Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
DisjointSet.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.util;
16 
17 import java.util.Collection;
18 import java.util.Iterator;
19 import java.util.Map;
20 import java.util.Set;
21 
22 import com.google.common.collect.Maps;
23 import com.google.common.collect.Sets;
24 
32 public class DisjointSet<T> {
33  // Maps from an item to its item set.
34  private final Map<T, Set<T>> itemSets_ = Maps.newHashMap();
35  private final Set<Set<T>> uniqueSets_ = Sets.newHashSet();
36 
41  public Set<T> get(T item) { return itemSets_.get(item); }
42 
43  public Set<Set<T>> getSets() { return uniqueSets_; }
44 
49  public Set<T> makeSet(T item) {
50  if (itemSets_.containsKey(item)) {
51  throw new IllegalStateException(
52  "Item set for item already exists: " + item.toString());
53  }
54  Set<T> s = Sets.newHashSet(item);
55  itemSets_.put(item, s);
56  uniqueSets_.add(s);
57  return s;
58  }
59 
66  public boolean union(T a, T b) {
67  Set<T> aItems = itemSets_.get(a);
68  Set<T> bItems = itemSets_.get(b);
69  // check if the sets are already identical
70  if (aItems != null && bItems != null && aItems == bItems) return false;
71 
72  // union(x, x) is equivalent to makeSet(x)
73  if (a.equals(b) && aItems == null) {
74  makeSet(a);
75  return true;
76  }
77 
78  // create sets for a or b if not present already
79  if (aItems == null) aItems = makeSet(a);
80  if (bItems == null) bItems = makeSet(b);
81 
82  // will contain the union of aItems and bItems
83  Set<T> mergedItems = aItems;
84  // always the smaller of the two sets to be merged
85  Set<T> updateItems = bItems;
86  if (bItems.size() > aItems.size()) {
87  mergedItems = bItems;
88  updateItems = aItems;
89  }
90  for (T item: updateItems) {
91  mergedItems.add(item);
92  itemSets_.put(item, mergedItems);
93  }
94  uniqueSets_.remove(updateItems);
95  return true;
96  }
97 
102  public boolean bulkUnion(Collection<T> items) {
103  if (items.isEmpty()) return false;
104  Iterator<T> it = items.iterator();
105  T head = it.next();
106  // bulkUnion(x) is equivalent to makeSet(x)
107  if (!it.hasNext()) {
108  if (get(head) != null) return false;
109  makeSet(head);
110  return true;
111  }
112  boolean result = false;
113  while(it.hasNext()) {
114  boolean changed = union(head, it.next());
115  result = result || changed;
116  }
117  return result;
118  }
119 
124  public void checkConsistency() {
125  Set<Set<T>> validatedSets = Sets.newHashSet();
126  for (Set<T> itemSet: itemSets_.values()) {
127  // Avoid checking the same item set multiple times.
128  if (validatedSets.contains(itemSet)) continue;
129  // Validate that all items in this set are properly mapped to
130  // the set itself.
131  for (T item: itemSet) {
132  if (itemSet != itemSets_.get(item)) {
133  throw new IllegalStateException("DisjointSet is in an inconsistent state.");
134  }
135  }
136  validatedSets.add(itemSet);
137  }
138  }
139 }
boolean bulkUnion(Collection< T > items)