Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
old-hash-table.inline.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXEC_OLD_HASH_TABLE_INLINE_H
17 #define IMPALA_EXEC_OLD_HASH_TABLE_INLINE_H
18 
19 #include "exec/old-hash-table.h"
20 
21 namespace impala {
22 
24  uint32_t hash;
25  if (!EvalAndHashProbe(probe_row, &hash)) return End();
26  int64_t bucket_idx = hash & (num_buckets_ - 1);
27  Bucket* bucket = &buckets_[bucket_idx];
28  Node* node = bucket->node;
29  while (node != NULL) {
30  if (node->hash == hash && Equals(GetRow(node))) {
31  return Iterator(this, bucket_idx, node, hash);
32  }
33  node = node->next;
34  }
35  return End();
36 }
37 
39  int64_t bucket_idx = -1;
40  Bucket* bucket = NextBucket(&bucket_idx);
41  if (bucket != NULL) return Iterator(this, bucket_idx, bucket->node, 0);
42  return End();
43 }
44 
46  int64_t bucket_idx = -1;
47  Bucket* bucket = NextBucket(&bucket_idx);
48  while (bucket != NULL) {
49  Node* node = bucket->node;
50  while (node != NULL && node->matched) {
51  node = node->next;
52  }
53  if (node == NULL) {
54  bucket = NextBucket(&bucket_idx);
55  } else {
56  DCHECK(!node->matched);
57  return Iterator(this, bucket_idx, node, 0);
58  }
59  }
60  return End();
61 }
62 
63 inline OldHashTable::Bucket* OldHashTable::NextBucket(int64_t* bucket_idx) {
64  ++*bucket_idx;
65  for (; *bucket_idx < num_buckets_; ++*bucket_idx) {
66  if (buckets_[*bucket_idx].node != NULL) return &buckets_[*bucket_idx];
67  }
68  *bucket_idx = -1;
69  return NULL;
70 }
71 
72 inline bool OldHashTable::InsertImpl(void* data) {
73  uint32_t hash = HashCurrentRow();
74  int64_t bucket_idx = hash & (num_buckets_ - 1);
76  GrowNodeArray();
77  if (UNLIKELY(mem_limit_exceeded_)) return false;
78  }
79  next_node_->hash = hash;
80  next_node_->data = data;
81  next_node_->matched = false;
82  AddToBucket(&buckets_[bucket_idx], next_node_);
83  DCHECK_GT(node_remaining_current_page_, 0);
85  ++next_node_;
86  ++num_nodes_;
87  return true;
88 }
89 
90 inline void OldHashTable::AddToBucket(Bucket* bucket, Node* node) {
91  num_filled_buckets_ += (bucket->node == NULL);
92  node->next = bucket->node;
93  bucket->node = node;
94 }
95 
96 inline bool OldHashTable::EvalAndHashBuild(TupleRow* row, uint32_t* hash) {
97  bool has_null = EvalBuildRow(row);
98  if (!stores_nulls_ && has_null) return false;
99  *hash = HashCurrentRow();
100  return true;
101 }
102 
103 inline bool OldHashTable::EvalAndHashProbe(TupleRow* row, uint32_t* hash) {
104  bool has_null = EvalProbeRow(row);
105  if ((!stores_nulls_ || !finds_nulls_) && has_null) return false;
106  *hash = HashCurrentRow();
107  return true;
108 }
109 
110 inline void OldHashTable::MoveNode(Bucket* from_bucket, Bucket* to_bucket,
111  Node* node, Node* previous_node) {
112  if (previous_node != NULL) {
113  previous_node->next = node->next;
114  } else {
115  // Update bucket directly
116  from_bucket->node = node->next;
117  num_filled_buckets_ -= (node->next == NULL);
118  }
119  AddToBucket(to_bucket, node);
120 }
121 
122 template<bool check_match>
124  if (bucket_idx_ == -1) return;
125 
126  // TODO: this should prefetch the next tuplerow
127  // Iterator is not from a full table scan, evaluate equality now. Only the current
128  // bucket needs to be scanned. 'expr_values_buffer_' contains the results
129  // for the current probe row.
130  if (check_match) {
131  // TODO: this should prefetch the next node
132  Node* node = node_->next;
133  while (node != NULL) {
134  if (node->hash == scan_hash_ && table_->Equals(table_->GetRow(node))) {
135  node_ = node;
136  return;
137  }
138  node = node->next;
139  }
140  *this = table_->End();
141  } else {
142  // Move onto the next chained node
143  if (node_->next != NULL) {
144  node_ = node_->next;
145  return;
146  }
147 
148  // Move onto the next bucket
149  Bucket* bucket = table_->NextBucket(&bucket_idx_);
150  if (bucket == NULL) {
151  bucket_idx_ = -1;
152  node_ = NULL;
153  } else {
154  node_ = bucket->node;
155  }
156  }
157 }
158 
160  if (bucket_idx_ == -1) return false;
161  while (true) {
162  while (node_->next != NULL && node_->next->matched) {
163  node_ = node_->next;
164  }
165  if (node_->next == NULL) {
166  // Move onto the next bucket.
167  Bucket* bucket = table_->NextBucket(&bucket_idx_);
168  if (bucket == NULL) {
169  bucket_idx_ = -1;
170  node_ = NULL;
171  return false;
172  } else {
173  node_ = bucket->node;
174  if (node_ != NULL && !node_->matched) return true;
175  }
176  } else {
177  DCHECK(!node_->next->matched);
178  node_ = node_->next;
179  return true;
180  }
181  }
182 }
183 
184 }
185 
186 #endif
stl-like iterator interface.
std::vector< Bucket > buckets_
uint32_t IR_NO_INLINE HashCurrentRow()
int64_t num_filled_buckets_
Number of non-empty buckets. Used to determine when to grow and rehash.
Node * node_
Current node idx (within current bucket)
const StringSearch UrlParser::hash_search & hash
Definition: url-parser.cc:41
TupleRow * GetRow(Node *node) const
uint32_t scan_hash_
Cached hash value for the row passed to Find()
bool IR_NO_INLINE EvalProbeRow(TupleRow *row)
Bucket * NextBucket(int64_t *bucket_idx)
bool IR_NO_INLINE EvalBuildRow(TupleRow *row)
bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow *row, uint32_t *hash)
int node_remaining_current_page_
Number of nodes left in the current page.
Node * next_node_
Next node to insert.
bool IR_ALWAYS_INLINE InsertImpl(void *data)
Insert row into the hash table.
void GrowNodeArray()
Grow the node array.
int64_t num_buckets_
equal to buckets_.size() but more efficient than the size function
Iterator End()
Returns end marker.
void AddToBucket(Bucket *bucket, Node *node)
void IR_ALWAYS_INLINE Next()
Iterator IR_ALWAYS_INLINE Find(TupleRow *probe_row)
#define UNLIKELY(expr)
Definition: compiler-util.h:33
bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow *row, uint32_t *hash)
bool Equals(TupleRow *build_row)
void MoveNode(Bucket *from_bucket, Bucket *to_bucket, Node *node, Node *previous_node)
int64_t num_nodes_
number of nodes stored (i.e. size of hash table)
int64_t bucket_idx_
Current bucket idx.