16 #ifndef IMPALA_EXEC_HASH_TABLE_INLINE_H
17 #define IMPALA_EXEC_HASH_TABLE_INLINE_H
39 DCHECK_NOTNULL(buckets);
40 DCHECK_GT(num_buckets, 0);
42 int64_t bucket_idx = hash & (num_buckets - 1);
49 Bucket* bucket = &buckets[bucket_idx];
50 if (!bucket->
filled)
return bucket_idx;
51 if (hash == bucket->
hash) {
67 bucket_idx = (bucket_idx + step) & (num_buckets - 1);
69 bucket_idx = (bucket_idx + 1) & (num_buckets - 1);
71 }
while (
LIKELY(step < num_buckets));
86 if (
UNLIKELY(new_node == NULL))
return NULL;
99 if (
LIKELY(htdata != NULL)) {
110 if (
LIKELY(htdata != NULL)) {
111 htdata->
tuple = tuple;
132 return Iterator(
this, ctx->
row(), bucket_idx, node, 0);
139 Iterator it(
this, ctx->
row(), bucket_idx, node, 0);
143 if ((!bucket->hasDuplicates && bucket->matched) ||
144 (bucket->hasDuplicates && node->matched)) {
164 DCHECK_GE(bucket_idx, 0);
184 DCHECK_GE(bucket_idx, 0);
190 while (node_remaining_current_page_ < 1 + !bucket->hasDuplicates) {
220 DCHECK_NOTNULL(bucket);
223 DCHECK_NOTNULL(duplicate);
233 DCHECK_NOTNULL(
row_);
236 DCHECK_NOTNULL(
node_);
245 DCHECK(table_->stores_tuples_);
246 Bucket* bucket = &table_->buckets_[bucket_idx_];
249 DCHECK_NOTNULL(node_);
250 return node_->htdata.tuple;
258 Bucket* bucket = &table_->buckets_[bucket_idx_];
266 table_->has_matches_ =
true;
271 Bucket* bucket = &table_->buckets_[bucket_idx_];
279 bucket_idx_ = BUCKET_NOT_FOUND;
285 if (table_->buckets_[bucket_idx_].hasDuplicates && node_->next != NULL) {
288 table_->NextFilledBucket(&bucket_idx_, &node_);
294 if (table_->buckets_[bucket_idx_].hasDuplicates && node_->next != NULL) {
297 bucket_idx_ = BUCKET_NOT_FOUND;
304 Bucket* bucket = &table_->buckets_[bucket_idx_];
307 while (node_->next != NULL) {
309 if (!node_->matched)
return;
314 table_->NextFilledBucket(&bucket_idx_, &node_);
316 bucket = &table_->buckets_[bucket_idx_];
320 while (node_->matched && node_->next != NULL) {
323 if (!node_->matched)
return;
325 table_->NextFilledBucket(&bucket_idx_, &node_);
331 DCHECK_LT(level, seeds_.size());
void set_level(int level)
stl-like iterator interface.
bool AtEnd() const
Returns true if this iterator is at the end, i.e. GetRow() cannot be called.
Iterator End()
Return end marker.
bool IR_NO_INLINE Equals(TupleRow *build_row)
bool GrowNodeArray()
Grow the node array. Returns false on OOM.
void SetAtEnd()
Resets everything but the pointer to the hash table.
Tuple * GetTuple(int tuple_idx)
A tuple with 0 materialised slots is represented as NULL.
const StringSearch UrlParser::hash_search & hash
DuplicateNode *IR_ALWAYS_INLINE AppendNextNode(Bucket *bucket)
HtData *IR_ALWAYS_INLINE InsertInternal(HashTableCtx *ht_ctx, uint32_t hash)
DuplicateNode * duplicates
static const int64_t BUCKET_NOT_FOUND
Bucket index value when probe is not successful.
Iterator Begin(HashTableCtx *ht_ctx)
union impala::HashTable::Bucket::@6 bucketData
Either the data for this bucket or the linked list of duplicates.
Either the row in the tuple stream or a pointer to the single tuple of this row.
const bool quadratic_probing_
Quadratic probing enabled (as opposed to linear).
void IR_ALWAYS_INLINE NextDuplicate()
int64_t num_duplicate_nodes_
Number of duplicate nodes.
Linked list of entries used for duplicates.
int64_t num_filled_buckets_
Number of non-empty buckets. Used to determine when to resize.
bool filled
Whether this bucket contains a vaild entry, or it is empty.
const bool stores_tuples_
BufferedTupleStream::RowIdx idx
void IR_ALWAYS_INLINE Next()
Iterates to the next element. It should be called only if !AtEnd().
TupleRow * GetRow(HtData &htdata, TupleRow *row) const
Return the TupleRow pointed by 'htdata'.
int64_t num_buckets_with_duplicates_
DuplicateNode *IR_ALWAYS_INLINE InsertDuplicateNode(int64_t bucket_idx)
int64_t num_hash_collisions_
void IR_ALWAYS_INLINE PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash)
int64_t num_buckets_
Total number of buckets (filled and empty).
int node_remaining_current_page_
Number of nodes left in the current page.
uint32_t IR_NO_INLINE HashCurrentRow()
Iterator FirstUnmatched(HashTableCtx *ctx)
TupleRow * row_
Scratch buffer to generate rows on the fly.
DuplicateNode * next_node_
Next duplicate node to insert. Vaild when node_remaining_current_page_ > 0.
bool IR_NO_INLINE EvalProbeRow(TupleRow *row)
Iterator IR_ALWAYS_INLINE Find(HashTableCtx *ht_ctx, uint32_t hash)
void NextFilledBucket(int64_t *bucket_idx, DuplicateNode **node)
int64_t IR_ALWAYS_INLINE Probe(Bucket *buckets, int64_t num_buckets, HashTableCtx *ht_ctx, uint32_t hash, bool *found)
There are wrappers of this function that perform the Find and Insert logic.
BufferedTupleStream * tuple_stream_
bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow *row, uint32_t *hash)
DuplicateNode * node_
Pointer to the current duplicate node.
TupleRow * GetRow() const
bool IR_NO_INLINE EvalBuildRow(TupleRow *row)
bool IR_ALWAYS_INLINE Insert(HashTableCtx *ht_ctx, const BufferedTupleStream::RowIdx &idx, TupleRow *row, uint32_t hash)
bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow *row, uint32_t *hash)
void GetTupleRow(const RowIdx &idx, TupleRow *row) const