Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hash-table.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "exec/hash-table.inline.h"
16 
17 #include "codegen/codegen-anyval.h"
18 #include "codegen/llvm-codegen.h"
19 #include "exprs/expr.h"
20 #include "exprs/expr-context.h"
21 #include "exprs/slot-ref.h"
23 #include "runtime/mem-tracker.h"
24 #include "runtime/raw-value.h"
25 #include "runtime/runtime-state.h"
27 #include "util/debug-util.h"
28 #include "util/impalad-metrics.h"
29 
30 #include "common/names.h"
31 
32 using namespace impala;
33 using namespace llvm;
34 
35 DEFINE_bool(enable_quadratic_probing, false, "Enable quadratic probing hash table");
36 
37 const char* HashTableCtx::LLVM_CLASS_NAME = "class.impala::HashTableCtx";
38 
39 // Page sizes used only for BE test. For non-testing, we use the io buffer size.
40 static const int64_t TEST_PAGE_SIZE = 8 * 1024 * 1024;
41 
42 // Random primes to multiply the seed with.
43 static uint32_t SEED_PRIMES[] = {
44  1, // First seed must be 1, level 0 is used by other operators in the fragment.
45  1431655781,
46  1183186591,
47  622729787,
48  472882027,
49  338294347,
50  275604541,
51  41161739,
52  29999999,
53  27475109,
54  611603,
55  16313357,
56  11380003,
57  21261403,
58  33393119,
59  101,
60  71043403
61 };
62 
63 // Put a non-zero constant in the result location for NULL.
64 // We don't want(NULL, 1) to hash to the same as (0, 1).
65 // This needs to be as big as the biggest primitive type since the bytes
66 // get copied directly.
67 // TODO find a better approach, since primitives like CHAR(N) can be up to 128 bytes
75  HashUtil::FNV_SEED, HashUtil::FNV_SEED };
76 
77 // The first NUM_SMALL_BLOCKS of nodes_ are made of blocks less than the IO size (of 8MB)
78 // to reduce the memory footprint of small queries. In particular, we always first use a
79 // 64KB and a 512KB block before starting using IO-sized blocks.
80 static const int64_t INITIAL_DATA_PAGE_SIZES[] = { 64 * 1024, 512 * 1024 };
81 static const int NUM_SMALL_DATA_PAGES = sizeof(INITIAL_DATA_PAGE_SIZES) / sizeof(int64_t);
82 
83 HashTableCtx::HashTableCtx(const vector<ExprContext*>& build_expr_ctxs,
84  const vector<ExprContext*>& probe_expr_ctxs, bool stores_nulls, bool finds_nulls,
85  int32_t initial_seed, int max_levels, int num_build_tuples)
86  : build_expr_ctxs_(build_expr_ctxs),
87  probe_expr_ctxs_(probe_expr_ctxs),
88  stores_nulls_(stores_nulls),
89  finds_nulls_(finds_nulls),
90  level_(0),
91  row_(reinterpret_cast<TupleRow*>(malloc(sizeof(Tuple*) * num_build_tuples))) {
92  // Compute the layout and buffer size to store the evaluated expr results
93  DCHECK_EQ(build_expr_ctxs_.size(), probe_expr_ctxs_.size());
94  DCHECK(!build_expr_ctxs_.empty());
98  memset(expr_values_buffer_, 0, sizeof(uint8_t) * results_buffer_size_);
99  expr_value_null_bits_ = new uint8_t[build_expr_ctxs.size()];
100 
101  // Populate the seeds to use for all the levels. TODO: revisit how we generate these.
102  DCHECK_GE(max_levels, 0);
103  DCHECK_LT(max_levels, sizeof(SEED_PRIMES) / sizeof(SEED_PRIMES[0]));
104  DCHECK_NE(initial_seed, 0);
105  seeds_.resize(max_levels + 1);
106  seeds_[0] = initial_seed;
107  for (int i = 1; i <= max_levels; ++i) {
108  seeds_[i] = seeds_[i - 1] * SEED_PRIMES[i];
109  }
110 }
111 
113  // TODO: use tr1::array?
114  DCHECK_NOTNULL(expr_values_buffer_);
115  delete[] expr_values_buffer_;
116  expr_values_buffer_ = NULL;
117  DCHECK_NOTNULL(expr_value_null_bits_);
118  delete[] expr_value_null_bits_;
119  expr_value_null_bits_ = NULL;
120  free(row_);
121  row_ = NULL;
122 }
123 
124 bool HashTableCtx::EvalRow(TupleRow* row, const vector<ExprContext*>& ctxs) {
125  bool has_null = false;
126  for (int i = 0; i < ctxs.size(); ++i) {
128  void* val = ctxs[i]->GetValue(row);
129  if (val == NULL) {
130  // If the table doesn't store nulls, no reason to keep evaluating
131  if (!stores_nulls_) return true;
132 
133  expr_value_null_bits_[i] = true;
134  val = reinterpret_cast<void*>(&NULL_VALUE);
135  has_null = true;
136  } else {
137  expr_value_null_bits_[i] = false;
138  }
139  DCHECK_LE(build_expr_ctxs_[i]->root()->type().GetSlotSize(),
140  sizeof(NULL_VALUE));
141  RawValue::Write(val, loc, build_expr_ctxs_[i]->root()->type(), NULL);
142  }
143  return has_null;
144 }
145 
147  uint32_t hash = seeds_[level_];
148  // Hash the non-var length portions (if there are any)
149  if (var_result_begin_ != 0) {
151  }
152 
153  for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
154  // non-string and null slots are already part of expr_values_buffer
155  if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING &&
156  build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue;
157 
159  if (expr_value_null_bits_[i]) {
160  // Hash the null random seed values at 'loc'
161  hash = Hash(loc, sizeof(StringValue), hash);
162  } else {
163  // Hash the string
164  StringValue* str = reinterpret_cast<StringValue*>(loc);
165  hash = Hash(str->ptr, str->len, hash);
166  }
167  }
168  return hash;
169 }
170 
171 bool HashTableCtx::Equals(TupleRow* build_row) {
172  for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
173  void* val = build_expr_ctxs_[i]->GetValue(build_row);
174  if (val == NULL) {
175  if (!stores_nulls_) return false;
176  if (!expr_value_null_bits_[i]) return false;
177  continue;
178  } else {
179  if (expr_value_null_bits_[i]) return false;
180  }
181 
183  if (!RawValue::Eq(loc, val, build_expr_ctxs_[i]->root()->type())) {
184  return false;
185  }
186  }
187  return true;
188 }
189 
190 const double HashTable::MAX_FILL_FACTOR = 0.75f;
191 
193  int num_build_tuples, BufferedTupleStream* stream, int64_t max_num_buckets,
194  int64_t num_buckets)
195  : state_(state),
196  block_mgr_client_(client),
197  tuple_stream_(stream),
198  data_page_pool_(NULL),
199  stores_tuples_(num_build_tuples == 1),
200  quadratic_probing_(FLAGS_enable_quadratic_probing),
201  total_data_page_size_(0),
202  next_node_(NULL),
203  node_remaining_current_page_(0),
204  num_duplicate_nodes_(0),
205  max_num_buckets_(max_num_buckets),
206  buckets_(NULL),
207  num_buckets_(num_buckets),
208  num_filled_buckets_(0),
209  num_buckets_with_duplicates_(0),
210  num_build_tuples_(num_build_tuples),
211  has_matches_(false),
212  num_probes_(0), num_failed_probes_(0), travel_length_(0), num_hash_collisions_(0),
213  num_resizes_(0) {
214  DCHECK_EQ((num_buckets & (num_buckets-1)), 0) << "num_buckets must be a power of 2";
215  DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0";
216  DCHECK(stores_tuples_ || stream != NULL);
217 }
218 
219 HashTable::HashTable(MemPool* pool, bool quadratic_probing, int num_buckets)
220  : state_(NULL),
221  block_mgr_client_(NULL),
222  tuple_stream_(NULL),
223  data_page_pool_(pool),
224  stores_tuples_(true),
225  quadratic_probing_(quadratic_probing),
226  total_data_page_size_(0),
227  next_node_(NULL),
228  node_remaining_current_page_(0),
229  num_duplicate_nodes_(0),
230  max_num_buckets_(-1),
231  buckets_(NULL),
232  num_buckets_(num_buckets),
233  num_filled_buckets_(0),
234  num_buckets_with_duplicates_(0),
235  num_build_tuples_(1),
236  has_matches_(false),
237  num_probes_(0), num_failed_probes_(0), travel_length_(0), num_hash_collisions_(0),
238  num_resizes_(0) {
239  DCHECK_EQ((num_buckets & (num_buckets-1)), 0) << "num_buckets must be a power of 2";
240  DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0";
241  bool ret = Init();
242  DCHECK(ret);
243 }
244 
246  int64_t buckets_byte_size = num_buckets_ * sizeof(Bucket);
247  if (block_mgr_client_ != NULL &&
248  !state_->block_mgr()->ConsumeMemory(block_mgr_client_, buckets_byte_size)) {
249  num_buckets_ = 0;
250  return false;
251  }
252  buckets_ = reinterpret_cast<Bucket*>(malloc(buckets_byte_size));
253  memset(buckets_, 0, buckets_byte_size);
254  return GrowNodeArray();
255 }
256 
258  // Print statistics only for the large or heavily used hash tables.
259  // TODO: Tweak these numbers/conditions, or print them always?
260  const int64_t LARGE_HT = 128 * 1024;
261  const int64_t HEAVILY_USED = 1024 * 1024;
262  // TODO: These statistics should go to the runtime profile as well.
263  if ((num_buckets_ > LARGE_HT) || (num_probes_ > HEAVILY_USED)) VLOG(2) << PrintStats();
264  for (int i = 0; i < data_pages_.size(); ++i) {
265  data_pages_[i]->Delete();
266  }
269  }
270  data_pages_.clear();
271  if (buckets_ != NULL) free(buckets_);
272  if (block_mgr_client_ != NULL) {
274  num_buckets_ * sizeof(Bucket));
275  }
276 }
277 
278 int64_t HashTable::CurrentMemSize() const {
279  return num_buckets_ * sizeof(Bucket) + num_duplicate_nodes_ * sizeof(DuplicateNode);
280 }
281 
282 bool HashTable::CheckAndResize(uint64_t buckets_to_fill, HashTableCtx* ht_ctx) {
283  uint64_t shift = 0;
284  while (num_filled_buckets_ + buckets_to_fill >
285  (num_buckets_ << shift) * MAX_FILL_FACTOR) {
286  // TODO: next prime instead of double?
287  ++shift;
288  }
289  if (shift > 0) return ResizeBuckets(num_buckets_ << shift, ht_ctx);
290  return true;
291 }
292 
293 bool HashTable::ResizeBuckets(int64_t num_buckets, HashTableCtx* ht_ctx) {
294  DCHECK_EQ((num_buckets & (num_buckets-1)), 0)
295  << "num_buckets=" << num_buckets << " must be a power of 2";
296  DCHECK_GT(num_buckets, num_filled_buckets_) << "Cannot shrink the hash table to "
297  "smaller number of buckets than the number of filled buckets.";
298  VLOG(2) << "Resizing hash table from "
299  << num_buckets_ << " to " << num_buckets << " buckets.";
300  if (max_num_buckets_ != -1 && num_buckets > max_num_buckets_) return false;
301  ++num_resizes_;
302 
303  // All memory that can grow proportional to the input should come from the block mgrs
304  // mem tracker.
305  // Note that while we copying over the contents of the old hash table, we need to have
306  // allocated both the old and the new hash table. Once we finish, we return the memory
307  // of the old hash table.
308  int64_t old_size = num_buckets_ * sizeof(Bucket);
309  int64_t new_size = num_buckets * sizeof(Bucket);
310  if (block_mgr_client_ != NULL &&
312  return false;
313  }
314  Bucket* new_buckets = reinterpret_cast<Bucket*>(malloc(new_size));
315  DCHECK_NOTNULL(new_buckets);
316  memset(new_buckets, 0, new_size);
317 
318  // Walk the old table and copy all the filled buckets to the new (resized) table.
319  // We do not have to do anything with the duplicate nodes. This operation is expected
320  // to succeed.
321  for (HashTable::Iterator iter = Begin(ht_ctx); !iter.AtEnd();
322  NextFilledBucket(&iter.bucket_idx_, &iter.node_)) {
323  Bucket* bucket_to_copy = &buckets_[iter.bucket_idx_];
324  bool found = false;
325  int64_t bucket_idx = Probe(new_buckets, num_buckets, NULL, bucket_to_copy->hash,
326  &found);
327  DCHECK(!found);
328  DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND) << " Probe failed even though "
329  " there are free buckets. " << num_buckets << " " << num_filled_buckets_;
330  Bucket* dst_bucket = &new_buckets[bucket_idx];
331  *dst_bucket = *bucket_to_copy;
332  }
333 
335  free(buckets_);
336  buckets_ = new_buckets;
337  // TODO: Remove this check, i.e. block_mgr_client_ should always be != NULL,
338  // see IMPALA-1656.
339  if (block_mgr_client_ != NULL) {
341  }
342  return true;
343 }
344 
346  int64_t page_size = 0;
347  if (block_mgr_client_ != NULL) {
348  page_size = state_->block_mgr()->max_block_size();;
349  if (data_pages_.size() < NUM_SMALL_DATA_PAGES) {
350  page_size = min(page_size, INITIAL_DATA_PAGE_SIZES[data_pages_.size()]);
351  }
352  BufferedBlockMgr::Block* block = NULL;
353  Status status = state_->block_mgr()->GetNewBlock(
354  block_mgr_client_, NULL, &block, page_size);
355  DCHECK(status.ok() || block == NULL);
356  if (block == NULL) return false;
357  data_pages_.push_back(block);
358  next_node_ = block->Allocate<DuplicateNode>(page_size);
359  ImpaladMetrics::HASH_TABLE_TOTAL_BYTES->Increment(page_size);
360  } else {
361  // Only used for testing.
362  DCHECK_NOTNULL(data_page_pool_);
363  page_size = TEST_PAGE_SIZE;
364  next_node_ = reinterpret_cast<DuplicateNode*>(data_page_pool_->Allocate(page_size));
365  if (data_page_pool_->mem_tracker()->LimitExceeded()) return false;
366  DCHECK(next_node_ != NULL);
367  }
368  node_remaining_current_page_ = page_size / sizeof(DuplicateNode);
369  total_data_page_size_ += page_size;
370  return true;
371 }
372 
373 void HashTable::DebugStringTuple(stringstream& ss, HtData& htdata,
374  const RowDescriptor* desc) {
375  if (stores_tuples_) {
376  ss << "(" << htdata.tuple << ")";
377  } else {
378  ss << "(" << htdata.idx.block() << ", " << htdata.idx.idx()
379  << ", " << htdata.idx.offset() << ")";
380  }
381  if (desc != NULL) {
382  Tuple* row[num_build_tuples_];
383  ss << " " << PrintRow(GetRow(htdata, reinterpret_cast<TupleRow*>(row)), *desc);
384  }
385 }
386 
387 string HashTable::DebugString(bool skip_empty, bool show_match,
388  const RowDescriptor* desc) {
389  stringstream ss;
390  ss << endl;
391  for (int i = 0; i < num_buckets_; ++i) {
392  if (skip_empty && !buckets_[i].filled) continue;
393  ss << i << ": ";
394  if (show_match) {
395  if (buckets_[i].matched) {
396  ss << " [M]";
397  } else {
398  ss << " [U]";
399  }
400  }
401  if (buckets_[i].hasDuplicates) {
403  bool first = true;
404  ss << " [D] ";
405  while (node != NULL) {
406  if (!first) ss << ",";
407  DebugStringTuple(ss, node->htdata, desc);
408  node = node->next;
409  first = false;
410  }
411  } else {
412  ss << " [B] ";
413  if (buckets_[i].filled) {
414  DebugStringTuple(ss, buckets_[i].bucketData.htdata, desc);
415  } else {
416  ss << " - ";
417  }
418  }
419  ss << endl;
420  }
421  return ss.str();
422 }
423 
424 string HashTable::PrintStats() const {
425  double curr_fill_factor = (double)num_filled_buckets_/(double)num_buckets_;
426  double avg_travel = (double)travel_length_/(double)num_probes_;
427  double avg_collisions = (double)num_hash_collisions_/(double)num_filled_buckets_;
428  stringstream ss;
429  ss << "Buckets: " << num_buckets_ << " " << num_filled_buckets_ << " "
430  << curr_fill_factor << endl;
431  ss << "Duplicates: " << num_buckets_with_duplicates_ << " buckets "
432  << num_duplicate_nodes_ << " nodes" << endl;
433  ss << "Probes: " << num_probes_ << endl;
434  ss << "FailedProbes: " << num_failed_probes_ << endl;
435  ss << "Travel: " << travel_length_ << " " << avg_travel << endl;
436  ss << "HashCollisions: " << num_hash_collisions_ << " " << avg_collisions << endl;
437  ss << "Resizes: " << num_resizes_ << endl;
438  return ss.str();
439 }
440 
441 // Helper function to store a value into the results buffer if the expr
442 // evaluated to NULL. We don't want (NULL, 1) to hash to the same as (0,1) so
443 // we'll pick a more random value.
444 static void CodegenAssignNullValue(LlvmCodeGen* codegen,
445  LlvmCodeGen::LlvmBuilder* builder, Value* dst, const ColumnType& type) {
446  int64_t fvn_seed = HashUtil::FNV_SEED;
447 
448  if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) {
449  Value* dst_ptr = builder->CreateStructGEP(dst, 0, "string_ptr");
450  Value* dst_len = builder->CreateStructGEP(dst, 1, "string_len");
451  Value* null_len = codegen->GetIntConstant(TYPE_INT, fvn_seed);
452  Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type());
453  builder->CreateStore(null_ptr, dst_ptr);
454  builder->CreateStore(null_len, dst_len);
455  } else {
456  Value* null_value = NULL;
457  // Get a type specific representation of fvn_seed
458  switch (type.type) {
459  case TYPE_BOOLEAN:
460  // In results, booleans are stored as 1 byte
461  dst = builder->CreateBitCast(dst, codegen->ptr_type());
462  null_value = codegen->GetIntConstant(TYPE_TINYINT, fvn_seed);
463  break;
464  case TYPE_TINYINT:
465  case TYPE_SMALLINT:
466  case TYPE_INT:
467  case TYPE_BIGINT:
468  null_value = codegen->GetIntConstant(type.type, fvn_seed);
469  break;
470  case TYPE_FLOAT: {
471  // Don't care about the value, just the bit pattern
472  float fvn_seed_float = *reinterpret_cast<float*>(&fvn_seed);
473  null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_float));
474  break;
475  }
476  case TYPE_DOUBLE: {
477  // Don't care about the value, just the bit pattern
478  double fvn_seed_double = *reinterpret_cast<double*>(&fvn_seed);
479  null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_double));
480  break;
481  }
482  default:
483  DCHECK(false);
484  }
485  builder->CreateStore(null_value, dst);
486  }
487 }
488 
489 // Codegen for evaluating a tuple row over either build_expr_ctxs_ or probe_expr_ctxs_.
490 // For the case where we are joining on a single int, the IR looks like
491 // define i1 @EvalBuildRow(%"class.impala::HashTableCtx"* %this_ptr,
492 // %"class.impala::TupleRow"* %row) #20 {
493 // entry:
494 // %result = call i64 @GetSlotRef1(%"class.impala::ExprContext"* inttoptr
495 // (i64 67971664 to %"class.impala::ExprContext"*),
496 // %"class.impala::TupleRow"* %row)
497 // %is_null = trunc i64 %result to i1
498 // %0 = zext i1 %is_null to i8
499 // store i8 %0, i8* inttoptr (i64 95753144 to i8*)
500 // br i1 %is_null, label %null, label %not_null
501 //
502 // null: ; preds = %entry
503 // store i32 -2128831035, i32* inttoptr (i64 95753128 to i32*)
504 // br label %continue
505 //
506 // not_null: ; preds = %entry
507 // %1 = ashr i64 %result, 32
508 // %2 = trunc i64 %1 to i32
509 // store i32 %2, i32* inttoptr (i64 95753128 to i32*)
510 // br label %continue
511 //
512 // continue: ; preds = %not_null, %null
513 // ret i1 true
514 // }
515 // For each expr, we create 3 code blocks. The null, not null and continue blocks.
516 // Both the null and not null branch into the continue block. The continue block
517 // becomes the start of the next block for codegen (either the next expr or just the
518 // end of the function).
519 Function* HashTableCtx::CodegenEvalRow(RuntimeState* state, bool build) {
520  // TODO: CodegenAssignNullValue() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet
521  const vector<ExprContext*>& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_;
522  for (int i = 0; i < ctxs.size(); ++i) {
523  PrimitiveType type = ctxs[i]->root()->type().type;
524  if (type == TYPE_TIMESTAMP || type == TYPE_DECIMAL || type == TYPE_CHAR) return NULL;
525  }
526 
527  LlvmCodeGen* codegen;
528  if (!state->GetCodegen(&codegen).ok()) return NULL;
529 
530  // Get types to generate function prototype
531  Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
532  DCHECK(tuple_row_type != NULL);
533  PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0);
534 
535  Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
536  DCHECK(this_type != NULL);
537  PointerType* this_ptr_type = PointerType::get(this_type, 0);
538 
539  LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : "EvalProbeRow",
540  codegen->GetType(TYPE_BOOLEAN));
541  prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
542  prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
543 
544  LLVMContext& context = codegen->context();
545  LlvmCodeGen::LlvmBuilder builder(context);
546  Value* args[2];
547  Function* fn = prototype.GeneratePrototype(&builder, args);
548 
549  Value* row = args[1];
550  Value* has_null = codegen->false_value();
551 
552  for (int i = 0; i < ctxs.size(); ++i) {
553  // TODO: refactor this to somewhere else? This is not hash table specific except for
554  // the null handling bit and would be used for anyone that needs to materialize a
555  // vector of exprs
556  // Convert result buffer to llvm ptr type
558  Value* llvm_loc = codegen->CastPtrToLlvmPtr(
559  codegen->GetPtrType(ctxs[i]->root()->type()), loc);
560 
561  BasicBlock* null_block = BasicBlock::Create(context, "null", fn);
562  BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn);
563  BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn);
564 
565  // Call expr
566  Function* expr_fn;
567  Status status = ctxs[i]->root()->GetCodegendComputeFn(state, &expr_fn);
568  if (!status.ok()) {
569  VLOG_QUERY << "Problem with CodegenEvalRow: " << status.GetDetail();
570  fn->eraseFromParent(); // deletes function
571  return NULL;
572  }
573 
574  Value* ctx_arg = codegen->CastPtrToLlvmPtr(
575  codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME), ctxs[i]);
576  Value* expr_fn_args[] = { ctx_arg, row };
578  codegen, &builder, ctxs[i]->root()->type(), expr_fn, expr_fn_args, "result");
579  Value* is_null = result.GetIsNull();
580 
581  // Set null-byte result
582  Value* null_byte = builder.CreateZExt(is_null, codegen->GetType(TYPE_TINYINT));
583  uint8_t* null_byte_loc = &expr_value_null_bits_[i];
584  Value* llvm_null_byte_loc =
585  codegen->CastPtrToLlvmPtr(codegen->ptr_type(), null_byte_loc);
586  builder.CreateStore(null_byte, llvm_null_byte_loc);
587 
588  builder.CreateCondBr(is_null, null_block, not_null_block);
589 
590  // Null block
591  builder.SetInsertPoint(null_block);
592  if (!stores_nulls_) {
593  // hash table doesn't store nulls, no reason to keep evaluating exprs
594  builder.CreateRet(codegen->true_value());
595  } else {
596  CodegenAssignNullValue(codegen, &builder, llvm_loc, ctxs[i]->root()->type());
597  builder.CreateBr(continue_block);
598  }
599 
600  // Not null block
601  builder.SetInsertPoint(not_null_block);
602  result.ToNativePtr(llvm_loc);
603  builder.CreateBr(continue_block);
604 
605  // Continue block
606  builder.SetInsertPoint(continue_block);
607  if (stores_nulls_) {
608  // Update has_null
609  PHINode* is_null_phi = builder.CreatePHI(codegen->boolean_type(), 2, "is_null_phi");
610  is_null_phi->addIncoming(codegen->true_value(), null_block);
611  is_null_phi->addIncoming(codegen->false_value(), not_null_block);
612  has_null = builder.CreateOr(has_null, is_null_phi, "has_null");
613  }
614  }
615  builder.CreateRet(has_null);
616 
617  return codegen->FinalizeFunction(fn);
618 }
619 
620 // Codegen for hashing the current row. In the case with both string and non-string data
621 // (group by int_col, string_col), the IR looks like:
622 // define i32 @HashCurrentRow(%"class.impala::HashTableCtx"* %this_ptr) #20 {
623 // entry:
624 // %seed = call i32 @GetHashSeed(%"class.impala::HashTableCtx"* %this_ptr)
625 // %0 = call i32 @CrcHash16(i8* inttoptr (i64 119151296 to i8*), i32 16, i32 %seed)
626 // %1 = load i8* inttoptr (i64 119943721 to i8*)
627 // %2 = icmp ne i8 %1, 0
628 // br i1 %2, label %null, label %not_null
629 //
630 // null: ; preds = %entry
631 // %3 = call i32 @CrcHash161(i8* inttoptr (i64 119151312 to i8*), i32 16, i32 %0)
632 // br label %continue
633 //
634 // not_null: ; preds = %entry
635 // %4 = load i8** getelementptr inbounds (%"struct.impala::StringValue"* inttoptr
636 // (i64 119151312 to %"struct.impala::StringValue"*), i32 0, i32 0)
637 // %5 = load i32* getelementptr inbounds (%"struct.impala::StringValue"* inttoptr
638 // (i64 119151312 to %"struct.impala::StringValue"*), i32 0, i32 1)
639 // %6 = call i32 @IrCrcHash(i8* %4, i32 %5, i32 %0)
640 // br label %continue
641 //
642 // continue: ; preds = %not_null, %null
643 // %7 = phi i32 [ %6, %not_null ], [ %3, %null ]
644 // call void @set_hash(%"class.impala::HashTableCtx"* %this_ptr, i32 %7)
645 // ret i32 %7
646 // }
647 Function* HashTableCtx::CodegenHashCurrentRow(RuntimeState* state, bool use_murmur) {
648  for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
649  // Disable codegen for CHAR
650  if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) return NULL;
651  }
652 
653  LlvmCodeGen* codegen;
654  if (!state->GetCodegen(&codegen).ok()) return NULL;
655 
656  // Get types to generate function prototype
657  Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
658  DCHECK(this_type != NULL);
659  PointerType* this_ptr_type = PointerType::get(this_type, 0);
660 
661  LlvmCodeGen::FnPrototype prototype(codegen,
662  (use_murmur ? "MurmurHashCurrentRow" : "HashCurrentRow"),
663  codegen->GetType(TYPE_INT));
664  prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
665 
666  LLVMContext& context = codegen->context();
667  LlvmCodeGen::LlvmBuilder builder(context);
668  Value* this_arg;
669  Function* fn = prototype.GeneratePrototype(&builder, &this_arg);
670 
671  // Call GetHashSeed() to get seeds_[level_]
672  Function* get_hash_seed_fn = codegen->GetFunction(IRFunction::HASH_TABLE_GET_HASH_SEED);
673  Value* seed = builder.CreateCall(get_hash_seed_fn, this_arg, "seed");
674 
675  Value* hash_result = seed;
676  Value* data = codegen->CastPtrToLlvmPtr(codegen->ptr_type(), expr_values_buffer_);
677  if (var_result_begin_ == -1) {
678  // No variable length slots, just hash what is in 'expr_values_buffer_'
679  if (results_buffer_size_ > 0) {
680  Function* hash_fn = use_murmur ?
683  Value* len = codegen->GetIntConstant(TYPE_INT, results_buffer_size_);
684  hash_result = builder.CreateCall3(hash_fn, data, len, hash_result, "hash");
685  }
686  } else {
687  if (var_result_begin_ > 0) {
688  Function* hash_fn = use_murmur ?
691  Value* len = codegen->GetIntConstant(TYPE_INT, var_result_begin_);
692  hash_result = builder.CreateCall3(hash_fn, data, len, hash_result, "hash");
693  }
694 
695  // Hash string slots
696  for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
697  if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING
698  && build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue;
699 
700  BasicBlock* null_block = NULL;
701  BasicBlock* not_null_block = NULL;
702  BasicBlock* continue_block = NULL;
703  Value* str_null_result = NULL;
704 
706 
707  // If the hash table stores nulls, we need to check if the stringval
708  // evaluated to NULL
709  if (stores_nulls_) {
710  null_block = BasicBlock::Create(context, "null", fn);
711  not_null_block = BasicBlock::Create(context, "not_null", fn);
712  continue_block = BasicBlock::Create(context, "continue", fn);
713 
714  uint8_t* null_byte_loc = &expr_value_null_bits_[i];
715  Value* llvm_null_byte_loc =
716  codegen->CastPtrToLlvmPtr(codegen->ptr_type(), null_byte_loc);
717  Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte");
718  Value* is_null = builder.CreateICmpNE(null_byte,
719  codegen->GetIntConstant(TYPE_TINYINT, 0), "is_null");
720  builder.CreateCondBr(is_null, null_block, not_null_block);
721 
722  // For null, we just want to call the hash function on the portion of
723  // the data
724  builder.SetInsertPoint(null_block);
725  Function* null_hash_fn = use_murmur ?
726  codegen->GetMurmurHashFunction(sizeof(StringValue)) :
727  codegen->GetHashFunction(sizeof(StringValue));
728  Value* llvm_loc = codegen->CastPtrToLlvmPtr(codegen->ptr_type(), loc);
729  Value* len = codegen->GetIntConstant(TYPE_INT, sizeof(StringValue));
730  str_null_result =
731  builder.CreateCall3(null_hash_fn, llvm_loc, len, hash_result, "str_null");
732  builder.CreateBr(continue_block);
733 
734  builder.SetInsertPoint(not_null_block);
735  }
736 
737  // Convert expr_values_buffer_ loc to llvm value
738  Value* str_val = codegen->CastPtrToLlvmPtr(codegen->GetPtrType(TYPE_STRING), loc);
739 
740  Value* ptr = builder.CreateStructGEP(str_val, 0);
741  Value* len = builder.CreateStructGEP(str_val, 1);
742  ptr = builder.CreateLoad(ptr, "ptr");
743  len = builder.CreateLoad(len, "len");
744 
745  // Call hash(ptr, len, hash_result);
746  Function* general_hash_fn = use_murmur ? codegen->GetMurmurHashFunction() :
747  codegen->GetHashFunction();
748  Value* string_hash_result =
749  builder.CreateCall3(general_hash_fn, ptr, len, hash_result, "string_hash");
750 
751  if (stores_nulls_) {
752  builder.CreateBr(continue_block);
753  builder.SetInsertPoint(continue_block);
754  // Use phi node to reconcile that we could have come from the string-null
755  // path and string not null paths.
756  PHINode* phi_node = builder.CreatePHI(codegen->GetType(TYPE_INT), 2, "hash_phi");
757  phi_node->addIncoming(string_hash_result, not_null_block);
758  phi_node->addIncoming(str_null_result, null_block);
759  hash_result = phi_node;
760  } else {
761  hash_result = string_hash_result;
762  }
763  }
764  }
765 
766  builder.CreateRet(hash_result);
767  return codegen->FinalizeFunction(fn);
768 }
769 
770 // Codegen for HashTableCtx::Equals. For a hash table with two exprs (string,int),
771 // the IR looks like:
772 //
773 // define i1 @Equals(%"class.impala::HashTableCtx"* %this_ptr,
774 // %"class.impala::TupleRow"* %row) {
775 // entry:
776 // %result = call i64 @GetSlotRef(%"class.impala::ExprContext"* inttoptr
777 // (i64 146381856 to %"class.impala::ExprContext"*),
778 // %"class.impala::TupleRow"* %row)
779 // %0 = trunc i64 %result to i1
780 // br i1 %0, label %null, label %not_null
781 //
782 // false_block: ; preds = %not_null2, %null1, %not_null, %null
783 // ret i1 false
784 //
785 // null: ; preds = %entry
786 // br i1 false, label %continue, label %false_block
787 //
788 // not_null: ; preds = %entry
789 // %1 = load i32* inttoptr (i64 104774368 to i32*)
790 // %2 = ashr i64 %result, 32
791 // %3 = trunc i64 %2 to i32
792 // %cmp_raw = icmp eq i32 %3, %1
793 // br i1 %cmp_raw, label %continue, label %false_block
794 //
795 // continue: ; preds = %not_null, %null
796 // %result4 = call { i64, i8* } @GetSlotRef1(
797 // %"class.impala::ExprContext"* inttoptr
798 // (i64 146381696 to %"class.impala::ExprContext"*),
799 // %"class.impala::TupleRow"* %row)
800 // %4 = extractvalue { i64, i8* } %result4, 0
801 // %5 = trunc i64 %4 to i1
802 // br i1 %5, label %null1, label %not_null2
803 //
804 // null1: ; preds = %continue
805 // br i1 false, label %continue3, label %false_block
806 //
807 // not_null2: ; preds = %continue
808 // %6 = extractvalue { i64, i8* } %result4, 0
809 // %7 = ashr i64 %6, 32
810 // %8 = trunc i64 %7 to i32
811 // %result5 = extractvalue { i64, i8* } %result4, 1
812 // %cmp_raw6 = call i1 @_Z11StringValEQPciPKN6impala11StringValueE(
813 // i8* %result5, i32 %8, %"struct.impala::StringValue"* inttoptr
814 // (i64 104774384 to %"struct.impala::StringValue"*))
815 // br i1 %cmp_raw6, label %continue3, label %false_block
816 //
817 // continue3: ; preds = %not_null2, %null1
818 // ret i1 true
819 // }
821  for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
822  // Disable codegen for CHAR
823  if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) return NULL;
824  }
825 
826  LlvmCodeGen* codegen;
827  if (!state->GetCodegen(&codegen).ok()) return NULL;
828  // Get types to generate function prototype
829  Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
830  DCHECK(tuple_row_type != NULL);
831  PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0);
832 
833  Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
834  DCHECK(this_type != NULL);
835  PointerType* this_ptr_type = PointerType::get(this_type, 0);
836 
837  LlvmCodeGen::FnPrototype prototype(codegen, "Equals", codegen->GetType(TYPE_BOOLEAN));
838  prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
839  prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
840 
841  LLVMContext& context = codegen->context();
842  LlvmCodeGen::LlvmBuilder builder(context);
843  Value* args[2];
844  Function* fn = prototype.GeneratePrototype(&builder, args);
845  Value* row = args[1];
846 
847  BasicBlock* false_block = BasicBlock::Create(context, "false_block", fn);
848  for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
849  BasicBlock* null_block = BasicBlock::Create(context, "null", fn);
850  BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn);
851  BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn);
852 
853  // call GetValue on build_exprs[i]
854  Function* expr_fn;
855  Status status = build_expr_ctxs_[i]->root()->GetCodegendComputeFn(state, &expr_fn);
856  if (!status.ok()) {
857  VLOG_QUERY << "Problem with CodegenEquals: " << status.GetDetail();
858  fn->eraseFromParent(); // deletes function
859  return NULL;
860  }
861 
862  Value* ctx_arg = codegen->CastPtrToLlvmPtr(
864  Value* expr_fn_args[] = { ctx_arg, row };
865  CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(codegen, &builder,
866  build_expr_ctxs_[i]->root()->type(), expr_fn, expr_fn_args, "result");
867  Value* is_null = result.GetIsNull();
868 
869  // Determine if probe is null (i.e. expr_value_null_bits_[i] == true). In
870  // the case where the hash table does not store nulls, this is always false.
871  Value* probe_is_null = codegen->false_value();
872  uint8_t* null_byte_loc = &expr_value_null_bits_[i];
873  if (stores_nulls_) {
874  Value* llvm_null_byte_loc =
875  codegen->CastPtrToLlvmPtr(codegen->ptr_type(), null_byte_loc);
876  Value* null_byte = builder.CreateLoad(llvm_null_byte_loc);
877  probe_is_null = builder.CreateICmpNE(null_byte,
878  codegen->GetIntConstant(TYPE_TINYINT, 0));
879  }
880 
881  // Get llvm value for probe_val from 'expr_values_buffer_'
883  Value* probe_val = codegen->CastPtrToLlvmPtr(
884  codegen->GetPtrType(build_expr_ctxs_[i]->root()->type()), loc);
885 
886  // Branch for GetValue() returning NULL
887  builder.CreateCondBr(is_null, null_block, not_null_block);
888 
889  // Null block
890  builder.SetInsertPoint(null_block);
891  builder.CreateCondBr(probe_is_null, continue_block, false_block);
892 
893  // Not-null block
894  builder.SetInsertPoint(not_null_block);
895  if (stores_nulls_) {
896  BasicBlock* cmp_block = BasicBlock::Create(context, "cmp", fn);
897  // First need to compare that probe expr[i] is not null
898  builder.CreateCondBr(probe_is_null, false_block, cmp_block);
899  builder.SetInsertPoint(cmp_block);
900  }
901  // Check result == probe_val
902  Value* is_equal = result.EqToNativePtr(probe_val);
903  builder.CreateCondBr(is_equal, continue_block, false_block);
904 
905  builder.SetInsertPoint(continue_block);
906  }
907  builder.CreateRet(codegen->true_value());
908 
909  builder.SetInsertPoint(false_block);
910  builder.CreateRet(codegen->false_value());
911 
912  return codegen->FinalizeFunction(fn);
913 }
uint32_t Hash(const void *input, int len, int32_t hash)
Wrapper function for calling correct HashUtil function in non-codegen'd case.
Definition: hash-table.h:192
stl-like iterator interface.
Definition: hash-table.h:450
static uint32_t SEED_PRIMES[]
Definition: hash-table.cc:43
static const int64_t TEST_PAGE_SIZE
Definition: hash-table.cc:40
The underlying memory management is done by the BufferedBlockMgr.
RuntimeState * state_
Definition: hash-table.h:585
bool AtEnd() const
Returns true if this iterator is at the end, i.e. GetRow() cannot be called.
Definition: hash-table.h:492
std::string PrintStats() const
Update and print some statistics that can be used for performance debugging.
Definition: hash-table.cc:424
bool IR_NO_INLINE Equals(TupleRow *build_row)
Definition: hash-table.cc:171
bool GrowNodeArray()
Grow the node array. Returns false on OOM.
Definition: hash-table.cc:345
const std::string GetDetail() const
Definition: status.cc:184
static CodegenAnyVal CreateCallWrapped(LlvmCodeGen *cg, LlvmCodeGen::LlvmBuilder *builder, const ColumnType &type, llvm::Function *fn, llvm::ArrayRef< llvm::Value * > args, const char *name="", llvm::Value *result_ptr=NULL)
Same as above but wraps the result in a CodegenAnyVal.
llvm::PointerType * GetPtrType(llvm::Type *type)
Return a pointer type to 'type'.
int64_t travel_length_
Definition: hash-table.h:657
void DebugStringTuple(std::stringstream &ss, HtData &htdata, const RowDescriptor *desc)
Print the content of a bucket or node.
Definition: hash-table.cc:373
bool EvalRow(TupleRow *row, const std::vector< ExprContext * > &ctxs)
Definition: hash-table.cc:124
static bool Eq(const void *v1, const void *v2, const ColumnType &type)
Definition: raw-value.h:106
BufferedBlockMgr * block_mgr()
Utility struct that wraps a variable name and llvm type.
Definition: llvm-codegen.h:149
llvm::Function * CodegenHashCurrentRow(RuntimeState *state, bool use_murmur)
Definition: hash-table.cc:647
uint8_t * expr_values_buffer_
Definition: hash-table.h:259
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
T * Allocate(int size)
Allocates the specified number of bytes from this block.
const StringSearch UrlParser::hash_search & hash
Definition: url-parser.cc:41
BufferedBlockMgr::Client * block_mgr_client_
Client to allocate data pages with.
Definition: hash-table.h:588
static const double MAX_FILL_FACTOR
Definition: hash-table.h:583
void ReleaseMemory(Client *client, int64_t size)
const std::vector< ExprContext * > & build_expr_ctxs_
Definition: hash-table.h:227
Status GetNewBlock(Client *client, Block *unpin_block, Block **block, int64_t len=-1)
DuplicateNode * duplicates
Definition: hash-table.h:324
static const int64_t BUCKET_NOT_FOUND
Bucket index value when probe is not successful.
Definition: hash-table.h:453
bool Init()
Allocates the initial bucket structure. Returns false if OOM.
Definition: hash-table.cc:245
Iterator Begin(HashTableCtx *ht_ctx)
bool ConsumeMemory(Client *client, int64_t size)
static const char * LLVM_CLASS_NAME
Definition: hash-table.h:169
int64_t max_block_size() const
union impala::HashTable::Bucket::@6 bucketData
Either the data for this bucket or the linked list of duplicates.
bool CheckAndResize(uint64_t buckets_to_fill, HashTableCtx *ht_ctx)
Definition: hash-table.cc:282
llvm::Type * boolean_type()
Simple wrappers to reduce code verbosity.
Definition: llvm-codegen.h:385
std::vector< BufferedBlockMgr::Block * > data_pages_
Data pages for all nodes. These are always pinned.
Definition: hash-table.h:608
Either the row in the tuple stream or a pointer to the single tuple of this row.
Definition: hash-table.h:285
const std::vector< ExprContext * > & probe_expr_ctxs_
Definition: hash-table.h:228
int64_t num_duplicate_nodes_
Number of duplicate nodes.
Definition: hash-table.h:620
Linked list of entries used for duplicates.
Definition: hash-table.h:291
int64_t num_filled_buckets_
Number of non-empty buckets. Used to determine when to resize.
Definition: hash-table.h:632
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
static const char * LLVM_CLASS_NAME
Definition: tuple-row.h:76
#define VLOG_QUERY
Definition: logging.h:57
const bool stores_nulls_
Definition: hash-table.h:234
static const char * LLVM_CLASS_NAME
Definition: expr-context.h:126
PrimitiveType type
Definition: types.h:60
llvm::Value * CastPtrToLlvmPtr(llvm::Type *type, const void *ptr)
void AddArgument(const NamedVariable &var)
Add argument.
Definition: llvm-codegen.h:171
std::vector< int > expr_values_buffer_offsets_
Definition: hash-table.h:246
const bool stores_tuples_
Definition: hash-table.h:602
MemTracker * mem_tracker()
Definition: mem-pool.h:151
llvm::Function * GetHashFunction(int num_bytes=-1)
PrimitiveType
Definition: types.h:27
ObjectPool pool
llvm::Function * CodegenEquals(RuntimeState *state)
Definition: hash-table.cc:820
BufferedTupleStream::RowIdx idx
Definition: hash-table.h:286
llvm::Function * GetFunction(IRFunction::Type)
static int64_t NULL_VALUE[]
Definition: hash-table.cc:68
static void Write(const void *value, Tuple *tuple, const SlotDescriptor *slot_desc, MemPool *pool)
Definition: raw-value.cc:303
int64_t num_resizes_
How many times this table has resized so far.
Definition: hash-table.h:664
llvm::Function * CodegenEvalRow(RuntimeState *state, bool build_row)
Definition: hash-table.cc:519
bool ResizeBuckets(int64_t num_buckets, HashTableCtx *ht_ctx)
Resize the hash table to 'num_buckets'. Returns false on OOM.
Definition: hash-table.cc:293
static int ComputeResultsLayout(const std::vector< Expr * > &exprs, std::vector< int > *offsets, int *var_result_begin)
int64_t CurrentMemSize() const
Definition: hash-table.cc:278
uint32_t seed(int level)
Definition: hash-table.h:125
llvm::Value * true_value()
Returns true/false constants (bool type)
Definition: llvm-codegen.h:380
TupleRow * GetRow(HtData &htdata, TupleRow *row) const
Return the TupleRow pointed by 'htdata'.
int64_t num_probes_
Definition: hash-table.h:650
int64_t num_buckets_with_duplicates_
Definition: hash-table.h:636
static const uint32_t FNV_SEED
Definition: hash-util.h:99
static void CodegenAssignNullValue(LlvmCodeGen *codegen, LlvmCodeGen::LlvmBuilder *builder, Value *dst, const ColumnType &type)
Definition: hash-table.cc:444
int64_t num_hash_collisions_
Definition: hash-table.h:661
static const int64_t INITIAL_DATA_PAGE_SIZES[]
Definition: hash-table.cc:80
int64_t num_buckets_
Total number of buckets (filled and empty).
Definition: hash-table.h:629
HashTableCtx(const std::vector< ExprContext * > &build_expr_ctxs, const std::vector< ExprContext * > &probe_expr_ctxs, bool stores_nulls, bool finds_nulls, int32_t initial_seed, int max_levels, int num_build_tuples)
Definition: hash-table.cc:83
int node_remaining_current_page_
Number of nodes left in the current page.
Definition: hash-table.h:617
uint8_t * expr_value_null_bits_
Definition: hash-table.h:263
llvm::Value * EqToNativePtr(llvm::Value *native_ptr)
MemPool * data_page_pool_
Only used for tests to allocate data pages instead of the block mgr.
Definition: hash-table.h:596
static const int NUM_SMALL_DATA_PAGES
Definition: hash-table.cc:81
std::vector< uint32_t > seeds_
The seeds to use for hashing. Indexed by the level.
Definition: hash-table.h:242
TupleRow * row_
Scratch buffer to generate rows on the fly.
Definition: hash-table.h:266
int64_t num_buckets() const
Returns the number of buckets.
Definition: hash-table.h:388
DuplicateNode * next_node_
Next duplicate node to insert. Vaild when node_remaining_current_page_ > 0.
Definition: hash-table.h:614
llvm::Value * false_value()
Definition: llvm-codegen.h:381
int64_t total_data_page_size_
Byte size of all buffers in data_pages_.
Definition: hash-table.h:611
HashTable(RuntimeState *state, BufferedBlockMgr::Client *client, int num_build_tuples, BufferedTupleStream *tuple_stream, int64_t max_num_buckets, int64_t initial_num_buckets=1024)
Definition: hash-table.cc:192
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
uint32_t HashVariableLenRow()
Definition: hash-table.cc:146
Status GetCodegen(LlvmCodeGen **codegen, bool initialize=true)
llvm::Value * GetIsNull(const char *name="is_null")
Gets the 'is_null' field of the *Val.
void NextFilledBucket(int64_t *bucket_idx, DuplicateNode **node)
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
llvm::Function * FinalizeFunction(llvm::Function *function)
int64_t IR_ALWAYS_INLINE Probe(Bucket *buckets, int64_t num_buckets, HashTableCtx *ht_ctx, uint32_t hash, bool *found)
There are wrappers of this function that perform the Find and Insert logic.
void Close()
Call to cleanup any resources.
Definition: hash-table.cc:112
static IntGauge * HASH_TABLE_TOTAL_BYTES
void Close()
Call to cleanup any resources. Must be called once.
Definition: hash-table.cc:257
const int64_t max_num_buckets_
Definition: hash-table.h:622
llvm::Function * GetMurmurHashFunction(int num_bytes=-1)
string PrintRow(TupleRow *row, const RowDescriptor &d)
Definition: debug-util.cc:192
bool ok() const
Definition: status.h:172
DEFINE_bool(enable_quadratic_probing, false,"Enable quadratic probing hash table")
llvm::LLVMContext & context()
Definition: llvm-codegen.h:214
void ToNativePtr(llvm::Value *native_ptr)
std::string DebugString(bool skip_empty, bool show_match, const RowDescriptor *build_desc)
Definition: hash-table.cc:387
uint8_t * Allocate(int size)
Definition: mem-pool.h:92
TupleRow * row() const
Definition: hash-table.h:127
llvm::PointerType * ptr_type()
Definition: llvm-codegen.h:393
int64_t num_failed_probes_
Number of probes that failed and had to fall back to linear probing without cap.
Definition: hash-table.h:653
Bucket * buckets_
Definition: hash-table.h:626