32 using namespace impala;
55 const vector<ExprContext*>& probe_expr_ctxs,
int num_build_tuples,
bool stores_nulls,
56 bool finds_nulls, int32_t initial_seed,
MemTracker* mem_tracker,
bool stores_tuples,
59 build_expr_ctxs_(build_expr_ctxs),
60 probe_expr_ctxs_(probe_expr_ctxs),
61 num_build_tuples_(num_build_tuples),
62 stores_nulls_(stores_nulls),
63 finds_nulls_(finds_nulls),
64 stores_tuples_(stores_tuples),
65 initial_seed_(initial_seed),
66 num_filled_buckets_(0),
68 mem_pool_(new
MemPool(mem_tracker)),
71 node_remaining_current_page_(0),
72 mem_tracker_(mem_tracker),
73 mem_limit_exceeded_(false) {
74 DCHECK(mem_tracker != NULL);
77 DCHECK_EQ((num_buckets & (num_buckets-1)), 0) <<
"num_buckets must be a power of 2";
108 TupleRow* row,
const vector<ExprContext*>& ctxs) {
109 bool has_null =
false;
110 for (
int i = 0; i < ctxs.size(); ++i) {
112 void* val = ctxs[i]->GetValue(row);
130 vector<pair<SlotId, Bitmap*> > bitmaps;
138 bitmaps[i].second = NULL;
144 while (iter !=
End()) {
147 if (bitmaps[i].second == NULL)
continue;
151 bitmaps[i].second->Set<
true>(h,
true);
157 bool acquired_ownership =
false;
158 for (
int i = 0; i < bitmaps.size(); ++i) {
159 if (bitmaps[i].second == NULL)
continue;
161 VLOG(2) <<
"Bitmap filter added on slot: " << bitmaps[i].first;
162 if (!acquired_ownership)
delete bitmaps[i].second;
174 Value* dst_ptr = builder->CreateStructGEP(dst, 0,
"string_ptr");
175 Value* dst_len = builder->CreateStructGEP(dst, 1,
"string_len");
177 Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->
ptr_type());
178 builder->CreateStore(null_ptr, dst_ptr);
179 builder->CreateStore(null_len, dst_len);
182 Value* null_value = NULL;
187 dst = builder->CreateBitCast(dst, codegen->
ptr_type());
198 float fvn_seed_float = *
reinterpret_cast<float*
>(&fvn_seed);
199 null_value = ConstantFP::get(codegen->
context(), APFloat(fvn_seed_float));
204 double fvn_seed_double = *
reinterpret_cast<double*
>(&fvn_seed);
205 null_value = ConstantFP::get(codegen->
context(), APFloat(fvn_seed_double));
211 builder->CreateStore(null_value, dst);
245 for (
int i = 0; i < ctxs.size(); ++i) {
255 DCHECK(tuple_row_type != NULL);
256 PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0);
259 DCHECK(this_type != NULL);
260 PointerType* this_ptr_type = PointerType::get(this_type, 0);
267 LLVMContext& context = codegen->
context();
270 Function* fn = prototype.GeneratePrototype(&builder, args);
272 Value* row = args[1];
279 for (
int i = 0; i < ctxs.size(); ++i) {
286 codegen->
GetPtrType(ctxs[i]->root()->type()), loc);
288 BasicBlock* null_block = BasicBlock::Create(context,
"null", fn);
289 BasicBlock* not_null_block = BasicBlock::Create(context,
"not_null", fn);
290 BasicBlock* continue_block = BasicBlock::Create(context,
"continue", fn);
294 Status status = ctxs[i]->root()->GetCodegendComputeFn(state, &expr_fn);
297 ss <<
"Problem with codegen: " << status.
GetDetail();
299 fn->eraseFromParent();
305 Value* expr_fn_args[] = { ctx_arg, row };
307 codegen, &builder, ctxs[i]->root()->type(), expr_fn, expr_fn_args,
"result");
313 Value* llvm_null_byte_loc =
315 builder.CreateStore(null_byte, llvm_null_byte_loc);
317 builder.CreateCondBr(is_null, null_block, not_null_block);
320 builder.SetInsertPoint(null_block);
327 builder.CreateBr(continue_block);
331 builder.SetInsertPoint(not_null_block);
333 builder.CreateBr(continue_block);
335 builder.SetInsertPoint(continue_block);
338 builder.CreateRet(has_null);
408 DCHECK(this_type != NULL);
409 PointerType* this_ptr_type = PointerType::get(this_type, 0);
415 LLVMContext& context = codegen->
context();
418 Function* fn = prototype.GeneratePrototype(&builder, &this_arg);
427 hash_result = builder.CreateCall3(hash_fn, data, len, hash_result);
433 hash_result = builder.CreateCall3(hash_fn, data, len, hash_result);
441 BasicBlock* null_block = NULL;
442 BasicBlock* not_null_block = NULL;
443 BasicBlock* continue_block = NULL;
444 Value* str_null_result = NULL;
451 null_block = BasicBlock::Create(context,
"null", fn);
452 not_null_block = BasicBlock::Create(context,
"not_null", fn);
453 continue_block = BasicBlock::Create(context,
"continue", fn);
456 Value* llvm_null_byte_loc =
458 Value* null_byte = builder.CreateLoad(llvm_null_byte_loc);
459 Value* is_null = builder.CreateICmpNE(null_byte,
461 builder.CreateCondBr(is_null, null_block, not_null_block);
465 builder.SetInsertPoint(null_block);
469 str_null_result = builder.CreateCall3(null_hash_fn, llvm_loc, len, hash_result);
470 builder.CreateBr(continue_block);
472 builder.SetInsertPoint(not_null_block);
478 Value* ptr = builder.CreateStructGEP(str_val, 0,
"ptr");
479 Value* len = builder.CreateStructGEP(str_val, 1,
"len");
480 ptr = builder.CreateLoad(ptr);
481 len = builder.CreateLoad(len);
485 Value* string_hash_result =
486 builder.CreateCall3(general_hash_fn, ptr, len, hash_result);
489 builder.CreateBr(continue_block);
490 builder.SetInsertPoint(continue_block);
494 phi_node->addIncoming(string_hash_result, not_null_block);
495 phi_node->addIncoming(str_null_result, null_block);
496 hash_result = phi_node;
498 hash_result = string_hash_result;
503 builder.CreateRet(hash_result);
586 DCHECK(tuple_row_type != NULL);
587 PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0);
590 DCHECK(this_type != NULL);
591 PointerType* this_ptr_type = PointerType::get(this_type, 0);
597 LLVMContext& context = codegen->
context();
600 Function* fn = prototype.GeneratePrototype(&builder, args);
601 Value* row = args[1];
604 BasicBlock* false_block = BasicBlock::Create(context,
"false_block", fn);
607 BasicBlock* null_block = BasicBlock::Create(context,
"null", fn);
608 BasicBlock* not_null_block = BasicBlock::Create(context,
"not_null", fn);
609 BasicBlock* continue_block = BasicBlock::Create(context,
"continue", fn);
616 ss <<
"Problem with codegen: " << status.
GetDetail();
618 fn->eraseFromParent();
624 Value* expr_fn_args[] = { ctx_arg, row };
634 Value* llvm_null_byte_loc =
636 Value* null_byte = builder.CreateLoad(llvm_null_byte_loc);
637 probe_is_null = builder.CreateICmpNE(null_byte,
647 builder.CreateCondBr(is_null, null_block, not_null_block);
650 builder.SetInsertPoint(null_block);
651 builder.CreateCondBr(probe_is_null, continue_block, false_block);
654 builder.SetInsertPoint(not_null_block);
656 BasicBlock* cmp_block = BasicBlock::Create(context,
"cmp", fn);
658 builder.CreateCondBr(probe_is_null, false_block, cmp_block);
659 builder.SetInsertPoint(cmp_block);
663 builder.CreateCondBr(is_equal, continue_block, false_block);
665 builder.SetInsertPoint(continue_block);
669 builder.SetInsertPoint(false_block);
679 DCHECK_EQ((num_buckets & (num_buckets-1)), 0)
680 <<
"num_buckets=" << num_buckets <<
" must be a power of 2";
685 int64_t delta_size = (num_buckets - old_num_buckets) *
sizeof(
Bucket);
696 bool doubled_buckets = (num_buckets == old_num_buckets * 2);
700 Node* last_node = NULL;
703 while (node != NULL) {
709 if (doubled_buckets) {
710 node_must_move = ((hash & old_num_buckets) != 0);
711 move_to = sister_bucket;
713 int64_t bucket_idx = hash & (num_buckets - 1);
714 node_must_move = (bucket_idx != i);
718 if (node_must_move) {
719 MoveNode(bucket, move_to, node, last_node);
751 for (
int i = 0; i <
buckets_.size(); ++i) {
754 if (skip_empty && node == NULL)
continue;
756 while (node != NULL) {
757 if (!first) ss <<
",";
758 ss << node <<
"(" << node->
data <<
")";
stl-like iterator interface.
uint32_t slot_filter_bitmap_size() const
std::vector< Bucket > buckets_
const std::string GetDetail() const
static CodegenAnyVal CreateCallWrapped(LlvmCodeGen *cg, LlvmCodeGen::LlvmBuilder *builder, const ColumnType &type, llvm::Function *fn, llvm::ArrayRef< llvm::Value * > args, const char *name="", llvm::Value *result_ptr=NULL)
Same as above but wraps the result in a CodegenAnyVal.
OldHashTable(RuntimeState *state, const std::vector< ExprContext * > &build_expr_ctxs, const std::vector< ExprContext * > &probe_expr_ctxs, int num_build_tuples, bool stores_nulls, bool finds_nulls, int32_t initial_seed, MemTracker *mem_tracker, bool stores_tuples=false, int64_t num_buckets=1024)
llvm::PointerType * GetPtrType(llvm::Type *type)
Return a pointer type to 'type'.
static bool Eq(const void *v1, const void *v2, const ColumnType &type)
bool TryConsume(int64_t bytes)
uint32_t HashVariableLenRow()
Utility struct that wraps a variable name and llvm type.
llvm::Function * CodegenHashCurrentRow(RuntimeState *state)
static const int PAGE_SIZE
const StringSearch UrlParser::hash_search & hash
static int64_t NULL_VALUE[]
std::vector< int > expr_values_buffer_offsets_
TupleRow * GetRow(Node *node) const
uint8_t * expr_values_buffer_
void AddBitmapFilter(SlotId slot, Bitmap *bitmap, bool *acquired_ownership)
const int32_t initial_seed_
LLVM code generator. This is the top level object to generate jitted code.
static const char * LLVM_CLASS_NAME
llvm::Function * CodegenEvalTupleRow(RuntimeState *state, bool build_row)
static const float MAX_BUCKET_OCCUPANCY_FRACTION
static const char * LLVM_CLASS_NAME
int node_remaining_current_page_
Number of nodes left in the current page.
llvm::Value * CastPtrToLlvmPtr(llvm::Type *type, const void *ptr)
void AddArgument(const NamedVariable &var)
Add argument.
MemTracker * mem_tracker_
Node * next_node_
Next node to insert.
bool LogError(const ErrorMsg &msg)
llvm::Function * GetHashFunction(int num_bytes=-1)
const std::vector< ExprContext * > & probe_expr_ctxs_
std::string DebugString(bool skip_empty, bool show_match, const RowDescriptor *build_desc)
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
int var_result_begin_
byte offset into expr_values_buffer_ that begins the variable length results
void GrowNodeArray()
Grow the node array.
void MemLimitExceeded(int64_t allocation_size)
static void Write(const void *value, Tuple *tuple, const SlotDescriptor *slot_desc, MemPool *pool)
boost::scoped_ptr< MemPool > mem_pool_
MemPool used to allocate data pages.
static int ComputeResultsLayout(const std::vector< Expr * > &exprs, std::vector< int > *offsets, int *var_result_begin)
This class is thread-safe.
static const char * LLVM_CLASS_NAME
void Release(int64_t bytes)
Decreases consumption of this tracker and its ancestors by 'bytes'.
llvm::Value * true_value()
Returns true/false constants (bool type)
int64_t num_buckets_
equal to buckets_.size() but more efficient than the size function
int num_data_pages_
Number of data pages for nodes.
Iterator End()
Returns end marker.
static const uint32_t FNV_SEED
void IR_ALWAYS_INLINE Next()
void Close()
Call to cleanup any resources. Must be called once.
uint8_t * expr_value_null_bits_
llvm::Value * EqToNativePtr(llvm::Value *native_ptr)
Status SetMemLimitExceeded(MemTracker *tracker=NULL, int64_t failed_allocation_size=0)
const std::vector< ExprContext * > & build_expr_ctxs_
void ResizeBuckets(int64_t num_buckets)
Resize the hash table to 'num_buckets'.
llvm::Value * false_value()
Reference to a single slot of a tuple.
bool Equals(TupleRow *build_row)
void Consume(int64_t bytes)
Increases consumption of this tracker and its ancestors by 'bytes'.
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
void MoveNode(Bucket *from_bucket, Bucket *to_bucket, Node *node, Node *previous_node)
Status GetCodegen(LlvmCodeGen **codegen, bool initialize=true)
bool EvalRow(TupleRow *row, const std::vector< ExprContext * > &ctxs)
llvm::Value * GetIsNull(const char *name="is_null")
Gets the 'is_null' field of the *Val.
static void CodegenAssignNullValue(LlvmCodeGen *codegen, LlvmCodeGen::LlvmBuilder *builder, Value *dst, const ColumnType &type)
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
llvm::Function * FinalizeFunction(llvm::Function *function)
llvm::Function * CodegenEquals(RuntimeState *state)
static IntGauge * HASH_TABLE_TOTAL_BYTES
int64_t num_buckets() const
Returns the number of buckets.
string PrintRow(TupleRow *row, const RowDescriptor &d)
llvm::LLVMContext & context()
int64_t num_buckets_till_resize_
The number of filled buckets to trigger a resize. This is cached for efficiency.
void ToNativePtr(llvm::Value *native_ptr)
int results_buffer_size_
byte size of 'expr_values_buffer_'
static uint32_t GetHashValue(const void *v, const ColumnType &type, uint32_t seed=0)
llvm::PointerType * ptr_type()