16 #ifndef IMPALA_EXEC_OLD_HASH_TABLE_H
17 #define IMPALA_EXEC_OLD_HASH_TABLE_H
20 #include <boost/cstdint.hpp>
105 const std::vector<ExprContext*>& probe_expr_ctxs,
int num_build_tuples,
106 bool stores_nulls,
bool finds_nulls, int32_t initial_seed,
135 bool has_null =
EvalBuildRow(reinterpret_cast<TupleRow*>(&tuple));
181 return num_buckets *
sizeof(
Bucket) + num_rows *
sizeof(
Node);
243 std::string
DebugString(
bool skip_empty,
bool show_match,
255 template<
bool check_match>
362 void MoveNode(Bucket* from_bucket, Bucket* to_bucket, Node* node,
363 Node* previous_node);
stl-like iterator interface.
std::vector< Bucket > buckets_
OldHashTable(RuntimeState *state, const std::vector< ExprContext * > &build_expr_ctxs, const std::vector< ExprContext * > &probe_expr_ctxs, int num_build_tuples, bool stores_nulls, bool finds_nulls, int32_t initial_seed, MemTracker *mem_tracker, bool stores_tuples=false, int64_t num_buckets=1024)
bool operator!=(const Iterator &rhs)
uint32_t IR_NO_INLINE HashCurrentRow()
int64_t num_filled_buckets_
Number of non-empty buckets. Used to determine when to grow and rehash.
void * last_expr_value(int expr_idx) const
uint32_t HashVariableLenRow()
Node * node_
Current node idx (within current bucket)
llvm::Function * CodegenHashCurrentRow(RuntimeState *state)
A tuple with 0 materialised slots is represented as NULL.
const StringSearch UrlParser::hash_search & hash
bool AtEnd() const
Returns true if this iterator is at the end, i.e. GetRow() cannot be called.
std::vector< int > expr_values_buffer_offsets_
TupleRow * GetRow(Node *node) const
uint8_t * expr_values_buffer_
uint32_t scan_hash_
Cached hash value for the row passed to Find()
bool IR_NO_INLINE EvalProbeRow(TupleRow *row)
int64_t size() const
Returns number of elements in the hash table.
Bucket * NextBucket(int64_t *bucket_idx)
Iterator FirstUnmatched()
const int32_t initial_seed_
bool IR_NO_INLINE EvalBuildRow(TupleRow *row)
bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow *row, uint32_t *hash)
llvm::Function * CodegenEvalTupleRow(RuntimeState *state, bool build_row)
static const float MAX_BUCKET_OCCUPANCY_FRACTION
int node_remaining_current_page_
Number of nodes left in the current page.
const bool stores_tuples_
bool last_expr_value_null(int expr_idx) const
Returns if the expr at 'expr_idx' evaluated to NULL for the last row.
MemTracker * mem_tracker_
Node * next_node_
Next node to insert.
bool IR_ALWAYS_INLINE InsertImpl(void *data)
Insert row into the hash table.
bool mem_limit_exceeded() const
const std::vector< ExprContext * > & probe_expr_ctxs_
std::string DebugString(bool skip_empty, bool show_match, const RowDescriptor *build_desc)
The hash table does not support removes. The hash table is not thread safe.
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
int var_result_begin_
byte offset into expr_values_buffer_ that begins the variable length results
void GrowNodeArray()
Grow the node array.
void MemLimitExceeded(int64_t allocation_size)
boost::scoped_ptr< MemPool > mem_pool_
MemPool used to allocate data pages.
bool IR_ALWAYS_INLINE Insert(TupleRow *row)
This class is thread-safe.
bool operator==(const Iterator &rhs)
static const char * LLVM_CLASS_NAME
int64_t num_buckets_
equal to buckets_.size() but more efficient than the size function
int num_data_pages_
Number of data pages for nodes.
Iterator End()
Returns end marker.
void AddToBucket(Bucket *bucket, Node *node)
int64_t byte_size() const
Returns the number of bytes allocated to the hash table.
void IR_ALWAYS_INLINE Next()
void Close()
Call to cleanup any resources. Must be called once.
uint8_t * expr_value_null_bits_
const std::vector< ExprContext * > & build_expr_ctxs_
void ResizeBuckets(int64_t num_buckets)
Resize the hash table to 'num_buckets'.
Iterator IR_ALWAYS_INLINE Find(TupleRow *probe_row)
bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow *row, uint32_t *hash)
bool Equals(TupleRow *build_row)
void MoveNode(Bucket *from_bucket, Bucket *to_bucket, Node *node, Node *previous_node)
bool EvalRow(TupleRow *row, const std::vector< ExprContext * > &ctxs)
int64_t num_nodes_
number of nodes stored (i.e. size of hash table)
llvm::Function * CodegenEquals(RuntimeState *state)
int64_t num_buckets() const
Returns the number of buckets.
bool IR_ALWAYS_INLINE Insert(Tuple *tuple)
const int num_build_tuples_
Number of Tuple* in the build tuple row.
float load_factor() const
Returns the load factor (the number of non-empty buckets)
int64_t bucket_idx_
Current bucket idx.
int64_t num_buckets_till_resize_
The number of filled buckets to trigger a resize. This is cached for efficiency.
static int64_t EstimateSize(int64_t num_rows)
int results_buffer_size_
byte size of 'expr_values_buffer_'
Iterator(OldHashTable *table, int bucket_idx, Node *node, uint32_t hash)