doc/html/hash-table_8h_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #ifndef IMPALA_EXEC_HASH_TABLE_H

 #define IMPALA_EXEC_HASH_TABLE_H


 #include <vector>

 #include <boost/cstdint.hpp>

 #include <boost/scoped_ptr.hpp>

 #include "codegen/impala-ir.h"

 #include "common/logging.h"

 #include "runtime/buffered-block-mgr.h"

 #include "runtime/buffered-tuple-stream.h"

 #include "runtime/buffered-tuple-stream.inline.h"

 #include "runtime/mem-tracker.h"

 #include "runtime/tuple-row.h"

 #include "util/bitmap.h"

 #include "util/hash-util.h"


 namespace llvm {

   class Function;

 }


 namespace impala {


 class Expr;

 class ExprContext;

 class LlvmCodeGen;

 class MemTracker;

 class RowDescriptor;

 class RuntimeState;

 class Tuple;

 class TupleRow;

 class HashTable;


 //

 //

 //

 //

 //


 class HashTableCtx {

  public:

   HashTableCtx(const std::vector<ExprContext*>& build_expr_ctxs,

       const std::vector<ExprContext*>& probe_expr_ctxs, bool stores_nulls,

       bool finds_nulls, int32_t initial_seed, int max_levels,

       int num_build_tuples);


   void Close();


   void set_level(int level);

   int level() const { return level_; }

   uint32_t seed(int level) { return seeds_.at(level); }


   TupleRow* row() const { return row_; }


   void* last_expr_value(int expr_idx) const {

     return expr_values_buffer_ + expr_values_buffer_offsets_[expr_idx];

   }


   bool last_expr_value_null(int expr_idx) const {

     return expr_value_null_bits_[expr_idx];

   }


   bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row, uint32_t* hash);

   bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row, uint32_t* hash);


   int results_buffer_size() const { return results_buffer_size_; }


   llvm::Function* CodegenEvalRow(RuntimeState* state, bool build_row);


   llvm::Function* CodegenEquals(RuntimeState* state);


   llvm::Function* CodegenHashCurrentRow(RuntimeState* state, bool use_murmur);


   static const char* LLVM_CLASS_NAME;


  private:

   friend class HashTable;


   uint32_t IR_NO_INLINE HashCurrentRow() {

     DCHECK_LT(level_, seeds_.size());

     if (var_result_begin_ == -1) {

       return Hash(expr_values_buffer_, results_buffer_size_, seeds_[level_]);

     } else {

       return HashTableCtx::HashVariableLenRow();

     }

   }


   uint32_t inline Hash(const void* input, int len, int32_t hash) {

     if (level_ == 0) return HashUtil::Hash(input, len, hash);

     return HashUtil::MurmurHash2_64(input, len, hash);

   }


   bool IR_NO_INLINE EvalBuildRow(TupleRow* row) {

     return EvalRow(row, build_expr_ctxs_);

   }


   bool IR_NO_INLINE EvalProbeRow(TupleRow* row) {

     return EvalRow(row, probe_expr_ctxs_);

   }


   uint32_t HashVariableLenRow();


   bool EvalRow(TupleRow* row, const std::vector<ExprContext*>& ctxs);


   bool IR_NO_INLINE Equals(TupleRow* build_row);


   const std::vector<ExprContext*>& build_expr_ctxs_;

   const std::vector<ExprContext*>& probe_expr_ctxs_;


   const bool stores_nulls_;

   const bool finds_nulls_;


   int level_;


   std::vector<uint32_t> seeds_;


   std::vector<int> expr_values_buffer_offsets_;


   int var_result_begin_;


   int results_buffer_size_;


   uint8_t* expr_values_buffer_;


   uint8_t* expr_value_null_bits_;


   TupleRow* row_;


   uint32_t GetHashSeed() const;

 };


 class HashTable {

  private:


   union HtData {

     BufferedTupleStream::RowIdx idx;

     Tuple* tuple;

   };


   struct DuplicateNode {

     bool matched;


     DuplicateNode* next; // Chain to next duplicate node, NULL when end of list.

     HtData htdata;

   };


   struct Bucket {

     bool filled;


     bool matched;


     bool hasDuplicates;


     uint32_t hash;


     union {

       HtData htdata;

       DuplicateNode* duplicates;

     } bucketData;

   };


  public:

   class Iterator;


   HashTable(RuntimeState* state, BufferedBlockMgr::Client* client,

       int num_build_tuples, BufferedTupleStream* tuple_stream,

       int64_t max_num_buckets, int64_t initial_num_buckets = 1024);


   HashTable(MemPool* pool, bool quadratic_probing, int num_buckets);


   bool Init();


   void Close();


   bool IR_ALWAYS_INLINE Insert(HashTableCtx* ht_ctx,

       const BufferedTupleStream::RowIdx& idx, TupleRow* row, uint32_t hash);


   bool IR_ALWAYS_INLINE Insert(HashTableCtx* ht_ctx, Tuple* tuple, uint32_t hash);


   Iterator IR_ALWAYS_INLINE Find(HashTableCtx* ht_ctx, uint32_t hash);


   int64_t size() const {

     return num_filled_buckets_ - num_buckets_with_duplicates_ + num_duplicate_nodes_;

   }


   int64_t EmptyBuckets() const { return num_buckets_ - num_filled_buckets_; }


   int64_t num_buckets() const { return num_buckets_; }


   double load_factor() const {

     return static_cast<double>(num_filled_buckets_) / num_buckets_;

   }


   static int64_t EstimateNumBuckets(int64_t num_rows) {

     return BitUtil::NextPowerOfTwo(3 * num_rows / 2);

   }

   static int64_t EstimateSize(int64_t num_rows) {

     int64_t num_buckets = EstimateNumBuckets(num_rows);

     return num_buckets * sizeof(Bucket);

   }


   int64_t CurrentMemSize() const;


   bool CheckAndResize(uint64_t buckets_to_fill, HashTableCtx* ht_ctx);


   int64_t byte_size() const { return total_data_page_size_; }


   Iterator Begin(HashTableCtx* ht_ctx);


   Iterator FirstUnmatched(HashTableCtx* ctx);


   bool HasMatches() const { return has_matches_; }


   Iterator End() { return Iterator(); }


   std::string DebugString(bool skip_empty, bool show_match,

       const RowDescriptor* build_desc);


   void DebugStringTuple(std::stringstream& ss, HtData& htdata, const RowDescriptor* desc);


   std::string PrintStats() const;


   class Iterator {

    private:

     static const int64_t BUCKET_NOT_FOUND = -1;


    public:


     Iterator() : table_(NULL), row_(NULL), bucket_idx_(BUCKET_NOT_FOUND), node_(NULL) { }


     void IR_ALWAYS_INLINE Next();


     void IR_ALWAYS_INLINE NextDuplicate();


     void NextUnmatched();


     TupleRow* GetRow() const;

     Tuple* GetTuple() const;


     void SetMatched();


     bool IsMatched() const;


     void SetAtEnd();


     bool AtEnd() const { return bucket_idx_ == BUCKET_NOT_FOUND; }


    private:

     friend class HashTable;


     Iterator(HashTable* table, TupleRow* row, int bucket_idx, DuplicateNode* node,

              uint32_t hash)

       : table_(table),

         row_(row),

         bucket_idx_(bucket_idx),

         node_(node) {

     }


     HashTable* table_;

     TupleRow* row_;


     int64_t bucket_idx_;


     DuplicateNode* node_;

   };


  private:

   friend class Iterator;

   friend class HashTableTest;


   //

   int64_t IR_ALWAYS_INLINE Probe(Bucket* buckets, int64_t num_buckets,

       HashTableCtx* ht_ctx, uint32_t hash,  bool* found);


   HtData* IR_ALWAYS_INLINE InsertInternal(HashTableCtx* ht_ctx, uint32_t hash);


   void NextFilledBucket(int64_t* bucket_idx, DuplicateNode** node);


   bool ResizeBuckets(int64_t num_buckets, HashTableCtx* ht_ctx);


   DuplicateNode* IR_ALWAYS_INLINE AppendNextNode(Bucket* bucket);


   DuplicateNode* IR_ALWAYS_INLINE InsertDuplicateNode(int64_t bucket_idx);


   void IR_ALWAYS_INLINE PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash);


   TupleRow* GetRow(HtData& htdata, TupleRow* row) const;


   TupleRow* GetRow(Bucket* bucket, TupleRow* row) const;


   bool GrowNodeArray();


   static const double MAX_FILL_FACTOR;


   RuntimeState* state_;


   BufferedBlockMgr::Client* block_mgr_client_;


   BufferedTupleStream* tuple_stream_;


   MemPool* data_page_pool_;


   const bool stores_tuples_;


   const bool quadratic_probing_;


   std::vector<BufferedBlockMgr::Block*> data_pages_;


   int64_t total_data_page_size_;


   DuplicateNode* next_node_;


   int node_remaining_current_page_;


   int64_t num_duplicate_nodes_;


   const int64_t max_num_buckets_;


   Bucket* buckets_;


   int64_t num_buckets_;


   int64_t num_filled_buckets_;


   int64_t num_buckets_with_duplicates_;


   int num_build_tuples_;


   bool has_matches_;


   int64_t num_probes_;


   int64_t num_failed_probes_;


   int64_t travel_length_;


   int64_t num_hash_collisions_;


   int64_t num_resizes_;

 };


 }


 #endif

impala::HashTableCtx::set_level
void set_level(int level)
Definition: hash-table.inline.h:329

impala::HashTableCtx::Hash
uint32_t Hash(const void *input, int len, int32_t hash)
Wrapper function for calling correct HashUtil function in non-codegen'd case.
Definition: hash-table.h:192

impala::HashTable::Iterator
stl-like iterator interface.
Definition: hash-table.h:450

IR_NO_INLINE
#define IR_NO_INLINE
Definition: impala-ir.h:30

impala::BufferedTupleStream
The underlying memory management is done by the BufferedBlockMgr.
Definition: buffered-tuple-stream.h:109

impala::HashTable::state_
RuntimeState * state_
Definition: hash-table.h:585

impala::HashTable::Iterator::AtEnd
bool AtEnd() const
Returns true if this iterator is at the end, i.e. GetRow() cannot be called.
Definition: hash-table.h:492

impala::HashTable::PrintStats
std::string PrintStats() const
Update and print some statistics that can be used for performance debugging.
Definition: hash-table.cc:424

impala::HashTable::End
Iterator End()
Return end marker.
Definition: hash-table.h:434

impala::HashTableCtx::Equals
bool IR_NO_INLINE Equals(TupleRow *build_row)
Definition: hash-table.cc:171

impala::HashTable::GrowNodeArray
bool GrowNodeArray()
Grow the node array. Returns false on OOM.
Definition: hash-table.cc:345

impala::HashTableCtx::results_buffer_size
int results_buffer_size() const
Definition: hash-table.h:151

impala::HashTable::Iterator::GetTuple
Tuple * GetTuple() const
Definition: hash-table.inline.h:243

impala::HashTable::EstimateNumBuckets
static int64_t EstimateNumBuckets(int64_t num_rows)
Definition: hash-table.h:398

impala::HashTable::travel_length_
int64_t travel_length_
Definition: hash-table.h:657

impala::HashTable::DebugStringTuple
void DebugStringTuple(std::stringstream &ss, HtData &htdata, const RowDescriptor *desc)
Print the content of a bucket or node.
Definition: hash-table.cc:373

impala::HashTableCtx::EvalRow
bool EvalRow(TupleRow *row, const std::vector< ExprContext * > &ctxs)
Definition: hash-table.cc:124

impala::HashTable::Iterator::SetAtEnd
void SetAtEnd()
Resets everything but the pointer to the hash table.
Definition: hash-table.inline.h:278

impala::HashTable::Iterator::NextUnmatched
void NextUnmatched()
Definition: hash-table.inline.h:302

impala::HashTable::Iterator::row_
TupleRow * row_
Definition: hash-table.h:506

impala::HashTable::byte_size
int64_t byte_size() const
Returns the number of bytes allocated to the hash table.
Definition: hash-table.h:419

impala::HashTable::Iterator::Iterator
Iterator()
Definition: hash-table.h:457

impala::HashTableCtx::CodegenHashCurrentRow
llvm::Function * CodegenHashCurrentRow(RuntimeState *state, bool use_murmur)
Definition: hash-table.cc:647

impala::HashTableCtx::expr_values_buffer_
uint8_t * expr_values_buffer_
Definition: hash-table.h:259

impala::Tuple
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48

mem-tracker.h

impala::hash
const StringSearch UrlParser::hash_search & hash
Definition: url-parser.cc:41

impala::HashTable::block_mgr_client_
BufferedBlockMgr::Client * block_mgr_client_
Client to allocate data pages with.
Definition: hash-table.h:588

impala::HashTable::DuplicateNode::next
DuplicateNode * next
Definition: hash-table.h:299

buffered-tuple-stream.h

impala::HashTable::MAX_FILL_FACTOR
static const double MAX_FILL_FACTOR
Definition: hash-table.h:583

impala::HashTable::AppendNextNode
DuplicateNode *IR_ALWAYS_INLINE AppendNextNode(Bucket *bucket)
Definition: hash-table.inline.h:175

impala::HashTableCtx::build_expr_ctxs_
const std::vector< ExprContext * > & build_expr_ctxs_
Definition: hash-table.h:227

impala::HashTable::InsertInternal
HtData *IR_ALWAYS_INLINE InsertInternal(HashTableCtx *ht_ctx, uint32_t hash)
Definition: hash-table.inline.h:77

impala::HashTable::Bucket::duplicates
DuplicateNode * duplicates
Definition: hash-table.h:324

impala::HashTable::Iterator::BUCKET_NOT_FOUND
static const int64_t BUCKET_NOT_FOUND
Bucket index value when probe is not successful.
Definition: hash-table.h:453

impala::HashTable::Init
bool Init()
Allocates the initial bucket structure. Returns false if OOM.
Definition: hash-table.cc:245

impala::HashTableCtx::last_expr_value
void * last_expr_value(int expr_idx) const
Definition: hash-table.h:134

impala::HashTable::Begin
Iterator Begin(HashTableCtx *ht_ctx)
Definition: hash-table.inline.h:128

impala::HashTableCtx::LLVM_CLASS_NAME
static const char * LLVM_CLASS_NAME
Definition: hash-table.h:169

impala::HashTable::Bucket::bucketData
union impala::HashTable::Bucket::@6 bucketData
Either the data for this bucket or the linked list of duplicates.

impala::BufferedBlockMgr::Client
Definition: buffered-block-mgr.cc:45

impala::RowDescriptor
Definition: descriptors.h:373

impala::HashTable::CheckAndResize
bool CheckAndResize(uint64_t buckets_to_fill, HashTableCtx *ht_ctx)
Definition: hash-table.cc:282

buffered-tuple-stream.inline.h

impala::HashTable::EmptyBuckets
int64_t EmptyBuckets() const
Returns the number of empty buckets.
Definition: hash-table.h:385

hash-util.h

impala::HashTable::data_pages_
std::vector< BufferedBlockMgr::Block * > data_pages_
Data pages for all nodes. These are always pinned.
Definition: hash-table.h:608

impala::HashTable::HtData
Either the row in the tuple stream or a pointer to the single tuple of this row.
Definition: hash-table.h:285

impala::HashTable::Bucket
Definition: hash-table.h:303

impala::HashTable::HasMatches
bool HasMatches() const
Return true if there was a least one match.
Definition: hash-table.h:431

impala::HashTable::quadratic_probing_
const bool quadratic_probing_
Quadratic probing enabled (as opposed to linear).
Definition: hash-table.h:605

impala::HashTable::HtData::tuple
Tuple * tuple
Definition: hash-table.h:287

impala::HashTableCtx::probe_expr_ctxs_
const std::vector< ExprContext * > & probe_expr_ctxs_
Definition: hash-table.h:228

impala::HashTable::Iterator::NextDuplicate
void IR_ALWAYS_INLINE NextDuplicate()
Definition: hash-table.inline.h:292

impala::HashTable::num_duplicate_nodes_
int64_t num_duplicate_nodes_
Number of duplicate nodes.
Definition: hash-table.h:620

logging.h

impala::TupleRow
Definition: tuple-row.h:28

impala::HashTable::Bucket::matched
bool matched
Definition: hash-table.h:311

impala::HashTable::DuplicateNode
Linked list of entries used for duplicates.
Definition: hash-table.h:291

IR_ALWAYS_INLINE
#define IR_ALWAYS_INLINE
Definition: impala-ir.h:31

impala::HashTableCtx::level_
int level_
Definition: hash-table.h:239

impala::HashTable::num_filled_buckets_
int64_t num_filled_buckets_
Number of non-empty buckets. Used to determine when to resize.
Definition: hash-table.h:632

impala::HashTable::Iterator::IsMatched
bool IsMatched() const
Definition: hash-table.inline.h:269

impala::HashTable::Iterator::bucket_idx_
int64_t bucket_idx_
Definition: hash-table.h:510

impala::HashTableCtx::stores_nulls_
const bool stores_nulls_
Definition: hash-table.h:234

impala::HashTable::Bucket::filled
bool filled
Whether this bucket contains a vaild entry, or it is empty.
Definition: hash-table.h:305

impala::MemPool
Definition: mem-pool.h:77

impala::HashTable::Iterator::table_
HashTable * table_
Definition: hash-table.h:505

impala::HashTableCtx::GetHashSeed
uint32_t GetHashSeed() const
Cross-compiled functions to access member variables used in CodegenHashCurrentRow().

impala::HashTableCtx::expr_values_buffer_offsets_
std::vector< int > expr_values_buffer_offsets_
Definition: hash-table.h:246

impala::HashTable::stores_tuples_
const bool stores_tuples_
Definition: hash-table.h:602

impala::RuntimeState
Definition: runtime-state.h:69

impala::HashTable
Definition: hash-table.h:281

impala::HashUtil::Hash
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
Definition: hash-util.h:135

impala::HashTableCtx::var_result_begin_
int var_result_begin_
Definition: hash-table.h:251

impala::HashTableCtx::level
int level() const
Definition: hash-table.h:124

pool
ObjectPool pool
Definition: expr-benchmark.cc:89

impala::HashTableCtx::CodegenEquals
llvm::Function * CodegenEquals(RuntimeState *state)
Definition: hash-table.cc:820

impala::HashTable::HtData::idx
BufferedTupleStream::RowIdx idx
Definition: hash-table.h:286

impala::HashTable::Bucket::htdata
HtData htdata
Definition: hash-table.h:323

uint64_t

impala::HashTable::Iterator::Next
void IR_ALWAYS_INLINE Next()
Iterates to the next element. It should be called only if !AtEnd().
Definition: hash-table.inline.h:283

impala::HashTable::num_resizes_
int64_t num_resizes_
How many times this table has resized so far.
Definition: hash-table.h:664

impala::HashTableCtx::CodegenEvalRow
llvm::Function * CodegenEvalRow(RuntimeState *state, bool build_row)
Definition: hash-table.cc:519

impala::HashTable::ResizeBuckets
bool ResizeBuckets(int64_t num_buckets, HashTableCtx *ht_ctx)
Resize the hash table to 'num_buckets'. Returns false on OOM.
Definition: hash-table.cc:293

impala::HashTable::CurrentMemSize
int64_t CurrentMemSize() const
Definition: hash-table.cc:278

impala::HashTableCtx::seed
uint32_t seed(int level)
Definition: hash-table.h:125

impala::HashTable::Bucket::hasDuplicates
bool hasDuplicates
Definition: hash-table.h:315

impala::HashTable::GetRow
TupleRow * GetRow(HtData &htdata, TupleRow *row) const
Return the TupleRow pointed by 'htdata'.
Definition: hash-table.inline.h:210

impala::HashTable::num_probes_
int64_t num_probes_
Definition: hash-table.h:650

impala::HashTable::num_buckets_with_duplicates_
int64_t num_buckets_with_duplicates_
Definition: hash-table.h:636

impala::HashTable::InsertDuplicateNode
DuplicateNode *IR_ALWAYS_INLINE InsertDuplicateNode(int64_t bucket_idx)
Definition: hash-table.inline.h:183

impala::HashTable::num_hash_collisions_
int64_t num_hash_collisions_
Definition: hash-table.h:661

impala::HashTable::PrepareBucketForInsert
void IR_ALWAYS_INLINE PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash)
Definition: hash-table.inline.h:163

impala::HashTable::num_buckets_
int64_t num_buckets_
Total number of buckets (filled and empty).
Definition: hash-table.h:629

impala::HashTableCtx::HashTableCtx
HashTableCtx(const std::vector< ExprContext * > &build_expr_ctxs, const std::vector< ExprContext * > &probe_expr_ctxs, bool stores_nulls, bool finds_nulls, int32_t initial_seed, int max_levels, int num_build_tuples)
Definition: hash-table.cc:83

impala::HashTable::node_remaining_current_page_
int node_remaining_current_page_
Number of nodes left in the current page.
Definition: hash-table.h:617

impala::HashTableCtx::expr_value_null_bits_
uint8_t * expr_value_null_bits_
Definition: hash-table.h:263

impala::HashTable::data_page_pool_
MemPool * data_page_pool_
Only used for tests to allocate data pages instead of the block mgr.
Definition: hash-table.h:596

impala::HashTableCtx::HashCurrentRow
uint32_t IR_NO_INLINE HashCurrentRow()
Definition: hash-table.h:177

impala::HashTable::FirstUnmatched
Iterator FirstUnmatched(HashTableCtx *ctx)
Definition: hash-table.inline.h:135

impala::HashTable::Bucket::hash
uint32_t hash
Definition: hash-table.h:319

impala::HashTableCtx::seeds_
std::vector< uint32_t > seeds_
The seeds to use for hashing. Indexed by the level.
Definition: hash-table.h:242

bitmap.h

impala::HashTable::Iterator
friend class Iterator
Definition: hash-table.h:517

impala::HashTableCtx::row_
TupleRow * row_
Scratch buffer to generate rows on the fly.
Definition: hash-table.h:266

impala::BitUtil::NextPowerOfTwo
static int64_t NextPowerOfTwo(int64_t v)
Definition: bit-util.h:50

impala::HashTable::num_buckets
int64_t num_buckets() const
Returns the number of buckets.
Definition: hash-table.h:388

impala::HashTable::next_node_
DuplicateNode * next_node_
Next duplicate node to insert. Vaild when node_remaining_current_page_ > 0.
Definition: hash-table.h:614

impala::HashTableCtx::EvalProbeRow
bool IR_NO_INLINE EvalProbeRow(TupleRow *row)
Definition: hash-table.h:209

impala::HashTable::total_data_page_size_
int64_t total_data_page_size_
Byte size of all buffers in data_pages_.
Definition: hash-table.h:611

impala::HashTable::Find
Iterator IR_ALWAYS_INLINE Find(HashTableCtx *ht_ctx, uint32_t hash)
Definition: hash-table.inline.h:117

impala::HashTableCtx::finds_nulls_
const bool finds_nulls_
Definition: hash-table.h:235

impala::HashTable::HashTable
HashTable(RuntimeState *state, BufferedBlockMgr::Client *client, int num_build_tuples, BufferedTupleStream *tuple_stream, int64_t max_num_buckets, int64_t initial_num_buckets=1024)
Definition: hash-table.cc:192

impala::HashTableTest
Definition: hash-table-test.cc:38

impala::HashTable::has_matches_
bool has_matches_
Definition: hash-table.h:645

impala::HashTableCtx::HashVariableLenRow
uint32_t HashVariableLenRow()
Definition: hash-table.cc:146

impala::HashTable::NextFilledBucket
void NextFilledBucket(int64_t *bucket_idx, DuplicateNode **node)
Definition: hash-table.inline.h:150

impala::HashTableCtx::last_expr_value_null
bool last_expr_value_null(int expr_idx) const
Returns if the expr at 'expr_idx' evaluated to NULL for the last row.
Definition: hash-table.h:139

impala::HashTable::num_build_tuples_
int num_build_tuples_
Definition: hash-table.h:640

impala::HashTable::EstimateSize
static int64_t EstimateSize(int64_t num_rows)
Definition: hash-table.h:402

impala::HashTable::Probe
int64_t IR_ALWAYS_INLINE Probe(Bucket *buckets, int64_t num_buckets, HashTableCtx *ht_ctx, uint32_t hash, bool *found)
There are wrappers of this function that perform the Find and Insert logic.
Definition: hash-table.inline.h:37

tuple-row.h

impala::HashTable::load_factor
double load_factor() const
Returns the load factor (the number of non-empty buckets)
Definition: hash-table.h:391

impala::HashTableCtx::Close
void Close()
Call to cleanup any resources.
Definition: hash-table.cc:112

impala::HashTable::tuple_stream_
BufferedTupleStream * tuple_stream_
Definition: hash-table.h:593

impala::HashTable::DuplicateNode::htdata
HtData htdata
Definition: hash-table.h:300

impala::HashTable::Close
void Close()
Call to cleanup any resources. Must be called once.
Definition: hash-table.cc:257

impala::HashTable::max_num_buckets_
const int64_t max_num_buckets_
Definition: hash-table.h:622

impala::HashTableCtx::EvalAndHashBuild
bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow *row, uint32_t *hash)
Definition: hash-table.inline.h:23

buffered-block-mgr.h

gen_ir_descriptions.idx
int idx
Definition: gen_ir_descriptions.py:215

impala::HashTable::DuplicateNode::matched
bool matched
Definition: hash-table.h:297

impala::BufferedTupleStream::RowIdx
Definition: buffered-tuple-stream.h:121

impala::HashTable::Iterator::SetMatched
void SetMatched()
Definition: hash-table.inline.h:256

impala::HashTable::Iterator::node_
DuplicateNode * node_
Pointer to the current duplicate node.
Definition: hash-table.h:513

impala::HashTableCtx::results_buffer_size_
int results_buffer_size_
Definition: hash-table.h:255

impala::HashTable::Iterator::GetRow
TupleRow * GetRow() const
Definition: hash-table.inline.h:230

impala::HashTable::Iterator::Iterator
Iterator(HashTable *table, TupleRow *row, int bucket_idx, DuplicateNode *node, uint32_t hash)
Definition: hash-table.h:497

impala::HashTableCtx::EvalBuildRow
bool IR_NO_INLINE EvalBuildRow(TupleRow *row)
Definition: hash-table.h:203

impala-ir.h

impala::HashTableCtx
Definition: hash-table.h:104

impala::HashTable::Insert
bool IR_ALWAYS_INLINE Insert(HashTableCtx *ht_ctx, const BufferedTupleStream::RowIdx &idx, TupleRow *row, uint32_t hash)
Definition: hash-table.inline.h:94

impala::HashTable::size
int64_t size() const
Returns number of elements inserted in the hash table.
Definition: hash-table.h:380

impala::HashUtil::MurmurHash2_64
static uint64_t MurmurHash2_64(const void *input, int len, uint64_t seed)
Murmur2 hash implementation returning 64-bit hashes.
Definition: hash-util.h:64

impala::HashTable::DebugString
std::string DebugString(bool skip_empty, bool show_match, const RowDescriptor *build_desc)
Definition: hash-table.cc:387

impala::HashTableCtx::row
TupleRow * row() const
Definition: hash-table.h:127

impala::HashTableCtx::EvalAndHashProbe
bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow *row, uint32_t *hash)
Definition: hash-table.inline.h:30

impala::HashTable::num_failed_probes_
int64_t num_failed_probes_
Number of probes that failed and had to fall back to linear probing without cap.
Definition: hash-table.h:653

impala::HashTable::buckets_
Bucket * buckets_
Definition: hash-table.h:626