doc/html/old-hash-table_8h_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #ifndef IMPALA_EXEC_OLD_HASH_TABLE_H

 #define IMPALA_EXEC_OLD_HASH_TABLE_H


 #include <vector>

 #include <boost/cstdint.hpp>

 #include "codegen/impala-ir.h"

 #include "common/logging.h"

 #include "runtime/mem-pool.h"

 #include "util/bitmap.h"

 #include "util/hash-util.h"

 #include "util/runtime-profile.h"


 namespace llvm {

   class Function;

 }


 namespace impala {


 class Expr;

 class ExprContext;

 class LlvmCodeGen;

 class MemTracker;

 class RowDescriptor;

 class RuntimeState;

 class Tuple;

 class TupleRow;


 //

 //

 //

 //

 class OldHashTable {

  private:

   struct Node;


  public:

   class Iterator;


   OldHashTable(RuntimeState* state, const std::vector<ExprContext*>& build_expr_ctxs,

       const std::vector<ExprContext*>& probe_expr_ctxs, int num_build_tuples,

       bool stores_nulls, bool finds_nulls, int32_t initial_seed,

       MemTracker* mem_tracker, bool stores_tuples = false, int64_t num_buckets = 1024);


   void Close();


   bool IR_ALWAYS_INLINE Insert(TupleRow* row) {

     if (UNLIKELY(mem_limit_exceeded_)) return false;

     bool has_null = EvalBuildRow(row);

     if (!stores_nulls_ && has_null) return true;


     if (UNLIKELY(num_filled_buckets_ > num_buckets_till_resize_)) {

       ResizeBuckets(num_buckets_ * 2);

       if (UNLIKELY(mem_limit_exceeded_)) return false;

     }

     return InsertImpl(row);

   }


   bool IR_ALWAYS_INLINE Insert(Tuple* tuple) {

     if (UNLIKELY(mem_limit_exceeded_)) return false;

     bool has_null = EvalBuildRow(reinterpret_cast<TupleRow*>(&tuple));

     if (!stores_nulls_ && has_null) return true;


     if (UNLIKELY(num_filled_buckets_ > num_buckets_till_resize_)) {

       ResizeBuckets(num_buckets_ * 2);

       if (UNLIKELY(mem_limit_exceeded_)) return false;

     }

     return InsertImpl(tuple);

   }


   bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row, uint32_t* hash);

   bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row, uint32_t* hash);


   Iterator IR_ALWAYS_INLINE Find(TupleRow* probe_row);


   int64_t size() const { return num_nodes_; }


   int64_t num_buckets() const { return buckets_.size(); }


   float load_factor() const {

     return num_filled_buckets_ / static_cast<float>(buckets_.size());

   }


   static int64_t EstimateSize(int64_t num_rows) {

     int64_t num_buckets = num_rows * 2;

     return num_buckets * sizeof(Bucket) + num_rows * sizeof(Node);

   }


   int64_t byte_size() const {

     int64_t nodes_mem = (num_nodes_ + node_remaining_current_page_) * sizeof(Node);

     return nodes_mem + sizeof(Bucket) * buckets_.capacity();

   }


   bool mem_limit_exceeded() const { return mem_limit_exceeded_; }


   void* last_expr_value(int expr_idx) const {

     return expr_values_buffer_ + expr_values_buffer_offsets_[expr_idx];

   }


   bool last_expr_value_null(int expr_idx) const {

     return expr_value_null_bits_[expr_idx];

   }


   void AddBitmapFilters();


   Iterator Begin();


   Iterator FirstUnmatched();


   Iterator End() { return Iterator(); }


   llvm::Function* CodegenEvalTupleRow(RuntimeState* state, bool build_row);


   llvm::Function* CodegenHashCurrentRow(RuntimeState* state);


   llvm::Function* CodegenEquals(RuntimeState* state);


   static const char* LLVM_CLASS_NAME;


   std::string DebugString(bool skip_empty, bool show_match,

       const RowDescriptor* build_desc);


   class Iterator {

    public:

     Iterator() : table_(NULL), bucket_idx_(-1), node_(NULL) {

     }


     template<bool check_match>

     void IR_ALWAYS_INLINE Next();


     bool NextUnmatched();


     TupleRow* GetRow() {

       DCHECK(!AtEnd());

       DCHECK(!table_->stores_tuples_);

       return reinterpret_cast<TupleRow*>(node_->data);

     }


     Tuple* GetTuple() {

       DCHECK(!AtEnd());

       DCHECK(table_->stores_tuples_);

       return reinterpret_cast<Tuple*>(node_->data);

     }


     void set_matched(bool v) {

       DCHECK(!AtEnd());

       node_->matched = v;

     }


     bool matched() const {

       DCHECK(!AtEnd());

       return node_->matched;

     }


     void reset() {

       bucket_idx_ = -1;

       node_ = NULL;

     }


     bool AtEnd() const { return node_ == NULL; }

     bool operator!=(const Iterator& rhs) { return !(*this == rhs); }


     bool operator==(const Iterator& rhs) {

       return bucket_idx_ == rhs.bucket_idx_ && node_ == rhs.node_;

     }


    private:

     friend class OldHashTable;


     Iterator(OldHashTable* table, int bucket_idx, Node* node, uint32_t hash) :

       table_(table),

       bucket_idx_(bucket_idx),

       node_(node),

       scan_hash_(hash) {

     }


     OldHashTable* table_;


     int64_t bucket_idx_;


     Node* node_;


     uint32_t scan_hash_;

   };


  private:

   friend class Iterator;

   friend class OldHashTableTest;


   struct Node {

     bool matched;


     uint32_t hash;   // Cache of the hash for data_

     Node* next;      // Chain to next node for collisions

     void* data;      // Either the Tuple* or TupleRow*

   };


   struct Bucket {

     Node* node;

     Bucket() : node(NULL) { }

   };


   Bucket* NextBucket(int64_t* bucket_idx);


   void ResizeBuckets(int64_t num_buckets);


   bool IR_ALWAYS_INLINE InsertImpl(void* data);


   void AddToBucket(Bucket* bucket, Node* node);


   void MoveNode(Bucket* from_bucket, Bucket* to_bucket, Node* node,

                 Node* previous_node);


   bool EvalRow(TupleRow* row, const std::vector<ExprContext*>& ctxs);


   bool IR_NO_INLINE EvalBuildRow(TupleRow* row) {

     return EvalRow(row, build_expr_ctxs_);

   }


   bool IR_NO_INLINE EvalProbeRow(TupleRow* row) {

     return EvalRow(row, probe_expr_ctxs_);

   }


   uint32_t IR_NO_INLINE HashCurrentRow() {

     if (var_result_begin_ == -1) {

       return HashUtil::Hash(expr_values_buffer_, results_buffer_size_, initial_seed_);

     } else {

       return HashVariableLenRow();

     }

   }


   TupleRow* GetRow(Node* node) const {

     if (stores_tuples_) {

       return reinterpret_cast<TupleRow*>(&node->data);

     } else {

       return reinterpret_cast<TupleRow*>(node->data);

     }

   }


   uint32_t HashVariableLenRow();


   bool Equals(TupleRow* build_row);


   void GrowNodeArray();


   void MemLimitExceeded(int64_t allocation_size);


   static const float MAX_BUCKET_OCCUPANCY_FRACTION;


   RuntimeState* state_;


   const std::vector<ExprContext*>& build_expr_ctxs_;

   const std::vector<ExprContext*>& probe_expr_ctxs_;


   const int num_build_tuples_;


   const bool stores_nulls_;

   const bool finds_nulls_;

   const bool stores_tuples_;


   const int32_t initial_seed_;


   int64_t num_filled_buckets_;


   int64_t num_nodes_;


   boost::scoped_ptr<MemPool> mem_pool_;


   int num_data_pages_;


   Node* next_node_;


   int node_remaining_current_page_;


   MemTracker* mem_tracker_;


   bool mem_limit_exceeded_;


   std::vector<Bucket> buckets_;


   int64_t num_buckets_;


   int64_t num_buckets_till_resize_;


   std::vector<int> expr_values_buffer_offsets_;


   int var_result_begin_;


   int results_buffer_size_;


   uint8_t* expr_values_buffer_;


   uint8_t* expr_value_null_bits_;

 };


 }


 #endif

impala::OldHashTable::Iterator
stl-like iterator interface.
Definition: old-hash-table.h:247

IR_NO_INLINE
#define IR_NO_INLINE
Definition: impala-ir.h:30

impala::OldHashTable::buckets_
std::vector< Bucket > buckets_
Definition: old-hash-table.h:467

impala::OldHashTable::mem_limit_exceeded_
bool mem_limit_exceeded_
Definition: old-hash-table.h:465

impala::OldHashTable::OldHashTable
OldHashTable(RuntimeState *state, const std::vector< ExprContext * > &build_expr_ctxs, const std::vector< ExprContext * > &probe_expr_ctxs, int num_build_tuples, bool stores_nulls, bool finds_nulls, int32_t initial_seed, MemTracker *mem_tracker, bool stores_tuples=false, int64_t num_buckets=1024)
Definition: old-hash-table.cc:54

impala::OldHashTable::Iterator::table_
OldHashTable * table_
Definition: old-hash-table.h:310

impala::OldHashTable::Iterator::operator!=
bool operator!=(const Iterator &rhs)
Definition: old-hash-table.h:294

runtime-profile.h

impala::OldHashTable::HashCurrentRow
uint32_t IR_NO_INLINE HashCurrentRow()
Definition: old-hash-table.h:387

impala::OldHashTable::num_filled_buckets_
int64_t num_filled_buckets_
Number of non-empty buckets. Used to determine when to grow and rehash.
Definition: old-hash-table.h:444

impala::OldHashTable::last_expr_value
void * last_expr_value(int expr_idx) const
Definition: old-hash-table.h:197

impala::OldHashTable::HashVariableLenRow
uint32_t HashVariableLenRow()
Definition: old-hash-table.cc:344

impala::OldHashTable::Iterator::node_
Node * node_
Current node idx (within current bucket)
Definition: old-hash-table.h:316

impala::OldHashTable::CodegenHashCurrentRow
llvm::Function * CodegenHashCurrentRow(RuntimeState *state)
Definition: old-hash-table.cc:397

impala::Tuple
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48

impala::hash
const StringSearch UrlParser::hash_search & hash
Definition: url-parser.cc:41

impala::OldHashTable::Iterator::AtEnd
bool AtEnd() const
Returns true if this iterator is at the end, i.e. GetRow() cannot be called.
Definition: old-hash-table.h:293

impala::OldHashTable::expr_values_buffer_offsets_
std::vector< int > expr_values_buffer_offsets_
Definition: old-hash-table.h:477

impala::OldHashTable::GetRow
TupleRow * GetRow(Node *node) const
Definition: old-hash-table.h:397

impala::RowDescriptor
Definition: descriptors.h:373

hash-util.h

impala::OldHashTable::expr_values_buffer_
uint8_t * expr_values_buffer_
Definition: old-hash-table.h:487

impala::OldHashTable::Iterator::scan_hash_
uint32_t scan_hash_
Cached hash value for the row passed to Find()
Definition: old-hash-table.h:319

logging.h

impala::OldHashTable::Node
Definition: old-hash-table.h:328

impala::OldHashTable::EvalProbeRow
bool IR_NO_INLINE EvalProbeRow(TupleRow *row)
Definition: old-hash-table.h:380

impala::OldHashTable::size
int64_t size() const
Returns number of elements in the hash table.
Definition: old-hash-table.h:166

impala::TupleRow
Definition: tuple-row.h:28

impala::OldHashTable::Iterator::NextUnmatched
bool NextUnmatched()
Definition: old-hash-table.inline.h:159

impala::OldHashTable::Iterator::matched
bool matched() const
Definition: old-hash-table.h:282

impala::OldHashTable::NextBucket
Bucket * NextBucket(int64_t *bucket_idx)
Definition: old-hash-table.inline.h:63

IR_ALWAYS_INLINE
#define IR_ALWAYS_INLINE
Definition: impala-ir.h:31

impala::OldHashTable::Node::hash
uint32_t hash
Definition: old-hash-table.h:336

impala::OldHashTable::Bucket::Bucket
Bucket()
Definition: old-hash-table.h:343

impala::OldHashTable::FirstUnmatched
Iterator FirstUnmatched()
Definition: old-hash-table.inline.h:45

impala::OldHashTable::Iterator::Iterator
Iterator()
Definition: old-hash-table.h:249

impala::OldHashTable::initial_seed_
const int32_t initial_seed_
Definition: old-hash-table.h:441

impala::OldHashTable::EvalBuildRow
bool IR_NO_INLINE EvalBuildRow(TupleRow *row)
Definition: old-hash-table.h:374

impala::OldHashTable::EvalAndHashProbe
bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow *row, uint32_t *hash)
Definition: old-hash-table.inline.h:103

impala::OldHashTable::CodegenEvalTupleRow
llvm::Function * CodegenEvalTupleRow(RuntimeState *state, bool build_row)
Definition: old-hash-table.cc:242

impala::OldHashTable::MAX_BUCKET_OCCUPANCY_FRACTION
static const float MAX_BUCKET_OCCUPANCY_FRACTION
Definition: old-hash-table.h:424

impala::OldHashTable::node_remaining_current_page_
int node_remaining_current_page_
Number of nodes left in the current page.
Definition: old-hash-table.h:459

impala::OldHashTable::Node::matched
bool matched
Definition: old-hash-table.h:334

impala::OldHashTable::finds_nulls_
const bool finds_nulls_
Definition: old-hash-table.h:438

impala::OldHashTable::stores_tuples_
const bool stores_tuples_
Definition: old-hash-table.h:439

impala::OldHashTable::last_expr_value_null
bool last_expr_value_null(int expr_idx) const
Returns if the expr at 'expr_idx' evaluated to NULL for the last row.
Definition: old-hash-table.h:202

impala::OldHashTableTest
Definition: old-hash-table-test.cc:38

impala::OldHashTable::mem_tracker_
MemTracker * mem_tracker_
Definition: old-hash-table.h:461

impala::OldHashTable::next_node_
Node * next_node_
Next node to insert.
Definition: old-hash-table.h:456

impala::OldHashTable::InsertImpl
bool IR_ALWAYS_INLINE InsertImpl(void *data)
Insert row into the hash table.
Definition: old-hash-table.inline.h:72

impala::OldHashTable::mem_limit_exceeded
bool mem_limit_exceeded() const
Definition: old-hash-table.h:190

impala::OldHashTable::Iterator::reset
void reset()
Definition: old-hash-table.h:287

impala::RuntimeState
Definition: runtime-state.h:69

impala::OldHashTable::Node::next
Node * next
Definition: old-hash-table.h:337

impala::OldHashTable::probe_expr_ctxs_
const std::vector< ExprContext * > & probe_expr_ctxs_
Definition: old-hash-table.h:429

impala::OldHashTable::DebugString
std::string DebugString(bool skip_empty, bool show_match, const RowDescriptor *build_desc)
Definition: old-hash-table.cc:747

impala::OldHashTable
The hash table does not support removes. The hash table is not thread safe.
Definition: old-hash-table.h:84

impala::HashUtil::Hash
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
Definition: hash-util.h:135

impala::OldHashTable::var_result_begin_
int var_result_begin_
byte offset into expr_values_buffer_ that begins the variable length results
Definition: old-hash-table.h:480

impala::OldHashTable::GrowNodeArray
void GrowNodeArray()
Grow the node array.
Definition: old-hash-table.cc:732

impala::OldHashTable::Iterator::GetRow
TupleRow * GetRow()
Definition: old-hash-table.h:265

impala::OldHashTable::Bucket::node
Node * node
Definition: old-hash-table.h:342

impala::OldHashTable::Iterator::GetTuple
Tuple * GetTuple()
Definition: old-hash-table.h:271

impala::OldHashTable::MemLimitExceeded
void MemLimitExceeded(int64_t allocation_size)
Definition: old-hash-table.cc:742

impala::OldHashTable::Bucket
Definition: old-hash-table.h:341

impala::OldHashTable::mem_pool_
boost::scoped_ptr< MemPool > mem_pool_
MemPool used to allocate data pages.
Definition: old-hash-table.h:450

impala::OldHashTable::Insert
bool IR_ALWAYS_INLINE Insert(TupleRow *row)
Definition: old-hash-table.h:120

impala::MemTracker
This class is thread-safe.
Definition: mem-tracker.h:61

impala::OldHashTable::Iterator::operator==
bool operator==(const Iterator &rhs)
Definition: old-hash-table.h:296

impala::OldHashTable::LLVM_CLASS_NAME
static const char * LLVM_CLASS_NAME
Definition: old-hash-table.h:238

impala::OldHashTable::num_buckets_
int64_t num_buckets_
equal to buckets_.size() but more efficient than the size function
Definition: old-hash-table.h:470

impala::OldHashTable::num_data_pages_
int num_data_pages_
Number of data pages for nodes.
Definition: old-hash-table.h:453

impala::OldHashTable::End
Iterator End()
Returns end marker.
Definition: old-hash-table.h:223

impala::OldHashTable::AddToBucket
void AddToBucket(Bucket *bucket, Node *node)
Definition: old-hash-table.inline.h:90

impala::OldHashTable::byte_size
int64_t byte_size() const
Returns the number of bytes allocated to the hash table.
Definition: old-hash-table.h:185

impala::OldHashTable::Iterator::Next
void IR_ALWAYS_INLINE Next()

impala::OldHashTable::Close
void Close()
Call to cleanup any resources. Must be called once.
Definition: old-hash-table.cc:93

impala::OldHashTable::Iterator::set_matched
void set_matched(bool v)
Definition: old-hash-table.h:277

impala::OldHashTable::expr_value_null_bits_
uint8_t * expr_value_null_bits_
Definition: old-hash-table.h:491

impala::OldHashTable::stores_nulls_
const bool stores_nulls_
Definition: old-hash-table.h:437

impala::OldHashTable::build_expr_ctxs_
const std::vector< ExprContext * > & build_expr_ctxs_
Definition: old-hash-table.h:428

impala::OldHashTable::ResizeBuckets
void ResizeBuckets(int64_t num_buckets)
Resize the hash table to 'num_buckets'.
Definition: old-hash-table.cc:678

bitmap.h

impala::OldHashTable::Find
Iterator IR_ALWAYS_INLINE Find(TupleRow *probe_row)
Definition: old-hash-table.inline.h:23

impala::OldHashTable::state_
RuntimeState * state_
Definition: old-hash-table.h:426

UNLIKELY
#define UNLIKELY(expr)
Definition: compiler-util.h:33

impala::OldHashTable::EvalAndHashBuild
bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow *row, uint32_t *hash)
Definition: old-hash-table.inline.h:96

impala::OldHashTable::Iterator
friend class Iterator
Definition: old-hash-table.h:323

impala::OldHashTable::Equals
bool Equals(TupleRow *build_row)
Definition: old-hash-table.cc:507

impala::OldHashTable::MoveNode
void MoveNode(Bucket *from_bucket, Bucket *to_bucket, Node *node, Node *previous_node)
Definition: old-hash-table.inline.h:110

mem-pool.h

impala::OldHashTable::EvalRow
bool EvalRow(TupleRow *row, const std::vector< ExprContext * > &ctxs)
Definition: old-hash-table.cc:107

impala::OldHashTable::Node::data
void * data
Definition: old-hash-table.h:338

impala::OldHashTable::num_nodes_
int64_t num_nodes_
number of nodes stored (i.e. size of hash table)
Definition: old-hash-table.h:447

impala::OldHashTable::Begin
Iterator Begin()
Definition: old-hash-table.inline.h:38

impala::OldHashTable::CodegenEquals
llvm::Function * CodegenEquals(RuntimeState *state)
Definition: old-hash-table.cc:576

impala::OldHashTable::num_buckets
int64_t num_buckets() const
Returns the number of buckets.
Definition: old-hash-table.h:169

impala::OldHashTable::Insert
bool IR_ALWAYS_INLINE Insert(Tuple *tuple)
Definition: old-hash-table.h:133

impala::OldHashTable::num_build_tuples_
const int num_build_tuples_
Number of Tuple* in the build tuple row.
Definition: old-hash-table.h:432

impala::OldHashTable::load_factor
float load_factor() const
Returns the load factor (the number of non-empty buckets)
Definition: old-hash-table.h:172

impala::OldHashTable::Iterator::bucket_idx_
int64_t bucket_idx_
Current bucket idx.
Definition: old-hash-table.h:313

impala-ir.h

impala::OldHashTable::num_buckets_till_resize_
int64_t num_buckets_till_resize_
The number of filled buckets to trigger a resize. This is cached for efficiency.
Definition: old-hash-table.h:473

impala::OldHashTable::EstimateSize
static int64_t EstimateSize(int64_t num_rows)
Definition: old-hash-table.h:178

impala::OldHashTable::results_buffer_size_
int results_buffer_size_
byte size of 'expr_values_buffer_'
Definition: old-hash-table.h:483

impala::OldHashTable::Iterator::Iterator
Iterator(OldHashTable *table, int bucket_idx, Node *node, uint32_t hash)
Definition: old-hash-table.h:303

impala::OldHashTable::AddBitmapFilters
void AddBitmapFilters()
Definition: old-hash-table.cc:128