Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hash-join-node.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXEC_HASH_JOIN_NODE_H
17 #define IMPALA_EXEC_HASH_JOIN_NODE_H
18 
19 #include <boost/scoped_ptr.hpp>
20 #include <boost/thread.hpp>
21 #include <string>
22 
23 #include "exec/exec-node.h"
24 #include "exec/old-hash-table.h"
26 #include "util/promise.h"
27 
28 #include "gen-cpp/PlanNodes_types.h" // for TJoinOp
29 
30 namespace impala {
31 
32 class MemPool;
33 class RowBatch;
34 class TupleRow;
35 
41 //
49  public:
50  HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
51 
52  virtual Status Init(const TPlanNode& tnode);
53  virtual Status Prepare(RuntimeState* state);
54  // Open() implemented in BlockingJoinNode
55  virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
56  virtual Status Reset(RuntimeState* state);
57  virtual void Close(RuntimeState* state);
58 
59  static const char* LLVM_CLASS_NAME;
60 
61  protected:
62  virtual void AddToDebugString(int indentation_level, std::stringstream* out) const;
63  virtual Status InitGetNext(TupleRow* first_probe_row);
64  virtual Status ConstructBuildSide(RuntimeState* state);
65 
66  private:
67  boost::scoped_ptr<OldHashTable> hash_tbl_;
69 
72  std::vector<ExprContext*> probe_expr_ctxs_;
73  std::vector<ExprContext*> build_expr_ctxs_;
74 
76  std::vector<ExprContext*> other_join_conjunct_ctxs_;
77 
82 
85 
89 
92 
97 
102 
103  RuntimeProfile::Counter* build_buckets_counter_; // num buckets in hash table
105 
108  Status LeftJoinGetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
109 
116  int ProcessProbeBatch(RowBatch* out_batch, RowBatch* probe_batch, int max_added_rows);
117 
119  void ProcessBuildBatch(RowBatch* build_batch);
120 
122  llvm::Function* CodegenCreateOutputRow(LlvmCodeGen* codegen);
123 
128  llvm::Function* CodegenProcessBuildBatch(RuntimeState* state, llvm::Function* hash_fn);
129 
134  llvm::Function* CodegenProcessProbeBatch(RuntimeState* state, llvm::Function* hash_fn);
135 };
136 
137 }
138 
139 #endif
stl-like iterator interface.
virtual void AddToDebugString(int indentation_level, std::stringstream *out) const
RuntimeProfile::Counter * hash_tbl_load_factor_counter_
static const char * LLVM_CLASS_NAME
OldHashTable::Iterator hash_tbl_iterator_
llvm::Function * CodegenProcessProbeBatch(RuntimeState *state, llvm::Function *hash_fn)
HashJoinNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
std::vector< ExprContext * > probe_expr_ctxs_
void(* ProcessBuildBatchFn)(HashJoinNode *, RowBatch *)
llvm::Function * CodegenProcessBuildBatch(RuntimeState *state, llvm::Function *hash_fn)
int(* ProcessProbeBatchFn)(HashJoinNode *, RowBatch *, RowBatch *, int)
HashJoinNode::ProcessProbeBatch() exactly.
boost::scoped_ptr< OldHashTable > hash_tbl_
virtual Status Reset(RuntimeState *state)
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
virtual Status Prepare(RuntimeState *state)
int ProcessProbeBatch(RowBatch *out_batch, RowBatch *probe_batch, int max_added_rows)
virtual Status Init(const TPlanNode &tnode)
RuntimeProfile::Counter * build_buckets_counter_
ProcessProbeBatchFn process_probe_batch_fn_
Jitted ProcessProbeBatch function pointer. Null if codegen is disabled.
std::vector< ExprContext * > other_join_conjunct_ctxs_
non-equi-join conjuncts from the JOIN clause
ObjectPool pool
void ProcessBuildBatch(RowBatch *build_batch)
Construct the build hash table, adding all the rows in 'build_batch'.
llvm::Function * codegen_process_build_batch_fn_
llvm function for build batch
llvm::Function * CodegenCreateOutputRow(LlvmCodeGen *codegen)
Codegen function to create output row.
virtual Status InitGetNext(TupleRow *first_probe_row)
virtual Status GetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)
std::vector< ExprContext * > build_expr_ctxs_
virtual Status ConstructBuildSide(RuntimeState *state)
virtual void Close(RuntimeState *state)
Status LeftJoinGetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)
ProcessBuildBatchFn process_build_batch_fn_
bool match_one_build_
Match at most one build row to each probe row. Used in LEFT_SEMI_JOIN.