Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hash-join-node-ir.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "codegen/impala-ir.h"
16 #include "exec/hash-join-node.h"
18 #include "runtime/row-batch.h"
19 
20 #include "common/names.h"
21 
22 using namespace impala;
23 
24 // Functions in this file are cross compiled to IR with clang.
25 
26 // Wrapper around ExecNode's eval conjuncts with a different function name.
27 // This lets us distinguish between the join conjuncts vs. non-join conjuncts
28 // for codegen.
29 // Note: don't declare this static. LLVM will pick the fastcc calling convention and
30 // we will not be able to replace the functions with codegen'd versions.
31 // TODO: explicitly set the calling convention?
32 // TODO: investigate using fastcc for all codegen internal functions?
34  ExprContext* const* ctxs, int num_ctxs, TupleRow* row) {
35  return ExecNode::EvalConjuncts(ctxs, num_ctxs, row);
36 }
37 
38 // CreateOutputRow, EvalOtherJoinConjuncts, and EvalConjuncts are replaced by
39 // codegen.
40 int HashJoinNode::ProcessProbeBatch(RowBatch* out_batch, RowBatch* probe_batch,
41  int max_added_rows) {
42  // This path does not handle full outer or right outer joins
43  DCHECK(!match_all_build_);
44 
45  int row_idx = out_batch->AddRows(max_added_rows);
46  DCHECK(row_idx != RowBatch::INVALID_ROW_INDEX);
47  uint8_t* out_row_mem = reinterpret_cast<uint8_t*>(out_batch->GetRow(row_idx));
48  TupleRow* out_row = reinterpret_cast<TupleRow*>(out_row_mem);
49 
50  int rows_returned = 0;
51  int probe_rows = probe_batch->num_rows();
52 
53  ExprContext* const* other_conjunct_ctxs = &other_join_conjunct_ctxs_[0];
54  const int num_other_conjunct_ctxs = other_join_conjunct_ctxs_.size();
55 
57  const int num_conjunct_ctxs = conjunct_ctxs_.size();
58 
59  while (true) {
60  // Create output row for each matching build row
61  while (!hash_tbl_iterator_.AtEnd()) {
62  TupleRow* matched_build_row = hash_tbl_iterator_.GetRow();
63  hash_tbl_iterator_.Next<true>();
64 
65  if (join_op_ == TJoinOp::LEFT_SEMI_JOIN) {
66  // Evaluate the non-equi-join conjuncts against a temp row assembled from all
67  // build and probe tuples.
68  if (num_other_conjunct_ctxs > 0) {
70  if (!EvalOtherJoinConjuncts2(other_conjunct_ctxs, num_other_conjunct_ctxs,
72  continue;
73  }
74  }
75  out_batch->CopyRow(current_probe_row_, out_row);
76  } else {
77  CreateOutputRow(out_row, current_probe_row_, matched_build_row);
79  other_conjunct_ctxs, num_other_conjunct_ctxs, out_row)) {
80  continue;
81  }
82  }
83  matched_probe_ = true;
84 
85  if (EvalConjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) {
86  ++rows_returned;
87  // Filled up out batch or hit limit
88  if (UNLIKELY(rows_returned == max_added_rows)) goto end;
89  // Advance to next out row
90  out_row_mem += out_batch->row_byte_size();
91  out_row = reinterpret_cast<TupleRow*>(out_row_mem);
92  }
93 
94  // Handle left semi-join
95  if (match_one_build_) {
97  break;
98  }
99  }
100 
101  // Handle left outer-join
103  CreateOutputRow(out_row, current_probe_row_, NULL);
104  matched_probe_ = true;
105  if (EvalConjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) {
106  ++rows_returned;
107  if (UNLIKELY(rows_returned == max_added_rows)) goto end;
108  // Advance to next out row
109  out_row_mem += out_batch->row_byte_size();
110  out_row = reinterpret_cast<TupleRow*>(out_row_mem);
111  }
112  }
113 
114  if (hash_tbl_iterator_.AtEnd()) {
115  // Advance to the next probe row
116  if (UNLIKELY(probe_batch_pos_ == probe_rows)) goto end;
117  current_probe_row_ = probe_batch->GetRow(probe_batch_pos_++);
119  matched_probe_ = false;
120  }
121  }
122 
123 end:
125  out_batch->CommitRows(rows_returned);
126  return rows_returned;
127 }
128 
130  // insert build row into our hash table
131  for (int i = 0; i < build_batch->num_rows(); ++i) {
132  hash_tbl_->Insert(build_batch->GetRow(i));
133  }
134 }
#define IR_NO_INLINE
Definition: impala-ir.h:30
int num_rows() const
Definition: row-batch.h:215
int AddRows(int n)
Definition: row-batch.h:94
OldHashTable::Iterator hash_tbl_iterator_
void CreateOutputRow(TupleRow *out_row, TupleRow *probe_row, TupleRow *build_row)
TupleRow * GetRow(int row_idx)
Definition: row-batch.h:140
bool AtEnd() const
Returns true if this iterator is at the end, i.e. GetRow() cannot be called.
int row_byte_size()
Definition: row-batch.h:147
const std::vector< ExprContext * > & conjunct_ctxs() const
Definition: exec-node.h:152
boost::scoped_ptr< OldHashTable > hash_tbl_
int ProcessProbeBatch(RowBatch *out_batch, RowBatch *probe_batch, int max_added_rows)
std::vector< ExprContext * > other_join_conjunct_ctxs_
non-equi-join conjuncts from the JOIN clause
void ProcessBuildBatch(RowBatch *build_batch)
Construct the build hash table, adding all the rows in 'build_batch'.
bool IR_NO_INLINE EvalOtherJoinConjuncts2(ExprContext *const *ctxs, int num_ctxs, TupleRow *row)
int64_t rows_returned() const
Definition: exec-node.h:157
void IR_ALWAYS_INLINE Next()
#define UNLIKELY(expr)
Definition: compiler-util.h:33
void CopyRow(TupleRow *src, TupleRow *dest)
Definition: row-batch.h:173
static bool EvalConjuncts(ExprContext *const *ctxs, int num_ctxs, TupleRow *row)
Definition: exec-node.cc:393
std::vector< ExprContext * > conjunct_ctxs_
Definition: exec-node.h:212
void CommitRows(int n)
Definition: row-batch.h:102
bool match_one_build_
Match at most one build row to each probe row. Used in LEFT_SEMI_JOIN.
static const int INVALID_ROW_INDEX
Definition: row-batch.h:87