Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
aggregation-node.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXEC_AGGREGATION_NODE_H
17 #define IMPALA_EXEC_AGGREGATION_NODE_H
18 
19 #include <functional>
20 #include <boost/scoped_ptr.hpp>
21 
22 #include "exec/exec-node.h"
23 #include "exec/old-hash-table.h"
24 #include "runtime/descriptors.h" // for TupleId
25 #include "runtime/mem-pool.h"
26 #include "runtime/string-value.h"
27 
28 namespace llvm {
29  class Function;
30 }
31 
32 namespace impala {
33 
34 class AggFnEvaluator;
35 class LlvmCodeGen;
36 class RowBatch;
37 class RuntimeState;
38 struct StringValue;
39 class Tuple;
40 class TupleDescriptor;
41 class SlotDescriptor;
42 
47 //
50 class AggregationNode : public ExecNode {
51  public:
52  AggregationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
53 
54  virtual Status Init(const TPlanNode& tnode);
55  virtual Status Prepare(RuntimeState* state);
56  virtual Status Open(RuntimeState* state);
57  virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
58  virtual Status Reset(RuntimeState* state);
59  virtual void Close(RuntimeState* state);
60 
61  static const char* LLVM_CLASS_NAME;
62 
63  protected:
64  virtual void DebugString(int indentation_level, std::stringstream* out) const;
65 
66  private:
67  boost::scoped_ptr<OldHashTable> hash_tbl_;
69 
70  std::vector<AggFnEvaluator*> aggregate_evaluators_;
71 
73  std::vector<impala_udf::FunctionContext*> agg_fn_ctxs_;
74  boost::scoped_ptr<MemPool> agg_fn_pool_;
75 
77  std::vector<ExprContext*> probe_expr_ctxs_;
80  std::vector<ExprContext*> build_expr_ctxs_;
81 
85 
90 
94 
95  boost::scoped_ptr<MemPool> tuple_pool_;
96 
99 
103 
109 
118 
123 
126  void UpdateTuple(Tuple* tuple, TupleRow* row);
127 
134  Tuple* FinalizeTuple(Tuple* tuple, MemPool* pool);
135 
137  void ProcessRowBatchNoGrouping(RowBatch* batch);
139 
144  llvm::Function* CodegenProcessRowBatch(
145  RuntimeState* state, llvm::Function* update_tuple_fn);
146 
149  llvm::Function* CodegenUpdateSlot(
150  RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc);
151 
153  llvm::Function* CodegenUpdateTuple(RuntimeState* state);
154 };
155 
156 }
157 
158 #endif
stl-like iterator interface.
static const char * LLVM_CLASS_NAME
TupleDescriptor * output_tuple_desc_
void(* ProcessRowBatchFn)(AggregationNode *, RowBatch *)
virtual Status Prepare(RuntimeState *state)
std::vector< ExprContext * > build_expr_ctxs_
llvm::Function * CodegenUpdateSlot(RuntimeState *state, AggFnEvaluator *evaluator, SlotDescriptor *slot_desc)
AggregationNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
RuntimeProfile::Counter * build_timer_
Time spent processing the child rows.
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
virtual Status GetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)
TupleId intermediate_tuple_id_
Tuple into which Update()/Merge()/Serialize() results are stored.
int TupleId
Definition: global-types.h:23
virtual Status Reset(RuntimeState *state)
llvm::Function * CodegenUpdateTuple(RuntimeState *state)
Codegen UpdateTuple(). Returns NULL if codegen is unsuccessful.
void ProcessRowBatchWithGrouping(RowBatch *batch)
std::string DebugString() const
Returns a string representation in DFS order of the plan rooted at this.
Definition: exec-node.cc:345
std::vector< ExprContext * > probe_expr_ctxs_
Exprs used to evaluate input rows.
ObjectPool pool
void UpdateTuple(Tuple *tuple, TupleRow *row)
void ProcessRowBatchNoGrouping(RowBatch *batch)
Do the aggregation for all tuple rows in the batch.
llvm::Function * codegen_process_row_batch_fn_
IR for process row batch. NULL if codegen is disabled.
virtual Status Open(RuntimeState *state)
std::vector< AggFnEvaluator * > aggregate_evaluators_
std::vector< impala_udf::FunctionContext * > agg_fn_ctxs_
FunctionContext for each agg fn and backing pool.
Tuple * FinalizeTuple(Tuple *tuple, MemPool *pool)
OldHashTable::Iterator output_iterator_
boost::scoped_ptr< MemPool > agg_fn_pool_
RuntimeProfile::Counter * get_results_timer_
Time spent returning the aggregated rows.
RuntimeProfile::Counter * hash_table_load_factor_counter_
Load factor in hash table.
ProcessRowBatchFn process_row_batch_fn_
Jitted ProcessRowBatch function pointer. Null if codegen is disabled.
TupleDescriptor * intermediate_tuple_desc_
RuntimeProfile::Counter * hash_table_buckets_counter_
Num buckets in hash table.
boost::scoped_ptr< MemPool > tuple_pool_
llvm::Function * CodegenProcessRowBatch(RuntimeState *state, llvm::Function *update_tuple_fn)
virtual void Close(RuntimeState *state)
boost::scoped_ptr< OldHashTable > hash_tbl_
virtual Status Init(const TPlanNode &tnode)