Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
analytic-eval-node.h
Go to the documentation of this file.
1 // Copyright 2014 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_EXEC_ANALYTIC_EVAL_NODE_H
16 #define IMPALA_EXEC_ANALYTIC_EVAL_NODE_H
17 
18 #include "exec/exec-node.h"
19 #include "exprs/expr.h"
20 #include "exprs/expr-context.h"
23 #include "runtime/tuple.h"
24 
25 namespace impala {
26 
27 class AggFnEvaluator;
28 
35 //
39 //
49 //
58 class AnalyticEvalNode : public ExecNode {
59  public:
60  AnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
61 
62  virtual Status Init(const TPlanNode& tnode);
63  virtual Status Prepare(RuntimeState* state);
64  virtual Status Open(RuntimeState* state);
65  virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
66  virtual Status Reset(RuntimeState* state);
67  virtual void Close(RuntimeState* state);
68 
69  protected:
71  virtual Status QueryMaintenance(RuntimeState* state);
72 
73  virtual void DebugString(int indentation_level, std::stringstream* out) const;
74 
75  private:
88 
97 
109  };
110 
118 
122 
125  Status GetNextOutputBatch(RuntimeState* state, RowBatch* row_batch, bool* eos);
126 
128  Status AddRow(int64_t stream_idx, TupleRow* row);
129 
134  void TryAddResultTupleForPrevRow(bool next_partition, int64_t stream_idx,
135  TupleRow* row);
136 
140  void TryAddResultTupleForCurrRow(int64_t stream_idx, TupleRow* row);
141 
145  void TryAddRemainingResults(int64_t partition_idx, int64_t prev_partition_idx);
146 
151  void TryRemoveRowsBeforeWindow(int64_t stream_idx);
152 
155  void InitNextPartition(int64_t stream_idx);
156 
160  void AddResultTuple(int64_t stream_idx);
161 
164  int64_t NumOutputRowsReady() const;
165 
168  void ResetLeadFnSlots();
169 
172  bool PrevRowCompare(ExprContext* pred_ctx);
173 
175  std::string DebugStateString(bool detailed) const;
176 
177  std::string DebugEvaluatedRowsString() const;
178 
180  std::string DebugWindowString() const;
181 
185  const TAnalyticWindow window_;
186 
189 
192 
197 
202 
206 
210 
215 
221 
223  std::vector<AggFnEvaluator*> evaluators_;
224 
227  std::vector<bool> is_lead_fn_;
228 
233 
236  std::vector<impala_udf::FunctionContext*> fn_ctxs_;
237 
250  std::list<std::pair<int64_t, Tuple*> > result_tuples_;
251 
254 
260  std::list<std::pair<int64_t, Tuple*> > window_tuples_;
262 
269  boost::scoped_ptr<MemPool> curr_tuple_pool_;
270  boost::scoped_ptr<MemPool> prev_tuple_pool_;
271 
277 
283 
290 
295 
298 
302 
308  boost::scoped_ptr<RowBatch> prev_child_batch_;
309  boost::scoped_ptr<RowBatch> curr_child_batch_;
310 
313 
322  boost::scoped_ptr<BufferedTupleStream> input_stream_;
323 
325  boost::scoped_ptr<MemPool> mem_pool_;
326 
329 
332 };
333 
334 }
335 
336 #endif
void TryAddRemainingResults(int64_t partition_idx, int64_t prev_partition_idx)
std::string DebugStateString(bool detailed) const
Debug string containing current state. If 'detailed', per-row state is included.
std::vector< bool > is_lead_fn_
void TryAddResultTupleForPrevRow(bool next_partition, int64_t stream_idx, TupleRow *row)
virtual Status QueryMaintenance(RuntimeState *state)
Frees local allocations from evaluators_.
virtual Status Init(const TPlanNode &tnode)
Status GetNextOutputBatch(RuntimeState *state, RowBatch *row_batch, bool *eos)
bool input_eos_
True when there are no more input rows to consume from our child.
boost::scoped_ptr< RowBatch > curr_child_batch_
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
virtual Status Reset(RuntimeState *state)
const TAnalyticWindow window_
std::string DebugWindowString() const
Debug string containing the window definition.
int64_t NumOutputRowsReady() const
virtual void Close(RuntimeState *state)
void TryRemoveRowsBeforeWindow(int64_t stream_idx)
std::string DebugString() const
Returns a string representation in DFS order of the plan rooted at this.
Definition: exec-node.cc:345
void InitNextPartition(int64_t stream_idx)
boost::scoped_ptr< MemPool > mem_pool_
Pool used for O(1) allocations that live until close.
TupleDescriptor * child_tuple_desc_
ObjectPool pool
Status ProcessChildBatches(RuntimeState *state)
bool PrevRowCompare(ExprContext *pred_ctx)
BufferedBlockMgr::Client * client_
Block manager client used by input_stream_. Not owned.
const TupleDescriptor * intermediate_tuple_desc_
Tuple descriptor for storing intermediate values of analytic fn evaluation.
std::vector< AggFnEvaluator * > evaluators_
Analytic function evaluators.
std::list< std::pair< int64_t, Tuple * > > result_tuples_
RuntimeProfile::Counter * evaluation_timer_
Time spent processing the child rows.
Status ProcessChildBatch(RuntimeState *state)
ExprContext * partition_by_eq_expr_ctx_
boost::scoped_ptr< BufferedTupleStream > input_stream_
std::string DebugEvaluatedRowsString() const
int64_t last_result_idx_
Index in input_stream_ of the most recently added result tuple.
TupleDescriptor * buffered_tuple_desc_
int64_t curr_partition_idx_
Index of the row in input_stream_ at which the current partition started.
boost::scoped_ptr< RowBatch > prev_child_batch_
std::vector< impala_udf::FunctionContext * > fn_ctxs_
boost::scoped_ptr< MemPool > curr_tuple_pool_
void AddResultTuple(int64_t stream_idx)
boost::scoped_ptr< MemPool > prev_tuple_pool_
Status AddRow(int64_t stream_idx, TupleRow *row)
Adds the row to the evaluators and the tuple stream.
void TryAddResultTupleForCurrRow(int64_t stream_idx, TupleRow *row)
std::list< std::pair< int64_t, Tuple * > > window_tuples_
virtual Status Open(RuntimeState *state)
AnalyticEvalNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
virtual Status Prepare(RuntimeState *state)
virtual Status GetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)
const TupleDescriptor * result_tuple_desc_
Tuple descriptor for storing results of analytic fn evaluation.