Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
|
#include <analytic-eval-node.h>
Public Member Functions | |
AnalyticEvalNode (ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs) | |
virtual Status | Init (const TPlanNode &tnode) |
virtual Status | Prepare (RuntimeState *state) |
virtual Status | Open (RuntimeState *state) |
virtual Status | GetNext (RuntimeState *state, RowBatch *row_batch, bool *eos) |
virtual Status | Reset (RuntimeState *state) |
virtual void | Close (RuntimeState *state) |
void | CollectNodes (TPlanNodeType::type node_type, std::vector< ExecNode * > *nodes) |
void | CollectScanNodes (std::vector< ExecNode * > *nodes) |
Collect all scan node types. More... | |
std::string | DebugString () const |
Returns a string representation in DFS order of the plan rooted at this. More... | |
const std::vector< ExprContext * > & | conjunct_ctxs () const |
int | id () const |
TPlanNodeType::type | type () const |
const RowDescriptor & | row_desc () const |
int64_t | rows_returned () const |
int64_t | limit () const |
bool | ReachedLimit () |
RuntimeProfile * | runtime_profile () |
MemTracker * | mem_tracker () |
MemTracker * | expr_mem_tracker () |
Static Public Member Functions | |
static Status | CreateTree (ObjectPool *pool, const TPlan &plan, const DescriptorTbl &descs, ExecNode **root) |
static void | SetDebugOptions (int node_id, TExecNodePhase::type phase, TDebugAction::type action, ExecNode *tree) |
Set debug action for node with given id in 'tree'. More... | |
static bool | EvalConjuncts (ExprContext *const *ctxs, int num_ctxs, TupleRow *row) |
static llvm::Function * | CodegenEvalConjuncts (RuntimeState *state, const std::vector< ExprContext * > &conjunct_ctxs, const char *name="EvalConjuncts") |
static int | GetNodeIdFromProfile (RuntimeProfile *p) |
Extract node id from p->name(). More... | |
Static Public Attributes | |
static const std::string | ROW_THROUGHPUT_COUNTER = "RowsReturnedRate" |
Names of counters shared by all exec nodes. More... | |
Protected Member Functions | |
virtual Status | QueryMaintenance (RuntimeState *state) |
Frees local allocations from evaluators_. More... | |
virtual void | DebugString (int indentation_level, std::stringstream *out) const |
ExecNode * | child (int i) |
bool | is_closed () |
virtual bool | IsScanNode () const |
void | InitRuntimeProfile (const std::string &name) |
Status | ExecDebugAction (TExecNodePhase::type phase, RuntimeState *state) |
void | AddRuntimeExecOption (const std::string &option) |
Appends option to 'runtime_exec_options_'. More... | |
void | AddExprCtxToFree (ExprContext *ctx) |
void | AddExprCtxsToFree (const std::vector< ExprContext * > &ctxs) |
void | AddExprCtxsToFree (const SortExecExprs &sort_exec_exprs) |
Static Protected Member Functions | |
static Status | CreateNode (ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs, ExecNode **node) |
Create a single exec node derived from thrift node; place exec node in 'pool'. More... | |
static Status | CreateTreeHelper (ObjectPool *pool, const std::vector< TPlanNode > &tnodes, const DescriptorTbl &descs, ExecNode *parent, int *node_idx, ExecNode **root) |
Protected Attributes | |
int | id_ |
TPlanNodeType::type | type_ |
ObjectPool * | pool_ |
std::vector< ExprContext * > | conjunct_ctxs_ |
std::vector< ExecNode * > | children_ |
RowDescriptor | row_descriptor_ |
TExecNodePhase::type | debug_phase_ |
TDebugAction::type | debug_action_ |
int64_t | limit_ |
int64_t | num_rows_returned_ |
boost::scoped_ptr< RuntimeProfile > | runtime_profile_ |
RuntimeProfile::Counter * | rows_returned_counter_ |
RuntimeProfile::Counter * | rows_returned_rate_ |
boost::scoped_ptr< MemTracker > | mem_tracker_ |
Account for peak memory used by this node. More... | |
boost::scoped_ptr< MemTracker > | expr_mem_tracker_ |
MemTracker that should be used for ExprContexts. More... | |
boost::mutex | exec_options_lock_ |
std::string | runtime_exec_options_ |
Private Types | |
enum | AnalyticFnScope { PARTITION, RANGE, ROWS } |
Private Member Functions | |
Status | ProcessChildBatch (RuntimeState *state) |
Status | ProcessChildBatches (RuntimeState *state) |
Status | GetNextOutputBatch (RuntimeState *state, RowBatch *row_batch, bool *eos) |
Status | AddRow (int64_t stream_idx, TupleRow *row) |
Adds the row to the evaluators and the tuple stream. More... | |
void | TryAddResultTupleForPrevRow (bool next_partition, int64_t stream_idx, TupleRow *row) |
void | TryAddResultTupleForCurrRow (int64_t stream_idx, TupleRow *row) |
void | TryAddRemainingResults (int64_t partition_idx, int64_t prev_partition_idx) |
void | TryRemoveRowsBeforeWindow (int64_t stream_idx) |
void | InitNextPartition (int64_t stream_idx) |
void | AddResultTuple (int64_t stream_idx) |
int64_t | NumOutputRowsReady () const |
void | ResetLeadFnSlots () |
bool | PrevRowCompare (ExprContext *pred_ctx) |
std::string | DebugStateString (bool detailed) const |
Debug string containing current state. If 'detailed', per-row state is included. More... | |
std::string | DebugEvaluatedRowsString () const |
std::string | DebugWindowString () const |
Debug string containing the window definition. More... | |
Private Attributes | |
const TAnalyticWindow | window_ |
const TupleDescriptor * | intermediate_tuple_desc_ |
Tuple descriptor for storing intermediate values of analytic fn evaluation. More... | |
const TupleDescriptor * | result_tuple_desc_ |
Tuple descriptor for storing results of analytic fn evaluation. More... | |
TupleDescriptor * | buffered_tuple_desc_ |
TupleRow * | child_tuple_cmp_row_ |
ExprContext * | partition_by_eq_expr_ctx_ |
ExprContext * | order_by_eq_expr_ctx_ |
AnalyticFnScope | fn_scope_ |
int64_t | rows_start_offset_ |
int64_t | rows_end_offset_ |
std::vector< AggFnEvaluator * > | evaluators_ |
Analytic function evaluators. More... | |
std::vector< bool > | is_lead_fn_ |
bool | has_first_val_null_offset_ |
long | first_val_null_offset_ |
std::vector < impala_udf::FunctionContext * > | fn_ctxs_ |
std::list< std::pair< int64_t, Tuple * > > | result_tuples_ |
int64_t | last_result_idx_ |
Index in input_stream_ of the most recently added result tuple. More... | |
std::list< std::pair< int64_t, Tuple * > > | window_tuples_ |
TupleDescriptor * | child_tuple_desc_ |
boost::scoped_ptr< MemPool > | curr_tuple_pool_ |
boost::scoped_ptr< MemPool > | prev_tuple_pool_ |
int64_t | prev_pool_last_result_idx_ |
int64_t | prev_pool_last_window_idx_ |
Tuple * | curr_tuple_ |
Tuple * | dummy_result_tuple_ |
int64_t | curr_partition_idx_ |
Index of the row in input_stream_ at which the current partition started. More... | |
TupleRow * | prev_input_row_ |
boost::scoped_ptr< RowBatch > | prev_child_batch_ |
boost::scoped_ptr< RowBatch > | curr_child_batch_ |
BufferedBlockMgr::Client * | client_ |
Block manager client used by input_stream_. Not owned. More... | |
boost::scoped_ptr < BufferedTupleStream > | input_stream_ |
boost::scoped_ptr< MemPool > | mem_pool_ |
Pool used for O(1) allocations that live until close. More... | |
bool | input_eos_ |
True when there are no more input rows to consume from our child. More... | |
RuntimeProfile::Counter * | evaluation_timer_ |
Time spent processing the child rows. More... | |
Evaluates analytic functions with a single pass over input rows. It is assumed that the input has already been sorted on all of the partition exprs and then the order by exprs. If there is no order by clause or partition clause, the input is unsorted. Uses a BufferedTupleStream to buffer input rows which are returned in a streaming fashion as entire row batches of output are ready to be returned, though in some cases the entire input must actually be consumed to produce any output rows. The output row is composed of the tuples from the child node followed by a single result tuple that holds the values of the evaluated analytic functions (one slot per analytic function). When enough input rows have been consumed to produce the results of all analytic functions for one or more rows (e.g. because the order by values are different for a RANGE window), the results of all the analytic functions for those rows are produced in a result tuple by calling GetValue()/Finalize() on the evaluators and storing the tuple in result_tuples_. Input row batches are fetched from the BufferedTupleStream, copied into output row batches, and the associated result tuple is set in each corresponding row. Result tuples may apply to many rows (e.g. an arbitrary number or an entire partition) so result_tuples_ stores a pair of the stream index (the last row in the stream it applies to) and the tuple. Input rows are consumed in a streaming fashion until enough input has been consumed in order to produce enough output rows. In some cases, this may mean that only a single input batch is needed to produce the results for an output batch, e.g. "SELECT RANK OVER (ORDER BY unique_col) ... ", but in other cases, an arbitrary number of rows may need to be buffered before result rows can be produced, e.g. if multiple rows have the same values for the order by exprs. The number of buffered rows may be an entire partition or even the entire input. Therefore, the output rows are buffered and may spill to disk via the BufferedTupleStream.
Definition at line 58 of file analytic-eval-node.h.
|
private |
The scope over which analytic functions are evaluated. Functions are either evaluated over a window (specified by a TAnalyticWindow) or an entire partition. This is used to avoid more complex logic where we often branch based on these cases, e.g. whether or not there is a window (i.e. no window = PARTITION) is stored separately from the window type (assuming there is a window).
Enumerator | |
---|---|
PARTITION |
Analytic functions are evaluated over an entire partition (or the entire data set if no partition clause was specified). Every row within a partition is added to curr_tuple_ and buffered in the input_stream_. Once all rows in a partition have been consumed, a single result tuple is added to result_tuples_ for all rows in that partition. |
RANGE |
Functions are evaluated over windows specified with range boundaries. Currently only supports the 'default window', i.e. UNBOUNDED PRECEDING to CURRENT ROW. In this case, when the values of the order by expressions change between rows a result tuple is added to result_tuples_ for the previous rows with the same values for the order by expressions. This happens in TryAddResultTupleForPrevRow() because we determine if the order by expression values changed between the previous and current row. |
ROWS |
Functions are evaluated over windows specified with rows boundaries. A result tuple is added for every input row (except for some cases where the window extends before or after the partition). When the end boundary is offset from the current row, input rows are consumed and result tuples are produced for the associated preceding or following row. When the start boundary is offset from the current row, the first tuple (i.e. the input to the analytic functions) from the input rows are buffered in window_tuples_ because they must later be removed from the window (by calling AggFnEvaluator::Remove() with the expired tuple to remove it from the current row). When either the start or end boundaries are offset from the current row, there is special casing around partition boundaries. |
Definition at line 81 of file analytic-eval-node.h.
impala::AnalyticEvalNode::AnalyticEvalNode | ( | ObjectPool * | pool, |
const TPlanNode & | tnode, | ||
const DescriptorTbl & | descs | ||
) |
Definition at line 30 of file analytic-eval-node.cc.
References buffered_tuple_desc_, DebugWindowString(), fn_scope_, impala::DescriptorTbl::GetTupleDescriptor(), impala::ExecNode::id(), PARTITION, RANGE, ROWS, rows_end_offset_, rows_start_offset_, VLOG_FILE, and window_.
|
protectedinherited |
Referenced by impala::ExecNode::AddExprCtxsToFree(), impala::UnionNode::Prepare(), impala::SortNode::Prepare(), impala::TopNNode::Prepare(), impala::ExchangeNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), and impala::HdfsScanNode::Prepare().
|
protectedinherited |
Definition at line 410 of file exec-node.cc.
References impala::ExecNode::AddExprCtxsToFree(), impala::SortExecExprs::lhs_ordering_expr_ctxs(), impala::SortExecExprs::rhs_ordering_expr_ctxs(), and impala::SortExecExprs::sort_tuple_slot_expr_ctxs().
|
inlineprotectedinherited |
Add an ExprContext to have its local allocations freed by QueryMaintenance(). Exprs that are evaluated in the main execution thread should be added. Exprs evaluated in a separate thread are generally not safe to add, since a local allocation may be freed while it's being used. Rather than using this mechanism, threads should call FreeLocalAllocations() on local ExprContexts periodically.
Definition at line 276 of file exec-node.h.
References impala::ExecNode::expr_ctxs_to_free_.
Referenced by Prepare().
|
private |
Produces a result tuple with analytic function results by calling GetValue() or Finalize() for curr_tuple_ on the evaluators_. The result tuple is stored in result_tuples_ with the index into input_stream_ specified by stream_idx.
Definition at line 337 of file analytic-eval-node.cc.
References impala::TupleDescriptor::byte_size(), impala::Tuple::Create(), curr_tuple_, curr_tuple_pool_, DebugStateString(), evaluators_, fn_ctxs_, impala::AggFnEvaluator::GetValue(), impala::ExecNode::id(), last_result_idx_, result_tuple_desc_, result_tuples_, and VLOG_ROW.
Referenced by InitNextPartition(), TryAddRemainingResults(), TryAddResultTupleForCurrRow(), and TryAddResultTupleForPrevRow().
Adds the row to the evaluators and the tuple stream.
Definition at line 306 of file analytic-eval-node.cc.
References impala::AggFnEvaluator::Add(), child_tuple_desc_, curr_partition_idx_, curr_tuple_, curr_tuple_pool_, impala::Tuple::DeepCopy(), evaluators_, fn_ctxs_, fn_scope_, impala::TupleRow::GetTuple(), impala::ExecNode::id(), input_stream_, impala::Status::OK, RETURN_IF_ERROR, ROWS, rows_start_offset_, UNLIKELY, VLOG_FILE, VLOG_ROW, window_, and window_tuples_.
Referenced by ProcessChildBatch().
|
protectedinherited |
Appends option to 'runtime_exec_options_'.
Definition at line 188 of file exec-node.cc.
References impala::RuntimeProfile::AddInfoString(), impala::ExecNode::exec_options_lock_, impala::ExecNode::runtime_exec_options_, and impala::ExecNode::runtime_profile().
Referenced by impala::PartitionedHashJoinNode::AttachProbeFilters(), impala::HashJoinNode::ConstructBuildSide(), impala::BlockingJoinNode::Open(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::HdfsScanNode::Prepare(), and impala::HdfsScanNode::StopAndFinalizeCounters().
|
inlineprotectedinherited |
Definition at line 241 of file exec-node.h.
References impala::ExecNode::children_.
Referenced by impala::CrossJoinNode::BuildListDebugString(), impala::BlockingJoinNode::BuildSideThread(), impala::HashJoinNode::CodegenCreateOutputRow(), impala::PartitionedHashJoinNode::CodegenCreateOutputRow(), impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), impala::PartitionedHashJoinNode::ConstructBuildSide(), impala::BlockingJoinNode::GetLeftChildRowString(), impala::SelectNode::GetNext(), impala::UnionNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), GetNextOutputBatch(), impala::PartitionedAggregationNode::Partition::InitStreams(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedHashJoinNode::NextProbeRowBatch(), impala::SelectNode::Open(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::BlockingJoinNode::Open(), impala::AggregationNode::Open(), Open(), impala::PartitionedAggregationNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::SelectNode::Prepare(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::PartitionedHashJoinNode::ProcessBuildInput(), ProcessChildBatches(), and impala::SortNode::SortInput().
|
virtual |
Close() will get called for every exec node, regardless of what else is called and the status of these calls (i.e. Prepare() may never have been called, or Prepare()/Open()/GetNext() returned with an error). Close() releases all resources that were allocated in Open()/GetNext(), even if the latter ended with an error. Close() can be called if the node has been prepared or the node is closed. The default implementation updates runtime profile counters and calls Close() on the children. Subclasses should check if the node has already been closed (is_closed()), then close themselves, then call the base Close(). Nodes that are using tuples returned by a child may call Close() on their children before their own Close() if the child node has returned eos. It is only safe to call Close() on the child node while the parent node is still returning rows if the parent node fully materializes the child's input.
Reimplemented from impala::ExecNode.
Definition at line 738 of file analytic-eval-node.cc.
References impala::ExprContext::Close(), impala::ExecNode::Close(), curr_child_batch_, curr_tuple_, curr_tuple_pool_, dummy_result_tuple_, evaluators_, fn_ctxs_, input_stream_, impala::ExecNode::is_closed(), mem_pool_, order_by_eq_expr_ctx_, partition_by_eq_expr_ctx_, prev_child_batch_, and prev_tuple_pool_.
|
staticinherited |
Returns a codegen'd version of EvalConjuncts(), or NULL if the function couldn't be codegen'd. The codegen'd version uses inlined, codegen'd GetBooleanVal() functions.
Definition at line 452 of file exec-node.cc.
References impala::LlvmCodeGen::FnPrototype::AddArgument(), impala::LlvmCodeGen::context(), impala::CodegenAnyVal::CreateCallWrapped(), impala::LlvmCodeGen::false_value(), impala::LlvmCodeGen::FinalizeFunction(), impala::RuntimeState::GetCodegen(), impala::Status::GetDetail(), impala::CodegenAnyVal::GetIsNull(), impala::LlvmCodeGen::GetType(), impala::CodegenAnyVal::GetVal(), impala::TupleRow::LLVM_CLASS_NAME, impala::ExprContext::LLVM_CLASS_NAME, impala::Status::ok(), impala::LlvmCodeGen::true_value(), impala::ExecNode::type(), impala::TYPE_BOOLEAN, impala::TYPE_INT, and VLOG_QUERY.
Referenced by impala::HdfsAvroScanner::CodegenDecodeAvroData(), impala::HashJoinNode::CodegenProcessProbeBatch(), and impala::PartitionedHashJoinNode::CodegenProcessProbeBatch().
|
inherited |
Collect all nodes of given 'node_type' that are part of this subtree, and return in 'nodes'.
Definition at line 359 of file exec-node.cc.
References impala::ExecNode::children_, and impala::ExecNode::type_.
Referenced by impala::ExecNode::CollectScanNodes(), and impala::PlanFragmentExecutor::Prepare().
|
inherited |
Collect all scan node types.
Definition at line 366 of file exec-node.cc.
References impala::ExecNode::CollectNodes().
Referenced by impala::PlanFragmentExecutor::Prepare().
|
inlineinherited |
Definition at line 152 of file exec-node.h.
References impala::ExecNode::conjunct_ctxs_.
Referenced by impala::HdfsScanNode::ComputeSlotMaterializationOrder(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::HashJoinNode::GetNext(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), impala::HashJoinNode::ProcessProbeBatch(), and impala::PartitionedHashJoinNode::ProcessProbeBatch().
|
staticprotectedinherited |
Create a single exec node derived from thrift node; place exec node in 'pool'.
Definition at line 260 of file exec-node.cc.
References impala::ObjectPool::Add(), impala::Status::OK, and RETURN_IF_ERROR.
Referenced by impala::ExecNode::CreateTreeHelper().
|
staticinherited |
Creates exec node tree from list of nodes contained in plan via depth-first traversal. All nodes are placed in pool. Returns error if 'plan' is corrupted, otherwise success.
Definition at line 199 of file exec-node.cc.
References impala::ExecNode::CreateTreeHelper(), impala::Status::OK, and impala::Status::ok().
Referenced by impala::PlanFragmentExecutor::Prepare().
|
staticprotectedinherited |
Definition at line 218 of file exec-node.cc.
References impala::RuntimeProfile::AddChild(), impala::ExecNode::children_, impala::ExecNode::CreateNode(), impala::Status::OK, RETURN_IF_ERROR, and impala::ExecNode::runtime_profile().
Referenced by impala::ExecNode::CreateTree().
|
private |
|
private |
Debug string containing current state. If 'detailed', per-row state is included.
Definition at line 264 of file analytic-eval-node.cc.
References curr_partition_idx_, fn_scope_, input_stream_, last_result_idx_, result_tuples_, ROWS, window_, and window_tuples_.
Referenced by AddResultTuple(), GetNext(), GetNextOutputBatch(), InitNextPartition(), ProcessChildBatch(), TryAddRemainingResults(), and TryRemoveRowsBeforeWindow().
|
protectedvirtual |
Recursive helper method for generating a string for DebugString(). Implementations should call DebugString(int, std::stringstream) on their children. Input parameters: indentation_level: Current level in plan tree. Output parameters: out: Stream to accumulate debug string.
Reimplemented from impala::ExecNode.
Definition at line 766 of file analytic-eval-node.cc.
References impala::AggFnEvaluator::DebugString(), impala::ExecNode::DebugString(), impala::Expr::DebugString(), DebugWindowString(), evaluators_, order_by_eq_expr_ctx_, partition_by_eq_expr_ctx_, and impala::ExprContext::root().
|
inherited |
Returns a string representation in DFS order of the plan rooted at this.
Definition at line 345 of file exec-node.cc.
Referenced by impala::SortNode::DebugString(), impala::TopNNode::DebugString(), impala::ExchangeNode::DebugString(), impala::AggregationNode::DebugString(), DebugString(), impala::PartitionedAggregationNode::DebugString(), impala::BlockingJoinNode::DebugString(), and impala::PlanFragmentExecutor::Prepare().
|
private |
Debug string containing the window definition.
Definition at line 236 of file analytic-eval-node.cc.
References impala::DebugWindowBoundString(), fn_scope_, PARTITION, RANGE, and window_.
Referenced by AnalyticEvalNode(), and DebugString().
|
staticinherited |
Evaluate ExprContexts over row. Returns true if all exprs return true. TODO: This doesn't use the vector<Expr*> signature because I haven't figured out how to deal with declaring a templated std:vector type in IR
Definition at line 393 of file exec-node.cc.
References impala::ExprContext::GetBooleanVal(), impala_udf::AnyVal::is_null, and impala_udf::BooleanVal::val.
Referenced by impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::HdfsScanner::EvalConjuncts(), EvalOtherJoinConjuncts(), EvalOtherJoinConjuncts2(), impala::PartitionedHashJoinNode::EvaluateNullProbe(), impala::HBaseScanNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), GetNextOutputBatch(), impala::PartitionedHashJoinNode::OutputNullAwareProbeRows(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), impala::CrossJoinNode::ProcessLeftChildBatch(), impala::HashJoinNode::ProcessProbeBatch(), and impala::PartitionedHashJoinNode::ProcessProbeBatch().
|
protectedinherited |
Executes debug_action_ if phase matches debug_phase_. 'phase' must not be INVALID.
Definition at line 378 of file exec-node.cc.
References impala::Status::CANCELLED, impala::ExecNode::debug_action_, impala::ExecNode::debug_phase_, impala::RuntimeState::is_cancelled(), and impala::Status::OK.
Referenced by impala::SelectNode::GetNext(), impala::SortNode::GetNext(), impala::UnionNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), impala::HdfsScanNode::GetNextInternal(), impala::ExecNode::Open(), and impala::ExecNode::Prepare().
|
inlineinherited |
Definition at line 163 of file exec-node.h.
References impala::ExecNode::expr_mem_tracker_.
Referenced by impala::PartitionedAggregationNode::Partition::InitStreams(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::ExchangeNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), and impala::HdfsScanNode::Prepare().
|
virtual |
Retrieves rows and returns them via row_batch. Sets eos to true if subsequent calls will not retrieve any more rows. Data referenced by any tuples returned in row_batch must not be overwritten by the callee until Close() is called. The memory holding that data can be returned via row_batch's tuple_data_pool (in which case it may be deleted by the caller) or held on to by the callee. The row_batch, including its tuple_data_pool, will be destroyed by the caller at some point prior to the final Close() call. In other words, if the memory holding the tuple data will be referenced by the callee in subsequent GetNext() calls, it must not be attached to the row_batch's tuple_data_pool. Caller must not be holding any io buffers. This will cause deadlock. TODO: AggregationNode and HashJoinNode cannot be "re-opened" yet.
Implements impala::ExecNode.
Definition at line 696 of file analytic-eval-node.cc.
References impala::MemPool::AcquireData(), COUNTER_SET, curr_child_batch_, DebugStateString(), impala::ExecNode::ExecDebugAction(), GetNextOutputBatch(), impala::ExecNode::id(), impala::ExecNode::num_rows_returned_, impala::Status::OK, prev_pool_last_result_idx_, prev_pool_last_window_idx_, prev_tuple_pool_, ProcessChildBatches(), QueryMaintenance(), impala::ExecNode::ReachedLimit(), RETURN_IF_CANCELLED, RETURN_IF_ERROR, impala::ExecNode::rows_returned(), impala::ExecNode::rows_returned_counter_, impala::ExecNode::runtime_profile_, SCOPED_TIMER, impala::RowBatch::tuple_data_pool(), VLOG_FILE, and window_tuples_.
Referenced by ProcessChildBatches().
|
private |
Returns a batch of output rows from input_stream_ with the analytic function results (from result_tuples_) set as the last tuple.
Definition at line 634 of file analytic-eval-node.cc.
References impala::RowBatch::AddRow(), impala::RowBatch::AtCapacity(), impala::RowBatch::capacity(), impala::ExecNode::child(), impala::RowBatch::CommitLastRow(), impala::ExecNode::conjunct_ctxs_, curr_tuple_pool_, DebugStateString(), impala::ExecNode::EvalConjuncts(), evaluation_timer_, impala::RowBatch::GetRow(), impala::ExecNode::id(), input_stream_, impala::ExecNode::mem_tracker(), impala::ExecNode::num_rows_returned_, impala::Status::OK, impala::ExecNode::ReachedLimit(), result_tuples_, RETURN_IF_ERROR, impala::ExecNode::row_desc(), SCOPED_TIMER, impala::TupleRow::SetTuple(), impala::RowDescriptor::tuple_descriptors(), VLOG_FILE, and VLOG_ROW.
Referenced by GetNext().
|
staticinherited |
Extract node id from p->name().
Definition at line 62 of file exec-node.cc.
References impala::RuntimeProfile::metadata().
|
inlineinherited |
Definition at line 154 of file exec-node.h.
References impala::ExecNode::id_.
Referenced by AddResultTuple(), AddRow(), AnalyticEvalNode(), GetNext(), GetNextOutputBatch(), impala::HdfsScanner::InitializeWriteTuplesFn(), impala::HdfsAvroScanner::InitNewRange(), InitNextPartition(), impala::PartitionedAggregationNode::MoveHashPartitions(), impala::PartitionedHashJoinNode::NodeDebugString(), Open(), impala::HdfsScanNode::Open(), impala::PlanFragmentExecutor::Prepare(), impala::PartitionedHashJoinNode::ProcessBuildInput(), ProcessChildBatch(), impala::HdfsScanNode::ScannerThread(), TryAddRemainingResults(), TryAddResultTupleForCurrRow(), TryAddResultTupleForPrevRow(), and TryRemoveRowsBeforeWindow().
|
virtual |
Initializes this object from the thrift tnode desc. The subclass should do any initialization that can fail in Init() rather than the ctor. If overridden in subclass, must first call superclass's Init().
Reimplemented from impala::ExecNode.
Definition at line 94 of file analytic-eval-node.cc.
References impala::AggFnEvaluator::Create(), impala::Expr::CreateExprTree(), evaluators_, fn_scope_, impala::ExecNode::Init(), is_lead_fn_, impala::Status::OK, order_by_eq_expr_ctx_, PARTITION, partition_by_eq_expr_ctx_, impala::ExecNode::pool_, RETURN_IF_ERROR, and window_.
|
inlineprivate |
Initializes state at the start of a new partition. stream_idx is the index of the current input row from input_stream_.
Definition at line 435 of file analytic-eval-node.cc.
References AddResultTuple(), impala::TupleDescriptor::byte_size(), curr_partition_idx_, curr_tuple_, DebugStateString(), dummy_result_tuple_, evaluators_, impala::AggFnEvaluator::Finalize(), first_val_null_offset_, fn_ctxs_, fn_scope_, has_first_val_null_offset_, impala::ExecNode::id(), impala::Tuple::Init(), impala::AggFnEvaluator::Init(), input_stream_, intermediate_tuple_desc_, last_result_idx_, result_tuples_, ROWS, rows_end_offset_, TryAddRemainingResults(), VLOG_FILE, VLOG_ROW, window_, and window_tuples_.
Referenced by Open(), and ProcessChildBatch().
|
protectedinherited |
Definition at line 371 of file exec-node.cc.
References impala::ExecNode::id_, impala::ExecNode::pool_, and impala::ExecNode::runtime_profile_.
Referenced by impala::ExecNode::ExecNode().
|
inlineprotectedinherited |
Definition at line 242 of file exec-node.h.
References impala::ExecNode::is_closed_.
Referenced by impala::SelectNode::Close(), impala::SortNode::Close(), impala::UnionNode::Close(), impala::TopNNode::Close(), impala::ExchangeNode::Close(), impala::HBaseScanNode::Close(), impala::CrossJoinNode::Close(), impala::HashJoinNode::Close(), impala::AggregationNode::Close(), impala::BlockingJoinNode::Close(), Close(), impala::PartitionedHashJoinNode::Close(), impala::PartitionedAggregationNode::Close(), impala::HdfsScanNode::Close(), impala::PartitionedAggregationNode::Partition::Close(), impala::PartitionedHashJoinNode::Partition::Close(), impala::PartitionedHashJoinNode::ReserveTupleStreamBlocks(), impala::PartitionedHashJoinNode::SpillPartition(), impala::PartitionedAggregationNode::SpillPartition(), and impala::PartitionedHashJoinNode::Partition::~Partition().
|
inlineprotectedvirtualinherited |
Reimplemented in impala::ScanNode.
Definition at line 251 of file exec-node.h.
|
inlineinherited |
Definition at line 158 of file exec-node.h.
References impala::ExecNode::limit_.
Referenced by impala::CrossJoinNode::GetNext(), and impala::HashJoinNode::LeftJoinGetNext().
|
inlineinherited |
Definition at line 162 of file exec-node.h.
References impala::ExecNode::mem_tracker_.
Referenced by impala::ExecNode::Close(), impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), impala::HdfsScanNode::EnoughMemoryForScannerThread(), GetNextOutputBatch(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::AggregationNode::Open(), Open(), impala::PartitionedAggregationNode::Open(), impala::HdfsScanNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::SelectNode::Prepare(), impala::HBaseScanNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::HdfsScanNode::Prepare(), impala::PartitionedHashJoinNode::PrepareNextPartition(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::PartitionedAggregationNode::ProcessStream(), impala::HdfsRCFileScanner::ReadRowGroup(), impala::HdfsAvroScanner::ResolveSchemas(), impala::SortNode::SortInput(), and impala::HdfsScanner::StartNewRowBatch().
|
inlineprivate |
Gets the number of rows that are ready to be returned by subsequent calls to GetNextOutputBatch().
Definition at line 679 of file analytic-eval-node.cc.
References input_stream_, last_result_idx_, and result_tuples_.
Referenced by ProcessChildBatches().
|
virtual |
Performs any preparatory work prior to calling GetNext(). Caller must not be holding any io buffers. This will cause deadlock. If overridden in subclass, must first call superclass's Open(). If a parent exec node adds slot filters (see RuntimeState::AddBitmapFilter()), they need to be added before calling Open() on the child that will consume them.
Reimplemented from impala::ExecNode.
Definition at line 166 of file analytic-eval-node.cc.
References impala::RuntimeState::batch_size(), impala::RuntimeState::block_mgr(), impala::TupleDescriptor::byte_size(), impala::ExecNode::child(), client_, impala::Tuple::Create(), curr_child_batch_, curr_tuple_, dummy_result_tuple_, evaluators_, first_val_null_offset_, fn_ctxs_, gen_ir_descriptions::fn_name, has_first_val_null_offset_, impala::ExecNode::id(), impala::AggFnEvaluator::Init(), InitNextPartition(), input_stream_, intermediate_tuple_desc_, mem_pool_, impala::ExecNode::mem_tracker(), impala::Status::OK, impala::ExprContext::Open(), impala::ExecNode::Open(), order_by_eq_expr_ctx_, partition_by_eq_expr_ctx_, prev_child_batch_, QueryMaintenance(), impala::BufferedBlockMgr::RegisterClient(), result_tuple_desc_, RETURN_IF_CANCELLED, RETURN_IF_ERROR, impala::ExecNode::row_desc(), impala::ExecNode::runtime_profile(), impala::ExecNode::runtime_profile_, SCOPED_TIMER, and VLOG_FILE.
|
virtual |
Sets up internal structures, etc., without doing any actual work. Must be called prior to Open(). Will only be called once in this node's lifetime. All code generation (adding functions to the LlvmCodeGen object) must happen in Prepare(). Retrieving the jit compiled function pointer must happen in Open(). If overridden in subclass, must first call superclass's Prepare().
Reimplemented from impala::ExecNode.
Definition at line 124 of file analytic-eval-node.cc.
References impala::ObjectPool::Add(), ADD_TIMER, impala::ExecNode::AddExprCtxToFree(), buffered_tuple_desc_, impala::ExecNode::child(), child_tuple_cmp_row_, child_tuple_desc_, curr_tuple_pool_, impala::RuntimeState::desc_tbl(), evaluation_timer_, evaluators_, impala::ExecNode::expr_mem_tracker(), fn_ctxs_, impala::TupleDescriptor::id(), intermediate_tuple_desc_, mem_pool_, impala::ExecNode::mem_tracker(), impala::RuntimeState::obj_pool(), impala::Status::OK, order_by_eq_expr_ctx_, partition_by_eq_expr_ctx_, impala::ExprContext::Prepare(), impala::ExecNode::Prepare(), prev_tuple_pool_, result_tuple_desc_, RETURN_IF_ERROR, impala::ExecNode::row_desc(), impala::ExecNode::runtime_profile(), impala::ExecNode::runtime_profile_, SCOPED_TIMER, impala::TupleDescriptor::slots(), and impala::RowDescriptor::tuple_descriptors().
|
inlineprivate |
Evaluates the predicate pred_ctx over child_tuple_cmp_row_, which is a TupleRow* containing the previous row and the current row set during ProcessChildBatch().
Definition at line 518 of file analytic-eval-node.cc.
References child_tuple_cmp_row_, impala::ExprContext::GetBooleanVal(), impala_udf::AnyVal::is_null, and impala_udf::BooleanVal::val.
Referenced by ProcessChildBatch(), and TryAddResultTupleForPrevRow().
|
private |
Evaluates analytic functions over curr_child_batch_. Each input row is passed to the evaluators and added to input_stream_ where they are stored until a tuple containing the results of the analytic functions for that row is ready to be returned. When enough rows have been processed so that results can be produced for one or more rows, a tuple containing those results are stored in result_tuples_. That tuple gets set in the associated output row(s) later in GetNextOutputBatch().
Definition at line 548 of file analytic-eval-node.cc.
References AddRow(), child_tuple_cmp_row_, curr_child_batch_, curr_partition_idx_, curr_tuple_pool_, DebugStateString(), evaluation_timer_, impala::TupleRow::GetTuple(), impala::ExecNode::id(), InitNextPartition(), input_eos_, input_stream_, last_result_idx_, MAX_TUPLE_POOL_SIZE, impala::Status::OK, order_by_eq_expr_ctx_, partition_by_eq_expr_ctx_, prev_input_row_, prev_pool_last_result_idx_, prev_pool_last_window_idx_, prev_tuple_pool_, PrevRowCompare(), RETURN_IF_ERROR, SCOPED_TIMER, impala::TupleRow::SetTuple(), TryAddRemainingResults(), TryAddResultTupleForCurrRow(), TryAddResultTupleForPrevRow(), TryRemoveRowsBeforeWindow(), UNLIKELY, VLOG_FILE, and window_tuples_.
Referenced by ProcessChildBatches().
|
private |
Processes child batches (calling ProcessChildBatch()) until enough output rows are ready to return an output batch.
Definition at line 525 of file analytic-eval-node.cc.
References impala::RuntimeState::batch_size(), impala::ExecNode::child(), curr_child_batch_, GetNext(), input_eos_, NumOutputRowsReady(), impala::Status::OK, prev_child_batch_, ProcessChildBatch(), QueryMaintenance(), RETURN_IF_CANCELLED, and RETURN_IF_ERROR.
Referenced by GetNext().
|
protectedvirtual |
Frees local allocations from evaluators_.
Reimplemented from impala::ExecNode.
Definition at line 781 of file analytic-eval-node.cc.
References evaluators_, impala::ExprContext::FreeLocalAllocations(), and impala::ExecNode::QueryMaintenance().
Referenced by GetNext(), Open(), and ProcessChildBatches().
|
inlineinherited |
Definition at line 159 of file exec-node.h.
References impala::ExecNode::limit_, and impala::ExecNode::num_rows_returned_.
Referenced by impala::HdfsParquetScanner::AssembleRows(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::HdfsTextScanner::FinishScanRange(), impala::SelectNode::GetNext(), impala::UnionNode::GetNext(), impala::SortNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), GetNext(), impala::PartitionedAggregationNode::GetNext(), impala::HdfsScanNode::GetNextInternal(), impala::ExchangeNode::GetNextMerging(), GetNextOutputBatch(), impala::HashJoinNode::LeftJoinGetNext(), impala::HdfsSequenceScanner::ProcessBlockCompressedScanRange(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), and impala::PlanFragmentExecutor::ReachedLimit().
|
virtual |
Resets all data-specific state, returning this node to the state it was in after calling Prepare() and before calling Open(). Prepare() must have already been called before calling Reset(). Open() and GetNext() may have optionally been called. Close() must not have been called. If overridden in a subclass, must call superclass's Reset() at the end. The default implementation calls Reset() on children. Note that this function may be called many times, so should be fast. For example, accumulated memory does not need to be freed on every call if it's expensive.
Reimplemented from impala::ExecNode.
Definition at line 733 of file analytic-eval-node.cc.
|
private |
Resets the slots in current_tuple_ that store the intermedatiate results for lead(). This is necessary to produce the default value (set by Init()).
|
inlineinherited |
Definition at line 156 of file exec-node.h.
References impala::ExecNode::row_descriptor_.
Referenced by impala::CrossJoinNode::BuildListDebugString(), impala::HashJoinNode::CodegenCreateOutputRow(), impala::PartitionedHashJoinNode::CodegenCreateOutputRow(), impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), impala::BlockingJoinNode::GetLeftChildRowString(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), GetNextOutputBatch(), impala::PartitionedAggregationNode::Partition::InitStreams(), impala::TopNNode::Open(), impala::AggregationNode::Open(), Open(), impala::PartitionedAggregationNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::SelectNode::Prepare(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::PlanFragmentExecutor::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::HdfsScanNode::Prepare(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::PartitionedAggregationNode::ProcessStream(), impala::PlanFragmentExecutor::row_desc(), impala::SortNode::SortInput(), and impala::HdfsScanner::StartNewRowBatch().
|
inlineinherited |
Definition at line 157 of file exec-node.h.
References impala::ExecNode::num_rows_returned_.
Referenced by impala::CrossJoinNode::GetNext(), GetNext(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedAggregationNode::Open(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), impala::CrossJoinNode::ProcessLeftChildBatch(), impala::HashJoinNode::ProcessProbeBatch(), and impala::HdfsTextScanner::WriteFields().
|
inlineinherited |
Definition at line 161 of file exec-node.h.
References impala::ExecNode::runtime_profile_.
Referenced by impala::ExecNode::AddRuntimeExecOption(), impala::BlockingJoinNode::BuildSideThread(), impala::ExecNode::CreateTreeHelper(), impala::PartitionedAggregationNode::Partition::InitStreams(), impala::SortNode::Open(), Open(), impala::PartitionedAggregationNode::Open(), impala::HdfsScanNode::Open(), impala::HdfsTextScanner::Prepare(), impala::HBaseScanNode::Prepare(), impala::BaseSequenceScanner::Prepare(), impala::ExchangeNode::Prepare(), impala::HdfsParquetScanner::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::ScanNode::Prepare(), impala::PlanFragmentExecutor::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::HdfsScanner::Prepare(), impala::HdfsScanNode::Prepare(), and impala::PartitionedHashJoinNode::ProcessBuildInput().
|
staticinherited |
Set debug action for node with given id in 'tree'.
Definition at line 332 of file exec-node.cc.
References impala::ExecNode::children_, impala::ExecNode::debug_action_, impala::ExecNode::debug_phase_, and impala::ExecNode::id_.
Referenced by impala::PlanFragmentExecutor::Prepare().
|
inlineprivate |
Adds additional result tuples at the end of a partition, e.g. if the end bound is FOLLOWING. partition_idx is the index into input_stream_ of the new partition, prev_partition_idx is the index of the previous partition.
Definition at line 391 of file analytic-eval-node.cc.
References AddResultTuple(), curr_tuple_, DebugStateString(), evaluators_, fn_ctxs_, fn_scope_, impala::ExecNode::id(), is_lead_fn_, last_result_idx_, impala::AggFnEvaluator::Remove(), ROWS, rows_start_offset_, VLOG_ROW, window_, and window_tuples_.
Referenced by InitNextPartition(), and ProcessChildBatch().
|
inlineprivate |
Determines if there is a window ending at the current row, and if so, calls AddResultTuple() with the index of the current row in input_stream_. stream_idx is the index of the current input row from input_stream_.
Definition at line 364 of file analytic-eval-node.cc.
References AddResultTuple(), curr_partition_idx_, fn_scope_, impala::ExecNode::id(), ROWS, rows_end_offset_, VLOG_ROW, and window_.
Referenced by ProcessChildBatch().
|
inlineprivate |
Determines if there is a window ending at the previous row, and if so, calls AddResultTuple() with the index of the previous row in input_stream_. next_partition indicates if the current row is the start of a new partition. stream_idx is the index of the current input row from input_stream_.
Definition at line 350 of file analytic-eval-node.cc.
References AddResultTuple(), fn_scope_, impala::ExecNode::id(), order_by_eq_expr_ctx_, PrevRowCompare(), RANGE, ROWS, VLOG_ROW, and window_.
Referenced by ProcessChildBatch().
|
inlineprivate |
Removes rows from curr_tuple_ (by calling AggFnEvaluator::Remove()) that are no longer in the window (i.e. they are before the window start boundary). stream_idx is the index of the row in input_stream_ that is currently being processed in ProcessChildBatch().
Definition at line 375 of file analytic-eval-node.cc.
References curr_partition_idx_, curr_tuple_, DebugStateString(), evaluators_, fn_ctxs_, fn_scope_, impala::ExecNode::id(), impala::AggFnEvaluator::Remove(), ROWS, rows_end_offset_, rows_start_offset_, VLOG_ROW, window_, and window_tuples_.
Referenced by ProcessChildBatch().
|
inlineinherited |
Definition at line 155 of file exec-node.h.
References impala::ExecNode::type_.
Referenced by impala::ExecNode::CodegenEvalConjuncts(), impala::PartitionedAggregationNode::CodegenUpdateTuple(), and impala::PlanFragmentExecutor::Prepare().
|
private |
Tuple descriptor of the buffered tuple (identical to the input child tuple, which is assumed to come from a single SortNode). NULL if both partition_exprs and order_by_exprs are empty.
Definition at line 196 of file analytic-eval-node.h.
Referenced by AnalyticEvalNode(), and Prepare().
|
private |
TupleRow* composed of the first child tuple and the buffered tuple, used by partition_by_eq_expr_ctx_ and order_by_eq_expr_ctx_. Set in Prepare() if buffered_tuple_desc_ is not NULL, allocated from mem_pool_.
Definition at line 201 of file analytic-eval-node.h.
Referenced by Prepare(), PrevRowCompare(), and ProcessChildBatch().
|
private |
Definition at line 261 of file analytic-eval-node.h.
|
protectedinherited |
Definition at line 214 of file exec-node.h.
Referenced by impala::ExecNode::child(), impala::ExecNode::Close(), impala::ExecNode::CollectNodes(), impala::ExecNode::CreateTreeHelper(), impala::HBaseScanNode::DebugString(), impala::UnionNode::GetNext(), impala::UnionNode::Open(), impala::AggregationNode::Open(), impala::PartitionedAggregationNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::ExecNode::Prepare(), impala::PartitionedAggregationNode::ProcessStream(), impala::ExecNode::Reset(), and impala::ExecNode::SetDebugOptions().
|
private |
Block manager client used by input_stream_. Not owned.
Definition at line 312 of file analytic-eval-node.h.
Referenced by Open().
|
protectedinherited |
Definition at line 212 of file exec-node.h.
Referenced by impala::ExecNode::Close(), impala::HashJoinNode::CodegenProcessProbeBatch(), impala::PartitionedHashJoinNode::CodegenProcessProbeBatch(), impala::ExecNode::conjunct_ctxs(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::HBaseScanNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), GetNextOutputBatch(), impala::TopNNode::Init(), impala::ExecNode::Init(), impala::PartitionedHashJoinNode::Init(), impala::ExecNode::Open(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), impala::ExecNode::Prepare(), impala::CrossJoinNode::ProcessLeftChildBatch(), impala::HashJoinNode::ProcessProbeBatch(), and impala::PartitionedHashJoinNode::ProcessProbeBatch().
|
private |
Definition at line 309 of file analytic-eval-node.h.
Referenced by Close(), GetNext(), Open(), ProcessChildBatch(), and ProcessChildBatches().
|
private |
Index of the row in input_stream_ at which the current partition started.
Definition at line 297 of file analytic-eval-node.h.
Referenced by AddRow(), DebugStateString(), InitNextPartition(), ProcessChildBatch(), TryAddResultTupleForCurrRow(), and TryRemoveRowsBeforeWindow().
|
private |
The tuple described by intermediate_tuple_desc_ storing intermediate state for the evaluators_. When enough input rows have been consumed to produce the analytic function results, a result tuple (described by result_tuple_desc_) is created and the agg fn results are written to that tuple by calling Finalize()/GetValue() on the evaluators with curr_tuple_ as the source tuple.
Definition at line 289 of file analytic-eval-node.h.
Referenced by AddResultTuple(), AddRow(), Close(), InitNextPartition(), Open(), TryAddRemainingResults(), and TryRemoveRowsBeforeWindow().
|
private |
Pools used to allocate result tuples (added to result_tuples_ and later returned) and window tuples (added to window_tuples_ to buffer the current window). Resources are transferred from curr_tuple_pool_ to prev_tuple_pool_ once it is at least MAX_TUPLE_POOL_SIZE bytes. Resources from prev_tuple_pool_ are transferred to an output row batch when all result tuples it contains have been returned and all window tuples it contains are no longer needed.
Definition at line 269 of file analytic-eval-node.h.
Referenced by AddResultTuple(), AddRow(), Close(), GetNextOutputBatch(), Prepare(), and ProcessChildBatch().
|
protectedinherited |
Definition at line 220 of file exec-node.h.
Referenced by impala::ExecNode::ExecDebugAction(), and impala::ExecNode::SetDebugOptions().
|
protectedinherited |
debug-only: if debug_action_ is not INVALID, node will perform action in debug_phase_
Definition at line 219 of file exec-node.h.
Referenced by impala::ExecNode::ExecDebugAction(), and impala::ExecNode::SetDebugOptions().
|
private |
A tuple described by result_tuple_desc_ used when calling Finalize() on the evaluators_ to release resources between partitions; the value is never used. TODO: Remove when agg fns implement a separate Close() method to release resources.
Definition at line 294 of file analytic-eval-node.h.
Referenced by Close(), InitNextPartition(), and Open().
|
private |
Time spent processing the child rows.
Definition at line 331 of file analytic-eval-node.h.
Referenced by GetNextOutputBatch(), Prepare(), and ProcessChildBatch().
|
private |
Analytic function evaluators.
Definition at line 223 of file analytic-eval-node.h.
Referenced by AddResultTuple(), AddRow(), Close(), DebugString(), Init(), InitNextPartition(), Open(), Prepare(), QueryMaintenance(), TryAddRemainingResults(), and TryRemoveRowsBeforeWindow().
|
protectedinherited |
Execution options that are determined at runtime. This is added to the runtime profile at Close(). Examples for options logged here would be "Codegen Enabled"
Definition at line 238 of file exec-node.h.
Referenced by impala::ExecNode::AddRuntimeExecOption().
|
protectedinherited |
MemTracker that should be used for ExprContexts.
Definition at line 233 of file exec-node.h.
Referenced by impala::ExecNode::expr_mem_tracker(), and impala::ExecNode::Prepare().
|
private |
Definition at line 232 of file analytic-eval-node.h.
Referenced by InitNextPartition(), and Open().
|
private |
FunctionContext for each analytic function. String data returned by the analytic functions is allocated via these contexts.
Definition at line 236 of file analytic-eval-node.h.
Referenced by AddResultTuple(), AddRow(), Close(), InitNextPartition(), Open(), Prepare(), TryAddRemainingResults(), and TryRemoveRowsBeforeWindow().
|
private |
The scope over which analytic functions are evaluated. TODO: Consider adding additional state to capture whether different kinds of window bounds need to be maintained, e.g. (fn_scope_ == ROWS && window_.__isset.end_bound).
Definition at line 214 of file analytic-eval-node.h.
Referenced by AddRow(), AnalyticEvalNode(), DebugStateString(), DebugWindowString(), Init(), InitNextPartition(), TryAddRemainingResults(), TryAddResultTupleForCurrRow(), TryAddResultTupleForPrevRow(), and TryRemoveRowsBeforeWindow().
|
private |
If true, evaluating FIRST_VALUE requires special null handling when initializing new partitions determined by the offset. Set in Open() by inspecting the agg fns.
Definition at line 231 of file analytic-eval-node.h.
Referenced by InitNextPartition(), and Open().
|
protectedinherited |
Definition at line 209 of file exec-node.h.
Referenced by impala::PartitionedAggregationNode::CreateHashPartitions(), impala::ExecNode::id(), impala::ExecNode::InitRuntimeProfile(), impala::PartitionedAggregationNode::NextPartition(), impala::ExchangeNode::Prepare(), impala::PartitionedHashJoinNode::PrepareNextPartition(), impala::PartitionedHashJoinNode::ProcessBuildInput(), and impala::ExecNode::SetDebugOptions().
|
private |
True when there are no more input rows to consume from our child.
Definition at line 328 of file analytic-eval-node.h.
Referenced by ProcessChildBatch(), and ProcessChildBatches().
|
private |
Buffers input rows added in ProcessChildBatch() until enough rows are able to be returned by GetNextOutputBatch(), in which case row batches are returned from the front of the stream and the underlying buffered blocks are deleted once read. The number of rows that must be buffered may vary from an entire partition (e.g. no order by clause) to a single row (e.g. ROWS windows). When the amount of buffered data exceeds the available memory in the underlying BufferedBlockMgr, input_stream_ is unpinned (i.e., possibly spilled to disk if necessary). TODO: Consider re-pinning unpinned streams when possible.
Definition at line 322 of file analytic-eval-node.h.
Referenced by AddRow(), Close(), DebugStateString(), GetNextOutputBatch(), InitNextPartition(), NumOutputRowsReady(), Open(), and ProcessChildBatch().
|
private |
Tuple descriptor for storing intermediate values of analytic fn evaluation.
Definition at line 188 of file analytic-eval-node.h.
Referenced by InitNextPartition(), Open(), and Prepare().
|
private |
Indicates if each evaluator is the lead() fn. Used by ResetLeadFnSlots() to determine which slots need to be reset.
Definition at line 227 of file analytic-eval-node.h.
Referenced by Init(), and TryAddRemainingResults().
|
private |
Index in input_stream_ of the most recently added result tuple.
Definition at line 253 of file analytic-eval-node.h.
Referenced by AddResultTuple(), DebugStateString(), InitNextPartition(), NumOutputRowsReady(), ProcessChildBatch(), and TryAddRemainingResults().
|
protectedinherited |
Definition at line 222 of file exec-node.h.
Referenced by impala::SortNode::GetNext(), impala::HdfsScanNode::GetNextInternal(), impala::ExchangeNode::GetNextMerging(), impala::TopNNode::InsertTupleRow(), impala::HdfsScanNode::limit(), impala::ExecNode::limit(), impala::TopNNode::Open(), and impala::ExecNode::ReachedLimit().
|
private |
Pool used for O(1) allocations that live until close.
Definition at line 325 of file analytic-eval-node.h.
|
protectedinherited |
Account for peak memory used by this node.
Definition at line 230 of file exec-node.h.
Referenced by impala::ExecNode::mem_tracker(), and impala::ExecNode::Prepare().
|
protectedinherited |
Definition at line 223 of file exec-node.h.
Referenced by impala::ExecNode::Close(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::SortNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), GetNext(), impala::PartitionedAggregationNode::GetNext(), impala::HdfsScanNode::GetNextInternal(), impala::ExchangeNode::GetNextMerging(), GetNextOutputBatch(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), impala::ExecNode::ReachedLimit(), and impala::ExecNode::rows_returned().
|
private |
Expr context for a predicate that checks if child tuple '<' buffered tuple for order by exprs.
Definition at line 209 of file analytic-eval-node.h.
Referenced by Close(), DebugString(), Init(), Open(), Prepare(), ProcessChildBatch(), and TryAddResultTupleForPrevRow().
|
private |
Expr context for a predicate that checks if child tuple '<' buffered tuple for partitioning exprs.
Definition at line 205 of file analytic-eval-node.h.
Referenced by Close(), DebugString(), Init(), Open(), Prepare(), and ProcessChildBatch().
|
protectedinherited |
Definition at line 211 of file exec-node.h.
Referenced by impala::SortNode::Init(), impala::UnionNode::Init(), impala::TopNNode::Init(), impala::ExchangeNode::Init(), impala::HashJoinNode::Init(), impala::AggregationNode::Init(), impala::ExecNode::Init(), impala::PartitionedHashJoinNode::Init(), Init(), impala::PartitionedAggregationNode::Init(), impala::ExecNode::InitRuntimeProfile(), impala::HdfsScanNode::Open(), impala::PartitionedHashJoinNode::Prepare(), and impala::PartitionedHashJoinNode::ProcessBuildInput().
|
private |
Current and previous input row batches from the child. RowBatches are allocated once and reused. Previous input row batch owns prev_input_row_ between calls to ProcessChildBatch(). The prev batch is Reset() after calling ProcessChildBatch() and then swapped with the curr batch so the RowBatch owning prev_input_row_ is stored in prev_child_batch_ for the next call to ProcessChildBatch().
Definition at line 308 of file analytic-eval-node.h.
Referenced by Close(), Open(), and ProcessChildBatches().
|
private |
Previous input row used to compare partition boundaries and to determine when the order-by expressions change.
Definition at line 301 of file analytic-eval-node.h.
Referenced by ProcessChildBatch().
|
private |
The index of the last row from input_stream_ associated with output row containing resources in prev_tuple_pool_. -1 when the pool is empty. Resources from prev_tuple_pool_ can only be transferred to an output batch once all rows containing these tuples have been returned.
Definition at line 276 of file analytic-eval-node.h.
Referenced by GetNext(), and ProcessChildBatch().
|
private |
The index of the last row from input_stream_ associated with window tuples containing resources in prev_tuple_pool_. -1 when the pool is empty. Resources from prev_tuple_pool_ can only be transferred to an output batch once all rows containing these tuples are no longer needed (removed from the window_tuples_).
Definition at line 282 of file analytic-eval-node.h.
Referenced by GetNext(), and ProcessChildBatch().
|
private |
Definition at line 270 of file analytic-eval-node.h.
Referenced by Close(), GetNext(), Prepare(), and ProcessChildBatch().
|
private |
Tuple descriptor for storing results of analytic fn evaluation.
Definition at line 191 of file analytic-eval-node.h.
Referenced by AddResultTuple(), Open(), and Prepare().
|
private |
Queue of tuples which are ready to be set in output rows, with the index into the input_stream_ stream of the last TupleRow that gets the Tuple, i.e. this is a sparse structure. For example, if result_tuples_ contains tuples with indexes x1 and x2 where x1 < x2, output rows with indexes in [0, x1] get the first result tuple and output rows with indexes in (x1, x2] get the second result tuple. Pairs are pushed onto the queue in ProcessChildBatch() and dequeued in order in GetNextOutputBatch(). The size of result_tuples_ is limited by 2 times the row batch size because we only process input batches if there are not enough result tuples to produce a single batch of output rows. In the worst case there may be a single result tuple per output row and result_tuples_.size() may be one less than the row batch size, in which case we will process another input row batch (inserting one result tuple per input row) before returning a row batch.
Definition at line 250 of file analytic-eval-node.h.
Referenced by AddResultTuple(), DebugStateString(), GetNextOutputBatch(), InitNextPartition(), and NumOutputRowsReady().
|
protectedinherited |
Definition at line 215 of file exec-node.h.
Referenced by impala::SortNode::Open(), impala::SortNode::Prepare(), impala::TopNNode::Prepare(), impala::ExchangeNode::Prepare(), and impala::ExecNode::row_desc().
|
staticinherited |
Names of counters shared by all exec nodes.
Definition at line 169 of file exec-node.h.
Referenced by impala::ExecNode::Prepare().
|
private |
Definition at line 220 of file analytic-eval-node.h.
Referenced by AnalyticEvalNode(), InitNextPartition(), TryAddResultTupleForCurrRow(), and TryRemoveRowsBeforeWindow().
|
protectedinherited |
Definition at line 226 of file exec-node.h.
Referenced by impala::ExecNode::Close(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::SortNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), impala::HdfsScanNode::GetNextInternal(), impala::ExchangeNode::GetNextMerging(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), and impala::ExecNode::Prepare().
|
protectedinherited |
Definition at line 227 of file exec-node.h.
Referenced by impala::ExecNode::Prepare().
|
private |
Offset from the current row for ROWS windows with start or end bounds specified with offsets. Is positive if the offset is FOLLOWING, negative if PRECEDING, and 0 if type is CURRENT ROW or UNBOUNDED PRECEDING/FOLLOWING.
Definition at line 219 of file analytic-eval-node.h.
Referenced by AddRow(), AnalyticEvalNode(), TryAddRemainingResults(), and TryRemoveRowsBeforeWindow().
|
protectedinherited |
Definition at line 239 of file exec-node.h.
Referenced by impala::ExecNode::AddRuntimeExecOption().
|
protectedinherited |
Definition at line 225 of file exec-node.h.
Referenced by impala::HBaseScanNode::Close(), impala::SelectNode::GetNext(), impala::UnionNode::GetNext(), impala::SortNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), impala::HdfsScanNode::GetNext(), impala::ExecNode::InitRuntimeProfile(), impala::SelectNode::Open(), impala::HBaseScanNode::Open(), impala::UnionNode::Open(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::ExchangeNode::Open(), impala::DataSourceScanNode::Open(), impala::BlockingJoinNode::Open(), impala::AggregationNode::Open(), Open(), impala::PartitionedAggregationNode::Open(), impala::SelectNode::Prepare(), impala::UnionNode::Prepare(), impala::SortNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::ScanNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::HdfsScanNode::Prepare(), impala::ExecNode::runtime_profile(), and impala::HdfsScanNode::StopAndFinalizeCounters().
|
protectedinherited |
Definition at line 210 of file exec-node.h.
Referenced by impala::ExecNode::CollectNodes(), and impala::ExecNode::type().
|
private |
Window over which the analytic functions are evaluated. Only used if fn_scope_ is ROWS or RANGE. TODO: fn_scope_ and window_ are candidates to be removed during codegen
Definition at line 185 of file analytic-eval-node.h.
Referenced by AddRow(), AnalyticEvalNode(), DebugStateString(), DebugWindowString(), Init(), InitNextPartition(), TryAddRemainingResults(), TryAddResultTupleForCurrRow(), TryAddResultTupleForPrevRow(), and TryRemoveRowsBeforeWindow().
|
private |
Child tuples (described by child_tuple_desc_) that are currently within the window and the index into input_stream_ of the row they're associated with. Only used when window start bound is PRECEDING or FOLLOWING. Tuples in this list are deep copied and owned by curr_window_tuple_pool_. TODO: Remove and use BufferedTupleStream (needs support for multiple readers).
Definition at line 260 of file analytic-eval-node.h.
Referenced by AddRow(), DebugStateString(), GetNext(), InitNextPartition(), ProcessChildBatch(), TryAddRemainingResults(), and TryRemoveRowsBeforeWindow().