Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
impala::BlockingJoinNode Class Referenceabstract

#include <blocking-join-node.h>

Inheritance diagram for impala::BlockingJoinNode:
Collaboration diagram for impala::BlockingJoinNode:

Public Member Functions

 BlockingJoinNode (const std::string &node_name, const TJoinOp::type join_op, ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
 
virtual ~BlockingJoinNode ()
 
virtual Status Init (const TPlanNode &tnode)
 
virtual Status Prepare (RuntimeState *state)
 
virtual Status Open (RuntimeState *state)
 
virtual Status Reset (RuntimeState *state)
 
virtual void Close (RuntimeState *state)
 
virtual Status GetNext (RuntimeState *state, RowBatch *row_batch, bool *eos)=0
 
void CollectNodes (TPlanNodeType::type node_type, std::vector< ExecNode * > *nodes)
 
void CollectScanNodes (std::vector< ExecNode * > *nodes)
 Collect all scan node types. More...
 
std::string DebugString () const
 Returns a string representation in DFS order of the plan rooted at this. More...
 
const std::vector< ExprContext * > & conjunct_ctxs () const
 
int id () const
 
TPlanNodeType::type type () const
 
const RowDescriptorrow_desc () const
 
int64_t rows_returned () const
 
int64_t limit () const
 
bool ReachedLimit ()
 
RuntimeProfileruntime_profile ()
 
MemTrackermem_tracker ()
 
MemTrackerexpr_mem_tracker ()
 

Static Public Member Functions

static Status CreateTree (ObjectPool *pool, const TPlan &plan, const DescriptorTbl &descs, ExecNode **root)
 
static void SetDebugOptions (int node_id, TExecNodePhase::type phase, TDebugAction::type action, ExecNode *tree)
 Set debug action for node with given id in 'tree'. More...
 
static bool EvalConjuncts (ExprContext *const *ctxs, int num_ctxs, TupleRow *row)
 
static llvm::Function * CodegenEvalConjuncts (RuntimeState *state, const std::vector< ExprContext * > &conjunct_ctxs, const char *name="EvalConjuncts")
 
static int GetNodeIdFromProfile (RuntimeProfile *p)
 Extract node id from p->name(). More...
 

Static Public Attributes

static const char * LLVM_CLASS_NAME = "class.impala::BlockingJoinNode"
 
static const std::string ROW_THROUGHPUT_COUNTER = "RowsReturnedRate"
 Names of counters shared by all exec nodes. More...
 

Protected Member Functions

virtual Status InitGetNext (TupleRow *first_left_child_row)=0
 
virtual Status ConstructBuildSide (RuntimeState *state)=0
 
virtual void AddToDebugString (int indentation_level, std::stringstream *out) const
 
virtual void DebugString (int indentation_level, std::stringstream *out) const
 Subclasses should not override, use AddToDebugString() to add to the result. More...
 
std::string GetLeftChildRowString (TupleRow *row)
 
void CreateOutputRow (TupleRow *out_row, TupleRow *probe_row, TupleRow *build_row)
 
ExecNodechild (int i)
 
bool is_closed ()
 
virtual bool IsScanNode () const
 
void InitRuntimeProfile (const std::string &name)
 
Status ExecDebugAction (TExecNodePhase::type phase, RuntimeState *state)
 
void AddRuntimeExecOption (const std::string &option)
 Appends option to 'runtime_exec_options_'. More...
 
virtual Status QueryMaintenance (RuntimeState *state)
 
void AddExprCtxToFree (ExprContext *ctx)
 
void AddExprCtxsToFree (const std::vector< ExprContext * > &ctxs)
 
void AddExprCtxsToFree (const SortExecExprs &sort_exec_exprs)
 

Static Protected Member Functions

static Status CreateNode (ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs, ExecNode **node)
 Create a single exec node derived from thrift node; place exec node in 'pool'. More...
 
static Status CreateTreeHelper (ObjectPool *pool, const std::vector< TPlanNode > &tnodes, const DescriptorTbl &descs, ExecNode *parent, int *node_idx, ExecNode **root)
 

Protected Attributes

const std::string node_name_
 
TJoinOp::type join_op_
 
bool eos_
 
boost::scoped_ptr< MemPoolbuild_pool_
 
boost::scoped_ptr< RowBatchprobe_batch_
 
bool probe_side_eos_
 
int probe_batch_pos_
 
TupleRowcurrent_probe_row_
 
bool matched_probe_
 
int probe_tuple_row_size_
 
int build_tuple_row_size_
 
TupleRowsemi_join_staging_row_
 
bool can_add_probe_filters_
 
RuntimeProfile::Counterbuild_timer_
 
RuntimeProfile::Counterprobe_timer_
 
RuntimeProfile::Counterbuild_row_counter_
 
RuntimeProfile::Counterprobe_row_counter_
 
int id_
 
TPlanNodeType::type type_
 
ObjectPoolpool_
 
std::vector< ExprContext * > conjunct_ctxs_
 
std::vector< ExecNode * > children_
 
RowDescriptor row_descriptor_
 
TExecNodePhase::type debug_phase_
 
TDebugAction::type debug_action_
 
int64_t limit_
 
int64_t num_rows_returned_
 
boost::scoped_ptr< RuntimeProfileruntime_profile_
 
RuntimeProfile::Counterrows_returned_counter_
 
RuntimeProfile::Counterrows_returned_rate_
 
boost::scoped_ptr< MemTrackermem_tracker_
 Account for peak memory used by this node. More...
 
boost::scoped_ptr< MemTrackerexpr_mem_tracker_
 MemTracker that should be used for ExprContexts. More...
 
boost::mutex exec_options_lock_
 
std::string runtime_exec_options_
 

Private Member Functions

void BuildSideThread (RuntimeState *state, Promise< Status > *status)
 

Detailed Description

Abstract base class for join nodes that block while consuming all rows from their right child in Open().

Definition at line 36 of file blocking-join-node.h.

Constructor & Destructor Documentation

BlockingJoinNode::BlockingJoinNode ( const std::string &  node_name,
const TJoinOp::type  join_op,
ObjectPool pool,
const TPlanNode &  tnode,
const DescriptorTbl descs 
)

Definition at line 34 of file blocking-join-node.cc.

BlockingJoinNode::~BlockingJoinNode ( )
virtual

Definition at line 50 of file blocking-join-node.cc.

References probe_batch_.

Member Function Documentation

void impala::ExecNode::AddExprCtxsToFree ( const SortExecExprs sort_exec_exprs)
protectedinherited
void impala::ExecNode::AddExprCtxToFree ( ExprContext ctx)
inlineprotectedinherited

Add an ExprContext to have its local allocations freed by QueryMaintenance(). Exprs that are evaluated in the main execution thread should be added. Exprs evaluated in a separate thread are generally not safe to add, since a local allocation may be freed while it's being used. Rather than using this mechanism, threads should call FreeLocalAllocations() on local ExprContexts periodically.

Definition at line 276 of file exec-node.h.

References impala::ExecNode::expr_ctxs_to_free_.

Referenced by impala::AnalyticEvalNode::Prepare().

virtual void impala::BlockingJoinNode::AddToDebugString ( int  indentation_level,
std::stringstream *  out 
) const
inlineprotectedvirtual

Gives subclasses an opportunity to add debug output to the debug string printed by DebugString().

Reimplemented in impala::PartitionedHashJoinNode, and impala::HashJoinNode.

Definition at line 117 of file blocking-join-node.h.

Referenced by DebugString().

void BlockingJoinNode::BuildSideThread ( RuntimeState state,
Promise< Status > *  status 
)
private
ExecNode* impala::ExecNode::child ( int  i)
inlineprotectedinherited
void BlockingJoinNode::Close ( RuntimeState state)
virtual
void impala::ExecNode::CollectNodes ( TPlanNodeType::type  node_type,
std::vector< ExecNode * > *  nodes 
)
inherited

Collect all nodes of given 'node_type' that are part of this subtree, and return in 'nodes'.

Definition at line 359 of file exec-node.cc.

References impala::ExecNode::children_, and impala::ExecNode::type_.

Referenced by impala::ExecNode::CollectScanNodes(), and impala::PlanFragmentExecutor::Prepare().

void impala::ExecNode::CollectScanNodes ( std::vector< ExecNode * > *  nodes)
inherited

Collect all scan node types.

Definition at line 366 of file exec-node.cc.

References impala::ExecNode::CollectNodes().

Referenced by impala::PlanFragmentExecutor::Prepare().

virtual Status impala::BlockingJoinNode::ConstructBuildSide ( RuntimeState state)
protectedpure virtual

We parallelize building the build-side with Open'ing the left child. If, for example, the left child is another join node, it can start to build its own build-side at the same time.

Implemented in impala::PartitionedHashJoinNode, impala::HashJoinNode, and impala::CrossJoinNode.

Referenced by BuildSideThread(), and Open().

Status impala::ExecNode::CreateNode ( ObjectPool pool,
const TPlanNode &  tnode,
const DescriptorTbl descs,
ExecNode **  node 
)
staticprotectedinherited

Create a single exec node derived from thrift node; place exec node in 'pool'.

Definition at line 260 of file exec-node.cc.

References impala::ObjectPool::Add(), impala::Status::OK, and RETURN_IF_ERROR.

Referenced by impala::ExecNode::CreateTreeHelper().

void BlockingJoinNode::CreateOutputRow ( TupleRow out_row,
TupleRow probe_row,
TupleRow build_row 
)
protected
Status impala::ExecNode::CreateTree ( ObjectPool pool,
const TPlan &  plan,
const DescriptorTbl descs,
ExecNode **  root 
)
staticinherited

Creates exec node tree from list of nodes contained in plan via depth-first traversal. All nodes are placed in pool. Returns error if 'plan' is corrupted, otherwise success.

Definition at line 199 of file exec-node.cc.

References impala::ExecNode::CreateTreeHelper(), impala::Status::OK, and impala::Status::ok().

Referenced by impala::PlanFragmentExecutor::Prepare().

Status impala::ExecNode::CreateTreeHelper ( ObjectPool pool,
const std::vector< TPlanNode > &  tnodes,
const DescriptorTbl descs,
ExecNode parent,
int *  node_idx,
ExecNode **  root 
)
staticprotectedinherited
void BlockingJoinNode::DebugString ( int  indentation_level,
std::stringstream *  out 
) const
protectedvirtual

Subclasses should not override, use AddToDebugString() to add to the result.

Reimplemented from impala::ExecNode.

Definition at line 212 of file blocking-join-node.cc.

References AddToDebugString(), impala::ExecNode::DebugString(), eos_, node_name_, and probe_batch_pos_.

string impala::ExecNode::DebugString ( ) const
inherited
string BlockingJoinNode::GetLeftChildRowString ( TupleRow row)
protected

Returns a debug string for the left child's 'row'. They have tuple ptrs that are uninitialized; the left child only populates the tuple ptrs it is responsible for. This function outputs just the row values and leaves the build side values as NULL. This is only used for debugging and outputting the left child rows before doing the join.

Definition at line 222 of file blocking-join-node.cc.

References impala::ExecNode::child(), impala::TupleRow::GetTuple(), impala::PrintTuple(), impala::ExecNode::row_desc(), and impala::RowDescriptor::tuple_descriptors().

Referenced by impala::HashJoinNode::GetNext().

virtual Status impala::ExecNode::GetNext ( RuntimeState state,
RowBatch row_batch,
bool eos 
)
pure virtualinherited

Retrieves rows and returns them via row_batch. Sets eos to true if subsequent calls will not retrieve any more rows. Data referenced by any tuples returned in row_batch must not be overwritten by the callee until Close() is called. The memory holding that data can be returned via row_batch's tuple_data_pool (in which case it may be deleted by the caller) or held on to by the callee. The row_batch, including its tuple_data_pool, will be destroyed by the caller at some point prior to the final Close() call. In other words, if the memory holding the tuple data will be referenced by the callee in subsequent GetNext() calls, it must not be attached to the row_batch's tuple_data_pool. Caller must not be holding any io buffers. This will cause deadlock. TODO: AggregationNode and HashJoinNode cannot be "re-opened" yet.

Implemented in impala::HdfsScanNode, impala::PartitionedAggregationNode, impala::AnalyticEvalNode, impala::PartitionedHashJoinNode, impala::AggregationNode, impala::HashJoinNode, impala::DataSourceScanNode, impala::CrossJoinNode, impala::ExchangeNode, impala::TopNNode, impala::HBaseScanNode, impala::SortNode, impala::UnionNode, impala::SelectNode, and impala::EmptySetNode.

Referenced by impala::PlanFragmentExecutor::GetNextInternal(), and Open().

int impala::ExecNode::GetNodeIdFromProfile ( RuntimeProfile p)
staticinherited

Extract node id from p->name().

Definition at line 62 of file exec-node.cc.

References impala::RuntimeProfile::metadata().

Status BlockingJoinNode::Init ( const TPlanNode &  tnode)
virtual

Subclasses should call BlockingJoinNode::Init() and then perform any other Init() work, e.g. creating expr trees.

Reimplemented from impala::ExecNode.

Reimplemented in impala::PartitionedHashJoinNode, and impala::HashJoinNode.

Definition at line 45 of file blocking-join-node.cc.

References impala::ExecNode::Init(), impala::Status::OK, and RETURN_IF_ERROR.

Referenced by impala::HashJoinNode::Init(), and impala::PartitionedHashJoinNode::Init().

virtual Status impala::BlockingJoinNode::InitGetNext ( TupleRow first_left_child_row)
protectedpure virtual

Init the build-side state for a new left child row (e.g. hash table iterator or list iterator) given the first row. Used in Open() to prepare for GetNext(). A NULL ptr for first_left_child_row indicates the left child eos.

Implemented in impala::PartitionedHashJoinNode, impala::HashJoinNode, and impala::CrossJoinNode.

Referenced by Open().

void impala::ExecNode::InitRuntimeProfile ( const std::string &  name)
protectedinherited
virtual bool impala::ExecNode::IsScanNode ( ) const
inlineprotectedvirtualinherited

Reimplemented in impala::ScanNode.

Definition at line 251 of file exec-node.h.

int64_t impala::ExecNode::limit ( ) const
inlineinherited
Status impala::ExecNode::QueryMaintenance ( RuntimeState state)
protectedvirtualinherited
Status BlockingJoinNode::Reset ( RuntimeState state)
virtual

Subclasses should reset any state modified in Open() and GetNext() and then call BlockingJoinNode::Reset().

Reimplemented from impala::ExecNode.

Reimplemented in impala::PartitionedHashJoinNode, impala::HashJoinNode, and impala::CrossJoinNode.

Definition at line 120 of file blocking-join-node.cc.

References build_pool_, eos_, probe_batch_, probe_side_eos_, and impala::ExecNode::Reset().

Referenced by impala::CrossJoinNode::Reset().

void impala::ExecNode::SetDebugOptions ( int  node_id,
TExecNodePhase::type  phase,
TDebugAction::type  action,
ExecNode tree 
)
staticinherited

Set debug action for node with given id in 'tree'.

Definition at line 332 of file exec-node.cc.

References impala::ExecNode::children_, impala::ExecNode::debug_action_, impala::ExecNode::debug_phase_, and impala::ExecNode::id_.

Referenced by impala::PlanFragmentExecutor::Prepare().

TPlanNodeType::type impala::ExecNode::type ( ) const
inlineinherited

Member Data Documentation

boost::scoped_ptr<MemPool> impala::BlockingJoinNode::build_pool_
protected
RuntimeProfile::Counter* impala::BlockingJoinNode::build_timer_
protected
int impala::BlockingJoinNode::build_tuple_row_size_
protected
bool impala::BlockingJoinNode::can_add_probe_filters_
protected
TDebugAction::type impala::ExecNode::debug_action_
protectedinherited
TExecNodePhase::type impala::ExecNode::debug_phase_
protectedinherited

debug-only: if debug_action_ is not INVALID, node will perform action in debug_phase_

Definition at line 219 of file exec-node.h.

Referenced by impala::ExecNode::ExecDebugAction(), and impala::ExecNode::SetDebugOptions().

bool impala::BlockingJoinNode::eos_
protected
boost::mutex impala::ExecNode::exec_options_lock_
protectedinherited

Execution options that are determined at runtime. This is added to the runtime profile at Close(). Examples for options logged here would be "Codegen Enabled"

Definition at line 238 of file exec-node.h.

Referenced by impala::ExecNode::AddRuntimeExecOption().

boost::scoped_ptr<MemTracker> impala::ExecNode::expr_mem_tracker_
protectedinherited

MemTracker that should be used for ExprContexts.

Definition at line 233 of file exec-node.h.

Referenced by impala::ExecNode::expr_mem_tracker(), and impala::ExecNode::Prepare().

const char * BlockingJoinNode::LLVM_CLASS_NAME = "class.impala::BlockingJoinNode"
static
boost::scoped_ptr<MemTracker> impala::ExecNode::mem_tracker_
protectedinherited

Account for peak memory used by this node.

Definition at line 230 of file exec-node.h.

Referenced by impala::ExecNode::mem_tracker(), and impala::ExecNode::Prepare().

const std::string impala::BlockingJoinNode::node_name_
protected

Definition at line 67 of file blocking-join-node.h.

Referenced by DebugString(), and Open().

int impala::BlockingJoinNode::probe_tuple_row_size_
protected

Size of the TupleRow (just the Tuple ptrs) from the build (right) and probe (left) sides. Set to zero if the build/probe tuples are not returned, e.g., for semi joins. Cached because it is used in the hot path.

Definition at line 88 of file blocking-join-node.h.

Referenced by impala::HashJoinNode::CodegenCreateOutputRow(), impala::PartitionedHashJoinNode::CodegenCreateOutputRow(), CreateOutputRow(), and Prepare().

RowDescriptor impala::ExecNode::row_descriptor_
protectedinherited
const string impala::ExecNode::ROW_THROUGHPUT_COUNTER = "RowsReturnedRate"
staticinherited

Names of counters shared by all exec nodes.

Definition at line 169 of file exec-node.h.

Referenced by impala::ExecNode::Prepare().

RuntimeProfile::Counter* impala::ExecNode::rows_returned_rate_
protectedinherited

Definition at line 227 of file exec-node.h.

Referenced by impala::ExecNode::Prepare().

std::string impala::ExecNode::runtime_exec_options_
protectedinherited

Definition at line 239 of file exec-node.h.

Referenced by impala::ExecNode::AddRuntimeExecOption().

TupleRow* impala::BlockingJoinNode::semi_join_staging_row_
protected

Row assembled from all lhs and rhs tuples used for evaluating the non-equi-join conjuncts for semi joins. Semi joins only return the lhs or rhs output tuples, so this tuple is temporarily assembled for evaluating the conjuncts.

Definition at line 94 of file blocking-join-node.h.

Referenced by Close(), impala::PartitionedHashJoinNode::EvaluateNullProbe(), impala::PartitionedHashJoinNode::OutputNullAwareProbeRows(), Prepare(), impala::HashJoinNode::ProcessProbeBatch(), and impala::PartitionedHashJoinNode::ProcessProbeBatch().

TPlanNodeType::type impala::ExecNode::type_
protectedinherited

Definition at line 210 of file exec-node.h.

Referenced by impala::ExecNode::CollectNodes(), and impala::ExecNode::type().


The documentation for this class was generated from the following files: