Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
|
#include <hdfs-scan-node.h>
Public Types | |
typedef boost::unordered_map < int32_t, std::pair< int, int64_t > > | PerVolumnStats |
map from volume id to <number of split, per volume split lengths> More... | |
Public Member Functions | |
HdfsScanNode (ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs) | |
~HdfsScanNode () | |
virtual Status | Prepare (RuntimeState *state) |
ExecNode methods. More... | |
virtual Status | Open (RuntimeState *state) |
virtual Status | GetNext (RuntimeState *state, RowBatch *row_batch, bool *eos) |
virtual Status | Reset (RuntimeState *state) |
virtual void | Close (RuntimeState *state) |
int | limit () const |
const std::vector < SlotDescriptor * > & | materialized_slots () const |
int | tuple_idx () const |
int | num_partition_keys () const |
Returns number of partition keys in the table, including non-materialized slots. More... | |
int | num_materialized_partition_keys () const |
Returns number of materialized partition key slots. More... | |
const TupleDescriptor * | tuple_desc () |
const HdfsTableDescriptor * | hdfs_table () |
RuntimeState * | runtime_state () |
DiskIoMgr::RequestContext * | reader_context () |
RuntimeProfile::HighWaterMarkCounter * | max_compressed_text_file_length () |
Status | GetConjunctCtxs (std::vector< ExprContext * > *ctxs) |
int | GetMaterializedSlotIdx (const std::vector< int > &path) const |
const bool * | is_materialized_col () |
void * | GetCodegenFn (THdfsFileFormat::type) |
void | IncNumScannersCodegenEnabled () |
void | IncNumScannersCodegenDisabled () |
void | AddMaterializedRowBatch (RowBatch *row_batch) |
DiskIoMgr::ScanRange * | AllocateScanRange (hdfsFS fs, const char *file, int64_t len, int64_t offset, int64_t partition_id, int disk_id, bool try_cache, bool expected_local, int64_t mtime) |
Status | AddDiskIoRanges (const std::vector< DiskIoMgr::ScanRange * > &ranges) |
Adds ranges to the io mgr queue and starts up new scanner threads if possible. More... | |
Status | AddDiskIoRanges (const HdfsFileDesc *file_desc) |
Adds all splits for file_desc to the io mgr queue. More... | |
void | MarkFileDescIssued (const HdfsFileDesc *file_desc) |
Tuple * | InitTemplateTuple (RuntimeState *state, const std::vector< ExprContext * > &value_ctxs) |
Tuple * | InitEmptyTemplateTuple () |
void | TransferToScanNodePool (MemPool *pool) |
Acquires all allocations from pool into scan_node_pool_. Thread-safe. More... | |
HdfsFileDesc * | GetFileDesc (const std::string &filename) |
Returns the file desc for 'filename'. Returns NULL if filename is invalid. More... | |
void * | GetFileMetadata (const std::string &filename) |
void | SetFileMetadata (const std::string &filename, void *metadata) |
void | RangeComplete (const THdfsFileFormat::type &file_type, const THdfsCompression::type &compression_type) |
void | RangeComplete (const THdfsFileFormat::type &file_type, const std::vector< THdfsCompression::type > &compression_type) |
void | ComputeSlotMaterializationOrder (std::vector< int > *order) const |
void | SetScanRanges (const std::vector< TScanRangeParams > &scan_range_params) |
virtual bool | IsScanNode () const |
RuntimeProfile::Counter * | bytes_read_counter () const |
RuntimeProfile::Counter * | rows_read_counter () const |
RuntimeProfile::Counter * | read_timer () const |
RuntimeProfile::Counter * | total_throughput_counter () const |
RuntimeProfile::Counter * | per_read_thread_throughput_counter () const |
RuntimeProfile::Counter * | materialize_tuple_timer () const |
RuntimeProfile::Counter * | scan_ranges_complete_counter () const |
RuntimeProfile::ThreadCounters * | scanner_thread_counters () const |
RuntimeProfile::Counter & | active_scanner_thread_counter () |
RuntimeProfile::Counter * | average_scanner_thread_concurrency () const |
virtual Status | Init (const TPlanNode &tnode) |
void | CollectNodes (TPlanNodeType::type node_type, std::vector< ExecNode * > *nodes) |
void | CollectScanNodes (std::vector< ExecNode * > *nodes) |
Collect all scan node types. More... | |
std::string | DebugString () const |
Returns a string representation in DFS order of the plan rooted at this. More... | |
virtual void | DebugString (int indentation_level, std::stringstream *out) const |
const std::vector< ExprContext * > & | conjunct_ctxs () const |
int | id () const |
TPlanNodeType::type | type () const |
const RowDescriptor & | row_desc () const |
int64_t | rows_returned () const |
bool | ReachedLimit () |
RuntimeProfile * | runtime_profile () |
MemTracker * | mem_tracker () |
MemTracker * | expr_mem_tracker () |
Static Public Member Functions | |
static void | UpdateHdfsSplitStats (const std::vector< TScanRangeParams > &scan_range_params_list, PerVolumnStats *per_volume_stats) |
Update the per volume stats with the given scan range params list. More... | |
static void | PrintHdfsSplitStats (const PerVolumnStats &per_volume_stats, std::stringstream *ss) |
static Status | CreateTree (ObjectPool *pool, const TPlan &plan, const DescriptorTbl &descs, ExecNode **root) |
static void | SetDebugOptions (int node_id, TExecNodePhase::type phase, TDebugAction::type action, ExecNode *tree) |
Set debug action for node with given id in 'tree'. More... | |
static bool | EvalConjuncts (ExprContext *const *ctxs, int num_ctxs, TupleRow *row) |
static llvm::Function * | CodegenEvalConjuncts (RuntimeState *state, const std::vector< ExprContext * > &conjunct_ctxs, const char *name="EvalConjuncts") |
static int | GetNodeIdFromProfile (RuntimeProfile *p) |
Extract node id from p->name(). More... | |
Static Public Attributes | |
static const int | SKIP_COLUMN = -1 |
static const std::string | HDFS_SPLIT_STATS_DESC |
Description string for the per volume stats output. More... | |
static const std::string | BYTES_READ_COUNTER = "BytesRead" |
names of ScanNode common counters More... | |
static const std::string | ROWS_READ_COUNTER = "RowsRead" |
static const std::string | TOTAL_HDFS_READ_TIMER = "TotalRawHdfsReadTime(*)" |
static const std::string | TOTAL_HBASE_READ_TIMER = "TotalRawHBaseReadTime(*)" |
static const std::string | TOTAL_THROUGHPUT_COUNTER = "TotalReadThroughput" |
static const std::string | PER_READ_THREAD_THROUGHPUT_COUNTER |
static const std::string | NUM_DISKS_ACCESSED_COUNTER = "NumDisksAccessed" |
static const std::string | MATERIALIZE_TUPLE_TIMER = "MaterializeTupleTime(*)" |
static const std::string | SCAN_RANGES_COMPLETE_COUNTER = "ScanRangesComplete" |
static const std::string | SCANNER_THREAD_COUNTERS_PREFIX = "ScannerThreads" |
static const std::string | SCANNER_THREAD_TOTAL_WALLCLOCK_TIME |
static const std::string | AVERAGE_SCANNER_THREAD_CONCURRENCY |
static const std::string | AVERAGE_HDFS_READ_THREAD_CONCURRENCY |
static const std::string | NUM_SCANNER_THREADS_STARTED |
static const std::string | ROW_THROUGHPUT_COUNTER = "RowsReturnedRate" |
Names of counters shared by all exec nodes. More... | |
Protected Member Functions | |
ExecNode * | child (int i) |
bool | is_closed () |
void | InitRuntimeProfile (const std::string &name) |
Status | ExecDebugAction (TExecNodePhase::type phase, RuntimeState *state) |
void | AddRuntimeExecOption (const std::string &option) |
Appends option to 'runtime_exec_options_'. More... | |
virtual Status | QueryMaintenance (RuntimeState *state) |
void | AddExprCtxToFree (ExprContext *ctx) |
void | AddExprCtxsToFree (const std::vector< ExprContext * > &ctxs) |
void | AddExprCtxsToFree (const SortExecExprs &sort_exec_exprs) |
Static Protected Member Functions | |
static Status | CreateNode (ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs, ExecNode **node) |
Create a single exec node derived from thrift node; place exec node in 'pool'. More... | |
static Status | CreateTreeHelper (ObjectPool *pool, const std::vector< TPlanNode > &tnodes, const DescriptorTbl &descs, ExecNode *parent, int *node_idx, ExecNode **root) |
Private Types | |
typedef std::map< std::string, HdfsFileDesc * > | FileDescMap |
File path => file descriptor (which includes the file's splits) More... | |
typedef std::map < THdfsFileFormat::type, std::vector< HdfsFileDesc * > > | FileFormatsMap |
File format => file descriptors. More... | |
typedef std::map < THdfsFileFormat::type, HdfsScanner * > | ScannerMap |
typedef std::map < THdfsFileFormat::type, void * > | CodegendFnMap |
Per scanner type codegen'd fn. More... | |
typedef boost::unordered_map < std::vector< int >, int > | PathToSlotIdxMap |
Maps from a slot's path to its index into materialized_slots_. More... | |
typedef std::map< std::pair < THdfsFileFormat::type, THdfsCompression::type >, int > | FileTypeCountsMap |
Private Member Functions | |
void | ThreadTokenAvailableCb (ThreadResourceMgr::ResourcePool *pool) |
HdfsScanner * | CreateAndPrepareScanner (HdfsPartitionDescriptor *partition_desc, ScannerContext *context, Status *status) |
void | ScannerThread () |
bool | EnoughMemoryForScannerThread (bool new_thread) |
Status | GetNextInternal (RuntimeState *state, RowBatch *row_batch, bool *eos) |
Checks for eos conditions and returns batches from materialized_row_batches_. More... | |
void | SetDone () |
void | StopAndFinalizeCounters () |
Friends | |
class | ScannerContext |
A ScanNode implementation that is used for all tables read directly from HDFS-serialised data. A HdfsScanNode spawns multiple scanner threads to process the bytes in parallel. There is a handshake between the scan node and the scanners to get all the splits queued and bytes processed.
Definition at line 104 of file hdfs-scan-node.h.
|
private |
Per scanner type codegen'd fn.
Definition at line 323 of file hdfs-scan-node.h.
|
private |
File path => file descriptor (which includes the file's splits)
Definition at line 297 of file hdfs-scan-node.h.
|
private |
File format => file descriptors.
Definition at line 301 of file hdfs-scan-node.h.
|
private |
Definition at line 433 of file hdfs-scan-node.h.
|
private |
Maps from a slot's path to its index into materialized_slots_.
Definition at line 331 of file hdfs-scan-node.h.
typedef boost::unordered_map<int32_t, std::pair<int, int64_t> > impala::HdfsScanNode::PerVolumnStats |
map from volume id to <number of split, per volume split lengths>
Definition at line 252 of file hdfs-scan-node.h.
|
private |
Map of HdfsScanner objects to file types. Only one scanner object will be created for each file type. Objects stored in runtime_state's pool.
Definition at line 319 of file hdfs-scan-node.h.
HdfsScanNode::HdfsScanNode | ( | ObjectPool * | pool, |
const TPlanNode & | tnode, | ||
const DescriptorTbl & | descs | ||
) |
Definition at line 81 of file hdfs-scan-node.cc.
References materialized_row_batches_, max_materialized_row_batches_, impala::DiskInfo::num_disks(), and impala::DiskIoMgr::REMOTE_NUM_DISKS.
HdfsScanNode::~HdfsScanNode | ( | ) |
Definition at line 108 of file hdfs-scan-node.cc.
|
inlineinherited |
Definition at line 113 of file scan-node.h.
References impala::ScanNode::active_scanner_thread_counter_.
Status impala::HdfsScanNode::AddDiskIoRanges | ( | const std::vector< DiskIoMgr::ScanRange * > & | ranges | ) |
Adds ranges to the io mgr queue and starts up new scanner threads if possible.
Referenced by impala::BaseSequenceScanner::IssueInitialRanges(), impala::HdfsTextScanner::IssueInitialRanges(), impala::HdfsParquetScanner::IssueInitialRanges(), and impala::BaseSequenceScanner::ProcessSplit().
Status HdfsScanNode::AddDiskIoRanges | ( | const HdfsFileDesc * | file_desc | ) |
Adds all splits for file_desc to the io mgr queue.
Definition at line 674 of file hdfs-scan-node.cc.
References impala::DiskIoMgr::AddScanRanges(), impala::RuntimeState::io_mgr(), MarkFileDescIssued(), impala::Status::OK, reader_context_, impala::RuntimeState::resource_pool(), RETURN_IF_ERROR, runtime_state_, impala::HdfsFileDesc::splits, and ThreadTokenAvailableCb().
|
protectedinherited |
Referenced by impala::ExecNode::AddExprCtxsToFree(), impala::UnionNode::Prepare(), impala::SortNode::Prepare(), impala::TopNNode::Prepare(), impala::ExchangeNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), and Prepare().
|
protectedinherited |
Definition at line 410 of file exec-node.cc.
References impala::ExecNode::AddExprCtxsToFree(), impala::SortExecExprs::lhs_ordering_expr_ctxs(), impala::SortExecExprs::rhs_ordering_expr_ctxs(), and impala::SortExecExprs::sort_tuple_slot_expr_ctxs().
|
inlineprotectedinherited |
Add an ExprContext to have its local allocations freed by QueryMaintenance(). Exprs that are evaluated in the main execution thread should be added. Exprs evaluated in a separate thread are generally not safe to add, since a local allocation may be freed while it's being used. Rather than using this mechanism, threads should call FreeLocalAllocations() on local ExprContexts periodically.
Definition at line 276 of file exec-node.h.
References impala::ExecNode::expr_ctxs_to_free_.
Referenced by impala::AnalyticEvalNode::Prepare().
void HdfsScanNode::AddMaterializedRowBatch | ( | RowBatch * | row_batch | ) |
Adds a materialized row batch for the scan node. This is called from scanner threads. This function will block if materialized_row_batches_ is full.
Definition at line 688 of file hdfs-scan-node.cc.
References materialized_row_batches_.
Referenced by impala::HdfsScanner::AddFinalRowBatch(), and impala::HdfsScanner::CommitRows().
|
protectedinherited |
Appends option to 'runtime_exec_options_'.
Definition at line 188 of file exec-node.cc.
References impala::RuntimeProfile::AddInfoString(), impala::ExecNode::exec_options_lock_, impala::ExecNode::runtime_exec_options_, and impala::ExecNode::runtime_profile().
Referenced by impala::PartitionedHashJoinNode::AttachProbeFilters(), impala::HashJoinNode::ConstructBuildSide(), impala::BlockingJoinNode::Open(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), Prepare(), and StopAndFinalizeCounters().
DiskIoMgr::ScanRange * HdfsScanNode::AllocateScanRange | ( | hdfsFS | fs, |
const char * | file, | ||
int64_t | len, | ||
int64_t | offset, | ||
int64_t | partition_id, | ||
int | disk_id, | ||
bool | try_cache, | ||
bool | expected_local, | ||
int64_t | mtime | ||
) |
Allocate a new scan range object, stored in the runtime state's object pool. For scan ranges that correspond to the original hdfs splits, the partition id must be set to the range's partition id. For other ranges (e.g. columns in parquet, read past buffers), the partition_id is unused. expected_local should be true if this scan range is not expected to require a remote read. The range must fall within the file bounds. That is, the offset must be >= 0, and offset + len <= file_length. This is thread safe.
Definition at line 183 of file hdfs-scan-node.cc.
References impala::ObjectPool::Add(), impala::DiskIoMgr::AssignQueue(), GetFileDesc(), impala::RuntimeState::io_mgr(), impala::RuntimeState::obj_pool(), impala::DiskIoMgr::ScanRange::Reset(), and runtime_state_.
Referenced by impala::HdfsParquetScanner::InitColumns(), impala::BaseSequenceScanner::IssueInitialRanges(), impala::HdfsTextScanner::IssueInitialRanges(), impala::HdfsParquetScanner::IssueInitialRanges(), Prepare(), and impala::HdfsParquetScanner::ProcessFooter().
|
inlineinherited |
Definition at line 116 of file scan-node.h.
References impala::ScanNode::average_scanner_thread_concurrency_.
|
inlineinherited |
Definition at line 95 of file scan-node.h.
References impala::ScanNode::bytes_read_counter_.
Referenced by impala::HBaseTableScanner::GetFamily(), impala::HBaseTableScanner::GetQualifier(), impala::HBaseTableScanner::GetRowKey(), impala::HBaseTableScanner::GetValue(), Open(), and StopAndFinalizeCounters().
|
inlineprotectedinherited |
Definition at line 241 of file exec-node.h.
References impala::ExecNode::children_.
Referenced by impala::CrossJoinNode::BuildListDebugString(), impala::BlockingJoinNode::BuildSideThread(), impala::HashJoinNode::CodegenCreateOutputRow(), impala::PartitionedHashJoinNode::CodegenCreateOutputRow(), impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), impala::PartitionedHashJoinNode::ConstructBuildSide(), impala::BlockingJoinNode::GetLeftChildRowString(), impala::SelectNode::GetNext(), impala::UnionNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::PartitionedAggregationNode::Partition::InitStreams(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedHashJoinNode::NextProbeRowBatch(), impala::SelectNode::Open(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::BlockingJoinNode::Open(), impala::AggregationNode::Open(), impala::AnalyticEvalNode::Open(), impala::PartitionedAggregationNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::SelectNode::Prepare(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::AnalyticEvalNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::AnalyticEvalNode::ProcessChildBatches(), and impala::SortNode::SortInput().
|
virtual |
Close() will get called for every exec node, regardless of what else is called and the status of these calls (i.e. Prepare() may never have been called, or Prepare()/Open()/GetNext() returned with an error). Close() releases all resources that were allocated in Open()/GetNext(), even if the latter ended with an error. Close() can be called if the node has been prepared or the node is closed. The default implementation updates runtime profile counters and calls Close() on the children. Subclasses should check if the node has already been closed (is_closed()), then close themselves, then call the base Close(). Nodes that are using tuples returned by a child may call Close() on their children before their own Close() if the child node has returned eos. It is only safe to call Close() on the child node while the parent node is still returning rows if the parent node fully materializes the child's input.
Reimplemented from impala::ExecNode.
Definition at line 622 of file hdfs-scan-node.cc.
References impala::ScanNode::active_hdfs_read_thread_counter_, impala::ScanNode::active_scanner_thread_counter_, impala::DiskIoMgr::CancelContext(), impala::ExecNode::Close(), impala::Expr::Close(), impala::HdfsPartitionDescriptor::CloseExprs(), conjunct_ctxs_, impala::HdfsTableDescriptor::GetPartition(), hdfs_table_, impala::RuntimeState::io_mgr(), impala::ExecNode::is_closed(), impala::ThreadGroup::JoinAll(), materialized_row_batches_, num_owned_io_buffers_, partition_ids_, impala::RuntimeState::query_resource_mgr(), reader_context_, impala::RuntimeState::reader_contexts(), impala::QueryResourceMgr::RemoveVcoreAvailableCb(), impala::RuntimeState::resource_pool(), rm_callback_id_, scan_node_pool_, scanner_threads_, SetDone(), impala::ThreadResourceMgr::ResourcePool::SetThreadAvailableCb(), StopAndFinalizeCounters(), and impala::RuntimeProfile::Counter::value().
|
staticinherited |
Returns a codegen'd version of EvalConjuncts(), or NULL if the function couldn't be codegen'd. The codegen'd version uses inlined, codegen'd GetBooleanVal() functions.
Definition at line 452 of file exec-node.cc.
References impala::LlvmCodeGen::FnPrototype::AddArgument(), impala::LlvmCodeGen::context(), impala::CodegenAnyVal::CreateCallWrapped(), impala::LlvmCodeGen::false_value(), impala::LlvmCodeGen::FinalizeFunction(), impala::RuntimeState::GetCodegen(), impala::Status::GetDetail(), impala::CodegenAnyVal::GetIsNull(), impala::LlvmCodeGen::GetType(), impala::CodegenAnyVal::GetVal(), impala::TupleRow::LLVM_CLASS_NAME, impala::ExprContext::LLVM_CLASS_NAME, impala::Status::ok(), impala::LlvmCodeGen::true_value(), impala::ExecNode::type(), impala::TYPE_BOOLEAN, impala::TYPE_INT, and VLOG_QUERY.
Referenced by impala::HdfsAvroScanner::CodegenDecodeAvroData(), impala::HashJoinNode::CodegenProcessProbeBatch(), and impala::PartitionedHashJoinNode::CodegenProcessProbeBatch().
|
inherited |
Collect all nodes of given 'node_type' that are part of this subtree, and return in 'nodes'.
Definition at line 359 of file exec-node.cc.
References impala::ExecNode::children_, and impala::ExecNode::type_.
Referenced by impala::ExecNode::CollectScanNodes(), and impala::PlanFragmentExecutor::Prepare().
|
inherited |
Collect all scan node types.
Definition at line 366 of file exec-node.cc.
References impala::ExecNode::CollectNodes().
Referenced by impala::PlanFragmentExecutor::Prepare().
void HdfsScanNode::ComputeSlotMaterializationOrder | ( | std::vector< int > * | order | ) | const |
Utility function to compute the order in which to materialize slots to allow for computing conjuncts as slots get materialized (on partial tuples). 'order' will contain for each slot, the first conjunct it is associated with. e.g. order[2] = 1 indicates materialized_slots[2] must be materialized before evaluating conjuncts[1]. Slots that are not referenced by any conjuncts will have order set to conjuncts.size()
Definition at line 956 of file hdfs-scan-node.cc.
References impala::SlotDescriptor::col_path(), impala::ExecNode::conjunct_ctxs(), impala::RuntimeState::desc_tbl(), GetMaterializedSlotIdx(), impala::DescriptorTbl::GetSlotDescriptor(), materialized_slots(), and runtime_state_.
Referenced by impala::HdfsScanner::CodegenWriteCompleteTuple().
|
inlineinherited |
Definition at line 152 of file exec-node.h.
References impala::ExecNode::conjunct_ctxs_.
Referenced by ComputeSlotMaterializationOrder(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::HashJoinNode::GetNext(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), impala::HashJoinNode::ProcessProbeBatch(), and impala::PartitionedHashJoinNode::ProcessProbeBatch().
|
private |
Create and prepare new scanner for this partition type. If the scanner cannot be created, return NULL.
Definition at line 230 of file hdfs-scan-node.cc.
References impala::ObjectPool::Add(), impala::HdfsFileDesc::file_compression, impala::ScannerContext::Stream::file_desc(), impala::HdfsPartitionDescriptor::file_format(), impala::HdfsLzoTextScanner::GetHdfsLzoTextScanner(), impala::ScannerContext::GetStream(), impala::RuntimeState::obj_pool(), impala::HdfsScanner::Prepare(), and runtime_state_.
Referenced by ScannerThread().
|
staticprotectedinherited |
Create a single exec node derived from thrift node; place exec node in 'pool'.
Definition at line 260 of file exec-node.cc.
References impala::ObjectPool::Add(), impala::Status::OK, and RETURN_IF_ERROR.
Referenced by impala::ExecNode::CreateTreeHelper().
|
staticinherited |
Creates exec node tree from list of nodes contained in plan via depth-first traversal. All nodes are placed in pool. Returns error if 'plan' is corrupted, otherwise success.
Definition at line 199 of file exec-node.cc.
References impala::ExecNode::CreateTreeHelper(), impala::Status::OK, and impala::Status::ok().
Referenced by impala::PlanFragmentExecutor::Prepare().
|
staticprotectedinherited |
Definition at line 218 of file exec-node.cc.
References impala::RuntimeProfile::AddChild(), impala::ExecNode::children_, impala::ExecNode::CreateNode(), impala::Status::OK, RETURN_IF_ERROR, and impala::ExecNode::runtime_profile().
Referenced by impala::ExecNode::CreateTree().
|
inherited |
Returns a string representation in DFS order of the plan rooted at this.
Definition at line 345 of file exec-node.cc.
Referenced by impala::SortNode::DebugString(), impala::TopNNode::DebugString(), impala::ExchangeNode::DebugString(), impala::AggregationNode::DebugString(), impala::AnalyticEvalNode::DebugString(), impala::PartitionedAggregationNode::DebugString(), impala::BlockingJoinNode::DebugString(), and impala::PlanFragmentExecutor::Prepare().
|
virtualinherited |
Recursive helper method for generating a string for DebugString(). Implementations should call DebugString(int, std::stringstream) on their children. Input parameters: indentation_level: Current level in plan tree. Output parameters: out: Stream to accumulate debug string.
Reimplemented in impala::BlockingJoinNode, impala::PartitionedAggregationNode, impala::AnalyticEvalNode, impala::AggregationNode, impala::DataSourceScanNode, impala::ExchangeNode, impala::HBaseScanNode, impala::TopNNode, and impala::SortNode.
Returns true if there is enough memory (against the mem tracker limits) to have a scanner thread. If new_thread is true, the calculation is for starting a new scanner thread. If false, it determines whether there's adequate memory for the existing set of scanner threads. lock_ must be taken before calling this.
Definition at line 706 of file hdfs-scan-node.cc.
References impala::ScanNode::active_scanner_thread_counter_, impala::MemTracker::consumption(), impala::ExecNode::mem_tracker(), scanner_thread_bytes_required_, impala::MemTracker::SpareCapacity(), and impala::RuntimeProfile::Counter::value().
Referenced by ScannerThread(), and ThreadTokenAvailableCb().
|
staticinherited |
Evaluate ExprContexts over row. Returns true if all exprs return true. TODO: This doesn't use the vector<Expr*> signature because I haven't figured out how to deal with declaring a templated std:vector type in IR
Definition at line 393 of file exec-node.cc.
References impala::ExprContext::GetBooleanVal(), impala_udf::AnyVal::is_null, and impala_udf::BooleanVal::val.
Referenced by impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::HdfsScanner::EvalConjuncts(), EvalOtherJoinConjuncts(), EvalOtherJoinConjuncts2(), impala::PartitionedHashJoinNode::EvaluateNullProbe(), impala::HBaseScanNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::PartitionedHashJoinNode::OutputNullAwareProbeRows(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), impala::CrossJoinNode::ProcessLeftChildBatch(), impala::HashJoinNode::ProcessProbeBatch(), and impala::PartitionedHashJoinNode::ProcessProbeBatch().
|
protectedinherited |
Executes debug_action_ if phase matches debug_phase_. 'phase' must not be INVALID.
Definition at line 378 of file exec-node.cc.
References impala::Status::CANCELLED, impala::ExecNode::debug_action_, impala::ExecNode::debug_phase_, impala::RuntimeState::is_cancelled(), and impala::Status::OK.
Referenced by impala::SelectNode::GetNext(), impala::SortNode::GetNext(), impala::UnionNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::AnalyticEvalNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), GetNextInternal(), impala::ExecNode::Open(), and impala::ExecNode::Prepare().
|
inlineinherited |
Definition at line 163 of file exec-node.h.
References impala::ExecNode::expr_mem_tracker_.
Referenced by impala::PartitionedAggregationNode::Partition::InitStreams(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::ExchangeNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::AnalyticEvalNode::Prepare(), impala::ExecNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), and Prepare().
void * HdfsScanNode::GetCodegenFn | ( | THdfsFileFormat::type | type | ) |
Returns the per format codegen'd function. Scanners call this to get the codegen'd function to use. Returns NULL if codegen should not be used.
Definition at line 224 of file hdfs-scan-node.cc.
References codegend_fn_map_.
Referenced by impala::HdfsScanner::InitializeWriteTuplesFn(), and impala::HdfsAvroScanner::InitNewRange().
Status HdfsScanNode::GetConjunctCtxs | ( | std::vector< ExprContext * > * | ctxs | ) |
Creates a clone of conjunct_ctxs_. 'ctxs' should be non-NULL and empty. The returned contexts must be closed by the caller.
Definition at line 692 of file hdfs-scan-node.cc.
References impala::Expr::Clone(), conjunct_ctxs_, and runtime_state_.
Referenced by impala::HdfsScanner::Prepare().
HdfsFileDesc * HdfsScanNode::GetFileDesc | ( | const std::string & | filename | ) |
Returns the file desc for 'filename'. Returns NULL if filename is invalid.
Definition at line 206 of file hdfs-scan-node.cc.
References file_descs_.
Referenced by impala::ScannerContext::AddStream(), AllocateScanRange(), impala::BaseSequenceScanner::CloseFileRanges(), impala::HdfsTextScanner::FillByteBufferCompressedFile(), impala::HdfsParquetScanner::InitColumns(), impala::HdfsParquetScanner::ProcessFooter(), and impala::BaseSequenceScanner::ProcessSplit().
void * HdfsScanNode::GetFileMetadata | ( | const std::string & | filename | ) |
Gets scanner specific metadata for 'filename'. Scanners can use this to store file header information. Returns NULL if there is no metadata. This is thread safe.
Definition at line 217 of file hdfs-scan-node.cc.
References metadata_lock_, and per_file_metadata_.
Referenced by impala::BaseSequenceScanner::ProcessSplit().
|
inline |
Returns index into materialized_slots with 'path'. Returns SKIP_COLUMN if that path is not materialized.
Definition at line 152 of file hdfs-scan-node.h.
References path_to_materialized_slot_idx_, and SKIP_COLUMN.
Referenced by impala::HdfsAvroScanner::CodegenMaterializeTuple(), ComputeSlotMaterializationOrder(), impala::HdfsRCFileScanner::InitNewRange(), Prepare(), and impala::HdfsAvroScanner::ResolveSchemas().
|
virtual |
Retrieves rows and returns them via row_batch. Sets eos to true if subsequent calls will not retrieve any more rows. Data referenced by any tuples returned in row_batch must not be overwritten by the callee until Close() is called. The memory holding that data can be returned via row_batch's tuple_data_pool (in which case it may be deleted by the caller) or held on to by the callee. The row_batch, including its tuple_data_pool, will be destroyed by the caller at some point prior to the final Close() call. In other words, if the memory holding the tuple data will be referenced by the callee in subsequent GetNext() calls, it must not be attached to the row_batch's tuple_data_pool. Caller must not be holding any io buffers. This will cause deadlock. TODO: AggregationNode and HashJoinNode cannot be "re-opened" yet.
Implements impala::ExecNode.
Definition at line 111 of file hdfs-scan-node.cc.
References impala::ProgressUpdater::done(), GetNextInternal(), initial_ranges_issued_, impala::Status::IsMemLimitExceeded(), impala::BaseSequenceScanner::IssueInitialRanges(), impala::HdfsTextScanner::IssueInitialRanges(), impala::HdfsParquetScanner::IssueInitialRanges(), impala::Status::ok(), per_type_files_, progress_, RETURN_IF_ERROR, impala::ExecNode::runtime_profile_, SCOPED_TIMER, SetDone(), impala::RuntimeState::SetMemLimitExceeded(), and StopAndFinalizeCounters().
|
private |
Checks for eos conditions and returns batches from materialized_row_batches_.
Definition at line 139 of file hdfs-scan-node.cc.
References impala::RowBatch::AcquireState(), COUNTER_SET, impala::ExecNode::ExecDebugAction(), impala::ExecNode::limit_, lock_, materialized_row_batches_, impala::RowBatch::num_io_buffers(), num_owned_io_buffers_, impala::RowBatch::num_rows(), impala::ExecNode::num_rows_returned_, impala::Status::OK, impala::ExecNode::QueryMaintenance(), impala::ExecNode::ReachedLimit(), RETURN_IF_CANCELLED, RETURN_IF_ERROR, impala::ExecNode::rows_returned_counter_, impala::RowBatch::set_num_rows(), SetDone(), and status_.
Referenced by GetNext().
|
staticinherited |
Extract node id from p->name().
Definition at line 62 of file exec-node.cc.
References impala::RuntimeProfile::metadata().
|
inline |
Definition at line 134 of file hdfs-scan-node.h.
References hdfs_table_.
Referenced by impala::HdfsAvroScanner::CodegenMaterializeTuple(), impala::HdfsScanner::CodegenWriteCompleteTuple(), impala::HdfsTextScanner::InitNewRange(), impala::HdfsSequenceScanner::InitNewRange(), impala::HdfsRCFileScanner::InitNewRange(), impala::HdfsAvroScanner::ParseMetadata(), impala::HdfsRCFileScanner::Prepare(), impala::HdfsAvroScanner::ResolveSchemas(), and impala::HdfsAvroScanner::VerifyTypesMatch().
|
inlineinherited |
Definition at line 154 of file exec-node.h.
References impala::ExecNode::id_.
Referenced by impala::AnalyticEvalNode::AddResultTuple(), impala::AnalyticEvalNode::AddRow(), impala::AnalyticEvalNode::AnalyticEvalNode(), impala::AnalyticEvalNode::GetNext(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::HdfsScanner::InitializeWriteTuplesFn(), impala::HdfsAvroScanner::InitNewRange(), impala::AnalyticEvalNode::InitNextPartition(), impala::PartitionedAggregationNode::MoveHashPartitions(), impala::PartitionedHashJoinNode::NodeDebugString(), impala::AnalyticEvalNode::Open(), Open(), impala::PlanFragmentExecutor::Prepare(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::AnalyticEvalNode::ProcessChildBatch(), ScannerThread(), impala::AnalyticEvalNode::TryAddRemainingResults(), impala::AnalyticEvalNode::TryAddResultTupleForCurrRow(), impala::AnalyticEvalNode::TryAddResultTupleForPrevRow(), and impala::AnalyticEvalNode::TryRemoveRowsBeforeWindow().
|
inline |
Definition at line 172 of file hdfs-scan-node.h.
References num_scanners_codegen_disabled_.
Referenced by impala::HdfsScanner::InitializeWriteTuplesFn(), impala::HdfsAvroScanner::InitNewRange(), impala::HdfsParquetScanner::Prepare(), and impala::HdfsRCFileScanner::Prepare().
|
inline |
Definition at line 168 of file hdfs-scan-node.h.
References num_scanners_codegen_enabled_.
Referenced by impala::HdfsScanner::InitializeWriteTuplesFn(), and impala::HdfsAvroScanner::InitNewRange().
|
virtualinherited |
Initializes this object from the thrift tnode desc. The subclass should do any initialization that can fail in Init() rather than the ctor. If overridden in subclass, must first call superclass's Init().
Reimplemented in impala::PartitionedAggregationNode, impala::AnalyticEvalNode, impala::PartitionedHashJoinNode, impala::AggregationNode, impala::HashJoinNode, impala::BlockingJoinNode, impala::ExchangeNode, impala::TopNNode, impala::SortNode, and impala::UnionNode.
Definition at line 124 of file exec-node.cc.
References impala::ExecNode::conjunct_ctxs_, impala::Expr::CreateExprTrees(), impala::Status::OK, impala::ExecNode::pool_, and RETURN_IF_ERROR.
Referenced by impala::SortNode::Init(), impala::UnionNode::Init(), impala::TopNNode::Init(), impala::ExchangeNode::Init(), impala::BlockingJoinNode::Init(), impala::AggregationNode::Init(), impala::AnalyticEvalNode::Init(), and impala::PartitionedAggregationNode::Init().
Tuple * HdfsScanNode::InitEmptyTemplateTuple | ( | ) |
Allocates and return an empty template tuple (i.e. with no values filled in). Scanners can use this method to initialize a template tuple even if there are no materialized partition keys (e.g. to hold Avro default values).
Definition at line 289 of file hdfs-scan-node.cc.
References impala::TupleDescriptor::byte_size(), impala::Tuple::Create(), lock_, scan_node_pool_, and tuple_desc_.
Referenced by impala::HdfsParquetScanner::CreateColumnReaders(), InitTemplateTuple(), and impala::HdfsAvroScanner::ResolveSchemas().
|
protectedinherited |
Definition at line 371 of file exec-node.cc.
References impala::ExecNode::id_, impala::ExecNode::pool_, and impala::ExecNode::runtime_profile_.
Referenced by impala::ExecNode::ExecNode().
Tuple * HdfsScanNode::InitTemplateTuple | ( | RuntimeState * | state, |
const std::vector< ExprContext * > & | value_ctxs | ||
) |
Allocates and initialises template_tuple_ with any values from the partition columns for the current scan range Returns NULL if there are no materialized partition keys. TODO: cache the tuple template in the partition object.
Definition at line 270 of file hdfs-scan-node.cc.
References impala::SlotDescriptor::col_pos(), InitEmptyTemplateTuple(), lock_, partition_key_slots_, and impala::RawValue::Write().
Referenced by impala::HdfsScanner::Prepare().
|
inlineprotectedinherited |
Definition at line 242 of file exec-node.h.
References impala::ExecNode::is_closed_.
Referenced by impala::SelectNode::Close(), impala::SortNode::Close(), impala::UnionNode::Close(), impala::TopNNode::Close(), impala::ExchangeNode::Close(), impala::HBaseScanNode::Close(), impala::CrossJoinNode::Close(), impala::HashJoinNode::Close(), impala::AggregationNode::Close(), impala::BlockingJoinNode::Close(), impala::AnalyticEvalNode::Close(), impala::PartitionedHashJoinNode::Close(), impala::PartitionedAggregationNode::Close(), Close(), impala::PartitionedAggregationNode::Partition::Close(), impala::PartitionedHashJoinNode::Partition::Close(), impala::PartitionedHashJoinNode::ReserveTupleStreamBlocks(), impala::PartitionedHashJoinNode::SpillPartition(), impala::PartitionedAggregationNode::SpillPartition(), and impala::PartitionedHashJoinNode::Partition::~Partition().
|
inline |
The result array is of length num_cols(). The i-th element is true iff column i should be materialized.
Definition at line 160 of file hdfs-scan-node.h.
References is_materialized_col_.
Referenced by impala::HdfsTextScanner::InitNewRange(), and impala::HdfsSequenceScanner::InitNewRange().
|
inlinevirtualinherited |
Reimplemented from impala::ExecNode.
Definition at line 93 of file scan-node.h.
|
inline |
Definition at line 117 of file hdfs-scan-node.h.
References impala::ExecNode::limit_.
Referenced by impala::HdfsSequenceScanner::ProcessDecompressedBlock(), and impala::HdfsTextScanner::WriteFields().
void HdfsScanNode::MarkFileDescIssued | ( | const HdfsFileDesc * | file_desc | ) |
Indicates that this file_desc's scan ranges have all been issued to the IoMgr. For each file, the scanner must call MarkFileDescIssued() or AddDiskIoRanges(). Issuing ranges happens asynchronously. For many of the file formats we synchronously issue the file header/footer in Open() but the rest of the splits for the file are issued asynchronously.
Definition at line 683 of file hdfs-scan-node.cc.
References num_unqueued_files_.
Referenced by AddDiskIoRanges(), and impala::HdfsParquetScanner::ProcessFooter().
|
inlineinherited |
Definition at line 104 of file scan-node.h.
References impala::ScanNode::materialize_tuple_timer_.
Referenced by impala::HBaseScanNode::GetNext(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), and impala::HdfsTextScanner::WriteFields().
|
inline |
Definition at line 119 of file hdfs-scan-node.h.
References materialized_slots_.
Referenced by impala::HdfsAvroScanner::CodegenMaterializeTuple(), impala::HdfsScanner::CodegenWriteCompleteTuple(), ComputeSlotMaterializationOrder(), impala::HdfsParquetScanner::CreateColumnReaders(), impala::HdfsTextScanner::FinishScanRange(), impala::HdfsParquetScanner::InitColumns(), impala::HdfsTextScanner::InitNewRange(), impala::HdfsTextScanner::Prepare(), impala::HdfsSequenceScanner::Prepare(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), impala::HdfsParquetScanner::ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsScanner::ReportTupleParseError(), impala::HdfsAvroScanner::ResolveSchemas(), impala::HdfsScanner::WriteCompleteTuple(), impala::HdfsTextScanner::WriteFields(), and impala::HdfsTextScanner::WritePartialTuple().
|
inline |
Definition at line 140 of file hdfs-scan-node.h.
References max_compressed_text_file_length_.
Referenced by impala::HdfsTextScanner::IssueInitialRanges().
|
inlineinherited |
Definition at line 162 of file exec-node.h.
References impala::ExecNode::mem_tracker_.
Referenced by impala::ExecNode::Close(), impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), EnoughMemoryForScannerThread(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::AggregationNode::Open(), impala::AnalyticEvalNode::Open(), impala::PartitionedAggregationNode::Open(), Open(), impala::UnionNode::OpenCurrentChild(), impala::SelectNode::Prepare(), impala::HBaseScanNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::AnalyticEvalNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::PrepareNextPartition(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::PartitionedAggregationNode::ProcessStream(), impala::HdfsRCFileScanner::ReadRowGroup(), impala::HdfsAvroScanner::ResolveSchemas(), impala::SortNode::SortInput(), and impala::HdfsScanner::StartNewRowBatch().
|
inline |
Returns number of materialized partition key slots.
Definition at line 130 of file hdfs-scan-node.h.
References partition_key_slots_.
Referenced by impala::HdfsScanner::CodegenWriteCompleteTuple(), and impala::HdfsTextScanner::FinishScanRange().
|
inline |
Returns number of partition keys in the table, including non-materialized slots.
Definition at line 127 of file hdfs-scan-node.h.
References hdfs_table_, and impala::TableDescriptor::num_clustering_cols().
Referenced by impala::HdfsAvroScanner::CodegenMaterializeTuple(), impala::HdfsParquetScanner::CreateColumnReaders(), impala::HdfsTextScanner::InitNewRange(), impala::HdfsSequenceScanner::InitNewRange(), impala::HdfsRCFileScanner::InitNewRange(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage(), impala::HdfsScanner::ReportColumnParseError(), and impala::HdfsAvroScanner::ResolveSchemas().
|
virtual |
Performs any preparatory work prior to calling GetNext(). Caller must not be holding any io buffers. This will cause deadlock. If overridden in subclass, must first call superclass's Open(). If a parent exec node adds slot filters (see RuntimeState::AddBitmapFilter()), they need to be added before calling Open() on the child that will consume them.
Reimplemented from impala::ExecNode.
Definition at line 511 of file hdfs-scan-node.cc.
References impala::ScanNode::active_hdfs_read_thread_counter_, impala::ScanNode::active_scanner_thread_counter_, impala::ObjectPool::Add(), ADD_COUNTER, ADD_TIMER, impala::RuntimeProfile::AddDerivedCounter(), impala::RuntimeProfile::AddHighWaterMarkCounter(), impala::RuntimeProfile::AddSamplingCounter(), impala::QueryResourceMgr::AddVcoreAvailableCb(), impala::ScanNode::AVERAGE_HDFS_READ_THREAD_CONCURRENCY, impala::ScanNode::average_hdfs_read_thread_concurrency_, impala::ScanNode::AVERAGE_SCANNER_THREAD_CONCURRENCY, impala::ScanNode::average_scanner_thread_concurrency_, impala::ScanNode::bytes_read_counter(), impala::ScanNode::bytes_read_counter_, bytes_read_dn_cache_, bytes_read_local_, bytes_read_short_circuit_, conjunct_ctxs_, counters_running_, disks_accessed_bitmap_, file_descs_, impala::HdfsTableDescriptor::GetPartition(), impala::ScanNode::hdfs_read_thread_concurrency_bucket_, hdfs_table_, impala::ExecNode::id(), impala::RuntimeState::io_mgr(), max_compressed_text_file_length_, impala::ExecNode::mem_tracker(), impala::DiskInfo::num_disks(), impala::ScanNode::NUM_DISKS_ACCESSED_COUNTER, impala::ScanNode::num_disks_accessed_counter_, num_remote_ranges_, impala::ScanNode::NUM_SCANNER_THREADS_STARTED, impala::ScanNode::num_scanner_threads_started_counter_, impala::DiskIoMgr::num_total_disks(), impala::Status::OK, impala::ExecNode::Open(), impala::Expr::Open(), impala::HdfsPartitionDescriptor::OpenExprs(), partition_ids_, impala::ScanNode::PER_READ_THREAD_THROUGHPUT_COUNTER, impala::ScanNode::per_read_thread_throughput_counter_, impala::ExecNode::pool_, progress_, impala::RuntimeState::query_options(), impala::RuntimeState::query_resource_mgr(), impala::ScanNode::read_timer(), impala::ScanNode::read_timer_, reader_context_, impala::RuntimeProfile::RegisterBucketingCounters(), impala::DiskIoMgr::RegisterContext(), impala::ThreadResourceMgr::ResourcePool::ReserveOptionalTokens(), impala::RuntimeState::resource_pool(), RETURN_IF_ERROR, rm_callback_id_, impala::ExecNode::runtime_profile(), runtime_state_, impala::ScanNode::SCAN_RANGES_COMPLETE_COUNTER, impala::ScanNode::scan_ranges_complete_counter_, impala::DiskIoMgr::set_active_read_thread_counter(), impala::DiskIoMgr::set_bytes_read_counter(), impala::DiskIoMgr::set_disks_access_bitmap(), impala::ThreadResourceMgr::ResourcePool::set_max_quota(), impala::DiskIoMgr::set_read_timer(), SetDone(), impala::ThreadResourceMgr::ResourcePool::SetThreadAvailableCb(), ThreadTokenAvailableCb(), impala::ScanNode::TOTAL_HDFS_READ_TIMER, unexpected_remote_bytes_, and impala::RuntimeProfile::UnitsPerSecond().
|
inlineinherited |
Definition at line 101 of file scan-node.h.
References impala::ScanNode::per_read_thread_throughput_counter_.
|
virtual |
ExecNode methods.
Reimplemented from impala::ScanNode.
Definition at line 304 of file hdfs-scan-node.cc.
References impala::ExecNode::AddExprCtxsToFree(), impala::LlvmCodeGen::AddFunctionToJit(), impala::RuntimeProfile::AddInfoString(), impala::ExecNode::AddRuntimeExecOption(), AllocateScanRange(), impala::RuntimeState::cgroup(), impala::ExecEnv::cgroups_mgr(), impala::HdfsTextScanner::Codegen(), impala::HdfsAvroScanner::Codegen(), impala::HdfsSequenceScanner::Codegen(), codegend_fn_map_, impala::SlotDescriptor::ColPathLessThan(), COMPRESSED_TEXT_COMPRESSION_RATIO, conjunct_ctxs_, impala::Expr::CreateExprTrees(), impala::RuntimeState::desc_tbl(), impala::RuntimeState::exec_env(), impala::ExecNode::expr_mem_tracker(), impala::HdfsFileDesc::file_compression, file_descs_, impala::HdfsPartitionDescriptor::file_format(), impala::HdfsFileDesc::file_length, impala::HdfsFileDesc::filename, impala::HdfsFileDesc::fs, GetMaterializedSlotIdx(), impala::DescriptorTbl::GetTupleDescriptor(), HDFS_SPLIT_STATS_DESC, hdfs_table_, impala::HdfsFsCache::instance(), is_materialized_col_, impala::HdfsPartitionDescriptor::location(), impala::RuntimeState::LogError(), materialized_slots_, impala::ExecNode::mem_tracker(), impala::HdfsFileDesc::mtime, impala::ImpaladMetrics::NUM_RANGES_MISSING_VOLUME_ID, impala::ImpaladMetrics::NUM_RANGES_PROCESSED, num_unqueued_files_, impala::Status::OK, partition_ids_, partition_key_slots_, path(), path_to_materialized_slot_idx_, per_type_files_, impala::ScanNode::Prepare(), impala::Expr::Prepare(), impala::HdfsPartitionDescriptor::PrepareExprs(), PrintHdfsSplitStats(), RETURN_IF_ERROR, impala::ExecNode::row_desc(), impala::ExecNode::runtime_profile(), impala::ExecNode::runtime_profile_, runtime_state(), runtime_state_, scan_node_pool_, impala::ScanNode::scan_range_params_, scanner_thread_bytes_required_, SCANNER_THREAD_MEM_USAGE, scanner_threads_, SCOPED_TIMER, impala::ThreadGroup::SetCgroup(), impala::ThreadGroup::SetCgroupsMgr(), SKIP_COLUMN, impala::TupleDescriptor::slots(), impala::HdfsFileDesc::splits, impala::TupleDescriptor::table_desc(), thrift_plan_node_, tuple_desc_, tuple_id_, unknown_disk_id_warned_, and UpdateHdfsSplitStats().
|
static |
Output the per_volume_stats to stringstream. The output format is a list of: <volume id>="">:<# splits>/<per volume="" split="" lengths>="">
Definition at line 1073 of file hdfs-scan-node.cc.
References impala::PrettyPrinter::Print().
Referenced by Prepare(), and impala::PlanFragmentExecutor::PrintVolumeIds().
|
protectedvirtualinherited |
Frees any local allocations made by expr_ctxs_to_free_ and returns the result of state->CheckQueryState(). Nodes should call this periodically, e.g. once per input row batch. This should not be called outside the main execution thread. Nodes may override this to add extra periodic cleanup, e.g. freeing other local allocations. ExecNodes overriding this function should return ExecNode::QueryMaintenance().
Reimplemented in impala::PartitionedAggregationNode, and impala::AnalyticEvalNode.
Definition at line 401 of file exec-node.cc.
References impala::RuntimeState::CheckQueryState(), impala::ExecNode::expr_ctxs_to_free_, and impala::ExprContext::FreeLocalAllocations().
Referenced by impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), impala::SelectNode::GetNext(), impala::SortNode::GetNext(), impala::UnionNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), GetNextInternal(), impala::HBaseScanNode::Open(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::BlockingJoinNode::Open(), impala::AggregationNode::Open(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::AnalyticEvalNode::QueryMaintenance(), impala::PartitionedAggregationNode::QueryMaintenance(), and impala::SortNode::SortInput().
void HdfsScanNode::RangeComplete | ( | const THdfsFileFormat::type & | file_type, |
const THdfsCompression::type & | compression_type | ||
) |
Called by the scanner when a range is complete. Used to trigger done_ and to log progress. This must only be called after the scanner has completely finished the scan range (i.e. context->Flush()).
Definition at line 924 of file hdfs-scan-node.cc.
Referenced by impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), impala::HdfsParquetScanner::Close(), impala::BaseSequenceScanner::CloseFileRanges(), impala::HdfsTextScanner::IssueInitialRanges(), and impala::HdfsParquetScanner::IssueInitialRanges().
void impala::HdfsScanNode::RangeComplete | ( | const THdfsFileFormat::type & | file_type, |
const std::vector< THdfsCompression::type > & | compression_type | ||
) |
Same as above except for when multiple compression codecs were used in the file. The metrics are incremented for each compression_type.
|
inlineinherited |
Definition at line 159 of file exec-node.h.
References impala::ExecNode::limit_, and impala::ExecNode::num_rows_returned_.
Referenced by impala::HdfsParquetScanner::AssembleRows(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::HdfsTextScanner::FinishScanRange(), impala::SelectNode::GetNext(), impala::UnionNode::GetNext(), impala::SortNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::AnalyticEvalNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), GetNextInternal(), impala::ExchangeNode::GetNextMerging(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::HashJoinNode::LeftJoinGetNext(), impala::HdfsSequenceScanner::ProcessBlockCompressedScanRange(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), and impala::PlanFragmentExecutor::ReachedLimit().
|
inlineinherited |
Definition at line 97 of file scan-node.h.
References impala::ScanNode::read_timer_.
Referenced by impala::HBaseTableScanner::Next(), and Open().
|
inline |
Definition at line 138 of file hdfs-scan-node.h.
References reader_context_.
Referenced by impala::HdfsParquetScanner::InitColumns(), and impala::HdfsParquetScanner::ProcessFooter().
|
virtual |
Resets all data-specific state, returning this node to the state it was in after calling Prepare() and before calling Open(). Prepare() must have already been called before calling Reset(). Open() and GetNext() may have optionally been called. Close() must not have been called. If overridden in a subclass, must call superclass's Reset() at the end. The default implementation calls Reset() on children. Note that this function may be called many times, so should be fast. For example, accumulated memory does not need to be freed on every call if it's expensive.
Reimplemented from impala::ExecNode.
Definition at line 617 of file hdfs-scan-node.cc.
|
inlineinherited |
Definition at line 156 of file exec-node.h.
References impala::ExecNode::row_descriptor_.
Referenced by impala::CrossJoinNode::BuildListDebugString(), impala::HashJoinNode::CodegenCreateOutputRow(), impala::PartitionedHashJoinNode::CodegenCreateOutputRow(), impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), impala::BlockingJoinNode::GetLeftChildRowString(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::PartitionedAggregationNode::Partition::InitStreams(), impala::TopNNode::Open(), impala::AggregationNode::Open(), impala::AnalyticEvalNode::Open(), impala::PartitionedAggregationNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::SelectNode::Prepare(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::AnalyticEvalNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::PlanFragmentExecutor::Prepare(), impala::PartitionedAggregationNode::Prepare(), Prepare(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::PartitionedAggregationNode::ProcessStream(), impala::PlanFragmentExecutor::row_desc(), impala::SortNode::SortInput(), and impala::HdfsScanner::StartNewRowBatch().
|
inlineinherited |
Definition at line 96 of file scan-node.h.
References impala::ScanNode::rows_read_counter_.
Referenced by impala::HdfsParquetScanner::AssembleRows(), impala::HdfsTextScanner::FinishScanRange(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), impala::HdfsParquetScanner::ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), and impala::HdfsRCFileScanner::ProcessRange().
|
inlineinherited |
Definition at line 157 of file exec-node.h.
References impala::ExecNode::num_rows_returned_.
Referenced by impala::CrossJoinNode::GetNext(), impala::AnalyticEvalNode::GetNext(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedAggregationNode::Open(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), impala::CrossJoinNode::ProcessLeftChildBatch(), impala::HashJoinNode::ProcessProbeBatch(), and impala::HdfsTextScanner::WriteFields().
|
inlineinherited |
Definition at line 161 of file exec-node.h.
References impala::ExecNode::runtime_profile_.
Referenced by impala::ExecNode::AddRuntimeExecOption(), impala::BlockingJoinNode::BuildSideThread(), impala::ExecNode::CreateTreeHelper(), impala::PartitionedAggregationNode::Partition::InitStreams(), impala::SortNode::Open(), impala::AnalyticEvalNode::Open(), impala::PartitionedAggregationNode::Open(), Open(), impala::HdfsTextScanner::Prepare(), impala::HBaseScanNode::Prepare(), impala::BaseSequenceScanner::Prepare(), impala::ExchangeNode::Prepare(), impala::HdfsParquetScanner::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::AnalyticEvalNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::ExecNode::Prepare(), impala::ScanNode::Prepare(), impala::PlanFragmentExecutor::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::HdfsScanner::Prepare(), Prepare(), and impala::PartitionedHashJoinNode::ProcessBuildInput().
|
inline |
Definition at line 136 of file hdfs-scan-node.h.
References runtime_state_.
Referenced by impala::HdfsParquetScanner::AssembleRows(), impala::HdfsParquetScanner::BaseColumnReader::BaseColumnReader(), impala::HdfsTextScanner::Codegen(), impala::HdfsAvroScanner::Codegen(), impala::HdfsSequenceScanner::Codegen(), impala::HdfsAvroScanner::CodegenMaterializeTuple(), impala::HdfsScanner::CodegenWriteCompleteTuple(), impala::HdfsParquetScanner::CreateReader(), impala::HdfsParquetScanner::InitColumns(), impala::HdfsTextScanner::IssueInitialRanges(), impala::HdfsParquetScanner::IssueInitialRanges(), Prepare(), impala::HdfsParquetScanner::ProcessFooter(), and impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage().
|
inlineinherited |
Definition at line 107 of file scan-node.h.
References impala::ScanNode::scan_ranges_complete_counter_.
|
inlineinherited |
Definition at line 110 of file scan-node.h.
References impala::ScanNode::scanner_thread_counters_.
Referenced by impala::HBaseScanNode::GetNext(), and ScannerThread().
|
private |
Main function for scanner thread. This thread pulls the next range to be processed from the IoMgr and then processes the entire range end to end. This thread terminates when all scan ranges are complete or an error occurred.
Definition at line 797 of file hdfs-scan-node.cc.
References impala::ScanNode::active_scanner_thread_counter_, impala::ObjectPool::Add(), all_ranges_started_, impala::HdfsScanner::Close(), COUNTER_ADD, CreateAndPrepareScanner(), impala::ProgressUpdater::done(), done_, EnoughMemoryForScannerThread(), impala::RuntimeState::error_log(), impala::RuntimeState::ErrorLog(), impala::DiskIoMgr::RequestRange::file(), impala::HdfsPartitionDescriptor::file_format(), impala::DiskIoMgr::GetNextRange(), impala::HdfsTableDescriptor::GetPartition(), impala::ScannerContext::GetStream(), hdfs_table_, impala::ExecNode::id(), impala::RuntimeState::io_mgr(), impala::Status::IsCancelled(), impala::Status::IsMemLimitExceeded(), impala::DiskIoMgr::RequestRange::len(), lock_, impala::AtomicUtil::MemoryBarrier(), impala::DiskIoMgr::ScanRange::meta_data(), impala::QueryResourceMgr::NotifyThreadUsageChange(), num_unqueued_files_, impala::RuntimeState::obj_pool(), impala::DiskIoMgr::RequestRange::offset(), impala::Status::ok(), impala::ThreadResourceMgr::ResourcePool::optional_exceeded(), impala::ScanRangeMetadata::partition_id, impala::HdfsScanner::ProcessSplit(), progress_, impala::RuntimeState::query_resource_mgr(), reader_context_, impala::ThreadResourceMgr::ResourcePool::ReleaseThreadToken(), impala::RuntimeState::resource_pool(), runtime_state_, impala::ScanNode::scanner_thread_counters(), ScannerContext, SCOPED_THREAD_COUNTER_MEASUREMENT, SCOPED_TIMER, SetDone(), impala::RuntimeState::SetMemLimitExceeded(), status_, impala::ScannerContext::Stream::total_bytes_returned(), impala::RuntimeState::total_cpu_timer(), impala::RuntimeProfile::Counter::value(), VLOG_QUERY, and VLOG_QUERY_IS_ON.
Referenced by ThreadTokenAvailableCb().
|
staticinherited |
Set debug action for node with given id in 'tree'.
Definition at line 332 of file exec-node.cc.
References impala::ExecNode::children_, impala::ExecNode::debug_action_, impala::ExecNode::debug_phase_, and impala::ExecNode::id_.
Referenced by impala::PlanFragmentExecutor::Prepare().
|
private |
sets done_ to true and triggers threads to cleanup. Cannot be calld with any locks taken. Calling it repeatedly ignores subsequent calls.
Definition at line 944 of file hdfs-scan-node.cc.
References impala::DiskIoMgr::CancelContext(), done_, impala::RuntimeState::io_mgr(), lock_, materialized_row_batches_, reader_context_, and runtime_state_.
Referenced by Close(), GetNext(), GetNextInternal(), Open(), and ScannerThread().
void HdfsScanNode::SetFileMetadata | ( | const std::string & | filename, |
void * | metadata | ||
) |
Sets the scanner specific metadata for 'filename'. This is thread safe.
Definition at line 211 of file hdfs-scan-node.cc.
References metadata_lock_, and per_file_metadata_.
Referenced by impala::BaseSequenceScanner::ProcessSplit().
|
inlineinherited |
This should be called before Prepare(), and the argument must be not destroyed until after Prepare().
Definition at line 89 of file scan-node.h.
References impala::ScanNode::scan_range_params_.
Referenced by impala::PlanFragmentExecutor::Prepare().
|
private |
Stops periodic counters and aggregates counter values for the entire scan node. This should be called as soon as the scan node is complete to get the most accurate counter values. This can be called multiple times, subsequent calls will be ignored. This must be called on Close() to unregister counters.
Definition at line 982 of file hdfs-scan-node.cc.
References impala::ExecNode::AddRuntimeExecOption(), impala::ScanNode::average_hdfs_read_thread_concurrency_, impala::ScanNode::average_scanner_thread_concurrency_, impala::ScanNode::bytes_read_counter(), impala::DiskIoMgr::bytes_read_dn_cache(), bytes_read_dn_cache_, impala::DiskIoMgr::bytes_read_local(), bytes_read_local_, impala::DiskIoMgr::bytes_read_short_circuit(), bytes_read_short_circuit_, impala::ScanNode::bytes_read_timeseries_counter_, counters_running_, impala::TableDescriptor::database(), disks_accessed_bitmap_, file_type_counts_, file_type_counts_lock_, impala::ScanNode::hdfs_read_thread_concurrency_bucket_, hdfs_table_, impala::RuntimeState::io_mgr(), impala::ImpaladMetrics::IO_MGR_BYTES_READ, impala::ImpaladMetrics::IO_MGR_CACHED_BYTES_READ, impala::ImpaladMetrics::IO_MGR_LOCAL_BYTES_READ, impala::ImpaladMetrics::IO_MGR_SHORT_CIRCUIT_BYTES_READ, lock_, impala::RuntimeState::LogError(), impala::TableDescriptor::name(), impala::ScanNode::num_disks_accessed_counter_, impala::DiskIoMgr::num_remote_ranges(), num_remote_ranges_, num_scanners_codegen_disabled_, num_scanners_codegen_enabled_, impala::BitUtil::Popcount(), impala::PrettyPrinter::Print(), reader_context_, impala::ExecNode::runtime_profile_, runtime_state_, impala::RuntimeProfile::Counter::Set(), impala::PeriodicCounterUpdater::StopBucketingCounters(), impala::PeriodicCounterUpdater::StopRateCounter(), impala::PeriodicCounterUpdater::StopSamplingCounter(), impala::PeriodicCounterUpdater::StopTimeSeriesCounter(), impala::ScanNode::total_throughput_counter(), impala::DiskIoMgr::unexpected_remote_bytes(), unexpected_remote_bytes_, UNEXPECTED_REMOTE_BYTES_WARN_THRESHOLD, and impala::RuntimeProfile::Counter::value().
|
private |
Called when scanner threads are available for this scan node. This will try to spin up as many scanner threads as the quota allows. This is also called whenever a new range is added to the IoMgr to 'pull' thread tokens if they are available.
Definition at line 729 of file hdfs-scan-node.cc.
References impala::ScanNode::active_scanner_thread_counter_, impala::ThreadGroup::AddThread(), all_ranges_started_, COUNTER_ADD, done_, EnoughMemoryForScannerThread(), initial_ranges_issued_, impala::QueryResourceMgr::IsVcoreOverSubscribed(), lock_, materialized_row_batches_, max_materialized_row_batches_, impala::QueryResourceMgr::NotifyThreadUsageChange(), impala::ScanNode::num_scanner_threads_started_counter_, num_skipped_tokens_, progress_, impala::RuntimeState::query_resource_mgr(), impala::ProgressUpdater::remaining(), runtime_state_, scanner_threads_, ScannerThread(), impala::ThreadResourceMgr::ResourcePool::TryAcquireThreadToken(), and impala::RuntimeProfile::Counter::value().
Referenced by AddDiskIoRanges(), and Open().
|
inlineinherited |
Definition at line 98 of file scan-node.h.
References impala::ScanNode::total_throughput_counter_.
Referenced by impala::HBaseScanNode::Close(), and StopAndFinalizeCounters().
void HdfsScanNode::TransferToScanNodePool | ( | MemPool * | pool | ) |
Acquires all allocations from pool into scan_node_pool_. Thread-safe.
Definition at line 299 of file hdfs-scan-node.cc.
References lock_, and scan_node_pool_.
Referenced by impala::HdfsAvroScanner::ResolveSchemas().
|
inline |
Definition at line 132 of file hdfs-scan-node.h.
References tuple_desc_.
Referenced by impala::ScannerContext::AddStream(), impala::HdfsAvroScanner::CodegenMaterializeTuple(), impala::HdfsScanner::CodegenWriteCompleteTuple(), impala::HdfsScanner::CommitRows(), impala::HdfsScanner::InitializeWriteTuplesFn(), impala::HdfsRCFileScanner::InitNewRange(), impala::HdfsTextScanner::ResetScanner(), impala::HdfsScanner::UpdateDecompressor(), and impala::HdfsTextScanner::WriteFields().
|
inline |
Returns the tuple idx into the row for this scan node to output to. Currently this is always 0.
Definition at line 124 of file hdfs-scan-node.h.
Referenced by impala::HdfsParquetScanner::AssembleRows(), impala::HdfsScanner::CodegenWriteCompleteTuple(), impala::HdfsRCFileScanner::DebugString(), impala::HdfsAvroScanner::DecodeAvroData(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsScanner::WriteCompleteTuple(), impala::HdfsScanner::WriteEmptyTuples(), and impala::HdfsTextScanner::WriteFields().
|
inlineinherited |
Definition at line 155 of file exec-node.h.
References impala::ExecNode::type_.
Referenced by impala::ExecNode::CodegenEvalConjuncts(), impala::PartitionedAggregationNode::CodegenUpdateTuple(), and impala::PlanFragmentExecutor::Prepare().
|
static |
Update the per volume stats with the given scan range params list.
Definition at line 1058 of file hdfs-scan-node.cc.
References impala::FindOrInsert().
Referenced by Prepare(), and impala::PlanFragmentExecutor::PrintVolumeIds().
|
friend |
Definition at line 268 of file hdfs-scan-node.h.
Referenced by ScannerThread().
|
protectedinherited |
The number of active hdfs reading threads reading for this node.
Definition at line 164 of file scan-node.h.
|
protectedinherited |
The number of active scanner threads that are not blocked by IO.
Definition at line 157 of file scan-node.h.
Referenced by impala::ScanNode::active_scanner_thread_counter(), Close(), EnoughMemoryForScannerThread(), Open(), ScannerThread(), and ThreadTokenAvailableCb().
|
private |
Set to true if all ranges have started. Some of the ranges may still be in flight being processed by scanner threads, but no new ScannerThreads should be started.
Definition at line 417 of file hdfs-scan-node.h.
Referenced by ScannerThread(), and ThreadTokenAvailableCb().
|
staticinherited |
Definition at line 133 of file scan-node.h.
Referenced by Open().
|
protectedinherited |
Average number of active hdfs reading threads This should be created in Open and stopped when all the scanner threads are done.
Definition at line 168 of file scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
staticinherited |
Definition at line 132 of file scan-node.h.
Referenced by Open().
|
protectedinherited |
Average number of active scanner threads This should be created in Open and stopped when all the scanner threads are done.
Definition at line 161 of file scan-node.h.
Referenced by impala::ScanNode::average_scanner_thread_concurrency(), Open(), and StopAndFinalizeCounters().
|
staticinherited |
names of ScanNode common counters
Definition at line 121 of file scan-node.h.
Referenced by impala::ScanNode::Prepare().
|
protectedinherited |
Definition at line 140 of file scan-node.h.
Referenced by impala::ScanNode::bytes_read_counter(), Open(), and impala::ScanNode::Prepare().
|
private |
Total number of bytes read from data node cache.
Definition at line 396 of file hdfs-scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
private |
Total number of bytes read locally.
Definition at line 390 of file hdfs-scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
private |
Total number of bytes read via short circuit read.
Definition at line 393 of file hdfs-scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
protectedinherited |
Time series of the bytes_read_counter_.
Definition at line 142 of file scan-node.h.
Referenced by impala::HBaseScanNode::Close(), impala::ScanNode::Prepare(), and StopAndFinalizeCounters().
|
protectedinherited |
Definition at line 214 of file exec-node.h.
Referenced by impala::ExecNode::child(), impala::ExecNode::Close(), impala::ExecNode::CollectNodes(), impala::ExecNode::CreateTreeHelper(), impala::HBaseScanNode::DebugString(), impala::UnionNode::GetNext(), impala::UnionNode::Open(), impala::AggregationNode::Open(), impala::PartitionedAggregationNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::ExecNode::Prepare(), impala::PartitionedAggregationNode::ProcessStream(), impala::ExecNode::Reset(), and impala::ExecNode::SetDebugOptions().
|
private |
Definition at line 324 of file hdfs-scan-node.h.
Referenced by GetCodegenFn(), and Prepare().
|
private |
Contexts for each conjunct. These are cloned by the scanners so conjuncts can be safely evaluated in parallel.
Definition at line 328 of file hdfs-scan-node.h.
Referenced by Close(), GetConjunctCtxs(), Open(), and Prepare().
|
private |
If true, counters are actively running and need to be reported in the runtime profile.
Definition at line 438 of file hdfs-scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
protectedinherited |
Definition at line 220 of file exec-node.h.
Referenced by impala::ExecNode::ExecDebugAction(), and impala::ExecNode::SetDebugOptions().
|
protectedinherited |
debug-only: if debug_action_ is not INVALID, node will perform action in debug_phase_
Definition at line 219 of file exec-node.h.
Referenced by impala::ExecNode::ExecDebugAction(), and impala::ExecNode::SetDebugOptions().
|
private |
Disk accessed bitmap.
Definition at line 387 of file hdfs-scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
private |
Flag signaling that all scanner threads are done. This could be because they are finished, an error/cancellation occurred, or the limit was reached. Setting this to true triggers the scanner threads to clean up. This should not be explicitly set. Instead, call SetDone().
Definition at line 413 of file hdfs-scan-node.h.
Referenced by impala::ScannerContext::cancelled(), ScannerThread(), SetDone(), and ThreadTokenAvailableCb().
|
protectedinherited |
Execution options that are determined at runtime. This is added to the runtime profile at Close(). Examples for options logged here would be "Codegen Enabled"
Definition at line 238 of file exec-node.h.
Referenced by impala::ExecNode::AddRuntimeExecOption().
|
protectedinherited |
MemTracker that should be used for ExprContexts.
Definition at line 233 of file exec-node.h.
Referenced by impala::ExecNode::expr_mem_tracker(), and impala::ExecNode::Prepare().
|
private |
Definition at line 298 of file hdfs-scan-node.h.
Referenced by GetFileDesc(), Open(), and Prepare().
|
private |
Definition at line 434 of file hdfs-scan-node.h.
Referenced by StopAndFinalizeCounters().
|
private |
Mapping of file formats (file type, compression type) to the number of splits of that type and the lock protecting it. This lock cannot be taken together with any other lock except lock_.
Definition at line 431 of file hdfs-scan-node.h.
Referenced by StopAndFinalizeCounters().
|
protectedinherited |
HDFS read thread concurrency bucket: bucket[i] refers to the number of sample taken where there are i concurrent hdfs read thread running
Definition at line 174 of file scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
static |
Description string for the per volume stats output.
Definition at line 265 of file hdfs-scan-node.h.
Referenced by Prepare(), and impala::PlanFragmentExecutor::PrintVolumeIds().
|
private |
Descriptor for the hdfs table, including partition and format metadata. Set in Prepare, owned by RuntimeState
Definition at line 287 of file hdfs-scan-node.h.
Referenced by Close(), hdfs_table(), num_partition_keys(), Open(), Prepare(), ScannerThread(), and StopAndFinalizeCounters().
|
protectedinherited |
Definition at line 209 of file exec-node.h.
Referenced by impala::PartitionedAggregationNode::CreateHashPartitions(), impala::ExecNode::id(), impala::ExecNode::InitRuntimeProfile(), impala::PartitionedAggregationNode::NextPartition(), impala::ExchangeNode::Prepare(), impala::PartitionedHashJoinNode::PrepareNextPartition(), impala::PartitionedHashJoinNode::ProcessBuildInput(), and impala::ExecNode::SetDebugOptions().
|
private |
Set to true when the initial scan ranges are issued to the IoMgr. This happens on the first call to GetNext(). The token manager, in a different thread, will read this variable.
Definition at line 307 of file hdfs-scan-node.h.
Referenced by GetNext(), and ThreadTokenAvailableCb().
|
private |
is_materialized_col_[i] = <true i-th column should be materialized, false otherwise> for 0 <= i < total # columns This should be a vector<bool>, but bool vectors are special-cased and not stored internally as arrays, so instead we store as chars and cast to bools as needed
Definition at line 339 of file hdfs-scan-node.h.
Referenced by is_materialized_col(), and Prepare().
|
protectedinherited |
Definition at line 222 of file exec-node.h.
Referenced by impala::SortNode::GetNext(), GetNextInternal(), impala::ExchangeNode::GetNextMerging(), impala::TopNNode::InsertTupleRow(), limit(), impala::ExecNode::limit(), impala::TopNNode::Open(), and impala::ExecNode::ReachedLimit().
|
private |
Lock protects access between scanner thread and main query thread (the one calling GetNext()) for all fields below. If this lock and any other locks needs to be taken together, this lock must be taken first.
Definition at line 407 of file hdfs-scan-node.h.
Referenced by GetNextInternal(), InitEmptyTemplateTuple(), InitTemplateTuple(), ScannerThread(), SetDone(), StopAndFinalizeCounters(), ThreadTokenAvailableCb(), and TransferToScanNodePool().
|
staticinherited |
Definition at line 128 of file scan-node.h.
Referenced by impala::ScanNode::Prepare().
|
protectedinherited |
Definition at line 151 of file scan-node.h.
Referenced by impala::ScanNode::materialize_tuple_timer(), and impala::ScanNode::Prepare().
|
private |
Outgoing row batches queue. Row batches are produced asynchronously by the scanner threads and consumed by the main thread.
Definition at line 363 of file hdfs-scan-node.h.
Referenced by AddMaterializedRowBatch(), Close(), GetNextInternal(), HdfsScanNode(), SetDone(), and ThreadTokenAvailableCb().
|
private |
Vector containing slot descriptors for all materialized non-partition key slots. These descriptors are sorted in order of increasing col_pos TODO: Put this (with associated fields and logic) on ScanNode or ExecNode
Definition at line 344 of file hdfs-scan-node.h.
Referenced by materialized_slots(), and Prepare().
|
private |
The size of the largest compressed text file to be scanned. This is used to estimate scanner thread memory usage.
Definition at line 384 of file hdfs-scan-node.h.
Referenced by max_compressed_text_file_length(), and Open().
|
private |
Maximum size of materialized_row_batches_.
Definition at line 366 of file hdfs-scan-node.h.
Referenced by HdfsScanNode(), and ThreadTokenAvailableCb().
|
protectedinherited |
Account for peak memory used by this node.
Definition at line 230 of file exec-node.h.
Referenced by impala::ExecNode::mem_tracker(), and impala::ExecNode::Prepare().
|
private |
Scanner specific per file metadata (e.g. header information) and associated lock. This lock cannot be taken together with any other locks except lock_.
Definition at line 355 of file hdfs-scan-node.h.
Referenced by GetFileMetadata(), and SetFileMetadata().
|
staticinherited |
Definition at line 127 of file scan-node.h.
Referenced by Open().
|
protectedinherited |
Definition at line 150 of file scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
private |
This is the number of io buffers that are owned by the scan node and the scanners. This is used just to help debug leaked io buffers to determine if the leak is happening in the scanners vs other parts of the execution.
Definition at line 371 of file hdfs-scan-node.h.
Referenced by Close(), and GetNextInternal().
|
private |
Total number of remote scan ranges.
Definition at line 399 of file hdfs-scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
protectedinherited |
Definition at line 223 of file exec-node.h.
Referenced by impala::ExecNode::Close(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::SortNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::AnalyticEvalNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), GetNextInternal(), impala::ExchangeNode::GetNextMerging(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), impala::ExecNode::ReachedLimit(), and impala::ExecNode::rows_returned().
|
staticinherited |
Definition at line 134 of file scan-node.h.
Referenced by Open().
|
protectedinherited |
Definition at line 170 of file scan-node.h.
Referenced by Open(), and ThreadTokenAvailableCb().
|
private |
Definition at line 380 of file hdfs-scan-node.h.
Referenced by IncNumScannersCodegenDisabled(), and StopAndFinalizeCounters().
|
private |
Counters which track the number of scanners that have codegen enabled for the materialize and conjuncts evaluation code paths.
Definition at line 379 of file hdfs-scan-node.h.
Referenced by IncNumScannersCodegenEnabled(), and StopAndFinalizeCounters().
|
private |
The number of times a token was offered but no scanner threads started. This is used for diagnostics only.
Definition at line 375 of file hdfs-scan-node.h.
Referenced by ThreadTokenAvailableCb().
|
private |
Number of files that have not been issued from the scanners.
Definition at line 315 of file hdfs-scan-node.h.
Referenced by MarkFileDescIssued(), Prepare(), and ScannerThread().
|
private |
Partitions scanned by this scan node.
Definition at line 294 of file hdfs-scan-node.h.
|
private |
Vector containing slot descriptors for all materialized partition key slots These descriptors are sorted in order of increasing col_pos
Definition at line 348 of file hdfs-scan-node.h.
Referenced by InitTemplateTuple(), num_materialized_partition_keys(), and Prepare().
|
private |
Definition at line 332 of file hdfs-scan-node.h.
Referenced by GetMaterializedSlotIdx(), and Prepare().
|
private |
Definition at line 356 of file hdfs-scan-node.h.
Referenced by GetFileMetadata(), and SetFileMetadata().
|
staticinherited |
Definition at line 126 of file scan-node.h.
Referenced by Open().
|
protectedinherited |
Per thread read throughput [bytes/sec].
Definition at line 149 of file scan-node.h.
Referenced by Open(), and impala::ScanNode::per_read_thread_throughput_counter().
|
private |
Definition at line 302 of file hdfs-scan-node.h.
|
protectedinherited |
Definition at line 211 of file exec-node.h.
Referenced by impala::SortNode::Init(), impala::UnionNode::Init(), impala::TopNNode::Init(), impala::ExchangeNode::Init(), impala::HashJoinNode::Init(), impala::AggregationNode::Init(), impala::ExecNode::Init(), impala::PartitionedHashJoinNode::Init(), impala::AnalyticEvalNode::Init(), impala::PartitionedAggregationNode::Init(), impala::ExecNode::InitRuntimeProfile(), Open(), impala::PartitionedHashJoinNode::Prepare(), and impala::PartitionedHashJoinNode::ProcessBuildInput().
|
private |
Keeps track of total splits and the number finished.
Definition at line 351 of file hdfs-scan-node.h.
Referenced by GetNext(), Open(), ScannerThread(), and ThreadTokenAvailableCb().
|
protectedinherited |
Definition at line 145 of file scan-node.h.
Referenced by Open(), impala::HBaseScanNode::Prepare(), and impala::ScanNode::read_timer().
|
private |
RequestContext object to use with the disk-io-mgr for reads.
Definition at line 280 of file hdfs-scan-node.h.
Referenced by AddDiskIoRanges(), Close(), Open(), reader_context(), ScannerThread(), SetDone(), and StopAndFinalizeCounters().
|
private |
The id of the callback added to the query resource manager when RM is enabled. Used to remove the callback before this scan node is destroyed.
Definition at line 442 of file hdfs-scan-node.h.
|
protectedinherited |
Definition at line 215 of file exec-node.h.
Referenced by impala::SortNode::Open(), impala::SortNode::Prepare(), impala::TopNNode::Prepare(), impala::ExchangeNode::Prepare(), and impala::ExecNode::row_desc().
|
staticinherited |
Names of counters shared by all exec nodes.
Definition at line 169 of file exec-node.h.
Referenced by impala::ExecNode::Prepare().
|
staticinherited |
Definition at line 122 of file scan-node.h.
Referenced by impala::ScanNode::Prepare().
|
protectedinherited |
Definition at line 144 of file scan-node.h.
Referenced by impala::HBaseScanNode::GetNext(), impala::ScanNode::Prepare(), and impala::ScanNode::rows_read_counter().
|
protectedinherited |
Definition at line 226 of file exec-node.h.
Referenced by impala::ExecNode::Close(), impala::SelectNode::CopyRows(), impala::UnionNode::EvalAndMaterializeExprs(), impala::SortNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::AnalyticEvalNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), GetNextInternal(), impala::ExchangeNode::GetNextMerging(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedHashJoinNode::OutputUnmatchedBuild(), and impala::ExecNode::Prepare().
|
protectedinherited |
Definition at line 227 of file exec-node.h.
Referenced by impala::ExecNode::Prepare().
|
protectedinherited |
Definition at line 239 of file exec-node.h.
Referenced by impala::ExecNode::AddRuntimeExecOption().
|
protectedinherited |
Definition at line 225 of file exec-node.h.
Referenced by impala::HBaseScanNode::Close(), impala::SelectNode::GetNext(), impala::SortNode::GetNext(), impala::UnionNode::GetNext(), impala::HBaseScanNode::GetNext(), impala::TopNNode::GetNext(), impala::ExchangeNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AggregationNode::GetNext(), impala::AnalyticEvalNode::GetNext(), impala::PartitionedHashJoinNode::GetNext(), impala::PartitionedAggregationNode::GetNext(), GetNext(), impala::ExecNode::InitRuntimeProfile(), impala::SelectNode::Open(), impala::HBaseScanNode::Open(), impala::UnionNode::Open(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::ExchangeNode::Open(), impala::DataSourceScanNode::Open(), impala::BlockingJoinNode::Open(), impala::AggregationNode::Open(), impala::AnalyticEvalNode::Open(), impala::PartitionedAggregationNode::Open(), impala::SelectNode::Prepare(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::AnalyticEvalNode::Prepare(), impala::ExecNode::Prepare(), impala::ScanNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), Prepare(), impala::ExecNode::runtime_profile(), and StopAndFinalizeCounters().
|
private |
Definition at line 274 of file hdfs-scan-node.h.
Referenced by AddDiskIoRanges(), AllocateScanRange(), ComputeSlotMaterializationOrder(), CreateAndPrepareScanner(), GetConjunctCtxs(), Open(), Prepare(), runtime_state(), ScannerThread(), SetDone(), StopAndFinalizeCounters(), and ThreadTokenAvailableCb().
|
private |
Pool for allocating some amounts of memory that is shared between scanners. e.g. partition key tuple and their string buffers
Definition at line 421 of file hdfs-scan-node.h.
Referenced by Close(), InitEmptyTemplateTuple(), Prepare(), and TransferToScanNodePool().
|
protectedinherited |
The scan ranges this scan node is responsible for. Not owned.
Definition at line 138 of file scan-node.h.
Referenced by impala::HBaseScanNode::Prepare(), Prepare(), and impala::ScanNode::SetScanRanges().
|
staticinherited |
Definition at line 129 of file scan-node.h.
Referenced by Open().
|
protectedinherited |
Definition at line 152 of file scan-node.h.
Referenced by Open(), and impala::ScanNode::scan_ranges_complete_counter().
|
private |
Definition at line 320 of file hdfs-scan-node.h.
|
private |
The estimated memory required to start up a new scanner thread. If the memory left (due to limits) is less than this value, we won't start up optional scanner threads.
Definition at line 312 of file hdfs-scan-node.h.
Referenced by EnoughMemoryForScannerThread(), and Prepare().
|
protectedinherited |
Aggregated scanner thread counters.
Definition at line 154 of file scan-node.h.
Referenced by impala::ScanNode::Prepare(), and impala::ScanNode::scanner_thread_counters().
|
staticinherited |
Definition at line 130 of file scan-node.h.
Referenced by impala::ScanNode::Prepare().
|
staticinherited |
Definition at line 131 of file scan-node.h.
Referenced by impala::HdfsTextScanner::Prepare(), and impala::ScanNode::Prepare().
|
private |
Thread group for all scanner worker threads.
Definition at line 359 of file hdfs-scan-node.h.
Referenced by Close(), Prepare(), and ThreadTokenAvailableCb().
|
static |
Definition at line 144 of file hdfs-scan-node.h.
Referenced by impala::HdfsAvroScanner::CodegenMaterializeTuple(), GetMaterializedSlotIdx(), impala::HdfsRCFileScanner::InitNewRange(), Prepare(), and impala::HdfsAvroScanner::ResolveSchemas().
|
private |
Status of failed operations. This is set in the ScannerThreads Returned in GetNext() if an error occurred. An non-ok status triggers cleanup scanner threads.
Definition at line 426 of file hdfs-scan-node.h.
Referenced by GetNextInternal(), and ScannerThread().
|
private |
Cache of the plan node. This is needed to be able to create a copy of the conjuncts per scanner since our Exprs are not thread safe.
Definition at line 272 of file hdfs-scan-node.h.
Referenced by Prepare().
|
staticinherited |
Definition at line 124 of file scan-node.h.
Referenced by impala::HBaseScanNode::Prepare().
|
staticinherited |
Definition at line 123 of file scan-node.h.
Referenced by Open().
|
staticinherited |
Definition at line 125 of file scan-node.h.
Referenced by impala::ScanNode::Prepare().
|
protectedinherited |
Wall based aggregate read throughput [bytes/sec].
Definition at line 147 of file scan-node.h.
Referenced by impala::ScanNode::Prepare(), and impala::ScanNode::total_throughput_counter().
|
private |
Descriptor for tuples this scan node constructs.
Definition at line 283 of file hdfs-scan-node.h.
Referenced by InitEmptyTemplateTuple(), Prepare(), and tuple_desc().
|
private |
Tuple id resolved in Prepare() to set tuple_desc_;.
Definition at line 277 of file hdfs-scan-node.h.
Referenced by Prepare().
|
protectedinherited |
Definition at line 210 of file exec-node.h.
Referenced by impala::ExecNode::CollectNodes(), and impala::ExecNode::type().
|
private |
Total number of bytes read remotely that were expected to be local.
Definition at line 402 of file hdfs-scan-node.h.
Referenced by Open(), and StopAndFinalizeCounters().
|
private |
If true, the warning that some disk ids are unknown was logged. Only log this once per scan node since it can be noisy.
Definition at line 291 of file hdfs-scan-node.h.
Referenced by Prepare().