Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
impala::HdfsScanNode Class Reference

#include <hdfs-scan-node.h>

Inheritance diagram for impala::HdfsScanNode:
Collaboration diagram for impala::HdfsScanNode:

Public Types

typedef boost::unordered_map
< int32_t, std::pair< int,
int64_t > > 
PerVolumnStats
 map from volume id to <number of split, per volume split lengths> More...
 

Public Member Functions

 HdfsScanNode (ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
 
 ~HdfsScanNode ()
 
virtual Status Prepare (RuntimeState *state)
 ExecNode methods. More...
 
virtual Status Open (RuntimeState *state)
 
virtual Status GetNext (RuntimeState *state, RowBatch *row_batch, bool *eos)
 
virtual Status Reset (RuntimeState *state)
 
virtual void Close (RuntimeState *state)
 
int limit () const
 
const std::vector
< SlotDescriptor * > & 
materialized_slots () const
 
int tuple_idx () const
 
int num_partition_keys () const
 Returns number of partition keys in the table, including non-materialized slots. More...
 
int num_materialized_partition_keys () const
 Returns number of materialized partition key slots. More...
 
const TupleDescriptortuple_desc ()
 
const HdfsTableDescriptorhdfs_table ()
 
RuntimeStateruntime_state ()
 
DiskIoMgr::RequestContextreader_context ()
 
RuntimeProfile::HighWaterMarkCountermax_compressed_text_file_length ()
 
Status GetConjunctCtxs (std::vector< ExprContext * > *ctxs)
 
int GetMaterializedSlotIdx (const std::vector< int > &path) const
 
const boolis_materialized_col ()
 
void * GetCodegenFn (THdfsFileFormat::type)
 
void IncNumScannersCodegenEnabled ()
 
void IncNumScannersCodegenDisabled ()
 
void AddMaterializedRowBatch (RowBatch *row_batch)
 
DiskIoMgr::ScanRangeAllocateScanRange (hdfsFS fs, const char *file, int64_t len, int64_t offset, int64_t partition_id, int disk_id, bool try_cache, bool expected_local, int64_t mtime)
 
Status AddDiskIoRanges (const std::vector< DiskIoMgr::ScanRange * > &ranges)
 Adds ranges to the io mgr queue and starts up new scanner threads if possible. More...
 
Status AddDiskIoRanges (const HdfsFileDesc *file_desc)
 Adds all splits for file_desc to the io mgr queue. More...
 
void MarkFileDescIssued (const HdfsFileDesc *file_desc)
 
TupleInitTemplateTuple (RuntimeState *state, const std::vector< ExprContext * > &value_ctxs)
 
TupleInitEmptyTemplateTuple ()
 
void TransferToScanNodePool (MemPool *pool)
 Acquires all allocations from pool into scan_node_pool_. Thread-safe. More...
 
HdfsFileDescGetFileDesc (const std::string &filename)
 Returns the file desc for 'filename'. Returns NULL if filename is invalid. More...
 
void * GetFileMetadata (const std::string &filename)
 
void SetFileMetadata (const std::string &filename, void *metadata)
 
void RangeComplete (const THdfsFileFormat::type &file_type, const THdfsCompression::type &compression_type)
 
void RangeComplete (const THdfsFileFormat::type &file_type, const std::vector< THdfsCompression::type > &compression_type)
 
void ComputeSlotMaterializationOrder (std::vector< int > *order) const
 
void SetScanRanges (const std::vector< TScanRangeParams > &scan_range_params)
 
virtual bool IsScanNode () const
 
RuntimeProfile::Counterbytes_read_counter () const
 
RuntimeProfile::Counterrows_read_counter () const
 
RuntimeProfile::Counterread_timer () const
 
RuntimeProfile::Countertotal_throughput_counter () const
 
RuntimeProfile::Counterper_read_thread_throughput_counter () const
 
RuntimeProfile::Countermaterialize_tuple_timer () const
 
RuntimeProfile::Counterscan_ranges_complete_counter () const
 
RuntimeProfile::ThreadCountersscanner_thread_counters () const
 
RuntimeProfile::Counteractive_scanner_thread_counter ()
 
RuntimeProfile::Counteraverage_scanner_thread_concurrency () const
 
virtual Status Init (const TPlanNode &tnode)
 
void CollectNodes (TPlanNodeType::type node_type, std::vector< ExecNode * > *nodes)
 
void CollectScanNodes (std::vector< ExecNode * > *nodes)
 Collect all scan node types. More...
 
std::string DebugString () const
 Returns a string representation in DFS order of the plan rooted at this. More...
 
virtual void DebugString (int indentation_level, std::stringstream *out) const
 
const std::vector< ExprContext * > & conjunct_ctxs () const
 
int id () const
 
TPlanNodeType::type type () const
 
const RowDescriptorrow_desc () const
 
int64_t rows_returned () const
 
bool ReachedLimit ()
 
RuntimeProfileruntime_profile ()
 
MemTrackermem_tracker ()
 
MemTrackerexpr_mem_tracker ()
 

Static Public Member Functions

static void UpdateHdfsSplitStats (const std::vector< TScanRangeParams > &scan_range_params_list, PerVolumnStats *per_volume_stats)
 Update the per volume stats with the given scan range params list. More...
 
static void PrintHdfsSplitStats (const PerVolumnStats &per_volume_stats, std::stringstream *ss)
 
static Status CreateTree (ObjectPool *pool, const TPlan &plan, const DescriptorTbl &descs, ExecNode **root)
 
static void SetDebugOptions (int node_id, TExecNodePhase::type phase, TDebugAction::type action, ExecNode *tree)
 Set debug action for node with given id in 'tree'. More...
 
static bool EvalConjuncts (ExprContext *const *ctxs, int num_ctxs, TupleRow *row)
 
static llvm::Function * CodegenEvalConjuncts (RuntimeState *state, const std::vector< ExprContext * > &conjunct_ctxs, const char *name="EvalConjuncts")
 
static int GetNodeIdFromProfile (RuntimeProfile *p)
 Extract node id from p->name(). More...
 

Static Public Attributes

static const int SKIP_COLUMN = -1
 
static const std::string HDFS_SPLIT_STATS_DESC
 Description string for the per volume stats output. More...
 
static const std::string BYTES_READ_COUNTER = "BytesRead"
 names of ScanNode common counters More...
 
static const std::string ROWS_READ_COUNTER = "RowsRead"
 
static const std::string TOTAL_HDFS_READ_TIMER = "TotalRawHdfsReadTime(*)"
 
static const std::string TOTAL_HBASE_READ_TIMER = "TotalRawHBaseReadTime(*)"
 
static const std::string TOTAL_THROUGHPUT_COUNTER = "TotalReadThroughput"
 
static const std::string PER_READ_THREAD_THROUGHPUT_COUNTER
 
static const std::string NUM_DISKS_ACCESSED_COUNTER = "NumDisksAccessed"
 
static const std::string MATERIALIZE_TUPLE_TIMER = "MaterializeTupleTime(*)"
 
static const std::string SCAN_RANGES_COMPLETE_COUNTER = "ScanRangesComplete"
 
static const std::string SCANNER_THREAD_COUNTERS_PREFIX = "ScannerThreads"
 
static const std::string SCANNER_THREAD_TOTAL_WALLCLOCK_TIME
 
static const std::string AVERAGE_SCANNER_THREAD_CONCURRENCY
 
static const std::string AVERAGE_HDFS_READ_THREAD_CONCURRENCY
 
static const std::string NUM_SCANNER_THREADS_STARTED
 
static const std::string ROW_THROUGHPUT_COUNTER = "RowsReturnedRate"
 Names of counters shared by all exec nodes. More...
 

Protected Member Functions

ExecNodechild (int i)
 
bool is_closed ()
 
void InitRuntimeProfile (const std::string &name)
 
Status ExecDebugAction (TExecNodePhase::type phase, RuntimeState *state)
 
void AddRuntimeExecOption (const std::string &option)
 Appends option to 'runtime_exec_options_'. More...
 
virtual Status QueryMaintenance (RuntimeState *state)
 
void AddExprCtxToFree (ExprContext *ctx)
 
void AddExprCtxsToFree (const std::vector< ExprContext * > &ctxs)
 
void AddExprCtxsToFree (const SortExecExprs &sort_exec_exprs)
 

Static Protected Member Functions

static Status CreateNode (ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs, ExecNode **node)
 Create a single exec node derived from thrift node; place exec node in 'pool'. More...
 
static Status CreateTreeHelper (ObjectPool *pool, const std::vector< TPlanNode > &tnodes, const DescriptorTbl &descs, ExecNode *parent, int *node_idx, ExecNode **root)
 

Protected Attributes

const std::vector
< TScanRangeParams > * 
scan_range_params_
 The scan ranges this scan node is responsible for. Not owned. More...
 
RuntimeProfile::Counterbytes_read_counter_
 
RuntimeProfile::TimeSeriesCounterbytes_read_timeseries_counter_
 Time series of the bytes_read_counter_. More...
 
RuntimeProfile::Counterrows_read_counter_
 

rows/tuples read from the scanner (including those discarded by EvalConjucts())

More...
 
RuntimeProfile::Counterread_timer_
 
RuntimeProfile::Countertotal_throughput_counter_
 Wall based aggregate read throughput [bytes/sec]. More...
 
RuntimeProfile::Counterper_read_thread_throughput_counter_
 Per thread read throughput [bytes/sec]. More...
 
RuntimeProfile::Counternum_disks_accessed_counter_
 
RuntimeProfile::Countermaterialize_tuple_timer_
 
RuntimeProfile::Counterscan_ranges_complete_counter_
 
RuntimeProfile::ThreadCountersscanner_thread_counters_
 Aggregated scanner thread counters. More...
 
RuntimeProfile::Counter active_scanner_thread_counter_
 The number of active scanner threads that are not blocked by IO. More...
 
RuntimeProfile::Counteraverage_scanner_thread_concurrency_
 
RuntimeProfile::Counter active_hdfs_read_thread_counter_
 The number of active hdfs reading threads reading for this node. More...
 
RuntimeProfile::Counteraverage_hdfs_read_thread_concurrency_
 
RuntimeProfile::Counternum_scanner_threads_started_counter_
 
std::vector
< RuntimeProfile::Counter * > 
hdfs_read_thread_concurrency_bucket_
 
int id_
 
TPlanNodeType::type type_
 
ObjectPoolpool_
 
std::vector< ExecNode * > children_
 
RowDescriptor row_descriptor_
 
TExecNodePhase::type debug_phase_
 
TDebugAction::type debug_action_
 
int64_t limit_
 
int64_t num_rows_returned_
 
boost::scoped_ptr< RuntimeProfileruntime_profile_
 
RuntimeProfile::Counterrows_returned_counter_
 
RuntimeProfile::Counterrows_returned_rate_
 
boost::scoped_ptr< MemTrackermem_tracker_
 Account for peak memory used by this node. More...
 
boost::scoped_ptr< MemTrackerexpr_mem_tracker_
 MemTracker that should be used for ExprContexts. More...
 
boost::mutex exec_options_lock_
 
std::string runtime_exec_options_
 

Private Types

typedef std::map< std::string,
HdfsFileDesc * > 
FileDescMap
 File path => file descriptor (which includes the file's splits) More...
 
typedef std::map
< THdfsFileFormat::type,
std::vector< HdfsFileDesc * > > 
FileFormatsMap
 File format => file descriptors. More...
 
typedef std::map
< THdfsFileFormat::type,
HdfsScanner * > 
ScannerMap
 
typedef std::map
< THdfsFileFormat::type, void * > 
CodegendFnMap
 Per scanner type codegen'd fn. More...
 
typedef boost::unordered_map
< std::vector< int >, int > 
PathToSlotIdxMap
 Maps from a slot's path to its index into materialized_slots_. More...
 
typedef std::map< std::pair
< THdfsFileFormat::type,
THdfsCompression::type >, int > 
FileTypeCountsMap
 

Private Member Functions

void ThreadTokenAvailableCb (ThreadResourceMgr::ResourcePool *pool)
 
HdfsScannerCreateAndPrepareScanner (HdfsPartitionDescriptor *partition_desc, ScannerContext *context, Status *status)
 
void ScannerThread ()
 
bool EnoughMemoryForScannerThread (bool new_thread)
 
Status GetNextInternal (RuntimeState *state, RowBatch *row_batch, bool *eos)
 Checks for eos conditions and returns batches from materialized_row_batches_. More...
 
void SetDone ()
 
void StopAndFinalizeCounters ()
 

Private Attributes

boost::scoped_ptr< TPlanNode > thrift_plan_node_
 
RuntimeStateruntime_state_
 
const int tuple_id_
 Tuple id resolved in Prepare() to set tuple_desc_;. More...
 
DiskIoMgr::RequestContextreader_context_
 RequestContext object to use with the disk-io-mgr for reads. More...
 
const TupleDescriptortuple_desc_
 Descriptor for tuples this scan node constructs. More...
 
const HdfsTableDescriptorhdfs_table_
 
bool unknown_disk_id_warned_
 
boost::unordered_set< int64_t > partition_ids_
 Partitions scanned by this scan node. More...
 
FileDescMap file_descs_
 
FileFormatsMap per_type_files_
 
bool initial_ranges_issued_
 
int64_t scanner_thread_bytes_required_
 
AtomicInt< int > num_unqueued_files_
 Number of files that have not been issued from the scanners. More...
 
ScannerMap scanner_map_
 
CodegendFnMap codegend_fn_map_
 
std::vector< ExprContext * > conjunct_ctxs_
 
PathToSlotIdxMap path_to_materialized_slot_idx_
 
std::vector< char > is_materialized_col_
 
std::vector< SlotDescriptor * > materialized_slots_
 
std::vector< SlotDescriptor * > partition_key_slots_
 
ProgressUpdater progress_
 Keeps track of total splits and the number finished. More...
 
boost::mutex metadata_lock_
 
std::map< std::string, void * > per_file_metadata_
 
ThreadGroup scanner_threads_
 Thread group for all scanner worker threads. More...
 
boost::scoped_ptr< RowBatchQueuematerialized_row_batches_
 
int max_materialized_row_batches_
 Maximum size of materialized_row_batches_. More...
 
AtomicInt< int > num_owned_io_buffers_
 
AtomicInt< int > num_skipped_tokens_
 
AtomicInt< int > num_scanners_codegen_enabled_
 
AtomicInt< int > num_scanners_codegen_disabled_
 
RuntimeProfile::HighWaterMarkCountermax_compressed_text_file_length_
 
RuntimeProfile::Counter disks_accessed_bitmap_
 Disk accessed bitmap. More...
 
RuntimeProfile::Counterbytes_read_local_
 Total number of bytes read locally. More...
 
RuntimeProfile::Counterbytes_read_short_circuit_
 Total number of bytes read via short circuit read. More...
 
RuntimeProfile::Counterbytes_read_dn_cache_
 Total number of bytes read from data node cache. More...
 
RuntimeProfile::Counternum_remote_ranges_
 Total number of remote scan ranges. More...
 
RuntimeProfile::Counterunexpected_remote_bytes_
 Total number of bytes read remotely that were expected to be local. More...
 
boost::mutex lock_
 
bool done_
 
bool all_ranges_started_
 
boost::scoped_ptr< MemPoolscan_node_pool_
 
Status status_
 
SpinLock file_type_counts_lock_
 
FileTypeCountsMap file_type_counts_
 
bool counters_running_
 
int32_t rm_callback_id_
 

Friends

class ScannerContext
 

Detailed Description

A ScanNode implementation that is used for all tables read directly from HDFS-serialised data. A HdfsScanNode spawns multiple scanner threads to process the bytes in parallel. There is a handshake between the scan node and the scanners to get all the splits queued and bytes processed.

  1. The scan node initially calls the Scanner with a list of files and splits for that scanner/file format.
  2. The scanner issues the initial byte ranges for each of those files. For text this is simply the entire range but for rc files, this would just be the header byte range. The scan node doesn't care either way.
  3. The scan node spins up a number of scanner threads. Each of those threads pulls the next scan range to work on from the IoMgr and then processes the range end to end.
  4. The scanner processes the buffers, issuing more scan ranges if necessary.
  5. The scanner finishes the scan range and informs the scan node so it can track end of stream. TODO: this class allocates a bunch of small utility objects that should be recycled.

Definition at line 104 of file hdfs-scan-node.h.

Member Typedef Documentation

typedef std::map<THdfsFileFormat::type, void*> impala::HdfsScanNode::CodegendFnMap
private

Per scanner type codegen'd fn.

Definition at line 323 of file hdfs-scan-node.h.

typedef std::map<std::string, HdfsFileDesc*> impala::HdfsScanNode::FileDescMap
private

File path => file descriptor (which includes the file's splits)

Definition at line 297 of file hdfs-scan-node.h.

typedef std::map<THdfsFileFormat::type, std::vector<HdfsFileDesc*> > impala::HdfsScanNode::FileFormatsMap
private

File format => file descriptors.

Definition at line 301 of file hdfs-scan-node.h.

typedef std::map< std::pair<THdfsFileFormat::type, THdfsCompression::type>, int> impala::HdfsScanNode::FileTypeCountsMap
private

Definition at line 433 of file hdfs-scan-node.h.

typedef boost::unordered_map<std::vector<int>, int> impala::HdfsScanNode::PathToSlotIdxMap
private

Maps from a slot's path to its index into materialized_slots_.

Definition at line 331 of file hdfs-scan-node.h.

typedef boost::unordered_map<int32_t, std::pair<int, int64_t> > impala::HdfsScanNode::PerVolumnStats

map from volume id to <number of split, per volume split lengths>

Definition at line 252 of file hdfs-scan-node.h.

typedef std::map<THdfsFileFormat::type, HdfsScanner*> impala::HdfsScanNode::ScannerMap
private

Map of HdfsScanner objects to file types. Only one scanner object will be created for each file type. Objects stored in runtime_state's pool.

Definition at line 319 of file hdfs-scan-node.h.

Constructor & Destructor Documentation

HdfsScanNode::HdfsScanNode ( ObjectPool pool,
const TPlanNode &  tnode,
const DescriptorTbl descs 
)
HdfsScanNode::~HdfsScanNode ( )

Definition at line 108 of file hdfs-scan-node.cc.

Member Function Documentation

RuntimeProfile::Counter& impala::ScanNode::active_scanner_thread_counter ( )
inlineinherited

Definition at line 113 of file scan-node.h.

References impala::ScanNode::active_scanner_thread_counter_.

Status impala::HdfsScanNode::AddDiskIoRanges ( const std::vector< DiskIoMgr::ScanRange * > &  ranges)
void impala::ExecNode::AddExprCtxsToFree ( const SortExecExprs sort_exec_exprs)
protectedinherited
void impala::ExecNode::AddExprCtxToFree ( ExprContext ctx)
inlineprotectedinherited

Add an ExprContext to have its local allocations freed by QueryMaintenance(). Exprs that are evaluated in the main execution thread should be added. Exprs evaluated in a separate thread are generally not safe to add, since a local allocation may be freed while it's being used. Rather than using this mechanism, threads should call FreeLocalAllocations() on local ExprContexts periodically.

Definition at line 276 of file exec-node.h.

References impala::ExecNode::expr_ctxs_to_free_.

Referenced by impala::AnalyticEvalNode::Prepare().

void HdfsScanNode::AddMaterializedRowBatch ( RowBatch row_batch)

Adds a materialized row batch for the scan node. This is called from scanner threads. This function will block if materialized_row_batches_ is full.

Definition at line 688 of file hdfs-scan-node.cc.

References materialized_row_batches_.

Referenced by impala::HdfsScanner::AddFinalRowBatch(), and impala::HdfsScanner::CommitRows().

DiskIoMgr::ScanRange * HdfsScanNode::AllocateScanRange ( hdfsFS  fs,
const char *  file,
int64_t  len,
int64_t  offset,
int64_t  partition_id,
int  disk_id,
bool  try_cache,
bool  expected_local,
int64_t  mtime 
)

Allocate a new scan range object, stored in the runtime state's object pool. For scan ranges that correspond to the original hdfs splits, the partition id must be set to the range's partition id. For other ranges (e.g. columns in parquet, read past buffers), the partition_id is unused. expected_local should be true if this scan range is not expected to require a remote read. The range must fall within the file bounds. That is, the offset must be >= 0, and offset + len <= file_length. This is thread safe.

Definition at line 183 of file hdfs-scan-node.cc.

References impala::ObjectPool::Add(), impala::DiskIoMgr::AssignQueue(), GetFileDesc(), impala::RuntimeState::io_mgr(), impala::RuntimeState::obj_pool(), impala::DiskIoMgr::ScanRange::Reset(), and runtime_state_.

Referenced by impala::HdfsParquetScanner::InitColumns(), impala::BaseSequenceScanner::IssueInitialRanges(), impala::HdfsTextScanner::IssueInitialRanges(), impala::HdfsParquetScanner::IssueInitialRanges(), Prepare(), and impala::HdfsParquetScanner::ProcessFooter().

RuntimeProfile::Counter* impala::ScanNode::average_scanner_thread_concurrency ( ) const
inlineinherited
ExecNode* impala::ExecNode::child ( int  i)
inlineprotectedinherited

Definition at line 241 of file exec-node.h.

References impala::ExecNode::children_.

Referenced by impala::CrossJoinNode::BuildListDebugString(), impala::BlockingJoinNode::BuildSideThread(), impala::HashJoinNode::CodegenCreateOutputRow(), impala::PartitionedHashJoinNode::CodegenCreateOutputRow(), impala::CrossJoinNode::ConstructBuildSide(), impala::HashJoinNode::ConstructBuildSide(), impala::PartitionedHashJoinNode::ConstructBuildSide(), impala::BlockingJoinNode::GetLeftChildRowString(), impala::SelectNode::GetNext(), impala::UnionNode::GetNext(), impala::CrossJoinNode::GetNext(), impala::HashJoinNode::GetNext(), impala::AnalyticEvalNode::GetNextOutputBatch(), impala::PartitionedAggregationNode::Partition::InitStreams(), impala::HashJoinNode::LeftJoinGetNext(), impala::PartitionedHashJoinNode::NextProbeRowBatch(), impala::SelectNode::Open(), impala::SortNode::Open(), impala::TopNNode::Open(), impala::BlockingJoinNode::Open(), impala::AggregationNode::Open(), impala::AnalyticEvalNode::Open(), impala::PartitionedAggregationNode::Open(), impala::UnionNode::OpenCurrentChild(), impala::SelectNode::Prepare(), impala::SortNode::Prepare(), impala::UnionNode::Prepare(), impala::TopNNode::Prepare(), impala::BlockingJoinNode::Prepare(), impala::HashJoinNode::Prepare(), impala::AggregationNode::Prepare(), impala::AnalyticEvalNode::Prepare(), impala::PartitionedHashJoinNode::Prepare(), impala::PartitionedAggregationNode::Prepare(), impala::PartitionedHashJoinNode::ProcessBuildInput(), impala::AnalyticEvalNode::ProcessChildBatches(), and impala::SortNode::SortInput().

void HdfsScanNode::Close ( RuntimeState state)
virtual

Close() will get called for every exec node, regardless of what else is called and the status of these calls (i.e. Prepare() may never have been called, or Prepare()/Open()/GetNext() returned with an error). Close() releases all resources that were allocated in Open()/GetNext(), even if the latter ended with an error. Close() can be called if the node has been prepared or the node is closed. The default implementation updates runtime profile counters and calls Close() on the children. Subclasses should check if the node has already been closed (is_closed()), then close themselves, then call the base Close(). Nodes that are using tuples returned by a child may call Close() on their children before their own Close() if the child node has returned eos. It is only safe to call Close() on the child node while the parent node is still returning rows if the parent node fully materializes the child's input.

Reimplemented from impala::ExecNode.

Definition at line 622 of file hdfs-scan-node.cc.

References impala::ScanNode::active_hdfs_read_thread_counter_, impala::ScanNode::active_scanner_thread_counter_, impala::DiskIoMgr::CancelContext(), impala::ExecNode::Close(), impala::Expr::Close(), impala::HdfsPartitionDescriptor::CloseExprs(), conjunct_ctxs_, impala::HdfsTableDescriptor::GetPartition(), hdfs_table_, impala::RuntimeState::io_mgr(), impala::ExecNode::is_closed(), impala::ThreadGroup::JoinAll(), materialized_row_batches_, num_owned_io_buffers_, partition_ids_, impala::RuntimeState::query_resource_mgr(), reader_context_, impala::RuntimeState::reader_contexts(), impala::QueryResourceMgr::RemoveVcoreAvailableCb(), impala::RuntimeState::resource_pool(), rm_callback_id_, scan_node_pool_, scanner_threads_, SetDone(), impala::ThreadResourceMgr::ResourcePool::SetThreadAvailableCb(), StopAndFinalizeCounters(), and impala::RuntimeProfile::Counter::value().

void impala::ExecNode::CollectNodes ( TPlanNodeType::type  node_type,
std::vector< ExecNode * > *  nodes 
)
inherited

Collect all nodes of given 'node_type' that are part of this subtree, and return in 'nodes'.

Definition at line 359 of file exec-node.cc.

References impala::ExecNode::children_, and impala::ExecNode::type_.

Referenced by impala::ExecNode::CollectScanNodes(), and impala::PlanFragmentExecutor::Prepare().

void impala::ExecNode::CollectScanNodes ( std::vector< ExecNode * > *  nodes)
inherited

Collect all scan node types.

Definition at line 366 of file exec-node.cc.

References impala::ExecNode::CollectNodes().

Referenced by impala::PlanFragmentExecutor::Prepare().

void HdfsScanNode::ComputeSlotMaterializationOrder ( std::vector< int > *  order) const

Utility function to compute the order in which to materialize slots to allow for computing conjuncts as slots get materialized (on partial tuples). 'order' will contain for each slot, the first conjunct it is associated with. e.g. order[2] = 1 indicates materialized_slots[2] must be materialized before evaluating conjuncts[1]. Slots that are not referenced by any conjuncts will have order set to conjuncts.size()

Definition at line 956 of file hdfs-scan-node.cc.

References impala::SlotDescriptor::col_path(), impala::ExecNode::conjunct_ctxs(), impala::RuntimeState::desc_tbl(), GetMaterializedSlotIdx(), impala::DescriptorTbl::GetSlotDescriptor(), materialized_slots(), and runtime_state_.

Referenced by impala::HdfsScanner::CodegenWriteCompleteTuple().

HdfsScanner * HdfsScanNode::CreateAndPrepareScanner ( HdfsPartitionDescriptor partition_desc,
ScannerContext context,
Status status 
)
private
Status impala::ExecNode::CreateNode ( ObjectPool pool,
const TPlanNode &  tnode,
const DescriptorTbl descs,
ExecNode **  node 
)
staticprotectedinherited

Create a single exec node derived from thrift node; place exec node in 'pool'.

Definition at line 260 of file exec-node.cc.

References impala::ObjectPool::Add(), impala::Status::OK, and RETURN_IF_ERROR.

Referenced by impala::ExecNode::CreateTreeHelper().

Status impala::ExecNode::CreateTree ( ObjectPool pool,
const TPlan &  plan,
const DescriptorTbl descs,
ExecNode **  root 
)
staticinherited

Creates exec node tree from list of nodes contained in plan via depth-first traversal. All nodes are placed in pool. Returns error if 'plan' is corrupted, otherwise success.

Definition at line 199 of file exec-node.cc.

References impala::ExecNode::CreateTreeHelper(), impala::Status::OK, and impala::Status::ok().

Referenced by impala::PlanFragmentExecutor::Prepare().

Status impala::ExecNode::CreateTreeHelper ( ObjectPool pool,
const std::vector< TPlanNode > &  tnodes,
const DescriptorTbl descs,
ExecNode parent,
int *  node_idx,
ExecNode **  root 
)
staticprotectedinherited
virtual void impala::ExecNode::DebugString ( int  indentation_level,
std::stringstream *  out 
) const
virtualinherited

Recursive helper method for generating a string for DebugString(). Implementations should call DebugString(int, std::stringstream) on their children. Input parameters: indentation_level: Current level in plan tree. Output parameters: out: Stream to accumulate debug string.

Reimplemented in impala::BlockingJoinNode, impala::PartitionedAggregationNode, impala::AnalyticEvalNode, impala::AggregationNode, impala::DataSourceScanNode, impala::ExchangeNode, impala::HBaseScanNode, impala::TopNNode, and impala::SortNode.

bool HdfsScanNode::EnoughMemoryForScannerThread ( bool  new_thread)
private

Returns true if there is enough memory (against the mem tracker limits) to have a scanner thread. If new_thread is true, the calculation is for starting a new scanner thread. If false, it determines whether there's adequate memory for the existing set of scanner threads. lock_ must be taken before calling this.

Definition at line 706 of file hdfs-scan-node.cc.

References impala::ScanNode::active_scanner_thread_counter_, impala::MemTracker::consumption(), impala::ExecNode::mem_tracker(), scanner_thread_bytes_required_, impala::MemTracker::SpareCapacity(), and impala::RuntimeProfile::Counter::value().

Referenced by ScannerThread(), and ThreadTokenAvailableCb().

void * HdfsScanNode::GetCodegenFn ( THdfsFileFormat::type  type)

Returns the per format codegen'd function. Scanners call this to get the codegen'd function to use. Returns NULL if codegen should not be used.

Definition at line 224 of file hdfs-scan-node.cc.

References codegend_fn_map_.

Referenced by impala::HdfsScanner::InitializeWriteTuplesFn(), and impala::HdfsAvroScanner::InitNewRange().

Status HdfsScanNode::GetConjunctCtxs ( std::vector< ExprContext * > *  ctxs)

Creates a clone of conjunct_ctxs_. 'ctxs' should be non-NULL and empty. The returned contexts must be closed by the caller.

Definition at line 692 of file hdfs-scan-node.cc.

References impala::Expr::Clone(), conjunct_ctxs_, and runtime_state_.

Referenced by impala::HdfsScanner::Prepare().

void * HdfsScanNode::GetFileMetadata ( const std::string &  filename)

Gets scanner specific metadata for 'filename'. Scanners can use this to store file header information. Returns NULL if there is no metadata. This is thread safe.

Definition at line 217 of file hdfs-scan-node.cc.

References metadata_lock_, and per_file_metadata_.

Referenced by impala::BaseSequenceScanner::ProcessSplit().

int impala::HdfsScanNode::GetMaterializedSlotIdx ( const std::vector< int > &  path) const
inline

Returns index into materialized_slots with 'path'. Returns SKIP_COLUMN if that path is not materialized.

Definition at line 152 of file hdfs-scan-node.h.

References path_to_materialized_slot_idx_, and SKIP_COLUMN.

Referenced by impala::HdfsAvroScanner::CodegenMaterializeTuple(), ComputeSlotMaterializationOrder(), impala::HdfsRCFileScanner::InitNewRange(), Prepare(), and impala::HdfsAvroScanner::ResolveSchemas().

Status HdfsScanNode::GetNext ( RuntimeState state,
RowBatch row_batch,
bool eos 
)
virtual

Retrieves rows and returns them via row_batch. Sets eos to true if subsequent calls will not retrieve any more rows. Data referenced by any tuples returned in row_batch must not be overwritten by the callee until Close() is called. The memory holding that data can be returned via row_batch's tuple_data_pool (in which case it may be deleted by the caller) or held on to by the callee. The row_batch, including its tuple_data_pool, will be destroyed by the caller at some point prior to the final Close() call. In other words, if the memory holding the tuple data will be referenced by the callee in subsequent GetNext() calls, it must not be attached to the row_batch's tuple_data_pool. Caller must not be holding any io buffers. This will cause deadlock. TODO: AggregationNode and HashJoinNode cannot be "re-opened" yet.

Implements impala::ExecNode.

Definition at line 111 of file hdfs-scan-node.cc.

References impala::ProgressUpdater::done(), GetNextInternal(), initial_ranges_issued_, impala::Status::IsMemLimitExceeded(), impala::BaseSequenceScanner::IssueInitialRanges(), impala::HdfsTextScanner::IssueInitialRanges(), impala::HdfsParquetScanner::IssueInitialRanges(), impala::Status::ok(), per_type_files_, progress_, RETURN_IF_ERROR, impala::ExecNode::runtime_profile_, SCOPED_TIMER, SetDone(), impala::RuntimeState::SetMemLimitExceeded(), and StopAndFinalizeCounters().

int impala::ExecNode::GetNodeIdFromProfile ( RuntimeProfile p)
staticinherited

Extract node id from p->name().

Definition at line 62 of file exec-node.cc.

References impala::RuntimeProfile::metadata().

void impala::HdfsScanNode::IncNumScannersCodegenDisabled ( )
inline
void impala::HdfsScanNode::IncNumScannersCodegenEnabled ( )
inline
Tuple * HdfsScanNode::InitEmptyTemplateTuple ( )

Allocates and return an empty template tuple (i.e. with no values filled in). Scanners can use this method to initialize a template tuple even if there are no materialized partition keys (e.g. to hold Avro default values).

Definition at line 289 of file hdfs-scan-node.cc.

References impala::TupleDescriptor::byte_size(), impala::Tuple::Create(), lock_, scan_node_pool_, and tuple_desc_.

Referenced by impala::HdfsParquetScanner::CreateColumnReaders(), InitTemplateTuple(), and impala::HdfsAvroScanner::ResolveSchemas().

void impala::ExecNode::InitRuntimeProfile ( const std::string &  name)
protectedinherited
Tuple * HdfsScanNode::InitTemplateTuple ( RuntimeState state,
const std::vector< ExprContext * > &  value_ctxs 
)

Allocates and initialises template_tuple_ with any values from the partition columns for the current scan range Returns NULL if there are no materialized partition keys. TODO: cache the tuple template in the partition object.

Definition at line 270 of file hdfs-scan-node.cc.

References impala::SlotDescriptor::col_pos(), InitEmptyTemplateTuple(), lock_, partition_key_slots_, and impala::RawValue::Write().

Referenced by impala::HdfsScanner::Prepare().

const bool* impala::HdfsScanNode::is_materialized_col ( )
inline

The result array is of length num_cols(). The i-th element is true iff column i should be materialized.

Definition at line 160 of file hdfs-scan-node.h.

References is_materialized_col_.

Referenced by impala::HdfsTextScanner::InitNewRange(), and impala::HdfsSequenceScanner::InitNewRange().

virtual bool impala::ScanNode::IsScanNode ( ) const
inlinevirtualinherited

Reimplemented from impala::ExecNode.

Definition at line 93 of file scan-node.h.

int impala::HdfsScanNode::limit ( ) const
inline
void HdfsScanNode::MarkFileDescIssued ( const HdfsFileDesc file_desc)

Indicates that this file_desc's scan ranges have all been issued to the IoMgr. For each file, the scanner must call MarkFileDescIssued() or AddDiskIoRanges(). Issuing ranges happens asynchronously. For many of the file formats we synchronously issue the file header/footer in Open() but the rest of the splits for the file are issued asynchronously.

Definition at line 683 of file hdfs-scan-node.cc.

References num_unqueued_files_.

Referenced by AddDiskIoRanges(), and impala::HdfsParquetScanner::ProcessFooter().

RuntimeProfile::HighWaterMarkCounter* impala::HdfsScanNode::max_compressed_text_file_length ( )
inline
int impala::HdfsScanNode::num_materialized_partition_keys ( ) const
inline

Returns number of materialized partition key slots.

Definition at line 130 of file hdfs-scan-node.h.

References partition_key_slots_.

Referenced by impala::HdfsScanner::CodegenWriteCompleteTuple(), and impala::HdfsTextScanner::FinishScanRange().

Status HdfsScanNode::Open ( RuntimeState state)
virtual

Performs any preparatory work prior to calling GetNext(). Caller must not be holding any io buffers. This will cause deadlock. If overridden in subclass, must first call superclass's Open(). If a parent exec node adds slot filters (see RuntimeState::AddBitmapFilter()), they need to be added before calling Open() on the child that will consume them.

Reimplemented from impala::ExecNode.

Definition at line 511 of file hdfs-scan-node.cc.

References impala::ScanNode::active_hdfs_read_thread_counter_, impala::ScanNode::active_scanner_thread_counter_, impala::ObjectPool::Add(), ADD_COUNTER, ADD_TIMER, impala::RuntimeProfile::AddDerivedCounter(), impala::RuntimeProfile::AddHighWaterMarkCounter(), impala::RuntimeProfile::AddSamplingCounter(), impala::QueryResourceMgr::AddVcoreAvailableCb(), impala::ScanNode::AVERAGE_HDFS_READ_THREAD_CONCURRENCY, impala::ScanNode::average_hdfs_read_thread_concurrency_, impala::ScanNode::AVERAGE_SCANNER_THREAD_CONCURRENCY, impala::ScanNode::average_scanner_thread_concurrency_, impala::ScanNode::bytes_read_counter(), impala::ScanNode::bytes_read_counter_, bytes_read_dn_cache_, bytes_read_local_, bytes_read_short_circuit_, conjunct_ctxs_, counters_running_, disks_accessed_bitmap_, file_descs_, impala::HdfsTableDescriptor::GetPartition(), impala::ScanNode::hdfs_read_thread_concurrency_bucket_, hdfs_table_, impala::ExecNode::id(), impala::RuntimeState::io_mgr(), max_compressed_text_file_length_, impala::ExecNode::mem_tracker(), impala::DiskInfo::num_disks(), impala::ScanNode::NUM_DISKS_ACCESSED_COUNTER, impala::ScanNode::num_disks_accessed_counter_, num_remote_ranges_, impala::ScanNode::NUM_SCANNER_THREADS_STARTED, impala::ScanNode::num_scanner_threads_started_counter_, impala::DiskIoMgr::num_total_disks(), impala::Status::OK, impala::ExecNode::Open(), impala::Expr::Open(), impala::HdfsPartitionDescriptor::OpenExprs(), partition_ids_, impala::ScanNode::PER_READ_THREAD_THROUGHPUT_COUNTER, impala::ScanNode::per_read_thread_throughput_counter_, impala::ExecNode::pool_, progress_, impala::RuntimeState::query_options(), impala::RuntimeState::query_resource_mgr(), impala::ScanNode::read_timer(), impala::ScanNode::read_timer_, reader_context_, impala::RuntimeProfile::RegisterBucketingCounters(), impala::DiskIoMgr::RegisterContext(), impala::ThreadResourceMgr::ResourcePool::ReserveOptionalTokens(), impala::RuntimeState::resource_pool(), RETURN_IF_ERROR, rm_callback_id_, impala::ExecNode::runtime_profile(), runtime_state_, impala::ScanNode::SCAN_RANGES_COMPLETE_COUNTER, impala::ScanNode::scan_ranges_complete_counter_, impala::DiskIoMgr::set_active_read_thread_counter(), impala::DiskIoMgr::set_bytes_read_counter(), impala::DiskIoMgr::set_disks_access_bitmap(), impala::ThreadResourceMgr::ResourcePool::set_max_quota(), impala::DiskIoMgr::set_read_timer(), SetDone(), impala::ThreadResourceMgr::ResourcePool::SetThreadAvailableCb(), ThreadTokenAvailableCb(), impala::ScanNode::TOTAL_HDFS_READ_TIMER, unexpected_remote_bytes_, and impala::RuntimeProfile::UnitsPerSecond().

RuntimeProfile::Counter* impala::ScanNode::per_read_thread_throughput_counter ( ) const
inlineinherited
Status HdfsScanNode::Prepare ( RuntimeState state)
virtual

ExecNode methods.

Reimplemented from impala::ScanNode.

Definition at line 304 of file hdfs-scan-node.cc.

References impala::ExecNode::AddExprCtxsToFree(), impala::LlvmCodeGen::AddFunctionToJit(), impala::RuntimeProfile::AddInfoString(), impala::ExecNode::AddRuntimeExecOption(), AllocateScanRange(), impala::RuntimeState::cgroup(), impala::ExecEnv::cgroups_mgr(), impala::HdfsTextScanner::Codegen(), impala::HdfsAvroScanner::Codegen(), impala::HdfsSequenceScanner::Codegen(), codegend_fn_map_, impala::SlotDescriptor::ColPathLessThan(), COMPRESSED_TEXT_COMPRESSION_RATIO, conjunct_ctxs_, impala::Expr::CreateExprTrees(), impala::RuntimeState::desc_tbl(), impala::RuntimeState::exec_env(), impala::ExecNode::expr_mem_tracker(), impala::HdfsFileDesc::file_compression, file_descs_, impala::HdfsPartitionDescriptor::file_format(), impala::HdfsFileDesc::file_length, impala::HdfsFileDesc::filename, impala::HdfsFileDesc::fs, GetMaterializedSlotIdx(), impala::DescriptorTbl::GetTupleDescriptor(), HDFS_SPLIT_STATS_DESC, hdfs_table_, impala::HdfsFsCache::instance(), is_materialized_col_, impala::HdfsPartitionDescriptor::location(), impala::RuntimeState::LogError(), materialized_slots_, impala::ExecNode::mem_tracker(), impala::HdfsFileDesc::mtime, impala::ImpaladMetrics::NUM_RANGES_MISSING_VOLUME_ID, impala::ImpaladMetrics::NUM_RANGES_PROCESSED, num_unqueued_files_, impala::Status::OK, partition_ids_, partition_key_slots_, path(), path_to_materialized_slot_idx_, per_type_files_, impala::ScanNode::Prepare(), impala::Expr::Prepare(), impala::HdfsPartitionDescriptor::PrepareExprs(), PrintHdfsSplitStats(), RETURN_IF_ERROR, impala::ExecNode::row_desc(), impala::ExecNode::runtime_profile(), impala::ExecNode::runtime_profile_, runtime_state(), runtime_state_, scan_node_pool_, impala::ScanNode::scan_range_params_, scanner_thread_bytes_required_, SCANNER_THREAD_MEM_USAGE, scanner_threads_, SCOPED_TIMER, impala::ThreadGroup::SetCgroup(), impala::ThreadGroup::SetCgroupsMgr(), SKIP_COLUMN, impala::TupleDescriptor::slots(), impala::HdfsFileDesc::splits, impala::TupleDescriptor::table_desc(), thrift_plan_node_, tuple_desc_, tuple_id_, unknown_disk_id_warned_, and UpdateHdfsSplitStats().

void HdfsScanNode::PrintHdfsSplitStats ( const PerVolumnStats per_volume_stats,
std::stringstream *  ss 
)
static

Output the per_volume_stats to stringstream. The output format is a list of: <volume id>="">:<# splits>/<per volume="" split="" lengths>="">

Definition at line 1073 of file hdfs-scan-node.cc.

References impala::PrettyPrinter::Print().

Referenced by Prepare(), and impala::PlanFragmentExecutor::PrintVolumeIds().

Status impala::ExecNode::QueryMaintenance ( RuntimeState state)
protectedvirtualinherited
void HdfsScanNode::RangeComplete ( const THdfsFileFormat::type &  file_type,
const THdfsCompression::type &  compression_type 
)

Called by the scanner when a range is complete. Used to trigger done_ and to log progress. This must only be called after the scanner has completely finished the scan range (i.e. context->Flush()).

Definition at line 924 of file hdfs-scan-node.cc.

Referenced by impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), impala::HdfsParquetScanner::Close(), impala::BaseSequenceScanner::CloseFileRanges(), impala::HdfsTextScanner::IssueInitialRanges(), and impala::HdfsParquetScanner::IssueInitialRanges().

void impala::HdfsScanNode::RangeComplete ( const THdfsFileFormat::type &  file_type,
const std::vector< THdfsCompression::type > &  compression_type 
)

Same as above except for when multiple compression codecs were used in the file. The metrics are incremented for each compression_type.

RuntimeProfile::Counter* impala::ScanNode::read_timer ( ) const
inlineinherited

Definition at line 97 of file scan-node.h.

References impala::ScanNode::read_timer_.

Referenced by impala::HBaseTableScanner::Next(), and Open().

DiskIoMgr::RequestContext* impala::HdfsScanNode::reader_context ( )
inline
Status HdfsScanNode::Reset ( RuntimeState state)
virtual

Resets all data-specific state, returning this node to the state it was in after calling Prepare() and before calling Open(). Prepare() must have already been called before calling Reset(). Open() and GetNext() may have optionally been called. Close() must not have been called. If overridden in a subclass, must call superclass's Reset() at the end. The default implementation calls Reset() on children. Note that this function may be called many times, so should be fast. For example, accumulated memory does not need to be freed on every call if it's expensive.

Reimplemented from impala::ExecNode.

Definition at line 617 of file hdfs-scan-node.cc.

RuntimeProfile::Counter* impala::ScanNode::scan_ranges_complete_counter ( ) const
inlineinherited

Definition at line 107 of file scan-node.h.

References impala::ScanNode::scan_ranges_complete_counter_.

RuntimeProfile::ThreadCounters* impala::ScanNode::scanner_thread_counters ( ) const
inlineinherited
void HdfsScanNode::ScannerThread ( )
private

Main function for scanner thread. This thread pulls the next range to be processed from the IoMgr and then processes the entire range end to end. This thread terminates when all scan ranges are complete or an error occurred.

Definition at line 797 of file hdfs-scan-node.cc.

References impala::ScanNode::active_scanner_thread_counter_, impala::ObjectPool::Add(), all_ranges_started_, impala::HdfsScanner::Close(), COUNTER_ADD, CreateAndPrepareScanner(), impala::ProgressUpdater::done(), done_, EnoughMemoryForScannerThread(), impala::RuntimeState::error_log(), impala::RuntimeState::ErrorLog(), impala::DiskIoMgr::RequestRange::file(), impala::HdfsPartitionDescriptor::file_format(), impala::DiskIoMgr::GetNextRange(), impala::HdfsTableDescriptor::GetPartition(), impala::ScannerContext::GetStream(), hdfs_table_, impala::ExecNode::id(), impala::RuntimeState::io_mgr(), impala::Status::IsCancelled(), impala::Status::IsMemLimitExceeded(), impala::DiskIoMgr::RequestRange::len(), lock_, impala::AtomicUtil::MemoryBarrier(), impala::DiskIoMgr::ScanRange::meta_data(), impala::QueryResourceMgr::NotifyThreadUsageChange(), num_unqueued_files_, impala::RuntimeState::obj_pool(), impala::DiskIoMgr::RequestRange::offset(), impala::Status::ok(), impala::ThreadResourceMgr::ResourcePool::optional_exceeded(), impala::ScanRangeMetadata::partition_id, impala::HdfsScanner::ProcessSplit(), progress_, impala::RuntimeState::query_resource_mgr(), reader_context_, impala::ThreadResourceMgr::ResourcePool::ReleaseThreadToken(), impala::RuntimeState::resource_pool(), runtime_state_, impala::ScanNode::scanner_thread_counters(), ScannerContext, SCOPED_THREAD_COUNTER_MEASUREMENT, SCOPED_TIMER, SetDone(), impala::RuntimeState::SetMemLimitExceeded(), status_, impala::ScannerContext::Stream::total_bytes_returned(), impala::RuntimeState::total_cpu_timer(), impala::RuntimeProfile::Counter::value(), VLOG_QUERY, and VLOG_QUERY_IS_ON.

Referenced by ThreadTokenAvailableCb().

void impala::ExecNode::SetDebugOptions ( int  node_id,
TExecNodePhase::type  phase,
TDebugAction::type  action,
ExecNode tree 
)
staticinherited

Set debug action for node with given id in 'tree'.

Definition at line 332 of file exec-node.cc.

References impala::ExecNode::children_, impala::ExecNode::debug_action_, impala::ExecNode::debug_phase_, and impala::ExecNode::id_.

Referenced by impala::PlanFragmentExecutor::Prepare().

void HdfsScanNode::SetDone ( )
private

sets done_ to true and triggers threads to cleanup. Cannot be calld with any locks taken. Calling it repeatedly ignores subsequent calls.

Definition at line 944 of file hdfs-scan-node.cc.

References impala::DiskIoMgr::CancelContext(), done_, impala::RuntimeState::io_mgr(), lock_, materialized_row_batches_, reader_context_, and runtime_state_.

Referenced by Close(), GetNext(), GetNextInternal(), Open(), and ScannerThread().

void HdfsScanNode::SetFileMetadata ( const std::string &  filename,
void *  metadata 
)

Sets the scanner specific metadata for 'filename'. This is thread safe.

Definition at line 211 of file hdfs-scan-node.cc.

References metadata_lock_, and per_file_metadata_.

Referenced by impala::BaseSequenceScanner::ProcessSplit().

void impala::ScanNode::SetScanRanges ( const std::vector< TScanRangeParams > &  scan_range_params)
inlineinherited

This should be called before Prepare(), and the argument must be not destroyed until after Prepare().

Definition at line 89 of file scan-node.h.

References impala::ScanNode::scan_range_params_.

Referenced by impala::PlanFragmentExecutor::Prepare().

void HdfsScanNode::StopAndFinalizeCounters ( )
private

Stops periodic counters and aggregates counter values for the entire scan node. This should be called as soon as the scan node is complete to get the most accurate counter values. This can be called multiple times, subsequent calls will be ignored. This must be called on Close() to unregister counters.

Definition at line 982 of file hdfs-scan-node.cc.

References impala::ExecNode::AddRuntimeExecOption(), impala::ScanNode::average_hdfs_read_thread_concurrency_, impala::ScanNode::average_scanner_thread_concurrency_, impala::ScanNode::bytes_read_counter(), impala::DiskIoMgr::bytes_read_dn_cache(), bytes_read_dn_cache_, impala::DiskIoMgr::bytes_read_local(), bytes_read_local_, impala::DiskIoMgr::bytes_read_short_circuit(), bytes_read_short_circuit_, impala::ScanNode::bytes_read_timeseries_counter_, counters_running_, impala::TableDescriptor::database(), disks_accessed_bitmap_, file_type_counts_, file_type_counts_lock_, impala::ScanNode::hdfs_read_thread_concurrency_bucket_, hdfs_table_, impala::RuntimeState::io_mgr(), impala::ImpaladMetrics::IO_MGR_BYTES_READ, impala::ImpaladMetrics::IO_MGR_CACHED_BYTES_READ, impala::ImpaladMetrics::IO_MGR_LOCAL_BYTES_READ, impala::ImpaladMetrics::IO_MGR_SHORT_CIRCUIT_BYTES_READ, lock_, impala::RuntimeState::LogError(), impala::TableDescriptor::name(), impala::ScanNode::num_disks_accessed_counter_, impala::DiskIoMgr::num_remote_ranges(), num_remote_ranges_, num_scanners_codegen_disabled_, num_scanners_codegen_enabled_, impala::BitUtil::Popcount(), impala::PrettyPrinter::Print(), reader_context_, impala::ExecNode::runtime_profile_, runtime_state_, impala::RuntimeProfile::Counter::Set(), impala::PeriodicCounterUpdater::StopBucketingCounters(), impala::PeriodicCounterUpdater::StopRateCounter(), impala::PeriodicCounterUpdater::StopSamplingCounter(), impala::PeriodicCounterUpdater::StopTimeSeriesCounter(), impala::ScanNode::total_throughput_counter(), impala::DiskIoMgr::unexpected_remote_bytes(), unexpected_remote_bytes_, UNEXPECTED_REMOTE_BYTES_WARN_THRESHOLD, and impala::RuntimeProfile::Counter::value().

Referenced by Close(), and GetNext().

RuntimeProfile::Counter* impala::ScanNode::total_throughput_counter ( ) const
inlineinherited
void HdfsScanNode::TransferToScanNodePool ( MemPool pool)

Acquires all allocations from pool into scan_node_pool_. Thread-safe.

Definition at line 299 of file hdfs-scan-node.cc.

References lock_, and scan_node_pool_.

Referenced by impala::HdfsAvroScanner::ResolveSchemas().

TPlanNodeType::type impala::ExecNode::type ( ) const
inlineinherited
void HdfsScanNode::UpdateHdfsSplitStats ( const std::vector< TScanRangeParams > &  scan_range_params_list,
PerVolumnStats per_volume_stats 
)
static

Update the per volume stats with the given scan range params list.

Definition at line 1058 of file hdfs-scan-node.cc.

References impala::FindOrInsert().

Referenced by Prepare(), and impala::PlanFragmentExecutor::PrintVolumeIds().

Friends And Related Function Documentation

friend class ScannerContext
friend

Definition at line 268 of file hdfs-scan-node.h.

Referenced by ScannerThread().

Member Data Documentation

RuntimeProfile::Counter impala::ScanNode::active_hdfs_read_thread_counter_
protectedinherited

The number of active hdfs reading threads reading for this node.

Definition at line 164 of file scan-node.h.

Referenced by Close(), and Open().

RuntimeProfile::Counter impala::ScanNode::active_scanner_thread_counter_
protectedinherited

The number of active scanner threads that are not blocked by IO.

Definition at line 157 of file scan-node.h.

Referenced by impala::ScanNode::active_scanner_thread_counter(), Close(), EnoughMemoryForScannerThread(), Open(), ScannerThread(), and ThreadTokenAvailableCb().

bool impala::HdfsScanNode::all_ranges_started_
private

Set to true if all ranges have started. Some of the ranges may still be in flight being processed by scanner threads, but no new ScannerThreads should be started.

Definition at line 417 of file hdfs-scan-node.h.

Referenced by ScannerThread(), and ThreadTokenAvailableCb().

const string impala::ScanNode::AVERAGE_HDFS_READ_THREAD_CONCURRENCY
staticinherited
Initial value:
=
"AverageHdfsReadThreadConcurrency"

Definition at line 133 of file scan-node.h.

Referenced by Open().

RuntimeProfile::Counter* impala::ScanNode::average_hdfs_read_thread_concurrency_
protectedinherited

Average number of active hdfs reading threads This should be created in Open and stopped when all the scanner threads are done.

Definition at line 168 of file scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

const string impala::ScanNode::AVERAGE_SCANNER_THREAD_CONCURRENCY
staticinherited
Initial value:
=
"AverageScannerThreadConcurrency"

Definition at line 132 of file scan-node.h.

Referenced by Open().

RuntimeProfile::Counter* impala::ScanNode::average_scanner_thread_concurrency_
protectedinherited

Average number of active scanner threads This should be created in Open and stopped when all the scanner threads are done.

Definition at line 161 of file scan-node.h.

Referenced by impala::ScanNode::average_scanner_thread_concurrency(), Open(), and StopAndFinalizeCounters().

const string impala::ScanNode::BYTES_READ_COUNTER = "BytesRead"
staticinherited

names of ScanNode common counters

Definition at line 121 of file scan-node.h.

Referenced by impala::ScanNode::Prepare().

RuntimeProfile::Counter* impala::ScanNode::bytes_read_counter_
protectedinherited
RuntimeProfile::Counter* impala::HdfsScanNode::bytes_read_dn_cache_
private

Total number of bytes read from data node cache.

Definition at line 396 of file hdfs-scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

RuntimeProfile::Counter* impala::HdfsScanNode::bytes_read_local_
private

Total number of bytes read locally.

Definition at line 390 of file hdfs-scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

RuntimeProfile::Counter* impala::HdfsScanNode::bytes_read_short_circuit_
private

Total number of bytes read via short circuit read.

Definition at line 393 of file hdfs-scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

RuntimeProfile::TimeSeriesCounter* impala::ScanNode::bytes_read_timeseries_counter_
protectedinherited

Time series of the bytes_read_counter_.

Definition at line 142 of file scan-node.h.

Referenced by impala::HBaseScanNode::Close(), impala::ScanNode::Prepare(), and StopAndFinalizeCounters().

CodegendFnMap impala::HdfsScanNode::codegend_fn_map_
private

Definition at line 324 of file hdfs-scan-node.h.

Referenced by GetCodegenFn(), and Prepare().

std::vector<ExprContext*> impala::HdfsScanNode::conjunct_ctxs_
private

Contexts for each conjunct. These are cloned by the scanners so conjuncts can be safely evaluated in parallel.

Definition at line 328 of file hdfs-scan-node.h.

Referenced by Close(), GetConjunctCtxs(), Open(), and Prepare().

bool impala::HdfsScanNode::counters_running_
private

If true, counters are actively running and need to be reported in the runtime profile.

Definition at line 438 of file hdfs-scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

TDebugAction::type impala::ExecNode::debug_action_
protectedinherited
TExecNodePhase::type impala::ExecNode::debug_phase_
protectedinherited

debug-only: if debug_action_ is not INVALID, node will perform action in debug_phase_

Definition at line 219 of file exec-node.h.

Referenced by impala::ExecNode::ExecDebugAction(), and impala::ExecNode::SetDebugOptions().

RuntimeProfile::Counter impala::HdfsScanNode::disks_accessed_bitmap_
private

Disk accessed bitmap.

Definition at line 387 of file hdfs-scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

bool impala::HdfsScanNode::done_
private

Flag signaling that all scanner threads are done. This could be because they are finished, an error/cancellation occurred, or the limit was reached. Setting this to true triggers the scanner threads to clean up. This should not be explicitly set. Instead, call SetDone().

Definition at line 413 of file hdfs-scan-node.h.

Referenced by impala::ScannerContext::cancelled(), ScannerThread(), SetDone(), and ThreadTokenAvailableCb().

boost::mutex impala::ExecNode::exec_options_lock_
protectedinherited

Execution options that are determined at runtime. This is added to the runtime profile at Close(). Examples for options logged here would be "Codegen Enabled"

Definition at line 238 of file exec-node.h.

Referenced by impala::ExecNode::AddRuntimeExecOption().

boost::scoped_ptr<MemTracker> impala::ExecNode::expr_mem_tracker_
protectedinherited

MemTracker that should be used for ExprContexts.

Definition at line 233 of file exec-node.h.

Referenced by impala::ExecNode::expr_mem_tracker(), and impala::ExecNode::Prepare().

FileDescMap impala::HdfsScanNode::file_descs_
private

Definition at line 298 of file hdfs-scan-node.h.

Referenced by GetFileDesc(), Open(), and Prepare().

FileTypeCountsMap impala::HdfsScanNode::file_type_counts_
private

Definition at line 434 of file hdfs-scan-node.h.

Referenced by StopAndFinalizeCounters().

SpinLock impala::HdfsScanNode::file_type_counts_lock_
private

Mapping of file formats (file type, compression type) to the number of splits of that type and the lock protecting it. This lock cannot be taken together with any other lock except lock_.

Definition at line 431 of file hdfs-scan-node.h.

Referenced by StopAndFinalizeCounters().

std::vector<RuntimeProfile::Counter*> impala::ScanNode::hdfs_read_thread_concurrency_bucket_
protectedinherited

HDFS read thread concurrency bucket: bucket[i] refers to the number of sample taken where there are i concurrent hdfs read thread running

Definition at line 174 of file scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

const string HdfsScanNode::HDFS_SPLIT_STATS_DESC
static
Initial value:
=
"Hdfs split stats (<volume id>:<# splits>/<split lengths>)"

Description string for the per volume stats output.

Definition at line 265 of file hdfs-scan-node.h.

Referenced by Prepare(), and impala::PlanFragmentExecutor::PrintVolumeIds().

const HdfsTableDescriptor* impala::HdfsScanNode::hdfs_table_
private

Descriptor for the hdfs table, including partition and format metadata. Set in Prepare, owned by RuntimeState

Definition at line 287 of file hdfs-scan-node.h.

Referenced by Close(), hdfs_table(), num_partition_keys(), Open(), Prepare(), ScannerThread(), and StopAndFinalizeCounters().

bool impala::HdfsScanNode::initial_ranges_issued_
private

Set to true when the initial scan ranges are issued to the IoMgr. This happens on the first call to GetNext(). The token manager, in a different thread, will read this variable.

Definition at line 307 of file hdfs-scan-node.h.

Referenced by GetNext(), and ThreadTokenAvailableCb().

std::vector<char> impala::HdfsScanNode::is_materialized_col_
private

is_materialized_col_[i] = <true i-th column should be materialized, false otherwise> for 0 <= i < total # columns This should be a vector<bool>, but bool vectors are special-cased and not stored internally as arrays, so instead we store as chars and cast to bools as needed

Definition at line 339 of file hdfs-scan-node.h.

Referenced by is_materialized_col(), and Prepare().

boost::mutex impala::HdfsScanNode::lock_
private

Lock protects access between scanner thread and main query thread (the one calling GetNext()) for all fields below. If this lock and any other locks needs to be taken together, this lock must be taken first.

Definition at line 407 of file hdfs-scan-node.h.

Referenced by GetNextInternal(), InitEmptyTemplateTuple(), InitTemplateTuple(), ScannerThread(), SetDone(), StopAndFinalizeCounters(), ThreadTokenAvailableCb(), and TransferToScanNodePool().

const string impala::ScanNode::MATERIALIZE_TUPLE_TIMER = "MaterializeTupleTime(*)"
staticinherited

Definition at line 128 of file scan-node.h.

Referenced by impala::ScanNode::Prepare().

RuntimeProfile::Counter* impala::ScanNode::materialize_tuple_timer_
protectedinherited
boost::scoped_ptr<RowBatchQueue> impala::HdfsScanNode::materialized_row_batches_
private

Outgoing row batches queue. Row batches are produced asynchronously by the scanner threads and consumed by the main thread.

Definition at line 363 of file hdfs-scan-node.h.

Referenced by AddMaterializedRowBatch(), Close(), GetNextInternal(), HdfsScanNode(), SetDone(), and ThreadTokenAvailableCb().

std::vector<SlotDescriptor*> impala::HdfsScanNode::materialized_slots_
private

Vector containing slot descriptors for all materialized non-partition key slots. These descriptors are sorted in order of increasing col_pos TODO: Put this (with associated fields and logic) on ScanNode or ExecNode

Definition at line 344 of file hdfs-scan-node.h.

Referenced by materialized_slots(), and Prepare().

RuntimeProfile::HighWaterMarkCounter* impala::HdfsScanNode::max_compressed_text_file_length_
private

The size of the largest compressed text file to be scanned. This is used to estimate scanner thread memory usage.

Definition at line 384 of file hdfs-scan-node.h.

Referenced by max_compressed_text_file_length(), and Open().

int impala::HdfsScanNode::max_materialized_row_batches_
private

Maximum size of materialized_row_batches_.

Definition at line 366 of file hdfs-scan-node.h.

Referenced by HdfsScanNode(), and ThreadTokenAvailableCb().

boost::scoped_ptr<MemTracker> impala::ExecNode::mem_tracker_
protectedinherited

Account for peak memory used by this node.

Definition at line 230 of file exec-node.h.

Referenced by impala::ExecNode::mem_tracker(), and impala::ExecNode::Prepare().

boost::mutex impala::HdfsScanNode::metadata_lock_
private

Scanner specific per file metadata (e.g. header information) and associated lock. This lock cannot be taken together with any other locks except lock_.

Definition at line 355 of file hdfs-scan-node.h.

Referenced by GetFileMetadata(), and SetFileMetadata().

const string impala::ScanNode::NUM_DISKS_ACCESSED_COUNTER = "NumDisksAccessed"
staticinherited

Definition at line 127 of file scan-node.h.

Referenced by Open().

RuntimeProfile::Counter* impala::ScanNode::num_disks_accessed_counter_
protectedinherited

Definition at line 150 of file scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

AtomicInt<int> impala::HdfsScanNode::num_owned_io_buffers_
private

This is the number of io buffers that are owned by the scan node and the scanners. This is used just to help debug leaked io buffers to determine if the leak is happening in the scanners vs other parts of the execution.

Definition at line 371 of file hdfs-scan-node.h.

Referenced by Close(), and GetNextInternal().

RuntimeProfile::Counter* impala::HdfsScanNode::num_remote_ranges_
private

Total number of remote scan ranges.

Definition at line 399 of file hdfs-scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

const string impala::ScanNode::NUM_SCANNER_THREADS_STARTED
staticinherited
Initial value:
=
"NumScannerThreadsStarted"

Definition at line 134 of file scan-node.h.

Referenced by Open().

RuntimeProfile::Counter* impala::ScanNode::num_scanner_threads_started_counter_
protectedinherited

Definition at line 170 of file scan-node.h.

Referenced by Open(), and ThreadTokenAvailableCb().

AtomicInt<int> impala::HdfsScanNode::num_scanners_codegen_disabled_
private

Definition at line 380 of file hdfs-scan-node.h.

Referenced by IncNumScannersCodegenDisabled(), and StopAndFinalizeCounters().

AtomicInt<int> impala::HdfsScanNode::num_scanners_codegen_enabled_
private

Counters which track the number of scanners that have codegen enabled for the materialize and conjuncts evaluation code paths.

Definition at line 379 of file hdfs-scan-node.h.

Referenced by IncNumScannersCodegenEnabled(), and StopAndFinalizeCounters().

AtomicInt<int> impala::HdfsScanNode::num_skipped_tokens_
private

The number of times a token was offered but no scanner threads started. This is used for diagnostics only.

Definition at line 375 of file hdfs-scan-node.h.

Referenced by ThreadTokenAvailableCb().

AtomicInt<int> impala::HdfsScanNode::num_unqueued_files_
private

Number of files that have not been issued from the scanners.

Definition at line 315 of file hdfs-scan-node.h.

Referenced by MarkFileDescIssued(), Prepare(), and ScannerThread().

boost::unordered_set<int64_t> impala::HdfsScanNode::partition_ids_
private

Partitions scanned by this scan node.

Definition at line 294 of file hdfs-scan-node.h.

Referenced by Close(), Open(), and Prepare().

std::vector<SlotDescriptor*> impala::HdfsScanNode::partition_key_slots_
private

Vector containing slot descriptors for all materialized partition key slots These descriptors are sorted in order of increasing col_pos

Definition at line 348 of file hdfs-scan-node.h.

Referenced by InitTemplateTuple(), num_materialized_partition_keys(), and Prepare().

PathToSlotIdxMap impala::HdfsScanNode::path_to_materialized_slot_idx_
private

Definition at line 332 of file hdfs-scan-node.h.

Referenced by GetMaterializedSlotIdx(), and Prepare().

std::map<std::string, void*> impala::HdfsScanNode::per_file_metadata_
private

Definition at line 356 of file hdfs-scan-node.h.

Referenced by GetFileMetadata(), and SetFileMetadata().

const string impala::ScanNode::PER_READ_THREAD_THROUGHPUT_COUNTER
staticinherited
Initial value:
=
"PerReadThreadRawHdfsThroughput"

Definition at line 126 of file scan-node.h.

Referenced by Open().

RuntimeProfile::Counter* impala::ScanNode::per_read_thread_throughput_counter_
protectedinherited

Per thread read throughput [bytes/sec].

Definition at line 149 of file scan-node.h.

Referenced by Open(), and impala::ScanNode::per_read_thread_throughput_counter().

FileFormatsMap impala::HdfsScanNode::per_type_files_
private

Definition at line 302 of file hdfs-scan-node.h.

Referenced by GetNext(), and Prepare().

ProgressUpdater impala::HdfsScanNode::progress_
private

Keeps track of total splits and the number finished.

Definition at line 351 of file hdfs-scan-node.h.

Referenced by GetNext(), Open(), ScannerThread(), and ThreadTokenAvailableCb().

RuntimeProfile::Counter* impala::ScanNode::read_timer_
protectedinherited

Definition at line 145 of file scan-node.h.

Referenced by Open(), impala::HBaseScanNode::Prepare(), and impala::ScanNode::read_timer().

DiskIoMgr::RequestContext* impala::HdfsScanNode::reader_context_
private

RequestContext object to use with the disk-io-mgr for reads.

Definition at line 280 of file hdfs-scan-node.h.

Referenced by AddDiskIoRanges(), Close(), Open(), reader_context(), ScannerThread(), SetDone(), and StopAndFinalizeCounters().

int32_t impala::HdfsScanNode::rm_callback_id_
private

The id of the callback added to the query resource manager when RM is enabled. Used to remove the callback before this scan node is destroyed.

Definition at line 442 of file hdfs-scan-node.h.

Referenced by Close(), and Open().

RowDescriptor impala::ExecNode::row_descriptor_
protectedinherited
const string impala::ExecNode::ROW_THROUGHPUT_COUNTER = "RowsReturnedRate"
staticinherited

Names of counters shared by all exec nodes.

Definition at line 169 of file exec-node.h.

Referenced by impala::ExecNode::Prepare().

const string impala::ScanNode::ROWS_READ_COUNTER = "RowsRead"
staticinherited

Definition at line 122 of file scan-node.h.

Referenced by impala::ScanNode::Prepare().

RuntimeProfile::Counter* impala::ScanNode::rows_read_counter_
protectedinherited

rows/tuples read from the scanner (including those discarded by EvalConjucts())

Definition at line 144 of file scan-node.h.

Referenced by impala::HBaseScanNode::GetNext(), impala::ScanNode::Prepare(), and impala::ScanNode::rows_read_counter().

RuntimeProfile::Counter* impala::ExecNode::rows_returned_rate_
protectedinherited

Definition at line 227 of file exec-node.h.

Referenced by impala::ExecNode::Prepare().

std::string impala::ExecNode::runtime_exec_options_
protectedinherited

Definition at line 239 of file exec-node.h.

Referenced by impala::ExecNode::AddRuntimeExecOption().

boost::scoped_ptr<MemPool> impala::HdfsScanNode::scan_node_pool_
private

Pool for allocating some amounts of memory that is shared between scanners. e.g. partition key tuple and their string buffers

Definition at line 421 of file hdfs-scan-node.h.

Referenced by Close(), InitEmptyTemplateTuple(), Prepare(), and TransferToScanNodePool().

const std::vector<TScanRangeParams>* impala::ScanNode::scan_range_params_
protectedinherited

The scan ranges this scan node is responsible for. Not owned.

Definition at line 138 of file scan-node.h.

Referenced by impala::HBaseScanNode::Prepare(), Prepare(), and impala::ScanNode::SetScanRanges().

const string impala::ScanNode::SCAN_RANGES_COMPLETE_COUNTER = "ScanRangesComplete"
staticinherited

Definition at line 129 of file scan-node.h.

Referenced by Open().

RuntimeProfile::Counter* impala::ScanNode::scan_ranges_complete_counter_
protectedinherited

Definition at line 152 of file scan-node.h.

Referenced by Open(), and impala::ScanNode::scan_ranges_complete_counter().

ScannerMap impala::HdfsScanNode::scanner_map_
private

Definition at line 320 of file hdfs-scan-node.h.

int64_t impala::HdfsScanNode::scanner_thread_bytes_required_
private

The estimated memory required to start up a new scanner thread. If the memory left (due to limits) is less than this value, we won't start up optional scanner threads.

Definition at line 312 of file hdfs-scan-node.h.

Referenced by EnoughMemoryForScannerThread(), and Prepare().

RuntimeProfile::ThreadCounters* impala::ScanNode::scanner_thread_counters_
protectedinherited

Aggregated scanner thread counters.

Definition at line 154 of file scan-node.h.

Referenced by impala::ScanNode::Prepare(), and impala::ScanNode::scanner_thread_counters().

const string impala::ScanNode::SCANNER_THREAD_COUNTERS_PREFIX = "ScannerThreads"
staticinherited

Definition at line 130 of file scan-node.h.

Referenced by impala::ScanNode::Prepare().

const string impala::ScanNode::SCANNER_THREAD_TOTAL_WALLCLOCK_TIME
staticinherited
Initial value:
=
"ScannerThreadsTotalWallClockTime"

Definition at line 131 of file scan-node.h.

Referenced by impala::HdfsTextScanner::Prepare(), and impala::ScanNode::Prepare().

ThreadGroup impala::HdfsScanNode::scanner_threads_
private

Thread group for all scanner worker threads.

Definition at line 359 of file hdfs-scan-node.h.

Referenced by Close(), Prepare(), and ThreadTokenAvailableCb().

Status impala::HdfsScanNode::status_
private

Status of failed operations. This is set in the ScannerThreads Returned in GetNext() if an error occurred. An non-ok status triggers cleanup scanner threads.

Definition at line 426 of file hdfs-scan-node.h.

Referenced by GetNextInternal(), and ScannerThread().

boost::scoped_ptr<TPlanNode> impala::HdfsScanNode::thrift_plan_node_
private

Cache of the plan node. This is needed to be able to create a copy of the conjuncts per scanner since our Exprs are not thread safe.

Definition at line 272 of file hdfs-scan-node.h.

Referenced by Prepare().

const string impala::ScanNode::TOTAL_HBASE_READ_TIMER = "TotalRawHBaseReadTime(*)"
staticinherited

Definition at line 124 of file scan-node.h.

Referenced by impala::HBaseScanNode::Prepare().

const string impala::ScanNode::TOTAL_HDFS_READ_TIMER = "TotalRawHdfsReadTime(*)"
staticinherited

Definition at line 123 of file scan-node.h.

Referenced by Open().

const string impala::ScanNode::TOTAL_THROUGHPUT_COUNTER = "TotalReadThroughput"
staticinherited

Definition at line 125 of file scan-node.h.

Referenced by impala::ScanNode::Prepare().

RuntimeProfile::Counter* impala::ScanNode::total_throughput_counter_
protectedinherited

Wall based aggregate read throughput [bytes/sec].

Definition at line 147 of file scan-node.h.

Referenced by impala::ScanNode::Prepare(), and impala::ScanNode::total_throughput_counter().

const TupleDescriptor* impala::HdfsScanNode::tuple_desc_
private

Descriptor for tuples this scan node constructs.

Definition at line 283 of file hdfs-scan-node.h.

Referenced by InitEmptyTemplateTuple(), Prepare(), and tuple_desc().

const int impala::HdfsScanNode::tuple_id_
private

Tuple id resolved in Prepare() to set tuple_desc_;.

Definition at line 277 of file hdfs-scan-node.h.

Referenced by Prepare().

TPlanNodeType::type impala::ExecNode::type_
protectedinherited

Definition at line 210 of file exec-node.h.

Referenced by impala::ExecNode::CollectNodes(), and impala::ExecNode::type().

RuntimeProfile::Counter* impala::HdfsScanNode::unexpected_remote_bytes_
private

Total number of bytes read remotely that were expected to be local.

Definition at line 402 of file hdfs-scan-node.h.

Referenced by Open(), and StopAndFinalizeCounters().

bool impala::HdfsScanNode::unknown_disk_id_warned_
private

If true, the warning that some disk ids are unknown was logged. Only log this once per scan node since it can be noisy.

Definition at line 291 of file hdfs-scan-node.h.

Referenced by Prepare().


The documentation for this class was generated from the following files: