doc/html/hdfs-scan-node_8cc_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #include "exec/hdfs-scan-node.h"

 #include "exec/base-sequence-scanner.h"

 #include "exec/hdfs-text-scanner.h"

 #include "exec/hdfs-lzo-text-scanner.h"

 #include "exec/hdfs-sequence-scanner.h"

 #include "exec/hdfs-rcfile-scanner.h"

 #include "exec/hdfs-avro-scanner.h"

 #include "exec/hdfs-parquet-scanner.h"


 #include <sstream>

 #include <boost/algorithm/string.hpp>

 #include <boost/foreach.hpp>

 #include <boost/filesystem.hpp>

 #include <gutil/strings/substitute.h>


 #include <hdfs.h>


 #include "codegen/llvm-codegen.h"

 #include "common/logging.h"

 #include "common/object-pool.h"

 #include "exprs/expr-context.h"

 #include "runtime/descriptors.h"

 #include "runtime/hdfs-fs-cache.h"

 #include "runtime/runtime-state.h"

 #include "runtime/mem-pool.h"

 #include "runtime/raw-value.h"

 #include "runtime/row-batch.h"

 #include "util/bit-util.h"

 #include "util/container-util.h"

 #include "util/debug-util.h"

 #include "util/disk-info.h"

 #include "util/error-util.h"

 #include "util/hdfs-util.h"

 #include "util/impalad-metrics.h"

 #include "util/periodic-counter-updater.h"

 #include "util/runtime-profile.h"


 #include "gen-cpp/PlanNodes_types.h"


 #include "common/names.h"


 DEFINE_int32(max_row_batches, 0, "the maximum size of materialized_row_batches_");

 DECLARE_string(cgroup_hierarchy_path);

 DECLARE_bool(enable_rm);


 namespace filesystem = boost::filesystem;

 using namespace impala;

 using namespace llvm;

 using namespace strings;


 const string HdfsScanNode::HDFS_SPLIT_STATS_DESC =

     "Hdfs split stats (<volume id>:<# splits>/<split lengths>)";


 // Amount of memory that we approximate a scanner thread will use not including IoBuffers.

 // The memory used does not vary considerably between file formats (just a couple of MBs).

 // This value is conservative and taken from running against the tpch lineitem table.

 // TODO: revisit how we do this.

 const int SCANNER_THREAD_MEM_USAGE = 32 * 1024 * 1024;


 // Estimated upper bound on the compression ratio of compressed text files. Used to

 // estimate scanner thread memory usage.

 const int COMPRESSED_TEXT_COMPRESSION_RATIO = 11;


 // Determines how many unexpected remote bytes trigger an error in the runtime state

 const int UNEXPECTED_REMOTE_BYTES_WARN_THRESHOLD = 64 * 1024 * 1024;


 HdfsScanNode::HdfsScanNode(ObjectPool* pool, const TPlanNode& tnode,

                            const DescriptorTbl& descs)

     : ScanNode(pool, tnode, descs),

       thrift_plan_node_(new TPlanNode(tnode)),

       runtime_state_(NULL),

       tuple_id_(tnode.hdfs_scan_node.tuple_id),

       reader_context_(NULL),

       tuple_desc_(NULL),

       unknown_disk_id_warned_(false),

       initial_ranges_issued_(false),

       scanner_thread_bytes_required_(0),

       disks_accessed_bitmap_(TUnit::UNIT, 0),

       done_(false),

       all_ranges_started_(false),

       counters_running_(false),

       rm_callback_id_(-1) {

   max_materialized_row_batches_ = FLAGS_max_row_batches;

   if (max_materialized_row_batches_ <= 0) {

     // TODO: This parameter has an U-shaped effect on performance: increasing the value

     // would first improves performance, but further increasing would degrade performance.

     // Investigate and tune this.

     max_materialized_row_batches_ =

         10 * (DiskInfo::num_disks() + DiskIoMgr::REMOTE_NUM_DISKS);

   }

   materialized_row_batches_.reset(new RowBatchQueue(max_materialized_row_batches_));

 }


 HdfsScanNode::~HdfsScanNode() {

 }


 Status HdfsScanNode::GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos) {

   SCOPED_TIMER(runtime_profile_->total_time_counter());


   if (!initial_ranges_issued_) {

     // We do this in GetNext() to ensure that all execution time predicates have

     // been generated (e.g. probe side bitmap filters).

     // TODO: we could do dynamic partition pruning here as well.

     initial_ranges_issued_ = true;

     // Issue initial ranges for all file types.

     RETURN_IF_ERROR(HdfsTextScanner::IssueInitialRanges(this,

         per_type_files_[THdfsFileFormat::TEXT]));

     RETURN_IF_ERROR(BaseSequenceScanner::IssueInitialRanges(this,

         per_type_files_[THdfsFileFormat::SEQUENCE_FILE]));

     RETURN_IF_ERROR(BaseSequenceScanner::IssueInitialRanges(this,

         per_type_files_[THdfsFileFormat::RC_FILE]));

     RETURN_IF_ERROR(BaseSequenceScanner::IssueInitialRanges(this,

         per_type_files_[THdfsFileFormat::AVRO]));

     RETURN_IF_ERROR(HdfsParquetScanner::IssueInitialRanges(this,

         per_type_files_[THdfsFileFormat::PARQUET]));

     if (progress_.done()) SetDone();

   }


   Status status = GetNextInternal(state, row_batch, eos);

   if (status.IsMemLimitExceeded()) state->SetMemLimitExceeded();

   if (!status.ok() || *eos) StopAndFinalizeCounters();

   return status;

 }


 Status HdfsScanNode::GetNextInternal(

     RuntimeState* state, RowBatch* row_batch, bool* eos) {

   RETURN_IF_ERROR(ExecDebugAction(TExecNodePhase::GETNEXT, state));

   RETURN_IF_CANCELLED(state);

   RETURN_IF_ERROR(QueryMaintenance(state));


   if (ReachedLimit()) {

     // LIMIT 0 case.  Other limit values handled below.

     DCHECK_EQ(limit_, 0);

     *eos = true;

     return Status::OK;

   }

   *eos = false;

   RowBatch* materialized_batch = materialized_row_batches_->GetBatch();

   if (materialized_batch != NULL) {

     num_owned_io_buffers_ -= materialized_batch->num_io_buffers();

     row_batch->AcquireState(materialized_batch);

     // Update the number of materialized rows now instead of when they are materialized.

     // This means that scanners might process and queue up more rows than are necessary

     // for the limit case but we want to avoid the synchronized writes to

     // num_rows_returned_.

     num_rows_returned_ += row_batch->num_rows();

     COUNTER_SET(rows_returned_counter_, num_rows_returned_);


     if (ReachedLimit()) {

       int num_rows_over = num_rows_returned_ - limit_;

       row_batch->set_num_rows(row_batch->num_rows() - num_rows_over);

       num_rows_returned_ -= num_rows_over;

       COUNTER_SET(rows_returned_counter_, num_rows_returned_);


       *eos = true;

       SetDone();

     }

     DCHECK_EQ(materialized_batch->num_io_buffers(), 0);

     delete materialized_batch;

     return Status::OK;

   }

   // The RowBatchQueue was shutdown either because all scan ranges are complete or a

   // scanner thread encountered an error.  Check status_ to distinguish those cases.

   *eos = true;

   unique_lock<mutex> l(lock_);

   return status_;

 }


 DiskIoMgr::ScanRange* HdfsScanNode::AllocateScanRange(

     hdfsFS fs, const char* file, int64_t len, int64_t offset, int64_t partition_id,

     int disk_id, bool try_cache, bool expected_local, int64_t mtime) {

   DCHECK_GE(disk_id, -1);

   // Require that the scan range is within [0, file_length). While this cannot be used

   // to guarantee safety (file_length metadata may be stale), it avoids different

   // behavior between Hadoop FileSystems (e.g. s3n hdfsSeek() returns error when seeking

   // beyond the end of the file).

   DCHECK_GE(offset, 0);

   DCHECK_GE(len, 0);

   DCHECK_LE(offset + len, GetFileDesc(file)->file_length)

       << "Scan range beyond end of file (offset=" << offset << ", len=" << len << ")";

   disk_id = runtime_state_->io_mgr()->AssignQueue(file, disk_id, expected_local);


   ScanRangeMetadata* metadata =

       runtime_state_->obj_pool()->Add(new ScanRangeMetadata(partition_id));

   DiskIoMgr::ScanRange* range =

       runtime_state_->obj_pool()->Add(new DiskIoMgr::ScanRange());

   range->Reset(fs, file, len, offset, disk_id, try_cache, expected_local,

       mtime, metadata);

   return range;

 }


 HdfsFileDesc* HdfsScanNode::GetFileDesc(const string& filename) {

   DCHECK(file_descs_.find(filename) != file_descs_.end());

   return file_descs_[filename];

 }


 void HdfsScanNode::SetFileMetadata(const string& filename, void* metadata) {

   unique_lock<mutex> l(metadata_lock_);

   DCHECK(per_file_metadata_.find(filename) == per_file_metadata_.end());

   per_file_metadata_[filename] = metadata;

 }


 void* HdfsScanNode::GetFileMetadata(const string& filename) {

   unique_lock<mutex> l(metadata_lock_);

   map<string, void*>::iterator it = per_file_metadata_.find(filename);

   if (it == per_file_metadata_.end()) return NULL;

   return it->second;

 }


 void* HdfsScanNode::GetCodegenFn(THdfsFileFormat::type type) {

   CodegendFnMap::iterator it = codegend_fn_map_.find(type);

   if (it == codegend_fn_map_.end()) return NULL;

   return it->second;

 }


 HdfsScanner* HdfsScanNode::CreateAndPrepareScanner(HdfsPartitionDescriptor* partition,

     ScannerContext* context, Status* status) {

   DCHECK(context != NULL);

   HdfsScanner* scanner = NULL;

   THdfsCompression::type compression =

       context->GetStream()->file_desc()->file_compression;


   // Create a new scanner for this file format and compression.

   switch (partition->file_format()) {

     case THdfsFileFormat::TEXT:

       // Lzo-compressed text files are scanned by a scanner that it is implemented as a

       // dynamic library, so that Impala does not include GPL code.

       if (compression == THdfsCompression::LZO) {

         scanner = HdfsLzoTextScanner::GetHdfsLzoTextScanner(this, runtime_state_);

       } else {

         scanner = new HdfsTextScanner(this, runtime_state_);

       }

       break;

     case THdfsFileFormat::SEQUENCE_FILE:

       scanner = new HdfsSequenceScanner(this, runtime_state_);

       break;

     case THdfsFileFormat::RC_FILE:

       scanner = new HdfsRCFileScanner(this, runtime_state_);

       break;

     case THdfsFileFormat::AVRO:

       scanner = new HdfsAvroScanner(this, runtime_state_);

       break;

     case THdfsFileFormat::PARQUET:

       scanner = new HdfsParquetScanner(this, runtime_state_);

       break;

     default:

       DCHECK(false) << "Unknown Hdfs file format type:" << partition->file_format();

       return NULL;

   }

   DCHECK(scanner != NULL);

   runtime_state_->obj_pool()->Add(scanner);

   *status = scanner->Prepare(context);

   return scanner;

 }


 Tuple* HdfsScanNode::InitTemplateTuple(RuntimeState* state,

                                        const vector<ExprContext*>& value_ctxs) {

   if (partition_key_slots_.empty()) return NULL;


   // Look to protect access to partition_key_pool_ and value_ctxs

   // TODO: we can push the lock to the mempool and exprs_values should not

   // use internal memory.

   Tuple* template_tuple = InitEmptyTemplateTuple();


   unique_lock<mutex> l(lock_);

   for (int i = 0; i < partition_key_slots_.size(); ++i) {

     const SlotDescriptor* slot_desc = partition_key_slots_[i];

     // Exprs guaranteed to be literals, so can safely be evaluated without a row context

     void* value = value_ctxs[slot_desc->col_pos()]->GetValue(NULL);

     RawValue::Write(value, template_tuple, slot_desc, NULL);

   }

   return template_tuple;

 }


 Tuple* HdfsScanNode::InitEmptyTemplateTuple() {

   Tuple* template_tuple = NULL;

   {

     unique_lock<mutex> l(lock_);

     template_tuple = Tuple::Create(tuple_desc_->byte_size(), scan_node_pool_.get());

   }

   memset(template_tuple, 0, tuple_desc_->byte_size());

   return template_tuple;

 }


 void HdfsScanNode::TransferToScanNodePool(MemPool* pool) {

   unique_lock<mutex> l(lock_);

   scan_node_pool_->AcquireData(pool, false);

 }


 Status HdfsScanNode::Prepare(RuntimeState* state) {

   SCOPED_TIMER(runtime_profile_->total_time_counter());

   runtime_state_ = state;

   RETURN_IF_ERROR(ScanNode::Prepare(state));


   tuple_desc_ = state->desc_tbl().GetTupleDescriptor(tuple_id_);

   DCHECK(tuple_desc_ != NULL);


   if (!state->cgroup().empty()) {

     scanner_threads_.SetCgroupsMgr(state->exec_env()->cgroups_mgr());

     scanner_threads_.SetCgroup(state->cgroup());

   }


   // One-time initialisation of state that is constant across scan ranges

   DCHECK(tuple_desc_->table_desc() != NULL);

   hdfs_table_ = static_cast<const HdfsTableDescriptor*>(tuple_desc_->table_desc());

   scan_node_pool_.reset(new MemPool(mem_tracker()));


   // Gather materialized partition-key slots and non-partition slots.

   const vector<SlotDescriptor*>& slots = tuple_desc_->slots();

   for (size_t i = 0; i < slots.size(); ++i) {

     if (!slots[i]->is_materialized()) continue;

     if (hdfs_table_->IsClusteringCol(slots[i])) {

       partition_key_slots_.push_back(slots[i]);

     } else {

       materialized_slots_.push_back(slots[i]);

     }

   }


   // Order the materialized slots such that for schemaless file formats (e.g. text) the

   // order corresponds to the physical order in files. For formats where the file schema

   // is independent of the table schema (e.g. Avro, Parquet), this step is not necessary.

   sort(materialized_slots_.begin(), materialized_slots_.end(),

       SlotDescriptor::ColPathLessThan);


   // Populate mapping from slot path to index into materialized_slots_.

   for (int i = 0; i < materialized_slots_.size(); ++i) {

     path_to_materialized_slot_idx_[materialized_slots_[i]->col_path()] = i;

   }


   // Initialize is_materialized_col_

   is_materialized_col_.resize(hdfs_table_->num_cols());

   for (int i = 0; i < hdfs_table_->num_cols(); ++i) {

     is_materialized_col_[i] = GetMaterializedSlotIdx(vector<int>(1, i)) != SKIP_COLUMN;

   }


   HdfsFsCache::HdfsFsMap fs_cache;

   // Convert the TScanRangeParams into per-file DiskIO::ScanRange objects and populate

   // partition_ids_, file_descs_, and per_type_files_.

   DCHECK(scan_range_params_ != NULL)

       << "Must call SetScanRanges() before calling Prepare()";

   int num_ranges_missing_volume_id = 0;

   for (int i = 0; i < scan_range_params_->size(); ++i) {

     DCHECK((*scan_range_params_)[i].scan_range.__isset.hdfs_file_split);

     const THdfsFileSplit& split = (*scan_range_params_)[i].scan_range.hdfs_file_split;

     partition_ids_.insert(split.partition_id);

     HdfsPartitionDescriptor* partition_desc =

         hdfs_table_->GetPartition(split.partition_id);

     filesystem::path file_path(partition_desc->location());

     file_path.append(split.file_name, filesystem::path::codecvt());

     const string& native_file_path = file_path.native();


     HdfsFileDesc* file_desc = NULL;

     FileDescMap::iterator file_desc_it = file_descs_.find(native_file_path);

     if (file_desc_it == file_descs_.end()) {

       // Add new file_desc to file_descs_ and per_type_files_

       file_desc = runtime_state_->obj_pool()->Add(new HdfsFileDesc(native_file_path));

       file_descs_[native_file_path] = file_desc;

       file_desc->file_length = split.file_length;

       file_desc->mtime = split.mtime;

       file_desc->file_compression = split.file_compression;

       RETURN_IF_ERROR(HdfsFsCache::instance()->GetConnection(

           native_file_path, &file_desc->fs, &fs_cache));


       if (partition_desc == NULL) {

         stringstream ss;

         ss << "Could not find partition with id: " << split.partition_id;

         return Status(ss.str());

       }

       ++num_unqueued_files_;

       per_type_files_[partition_desc->file_format()].push_back(file_desc);

     } else {

       // File already processed

       file_desc = file_desc_it->second;

     }


     bool expected_local = (*scan_range_params_)[i].__isset.is_remote &&

         !(*scan_range_params_)[i].is_remote;

     if (expected_local && (*scan_range_params_)[i].volume_id == -1) {

       if (!unknown_disk_id_warned_) {

         AddRuntimeExecOption("Missing Volume Id");

         runtime_state()->LogError(ErrorMsg(TErrorCode::HDFS_SCAN_NODE_UNKNOWN_DISK));

         unknown_disk_id_warned_ = true;

       }

       ++num_ranges_missing_volume_id;

     }


     bool try_cache = (*scan_range_params_)[i].is_cached;

     if (runtime_state_->query_options().disable_cached_reads) {

       DCHECK(!try_cache) << "Params should not have had this set.";

     }

     file_desc->splits.push_back(

         AllocateScanRange(file_desc->fs, file_desc->filename.c_str(), split.length,

             split.offset, split.partition_id, (*scan_range_params_)[i].volume_id,

             try_cache, expected_local, file_desc->mtime));

   }


   // Compute the minimum bytes required to start a new thread. This is based on the

   // file format.

   // The higher the estimate, the less likely it is the query will fail but more likely

   // the query will be throttled when it does not need to be.

   // TODO: how many buffers should we estimate per range. The IoMgr will throttle down to

   // one but if there are already buffers queued before memory pressure was hit, we can't

   // reclaim that memory.

   if (per_type_files_[THdfsFileFormat::PARQUET].size() > 0) {

     // Parquet files require buffers per column

     scanner_thread_bytes_required_ =

         materialized_slots_.size() * 3 * runtime_state_->io_mgr()->max_read_buffer_size();

   } else {

     scanner_thread_bytes_required_ =

         3 * runtime_state_->io_mgr()->max_read_buffer_size();

   }

   // scanner_thread_bytes_required_ now contains the IoBuffer requirement.

   // Next we add in the other memory the scanner thread will use.

   // e.g. decompression buffers, tuple buffers, etc.

   // For compressed text, we estimate this based on the file size (since the whole file

   // will need to be decompressed at once). For all other formats, we use a constant.

   // TODO: can we do something better?

   int64_t scanner_thread_mem_usage = SCANNER_THREAD_MEM_USAGE;

   BOOST_FOREACH(HdfsFileDesc* file, per_type_files_[THdfsFileFormat::TEXT]) {

     if (file->file_compression != THdfsCompression::NONE) {

       int64_t bytes_required = file->file_length * COMPRESSED_TEXT_COMPRESSION_RATIO;

       scanner_thread_mem_usage = ::max(bytes_required, scanner_thread_mem_usage);

     }

   }

   scanner_thread_bytes_required_ += scanner_thread_mem_usage;


   // Prepare all the partitions scanned by the scan node

   BOOST_FOREACH(const int64_t& partition_id, partition_ids_) {

     HdfsPartitionDescriptor* partition_desc = hdfs_table_->GetPartition(partition_id);

     DCHECK(partition_desc != NULL);

     RETURN_IF_ERROR(partition_desc->PrepareExprs(state));

   }


   // Update server wide metrics for number of scan ranges and ranges that have

   // incomplete metadata.

   ImpaladMetrics::NUM_RANGES_PROCESSED->Increment(scan_range_params_->size());

   ImpaladMetrics::NUM_RANGES_MISSING_VOLUME_ID->Increment(num_ranges_missing_volume_id);


   // Add per volume stats to the runtime profile

   PerVolumnStats per_volume_stats;

   stringstream str;

   UpdateHdfsSplitStats(*scan_range_params_, &per_volume_stats);

   PrintHdfsSplitStats(per_volume_stats, &str);

   runtime_profile()->AddInfoString(HDFS_SPLIT_STATS_DESC, str.str());


   // Initialize conjunct exprs

   RETURN_IF_ERROR(Expr::CreateExprTrees(

       runtime_state_->obj_pool(), thrift_plan_node_->conjuncts, &conjunct_ctxs_));

   RETURN_IF_ERROR(

       Expr::Prepare(conjunct_ctxs_, runtime_state_, row_desc(), expr_mem_tracker()));

   AddExprCtxsToFree(conjunct_ctxs_);


   for (int format = THdfsFileFormat::TEXT;

        format <= THdfsFileFormat::PARQUET; ++format) {

     vector<HdfsFileDesc*>& file_descs =

         per_type_files_[static_cast<THdfsFileFormat::type>(format)];


     if (file_descs.empty()) continue;


     // Randomize the order this node processes the files. We want to do this to avoid

     // issuing remote reads to the same DN from different impalads. In file formats such

     // as avro/seq/rc (i.e. splittable with a header), every node first reads the header.

     // If every node goes through the files in the same order, all the remote reads are

     // for the same file meaning a few DN serves a lot of remote reads at the same time.

     random_shuffle(file_descs.begin(), file_descs.end());


     // Create reusable codegen'd functions for each file type type needed

     Function* fn;

     switch (format) {

       case THdfsFileFormat::TEXT:

         fn = HdfsTextScanner::Codegen(this, conjunct_ctxs_);

         break;

       case THdfsFileFormat::SEQUENCE_FILE:

         fn = HdfsSequenceScanner::Codegen(this, conjunct_ctxs_);

         break;

       case THdfsFileFormat::AVRO:

         fn = HdfsAvroScanner::Codegen(this, conjunct_ctxs_);

         break;

       default:

         // No codegen for this format

         fn = NULL;

     }

     if (fn != NULL) {

       LlvmCodeGen* codegen;

       RETURN_IF_ERROR(runtime_state_->GetCodegen(&codegen));

       codegen->AddFunctionToJit(

           fn, &codegend_fn_map_[static_cast<THdfsFileFormat::type>(format)]);

     }

   }


   return Status::OK;

 }


 // This function initiates the connection to hdfs and starts up the initial scanner

 // threads. The scanner subclasses are passed the initial splits.  Scanners are expected

 // to queue up a non-zero number of those splits to the io mgr (via the ScanNode).

 Status HdfsScanNode::Open(RuntimeState* state) {

   RETURN_IF_ERROR(ExecNode::Open(state));


   // We need at least one scanner thread to make progress. We need to make this

   // reservation before any ranges are issued.

   runtime_state_->resource_pool()->ReserveOptionalTokens(1);

   if (runtime_state_->query_options().num_scanner_threads > 0) {

     runtime_state_->resource_pool()->set_max_quota(

         runtime_state_->query_options().num_scanner_threads);

   }


   runtime_state_->resource_pool()->SetThreadAvailableCb(

       bind<void>(mem_fn(&HdfsScanNode::ThreadTokenAvailableCb), this, _1));


   if (runtime_state_->query_resource_mgr() != NULL) {

     rm_callback_id_ = runtime_state_->query_resource_mgr()->AddVcoreAvailableCb(

         bind<void>(mem_fn(&HdfsScanNode::ThreadTokenAvailableCb), this,

             runtime_state_->resource_pool()));

   }


   if (file_descs_.empty()) {

     SetDone();

     return Status::OK;

   }


   // Open all the partition exprs used by the scan node

   BOOST_FOREACH(const int64_t& partition_id, partition_ids_) {

     HdfsPartitionDescriptor* partition_desc = hdfs_table_->GetPartition(partition_id);

     DCHECK(partition_desc != NULL);

     RETURN_IF_ERROR(partition_desc->OpenExprs(state));

   }


   // Open all conjuncts

   Expr::Open(conjunct_ctxs_, state);


   RETURN_IF_ERROR(runtime_state_->io_mgr()->RegisterContext(

       &reader_context_, mem_tracker()));


   // Initialize HdfsScanNode specific counters

   read_timer_ = ADD_TIMER(runtime_profile(), TOTAL_HDFS_READ_TIMER);

   per_read_thread_throughput_counter_ = runtime_profile()->AddDerivedCounter(

       PER_READ_THREAD_THROUGHPUT_COUNTER, TUnit::BYTES_PER_SECOND,

       bind<int64_t>(&RuntimeProfile::UnitsPerSecond, bytes_read_counter_, read_timer_));

   scan_ranges_complete_counter_ =

       ADD_COUNTER(runtime_profile(), SCAN_RANGES_COMPLETE_COUNTER, TUnit::UNIT);

   if (DiskInfo::num_disks() < 64) {

     num_disks_accessed_counter_ =

         ADD_COUNTER(runtime_profile(), NUM_DISKS_ACCESSED_COUNTER, TUnit::UNIT);

   } else {

     num_disks_accessed_counter_ = NULL;

   }

   num_scanner_threads_started_counter_ =

       ADD_COUNTER(runtime_profile(), NUM_SCANNER_THREADS_STARTED, TUnit::UNIT);


   runtime_state_->io_mgr()->set_bytes_read_counter(reader_context_, bytes_read_counter());

   runtime_state_->io_mgr()->set_read_timer(reader_context_, read_timer());

   runtime_state_->io_mgr()->set_active_read_thread_counter(reader_context_,

       &active_hdfs_read_thread_counter_);

   runtime_state_->io_mgr()->set_disks_access_bitmap(reader_context_,

       &disks_accessed_bitmap_);


   average_scanner_thread_concurrency_ = runtime_profile()->AddSamplingCounter(

       AVERAGE_SCANNER_THREAD_CONCURRENCY, &active_scanner_thread_counter_);

   average_hdfs_read_thread_concurrency_ = runtime_profile()->AddSamplingCounter(

       AVERAGE_HDFS_READ_THREAD_CONCURRENCY, &active_hdfs_read_thread_counter_);


   bytes_read_local_ = ADD_COUNTER(runtime_profile(), "BytesReadLocal",

       TUnit::BYTES);

   bytes_read_short_circuit_ = ADD_COUNTER(runtime_profile(), "BytesReadShortCircuit",

       TUnit::BYTES);

   bytes_read_dn_cache_ = ADD_COUNTER(runtime_profile(), "BytesReadDataNodeCache",

       TUnit::BYTES);

   num_remote_ranges_ = ADD_COUNTER(runtime_profile(), "RemoteScanRanges",

       TUnit::UNIT);

   unexpected_remote_bytes_ = ADD_COUNTER(runtime_profile(), "BytesReadRemoteUnexpected",

       TUnit::BYTES);


   max_compressed_text_file_length_ = runtime_profile()->AddHighWaterMarkCounter(

       "MaxCompressedTextFileLength", TUnit::BYTES);


   for (int i = 0; i < state->io_mgr()->num_total_disks() + 1; ++i) {

     hdfs_read_thread_concurrency_bucket_.push_back(

         pool_->Add(new RuntimeProfile::Counter(TUnit::DOUBLE_VALUE, 0)));

   }

   runtime_profile()->RegisterBucketingCounters(&active_hdfs_read_thread_counter_,

       &hdfs_read_thread_concurrency_bucket_);


   counters_running_ = true;


   int total_splits = 0;

   for (FileDescMap::iterator it = file_descs_.begin(); it != file_descs_.end(); ++it) {

     total_splits += it->second->splits.size();

   }


   if (total_splits == 0) {

     SetDone();

     return Status::OK;

   }


   stringstream ss;

   ss << "Splits complete (node=" << id() << "):";

   progress_ = ProgressUpdater(ss.str(), total_splits);


   return Status::OK;

 }


 Status HdfsScanNode::Reset(RuntimeState* state) {

   DCHECK(false) << "NYI";

   return Status("NYI");

 }


 void HdfsScanNode::Close(RuntimeState* state) {

   if (is_closed()) return;

   SetDone();


   state->resource_pool()->SetThreadAvailableCb(NULL);

   if (state->query_resource_mgr() != NULL && rm_callback_id_ != -1) {

     state->query_resource_mgr()->RemoveVcoreAvailableCb(rm_callback_id_);

   }


   scanner_threads_.JoinAll();


   num_owned_io_buffers_ -= materialized_row_batches_->Cleanup();

   DCHECK_EQ(num_owned_io_buffers_, 0) << "ScanNode has leaked io buffers";


   if (reader_context_ != NULL) {

     // There may still be io buffers used by parent nodes so we can't unregister the

     // reader context yet. The runtime state keeps a list of all the reader contexts and

     // they are unregistered when the fragment is closed.

     state->reader_contexts()->push_back(reader_context_);

     // Need to wait for all the active scanner threads to finish to ensure there is no

     // more memory tracked by this scan node's mem tracker.

     state->io_mgr()->CancelContext(reader_context_, true);

   }


   StopAndFinalizeCounters();


   // There should be no active scanner threads and hdfs read threads.

   DCHECK_EQ(active_scanner_thread_counter_.value(), 0);

   DCHECK_EQ(active_hdfs_read_thread_counter_.value(), 0);


   if (scan_node_pool_.get() != NULL) scan_node_pool_->FreeAll();


   // Close all conjuncts

   Expr::Close(conjunct_ctxs_, state);


   // Close all the partitions scanned by the scan node

   BOOST_FOREACH(const int64_t& partition_id, partition_ids_) {

     HdfsPartitionDescriptor* partition_desc = hdfs_table_->GetPartition(partition_id);

     DCHECK(partition_desc != NULL);

     partition_desc->CloseExprs(state);

   }


   ScanNode::Close(state);

 }


 Status HdfsScanNode::AddDiskIoRanges(const vector<DiskIoMgr::ScanRange*>& ranges) {

   RETURN_IF_ERROR(

       runtime_state_->io_mgr()->AddScanRanges(reader_context_, ranges));

   ThreadTokenAvailableCb(runtime_state_->resource_pool());

   return Status::OK;

 }


 Status HdfsScanNode::AddDiskIoRanges(const HdfsFileDesc* desc) {

   const vector<DiskIoMgr::ScanRange*>& ranges = desc->splits;

   RETURN_IF_ERROR(

       runtime_state_->io_mgr()->AddScanRanges(reader_context_, ranges));

   MarkFileDescIssued(desc);

   ThreadTokenAvailableCb(runtime_state_->resource_pool());

   return Status::OK;

 }


 void HdfsScanNode::MarkFileDescIssued(const HdfsFileDesc* desc) {

   DCHECK_GT(num_unqueued_files_, 0);

   --num_unqueued_files_;

 }


 void HdfsScanNode::AddMaterializedRowBatch(RowBatch* row_batch) {

   materialized_row_batches_->AddBatch(row_batch);

 }


 Status HdfsScanNode::GetConjunctCtxs(vector<ExprContext*>* ctxs) {

   return Expr::Clone(conjunct_ctxs_, runtime_state_, ctxs);

 }


 // For controlling the amount of memory used for scanners, we approximate the

 // scanner mem usage based on scanner_thread_bytes_required_, rather than the

 // consumption in the scan node's mem tracker. The problem with the scan node

 // trackers is that it does not account for the memory the scanner will use.

 // For example, if there is 110 MB of memory left (based on the mem tracker)

 // and we estimate that a scanner will use 100MB, we want to make sure to only

 // start up one additional thread. However, after starting the first thread, the

 // mem tracker value will not change immediately (it takes some time before the

 // scanner is running and starts using memory). Therefore we just use the estimate

 // based on the number of running scanner threads.

 bool HdfsScanNode::EnoughMemoryForScannerThread(bool new_thread) {

   int64_t committed_scanner_mem =

       active_scanner_thread_counter_.value() * scanner_thread_bytes_required_;

   int64_t tracker_consumption = mem_tracker()->consumption();

   int64_t est_additional_scanner_mem = committed_scanner_mem - tracker_consumption;

   if (est_additional_scanner_mem < 0) {

     // This is the case where our estimate was too low. Expand the estimate based

     // on the usage.

     int64_t avg_consumption =

         tracker_consumption / active_scanner_thread_counter_.value();

     // Take the average and expand it by 50%. Some scanners will not have hit their

     // steady state mem usage yet.

     // TODO: how can we scale down if we've overestimated.

     // TODO: better heuristic?

     scanner_thread_bytes_required_ = static_cast<int64_t>(avg_consumption * 1.5);

     est_additional_scanner_mem = 0;

   }


   // If we are starting a new thread, take that into account now.

   if (new_thread) est_additional_scanner_mem += scanner_thread_bytes_required_;

   return est_additional_scanner_mem < mem_tracker()->SpareCapacity();

 }


 void HdfsScanNode::ThreadTokenAvailableCb(ThreadResourceMgr::ResourcePool* pool) {

   // This is called to start up new scanner threads. It's not a big deal if we

   // spin up more than strictly necessary since they will go through and terminate

   // promptly. However, we want to minimize that by checking a conditions.

   //  1. Don't start up if the ScanNode is done

   //  2. Don't start up if all the ranges have been taken by another thread.

   //  3. Don't start up if the number of ranges left is less than the number of

   //     active scanner threads.

   //  4. Don't start up if no initial ranges have been issued (see IMPALA-1722).

   //  5. Don't start up a ScannerThread if materialized_row_batches_ is full since

   //     we are not scanner bound.

   //  6. Don't start up a thread if there isn't enough memory left to run it.

   //  7. Don't start up if there are no thread tokens.

   //  8. Don't start up if we are running too many threads for our vcore allocation

   //  (unless the thread is reserved, in which case it has to run).


   // Case 4. We have not issued the initial ranges so don't start a scanner thread.

   // Issuing ranges will call this function and we'll start the scanner threads then.

   // TODO: It would be good to have a test case for that.

   if (!initial_ranges_issued_) return;


   bool started_scanner = false;

   while (true) {

     // The lock must be given up between loops in order to give writers to done_,

     // all_ranges_started_ etc. a chance to grab the lock.

     // TODO: This still leans heavily on starvation-free locks, come up with a more

     // correct way to communicate between this method and ScannerThreadHelper

     unique_lock<mutex> lock(lock_);

     // Cases 1, 2, 3.

     if (done_ || all_ranges_started_ ||

       active_scanner_thread_counter_.value() >= progress_.remaining()) {

       break;

     }


     // Cases 5 and 6.

     if (active_scanner_thread_counter_.value() > 0 &&

         (materialized_row_batches_->GetSize() >= max_materialized_row_batches_ ||

          !EnoughMemoryForScannerThread(true))) {

       break;

     }


     // Case 7.

     bool is_reserved = false;

     if (!pool->TryAcquireThreadToken(&is_reserved)) break;


     // Case 8.

     if (!is_reserved) {

       if (runtime_state_->query_resource_mgr() != NULL &&

           runtime_state_->query_resource_mgr()->IsVcoreOverSubscribed()) {

         break;

       }

     }


     COUNTER_ADD(&active_scanner_thread_counter_, 1);

     COUNTER_ADD(num_scanner_threads_started_counter_, 1);

     stringstream ss;

     ss << "scanner-thread(" << num_scanner_threads_started_counter_->value() << ")";

     scanner_threads_.AddThread(

         new Thread("hdfs-scan-node", ss.str(), &HdfsScanNode::ScannerThread, this));

     started_scanner = true;


     if (runtime_state_->query_resource_mgr() != NULL) {

       runtime_state_->query_resource_mgr()->NotifyThreadUsageChange(1);

     }

   }

   if (!started_scanner) ++num_skipped_tokens_;

 }


 void HdfsScanNode::ScannerThread() {

   SCOPED_THREAD_COUNTER_MEASUREMENT(scanner_thread_counters());

   SCOPED_TIMER(runtime_state_->total_cpu_timer());


   while (!done_) {

     {

       // Check if we have enough resources (thread token and memory) to keep using

       // this thread.

       unique_lock<mutex> l(lock_);

       if (active_scanner_thread_counter_.value() > 1) {

         if (runtime_state_->resource_pool()->optional_exceeded() ||

             !EnoughMemoryForScannerThread(false)) {

           // We can't break here. We need to update the counter with the lock held or else

           // all threads might see active_scanner_thread_counter_.value > 1

           COUNTER_ADD(&active_scanner_thread_counter_, -1);

           // Unlock before releasing the thread token to avoid deadlock in

           // ThreadTokenAvailableCb().

           l.unlock();

           if (runtime_state_->query_resource_mgr() != NULL) {

             runtime_state_->query_resource_mgr()->NotifyThreadUsageChange(-1);

           }

           runtime_state_->resource_pool()->ReleaseThreadToken(false);

           return;

         }

       } else {

         // If this is the only scanner thread, it should keep running regardless

         // of resource constraints.

       }

     }


     DiskIoMgr::ScanRange* scan_range;

     // Take a snapshot of num_unqueued_files_ before calling GetNextRange().

     // We don't want num_unqueued_files_ to go to zero between the return from

     // GetNextRange() and the check for when all ranges are complete.

     int num_unqueued_files = num_unqueued_files_;

     AtomicUtil::MemoryBarrier();

     Status status = runtime_state_->io_mgr()->GetNextRange(reader_context_, &scan_range);


     if (status.ok() && scan_range != NULL) {

       // Got a scan range. Create a new scanner object and process the range

       // end to end (in this thread).

       ScanRangeMetadata* metadata =

           reinterpret_cast<ScanRangeMetadata*>(scan_range->meta_data());

       int64_t partition_id = metadata->partition_id;

       HdfsPartitionDescriptor* partition = hdfs_table_->GetPartition(partition_id);

       DCHECK_NOTNULL(partition);


       ScannerContext* context = runtime_state_->obj_pool()->Add(

           new ScannerContext(runtime_state_, this, partition, scan_range));

       Status scanner_status;

       HdfsScanner* scanner = CreateAndPrepareScanner(partition, context, &scanner_status);

       if (VLOG_QUERY_IS_ON && (!scanner_status.ok() || scanner == NULL)) {

         stringstream ss;

         ss << "Error preparing text scanner for scan range " << scan_range->file() <<

             "(" << scan_range->offset() << ":" << scan_range->len() << ").";

         ss << endl << runtime_state_->ErrorLog();

         VLOG_QUERY << ss.str();

       }


       status = scanner->ProcessSplit();

       if (VLOG_QUERY_IS_ON && !status.ok() && !runtime_state_->error_log().empty()) {

         // This thread hit an error, record it and bail

         // TODO: better way to report errors?  Maybe via the thrift interface?

         stringstream ss;

         ss << "Scan node (id=" << id() << ") ran into a parse error for scan range "

            << scan_range->file() << "(" << scan_range->offset() << ":"

            << scan_range->len() << ").";

         if (partition->file_format() != THdfsFileFormat::PARQUET) {

           // Parquet doesn't read the range end to end so the current offset isn't useful.

           // TODO: make sure the parquet reader is outputting as much diagnostic

           // information as possible.

           ScannerContext::Stream* stream = context->GetStream();

           ss << " Processed " << stream->total_bytes_returned() << " bytes.";

         }

         ss << endl << runtime_state_->ErrorLog();

         VLOG_QUERY << ss.str();

       }

       scanner->Close();

     }


     if (!status.ok()) {

       {

         unique_lock<mutex> l(lock_);

         // If there was already an error, the main thread will do the cleanup

         if (!status_.ok()) break;


         if (status.IsCancelled()) {

           // Scan node should be the only thing that initiated scanner threads to see

           // cancelled (i.e. limit reached).  No need to do anything here.

           DCHECK(done_);

           break;

         }

         // Set status_ before calling SetDone() (which shuts down the RowBatchQueue),

         // to ensure that GetNextInternal() notices the error status.

         status_ = status;

       }


       if (status.IsMemLimitExceeded()) runtime_state_->SetMemLimitExceeded();

       SetDone();

       break;

     }


     // Done with range and it completed successfully

     if (progress_.done()) {

       // All ranges are finished.  Indicate we are done.

       SetDone();

       break;

     }


     if (scan_range == NULL && num_unqueued_files == 0) {

       // TODO: Based on the usage pattern of all_ranges_started_, it looks like it is not

       // needed to acquire the lock in x86.

       unique_lock<mutex> l(lock_);

       // All ranges have been queued and GetNextRange() returned NULL. This means that

       // every range is either done or being processed by another thread.

       all_ranges_started_ = true;

       break;

     }

   }


   COUNTER_ADD(&active_scanner_thread_counter_, -1);

   if (runtime_state_->query_resource_mgr() != NULL) {

     runtime_state_->query_resource_mgr()->NotifyThreadUsageChange(-1);

   }

   runtime_state_->resource_pool()->ReleaseThreadToken(false);

 }


 void HdfsScanNode::RangeComplete(const THdfsFileFormat::type& file_type,

     const THdfsCompression::type& compression_type) {

   vector<THdfsCompression::type> types;

   types.push_back(compression_type);

   RangeComplete(file_type, types);

 }


 void HdfsScanNode::RangeComplete(const THdfsFileFormat::type& file_type,

     const vector<THdfsCompression::type>& compression_types) {

   scan_ranges_complete_counter()->Add(1);

   progress_.Update(1);


   {

     lock_guard<SpinLock> l(file_type_counts_lock_);

     for (int i = 0; i < compression_types.size(); ++i) {

       ++file_type_counts_[make_pair(file_type, compression_types[i])];

     }

   }

 }


 void HdfsScanNode::SetDone() {

   {

     unique_lock<mutex> l(lock_);

     if (done_) return;

     done_ = true;

   }

   if (reader_context_ != NULL) {

     runtime_state_->io_mgr()->CancelContext(reader_context_);

   }

   materialized_row_batches_->Shutdown();

 }


 void HdfsScanNode::ComputeSlotMaterializationOrder(vector<int>* order) const {

   const vector<ExprContext*>& conjuncts = ExecNode::conjunct_ctxs();

   // Initialize all order to be conjuncts.size() (after the last conjunct)

   order->insert(order->begin(), materialized_slots().size(), conjuncts.size());


   const DescriptorTbl& desc_tbl = runtime_state_->desc_tbl();


   vector<SlotId> slot_ids;

   for (int conjunct_idx = 0; conjunct_idx < conjuncts.size(); ++conjunct_idx) {

     slot_ids.clear();

     int num_slots = conjuncts[conjunct_idx]->root()->GetSlotIds(&slot_ids);

     for (int j = 0; j < num_slots; ++j) {

       SlotDescriptor* slot_desc = desc_tbl.GetSlotDescriptor(slot_ids[j]);

       int slot_idx = GetMaterializedSlotIdx(slot_desc->col_path());

       // slot_idx == -1 means this was a partition key slot which is always

       // materialized before any slots.

       if (slot_idx == -1) continue;

       // If this slot hasn't been assigned an order, assign it be materialized

       // before evaluating conjuncts[i]

       if ((*order)[slot_idx] == conjuncts.size()) {

         (*order)[slot_idx] = conjunct_idx;

       }

     }

   }

 }


 void HdfsScanNode::StopAndFinalizeCounters() {

   unique_lock<mutex> l(lock_);

   if (!counters_running_) return;

   counters_running_ = false;


   PeriodicCounterUpdater::StopTimeSeriesCounter(bytes_read_timeseries_counter_);

   PeriodicCounterUpdater::StopRateCounter(total_throughput_counter());

   PeriodicCounterUpdater::StopSamplingCounter(average_scanner_thread_concurrency_);

   PeriodicCounterUpdater::StopSamplingCounter(average_hdfs_read_thread_concurrency_);

   PeriodicCounterUpdater::StopBucketingCounters(&hdfs_read_thread_concurrency_bucket_,

       true);


   // Output hdfs read thread concurrency into info string

   stringstream ss;

   for (int i = 0; i < hdfs_read_thread_concurrency_bucket_.size(); ++i) {

     ss << i << ":" << setprecision(4)

        << hdfs_read_thread_concurrency_bucket_[i]->double_value() << "% ";

   }

   runtime_profile_->AddInfoString("Hdfs Read Thread Concurrency Bucket", ss.str());


   // Convert disk access bitmap to num of disk accessed

   uint64_t num_disk_bitmap = disks_accessed_bitmap_.value();

   int64_t num_disk_accessed = BitUtil::Popcount(num_disk_bitmap);

   if (num_disks_accessed_counter_ != NULL) {

     num_disks_accessed_counter_->Set(num_disk_accessed);

   }


   // output completed file types and counts to info string

   if (!file_type_counts_.empty()) {

     stringstream ss;

     {

       lock_guard<SpinLock> l2(file_type_counts_lock_);

       for (FileTypeCountsMap::const_iterator it = file_type_counts_.begin();

           it != file_type_counts_.end(); ++it) {

         ss << it->first.first << "/" << it->first.second << ":" << it->second << " ";

       }

     }

     runtime_profile_->AddInfoString("File Formats", ss.str());

   }


   // Output fraction of scanners with codegen enabled

   ss.str(std::string());

   ss << "Codegen enabled: " << num_scanners_codegen_enabled_ << " out of "

      << (num_scanners_codegen_enabled_ + num_scanners_codegen_disabled_);

   AddRuntimeExecOption(ss.str());


   if (reader_context_ != NULL) {

     bytes_read_local_->Set(runtime_state_->io_mgr()->bytes_read_local(reader_context_));

     bytes_read_short_circuit_->Set(

         runtime_state_->io_mgr()->bytes_read_short_circuit(reader_context_));

     bytes_read_dn_cache_->Set(

         runtime_state_->io_mgr()->bytes_read_dn_cache(reader_context_));

     num_remote_ranges_->Set(static_cast<int64_t>(

         runtime_state_->io_mgr()->num_remote_ranges(reader_context_)));

     unexpected_remote_bytes_->Set(

         runtime_state_->io_mgr()->unexpected_remote_bytes(reader_context_));


     if (unexpected_remote_bytes_->value() >= UNEXPECTED_REMOTE_BYTES_WARN_THRESHOLD) {

       runtime_state_->LogError(ErrorMsg(TErrorCode::GENERAL, Substitute(

           "Read $0 of data across network that was expected to be local. "

           "Block locality metadata for table '$1.$2' may be stale. Consider running "

           "\"INVALIDATE METADATA `$1`.`$2`\".",

           PrettyPrinter::Print(unexpected_remote_bytes_->value(), TUnit::BYTES),

           hdfs_table_->database(), hdfs_table_->name())));

     }


     ImpaladMetrics::IO_MGR_BYTES_READ->Increment(bytes_read_counter()->value());

     ImpaladMetrics::IO_MGR_LOCAL_BYTES_READ->Increment(

         bytes_read_local_->value());

     ImpaladMetrics::IO_MGR_SHORT_CIRCUIT_BYTES_READ->Increment(

         bytes_read_short_circuit_->value());

     ImpaladMetrics::IO_MGR_CACHED_BYTES_READ->Increment(

         bytes_read_dn_cache_->value());

   }

 }


 void HdfsScanNode::UpdateHdfsSplitStats(

     const vector<TScanRangeParams>& scan_range_params_list,

     PerVolumnStats* per_volume_stats) {

   pair<int, int64_t> init_value(0, 0);

   BOOST_FOREACH(const TScanRangeParams& scan_range_params, scan_range_params_list) {

     const TScanRange& scan_range = scan_range_params.scan_range;

     if (!scan_range.__isset.hdfs_file_split) continue;

     const THdfsFileSplit& split = scan_range.hdfs_file_split;

     pair<int, int64_t>* stats =

         FindOrInsert(per_volume_stats, scan_range_params.volume_id, init_value);

     ++(stats->first);

     stats->second += split.length;

   }

 }


 void HdfsScanNode::PrintHdfsSplitStats(const PerVolumnStats& per_volume_stats,

     stringstream* ss) {

   for (PerVolumnStats::const_iterator i = per_volume_stats.begin();

        i != per_volume_stats.end(); ++i) {

      (*ss) << i->first << ":" << i->second.first << "/"

          << PrettyPrinter::Print(i->second.second, TUnit::BYTES) << " ";

   }

 }

impala::BaseSequenceScanner::IssueInitialRanges
static Status IssueInitialRanges(HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files)
Issue the initial ranges for all sequence container files.
Definition: base-sequence-scanner.cc:40

impala::HdfsScanNode::materialized_slots
const std::vector< SlotDescriptor * > & materialized_slots() const
Definition: hdfs-scan-node.h:119

impala::ScanNode::scanner_thread_counters
RuntimeProfile::ThreadCounters * scanner_thread_counters() const
Definition: scan-node.h:110

impala::RuntimeProfile::Counter::value
virtual int64_t value() const
Definition: runtime-profile.h:108

impala::ScanNode::NUM_SCANNER_THREADS_STARTED
static const std::string NUM_SCANNER_THREADS_STARTED
Definition: scan-node.h:134

impala::HdfsScanNode::scanner_threads_
ThreadGroup scanner_threads_
Thread group for all scanner worker threads.
Definition: hdfs-scan-node.h:359

row-batch.h

impala::RuntimeProfile::AddDerivedCounter
DerivedCounter * AddDerivedCounter(const std::string &name, TUnit::type unit, const DerivedCounterFunction &counter_fn, const std::string &parent_counter_name="")
Definition: runtime-profile.cc:447

impala::TupleDescriptor::table_desc
const TableDescriptor * table_desc() const
Definition: descriptors.h:304

impala::DescriptorTbl
Definition: descriptors.h:338

impala::ExecNode::id
int id() const
Definition: exec-node.h:154

impala::HdfsScanNode::max_materialized_row_batches_
int max_materialized_row_batches_
Maximum size of materialized_row_batches_.
Definition: hdfs-scan-node.h:366

impala::ExecNode::AddRuntimeExecOption
void AddRuntimeExecOption(const std::string &option)
Appends option to 'runtime_exec_options_'.
Definition: exec-node.cc:188

DECLARE_bool
DECLARE_bool(enable_rm)

impala::DiskIoMgr::CancelContext
void CancelContext(RequestContext *context, bool wait_for_disks_completion=false)
Definition: disk-io-mgr.cc:377

impala::ScanNode::per_read_thread_throughput_counter_
RuntimeProfile::Counter * per_read_thread_throughput_counter_
Per thread read throughput [bytes/sec].
Definition: scan-node.h:149

impala::HdfsScanNode::CreateAndPrepareScanner
HdfsScanner * CreateAndPrepareScanner(HdfsPartitionDescriptor *partition_desc, ScannerContext *context, Status *status)
Definition: hdfs-scan-node.cc:230

impala::DiskIoMgr::num_remote_ranges
int num_remote_ranges(RequestContext *reader) const
Definition: disk-io-mgr.cc:432

impala::HdfsScanNode::num_owned_io_buffers_
AtomicInt< int > num_owned_io_buffers_
Definition: hdfs-scan-node.h:371

impala::HdfsScanNode::num_unqueued_files_
AtomicInt< int > num_unqueued_files_
Number of files that have not been issued from the scanners.
Definition: hdfs-scan-node.h:315

impala::ScannerContext::Stream
Definition: scanner-context.h:66

impala::MemTracker::consumption
int64_t consumption() const
Returns the memory consumed in bytes.
Definition: mem-tracker.h:298

impala::ThreadGroup::SetCgroup
Status SetCgroup(const std::string &cgroup)
Definition: thread.cc:333

hdfs-sequence-scanner.h

impala::RowBatch::num_rows
int num_rows() const
Definition: row-batch.h:215

impala::ScanNode::Prepare
virtual Status Prepare(RuntimeState *state)
Definition: scan-node.cc:44

impala::QueryResourceMgr::AddVcoreAvailableCb
int32_t AddVcoreAvailableCb(const VcoreAvailableCb &callback)
Definition: query-resource-mgr.cc:147

impala::HdfsPartitionDescriptor::file_format
THdfsFileFormat::type file_format() const
Definition: descriptors.h:184

impala::HdfsScanNode::all_ranges_started_
bool all_ranges_started_
Definition: hdfs-scan-node.h:417

impala::RuntimeProfile::AddSamplingCounter
Counter * AddSamplingCounter(const std::string &name, Counter *src_counter)

path
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")

hdfs-scan-node.h

impala::HdfsPartitionDescriptor::OpenExprs
Status OpenExprs(RuntimeState *state)
Definition: descriptors.cc:145

impala::HdfsScanNode::file_descs_
FileDescMap file_descs_
Definition: hdfs-scan-node.h:298

impala::ScannerContext::Stream::total_bytes_returned
int64_t total_bytes_returned()
Returns the total number of bytes returned.
Definition: scanner-context.h:126

impala::RuntimeProfile::AddInfoString
void AddInfoString(const std::string &key, const std::string &value)
Definition: runtime-profile.cc:406

impala::ScanNode::scan_ranges_complete_counter_
RuntimeProfile::Counter * scan_ranges_complete_counter_
Definition: scan-node.h:152

runtime-profile.h

impala::SlotDescriptor::ColPathLessThan
static bool ColPathLessThan(const SlotDescriptor *a, const SlotDescriptor *b)
Definition: descriptors.cc:66

impala::HdfsFileDesc::filename
std::string filename
File name including the path.
Definition: hdfs-scan-node.h:58

impala::ExecNode::num_rows_returned_
int64_t num_rows_returned_
Definition: exec-node.h:223

impala::HdfsScanNode::ThreadTokenAvailableCb
void ThreadTokenAvailableCb(ThreadResourceMgr::ResourcePool *pool)
Definition: hdfs-scan-node.cc:729

impala::TableDescriptor::database
const std::string & database() const
Definition: descriptors.h:164

impala::HdfsPartitionDescriptor::PrepareExprs
Status PrepareExprs(RuntimeState *state)
Definition: descriptors.cc:134

impala::HdfsFsCache::HdfsFsMap
boost::unordered_map< std::string, hdfsFS > HdfsFsMap
Definition: hdfs-fs-cache.h:41

impala::ExecNode::mem_tracker
MemTracker * mem_tracker()
Definition: exec-node.h:162

impala::Thread
TODO: Consider allowing fragment IDs as category parameters.
Definition: thread.h:45

impala::HdfsScanNode::SetDone
void SetDone()
Definition: hdfs-scan-node.cc:944

impala::DiskIoMgr::AssignQueue
int AssignQueue(const char *file, int disk_id, bool expected_local)
Definition: disk-io-mgr.cc:1116

impala::ThreadResourceMgr::ResourcePool::TryAcquireThreadToken
bool TryAcquireThreadToken(bool *is_reserved=NULL)
Definition: thread-resource-mgr.h:235

impala::DiskIoMgr::set_active_read_thread_counter
void set_active_read_thread_counter(RequestContext *, RuntimeProfile::Counter *)
Definition: disk-io-mgr.cc:397

impala::RuntimeState::ErrorLog
std::string ErrorLog()
Returns the error log lines as a string joined with ' '.
Definition: runtime-state.cc:203

impalad-metrics.h

impala::ScanNode::AVERAGE_SCANNER_THREAD_CONCURRENCY
static const std::string AVERAGE_SCANNER_THREAD_CONCURRENCY
Definition: scan-node.h:132

impala::HdfsScanNode::GetMaterializedSlotIdx
int GetMaterializedSlotIdx(const std::vector< int > &path) const
Definition: hdfs-scan-node.h:152

impala::Expr::Clone
static Status Clone(const std::vector< ExprContext * > &ctxs, RuntimeState *state, std::vector< ExprContext * > *new_ctxs)
Definition: expr.cc:374

impala::HdfsScanNode::num_scanners_codegen_disabled_
AtomicInt< int > num_scanners_codegen_disabled_
Definition: hdfs-scan-node.h:380

impala::HdfsScanNode::bytes_read_dn_cache_
RuntimeProfile::Counter * bytes_read_dn_cache_
Total number of bytes read from data node cache.
Definition: hdfs-scan-node.h:396

impala::HdfsRCFileScanner
A scanner for reading RCFiles into tuples.
Definition: hdfs-rcfile-scanner.h:231

impala::ExecNode::runtime_profile_
boost::scoped_ptr< RuntimeProfile > runtime_profile_
Definition: exec-node.h:225

impala::ProgressUpdater
Definition: progress-updater.h:34

impala::Expr::Open
static Status Open(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for opening multiple expr trees.

impala::ScanNode::scan_ranges_complete_counter
RuntimeProfile::Counter * scan_ranges_complete_counter() const
Definition: scan-node.h:107

impala::RowBatch::num_io_buffers
int num_io_buffers() const
Definition: row-batch.h:149

impala::DiskIoMgr::bytes_read_dn_cache
int64_t bytes_read_dn_cache(RequestContext *reader) const
Definition: disk-io-mgr.cc:428

impala::ThreadResourceMgr::ResourcePool::SetThreadAvailableCb
void SetThreadAvailableCb(ThreadAvailableCb fn)
Definition: thread-resource-mgr.cc:90

impala::Tuple
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48

impala::HdfsScanNode::HDFS_SPLIT_STATS_DESC
static const std::string HDFS_SPLIT_STATS_DESC
Description string for the per volume stats output.
Definition: hdfs-scan-node.h:265

impala::HdfsParquetScanner
Definition: hdfs-parquet-scanner.h:43

impala::ScanNode::num_scanner_threads_started_counter_
RuntimeProfile::Counter * num_scanner_threads_started_counter_
Definition: scan-node.h:170

impala::HdfsTableDescriptor
Definition: descriptors.h:226

impala::RuntimeProfile::RegisterBucketingCounters
void RegisterBucketingCounters(Counter *src_counter, std::vector< Counter * > *buckets)
Definition: runtime-profile.cc:798

impala::HdfsScanNode::progress_
ProgressUpdater progress_
Keeps track of total splits and the number finished.
Definition: hdfs-scan-node.h:351

impala::HdfsScanNode::Open
virtual Status Open(RuntimeState *state)
Definition: hdfs-scan-node.cc:511

periodic-counter-updater.h

impala::HdfsScanNode::materialized_row_batches_
boost::scoped_ptr< RowBatchQueue > materialized_row_batches_
Definition: hdfs-scan-node.h:363

impala::HdfsScanNode::rm_callback_id_
int32_t rm_callback_id_
Definition: hdfs-scan-node.h:442

RETURN_IF_ERROR
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242

impala::HdfsAvroScanner
Definition: hdfs-avro-scanner.h:80

hdfs-util.h

raw-value.h

impala::ScanRangeMetadata
Definition: hdfs-scan-node.h:78

impala::ScanNode::read_timer_
RuntimeProfile::Counter * read_timer_
Definition: scan-node.h:145

error-util.h

impala::DiskInfo::num_disks
static int num_disks()
Returns the number of (logical) disks on the system.
Definition: disk-info.h:38

impala::HdfsScanNode::unknown_disk_id_warned_
bool unknown_disk_id_warned_
Definition: hdfs-scan-node.h:291

impala::HdfsScanNode::partition_ids_
boost::unordered_set< int64_t > partition_ids_
Partitions scanned by this scan node.
Definition: hdfs-scan-node.h:294

impala::ScanNode::NUM_DISKS_ACCESSED_COUNTER
static const std::string NUM_DISKS_ACCESSED_COUNTER
Definition: scan-node.h:127

impala::HdfsScanNode::lock_
boost::mutex lock_
Definition: hdfs-scan-node.h:407

impala::MemTracker::SpareCapacity
int64_t SpareCapacity() const
Definition: mem-tracker.h:270

ADD_TIMER
#define ADD_TIMER(profile, name)
Definition: runtime-profile.h:50

impala::RowBatch::AcquireState
void AcquireState(RowBatch *src)
Definition: row-batch.cc:271

impala::DiskIoMgr::REMOTE_NUM_DISKS
Definition: disk-io-mgr.h:625

expr-context.h

impala::HdfsScanNode::partition_key_slots_
std::vector< SlotDescriptor * > partition_key_slots_
Definition: hdfs-scan-node.h:348

impala::SlotDescriptor::col_path
const std::vector< int > & col_path() const
Definition: descriptors.h:85

impala::HdfsScanNode::num_remote_ranges_
RuntimeProfile::Counter * num_remote_ranges_
Total number of remote scan ranges.
Definition: hdfs-scan-node.h:399

impala::HdfsScanNode::thrift_plan_node_
boost::scoped_ptr< TPlanNode > thrift_plan_node_
Definition: hdfs-scan-node.h:272

impala::TupleDescriptor::slots
const std::vector< SlotDescriptor * > & slots() const
Definition: descriptors.h:302

impala::DiskIoMgr::RequestRange::file
const char * file() const
Definition: disk-io-mgr.h:266

impala::HdfsScanNode::MarkFileDescIssued
void MarkFileDescIssued(const HdfsFileDesc *file_desc)
Definition: hdfs-scan-node.cc:683

impala::TupleDescriptor::byte_size
int byte_size() const
Definition: descriptors.h:300

impala::ObjectPool
Definition: object-pool.h:30

impala::HdfsScanNode::ScannerContext
friend class ScannerContext
Definition: hdfs-scan-node.h:268

impala::ImpaladMetrics::IO_MGR_BYTES_READ
static IntGauge * IO_MGR_BYTES_READ
Definition: impalad-metrics.h:143

impala::HdfsScanNode::SetFileMetadata
void SetFileMetadata(const std::string &filename, void *metadata)
Definition: hdfs-scan-node.cc:211

impala::RuntimeState::exec_env
ExecEnv * exec_env()
Definition: runtime-state.h:130

impala::HdfsScanNode::GetFileMetadata
void * GetFileMetadata(const std::string &filename)
Definition: hdfs-scan-node.cc:217

impala::DiskIoMgr::AddScanRanges
Status AddScanRanges(RequestContext *reader, const std::vector< ScanRange * > &ranges, bool schedule_immediately=false)
Definition: disk-io-mgr.cc:455

impala::RuntimeProfile::UnitsPerSecond
static int64_t UnitsPerSecond(const Counter *total_counter, const Counter *timer)
Derived counter function: return measured throughput as input_value/second.
Definition: runtime-profile.cc:733

impala::DiskIoMgr::RequestRange::offset
int64_t offset() const
Definition: disk-io-mgr.h:267

impala::HdfsScanNode::per_type_files_
FileFormatsMap per_type_files_
Definition: hdfs-scan-node.h:302

impala::ScanNode::bytes_read_counter_
RuntimeProfile::Counter * bytes_read_counter_
Definition: scan-node.h:140

impala::ThreadResourceMgr::ResourcePool::ReserveOptionalTokens
void ReserveOptionalTokens(int num)
Definition: thread-resource-mgr.cc:56

impala::SlotDescriptor
Definition: descriptors.h:75

impala::RuntimeState::cgroup
const std::string & cgroup() const
Definition: runtime-state.h:129

impala::HdfsScanNode::InitEmptyTemplateTuple
Tuple * InitEmptyTemplateTuple()
Definition: hdfs-scan-node.cc:289

impala::HdfsScanNode::Reset
virtual Status Reset(RuntimeState *state)
Definition: hdfs-scan-node.cc:617

impala::ExecNode::row_desc
const RowDescriptor & row_desc() const
Definition: exec-node.h:156

impala::Tuple::Create
static Tuple * Create(int size, MemPool *pool)
initialize individual tuple with data residing in mem pool
Definition: tuple.h:51

impala::DiskIoMgr::GetNextRange
Status GetNextRange(RequestContext *reader, ScanRange **range)
Definition: disk-io-mgr.cc:501

impala::HdfsScanNode::scan_node_pool_
boost::scoped_ptr< MemPool > scan_node_pool_
Definition: hdfs-scan-node.h:421

impala::ScannerContext
Definition: scanner-context.h:55

llvm-codegen.h

impala::ThreadResourceMgr::ResourcePool::ReleaseThreadToken
void ReleaseThreadToken(bool required)
Definition: thread-resource-mgr.h:255

logging.h

COUNTER_ADD
#define COUNTER_ADD(c, v)
Definition: runtime-profile.h:55

impala::ScanRangeMetadata::partition_id
int64_t partition_id
The partition id that this range is part of.
Definition: hdfs-scan-node.h:80

impala::ExecNode::ExecDebugAction
Status ExecDebugAction(TExecNodePhase::type phase, RuntimeState *state)
Definition: exec-node.cc:378

impala::ExecNode::ReachedLimit
bool ReachedLimit()
Definition: exec-node.h:159

impala::HdfsScanNode::RangeComplete
void RangeComplete(const THdfsFileFormat::type &file_type, const THdfsCompression::type &compression_type)
Definition: hdfs-scan-node.cc:924

impala::RuntimeProfile::Counter::Set
virtual void Set(int64_t value)
Definition: runtime-profile.h:102

impala::DescriptorTbl::GetTupleDescriptor
TupleDescriptor * GetTupleDescriptor(TupleId id) const
Definition: descriptors.cc:437

impala::HdfsScanNode::done_
bool done_
Definition: hdfs-scan-node.h:413

impala::RuntimeProfile::Counter
Definition: runtime-profile.h:85

SCOPED_TIMER
#define SCOPED_TIMER(c)
Definition: runtime-profile.h:53

impala::ExecNode::conjunct_ctxs
const std::vector< ExprContext * > & conjunct_ctxs() const
Definition: exec-node.h:152

impala::HdfsScanNode::unexpected_remote_bytes_
RuntimeProfile::Counter * unexpected_remote_bytes_
Total number of bytes read remotely that were expected to be local.
Definition: hdfs-scan-node.h:402

impala::PrettyPrinter::Print
static std::string Print(bool value, TUnit::type ignored, bool verbose=false)
Definition: pretty-printer.h:33

impala::HdfsLzoTextScanner::GetHdfsLzoTextScanner
static HdfsScanner * GetHdfsLzoTextScanner(HdfsScanNode *scan_node, RuntimeState *state)
Definition: hdfs-lzo-text-scanner.cc:50

impala::HdfsScanNode::per_file_metadata_
std::map< std::string, void * > per_file_metadata_
Definition: hdfs-scan-node.h:356

impala::ExecNode::limit_
int64_t limit_
Definition: exec-node.h:222

hdfs-fs-cache.h

impala::Expr::Close
static void Close(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for closing multiple expr trees.

hdfs-parquet-scanner.h

impala::HdfsScanNode::HdfsScanNode
HdfsScanNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
Definition: hdfs-scan-node.cc:81

impala::RuntimeState::error_log
const ErrorLogMap & error_log() const
Definition: runtime-state.h:121

object-pool.h

impala::HdfsScanNode::is_materialized_col_
std::vector< char > is_materialized_col_
Definition: hdfs-scan-node.h:339

impala::HdfsTextScanner::IssueInitialRanges
static Status IssueInitialRanges(HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files)
Issue io manager byte ranges for 'files'.
Definition: hdfs-text-scanner.cc:67

impala::ScanNode::PER_READ_THREAD_THROUGHPUT_COUNTER
static const std::string PER_READ_THREAD_THROUGHPUT_COUNTER
Definition: scan-node.h:126

impala::HdfsScanner::Close
virtual void Close()
Definition: hdfs-scanner.cc:82

impala::HdfsFsCache::instance
static HdfsFsCache * instance()
Definition: hdfs-fs-cache.h:43

impala::FindOrInsert
V * FindOrInsert(std::map< K, V > *m, const K &key, const V &default_val)
Definition: container-util.h:51

impala::HdfsScanNode::StopAndFinalizeCounters
void StopAndFinalizeCounters()
Definition: hdfs-scan-node.cc:982

impala::LlvmCodeGen
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107

impala::Status
Definition: status.h:81

impala::ExecEnv::cgroups_mgr
CgroupsMgr * cgroups_mgr()
Definition: exec-env.h:88

impala::ExecNode::expr_mem_tracker
MemTracker * expr_mem_tracker()
Definition: exec-node.h:163

impala::ScanNode::SCAN_RANGES_COMPLETE_COUNTER
static const std::string SCAN_RANGES_COMPLETE_COUNTER
Definition: scan-node.h:129

impala::HdfsScanner::ProcessSplit
virtual Status ProcessSplit()=0

VLOG_QUERY
#define VLOG_QUERY
Definition: logging.h:57

impala::RuntimeState::query_resource_mgr
QueryResourceMgr * query_resource_mgr() const
Definition: runtime-state.h:269

impala::ImpaladMetrics::NUM_RANGES_MISSING_VOLUME_ID
static IntCounter * NUM_RANGES_MISSING_VOLUME_ID
Definition: impalad-metrics.h:131

hdfs-lzo-text-scanner.h

impala::ScanNode::average_hdfs_read_thread_concurrency_
RuntimeProfile::Counter * average_hdfs_read_thread_concurrency_
Definition: scan-node.h:168

impala::ThreadResourceMgr::ResourcePool::optional_exceeded
bool optional_exceeded()
Returns true if the number of optional threads has now exceeded the quota.
Definition: thread-resource-mgr.h:141

impala::ScanNode::hdfs_read_thread_concurrency_bucket_
std::vector< RuntimeProfile::Counter * > hdfs_read_thread_concurrency_bucket_
Definition: scan-node.h:174

impala::HdfsScanNode::file_type_counts_lock_
SpinLock file_type_counts_lock_
Definition: hdfs-scan-node.h:431

impala::ThreadResourceMgr::ResourcePool::set_max_quota
void set_max_quota(int quota)
Definition: thread-resource-mgr.h:163

impala::HdfsFileDesc::file_compression
THdfsCompression::type file_compression
Definition: hdfs-scan-node.h:67

impala::ScannerContext::Stream::file_desc
const HdfsFileDesc * file_desc()
Definition: scanner-context.h:120

impala::DiskIoMgr::bytes_read_short_circuit
int64_t bytes_read_short_circuit(RequestContext *reader) const
Definition: disk-io-mgr.cc:424

impala::HdfsFileDesc::mtime
int64_t mtime
Last modified time.
Definition: hdfs-scan-node.h:65

impala::HdfsScanNode::GetFileDesc
HdfsFileDesc * GetFileDesc(const std::string &filename)
Returns the file desc for 'filename'. Returns NULL if filename is invalid.
Definition: hdfs-scan-node.cc:206

impala::ScanNode::num_disks_accessed_counter_
RuntimeProfile::Counter * num_disks_accessed_counter_
Definition: scan-node.h:150

impala::MemPool
Definition: mem-pool.h:77

impala::PeriodicCounterUpdater::StopBucketingCounters
static void StopBucketingCounters(std::vector< RuntimeProfile::Counter * > *buckets, bool convert)
Definition: periodic-counter-updater.cc:94

impala::RuntimeState::LogError
bool LogError(const ErrorMsg &msg)
Definition: runtime-state.cc:224

impala::RuntimeState::query_options
const TQueryOptions & query_options() const
Definition: runtime-state.h:95

impala::HdfsScanNode::file_type_counts_
FileTypeCountsMap file_type_counts_
Definition: hdfs-scan-node.h:434

impala::HdfsPartitionDescriptor::CloseExprs
void CloseExprs(RuntimeState *state)
Definition: descriptors.cc:151

impala::ImpaladMetrics::NUM_RANGES_PROCESSED
static IntCounter * NUM_RANGES_PROCESSED
Definition: impalad-metrics.h:132

impala::HdfsScanNode::AddDiskIoRanges
Status AddDiskIoRanges(const std::vector< DiskIoMgr::ScanRange * > &ranges)
Adds ranges to the io mgr queue and starts up new scanner threads if possible.

impala::ScanNode::AVERAGE_HDFS_READ_THREAD_CONCURRENCY
static const std::string AVERAGE_HDFS_READ_THREAD_CONCURRENCY
Definition: scan-node.h:133

COMPRESSED_TEXT_COMPRESSION_RATIO
const int COMPRESSED_TEXT_COMPRESSION_RATIO
Definition: hdfs-scan-node.cc:76

impala::RuntimeState
Definition: runtime-state.h:69

impala::DiskIoMgr::num_total_disks
int num_total_disks() const
Returns the total number of disk queues (both local and remote).
Definition: disk-io-mgr.h:593

impala::HdfsScanNode::tuple_id_
const int tuple_id_
Tuple id resolved in Prepare() to set tuple_desc_;.
Definition: hdfs-scan-node.h:277

impala::HdfsScanNode::ComputeSlotMaterializationOrder
void ComputeSlotMaterializationOrder(std::vector< int > *order) const
Definition: hdfs-scan-node.cc:956

impala::HdfsFileDesc::file_length
int64_t file_length
Definition: hdfs-scan-node.h:62

impala::HdfsPartitionDescriptor::location
const std::string & location() const
Definition: descriptors.h:189

impala::Status::IsCancelled
bool IsCancelled() const
Definition: status.h:174

bit-util.h

impala::HdfsScanNode::bytes_read_local_
RuntimeProfile::Counter * bytes_read_local_
Total number of bytes read locally.
Definition: hdfs-scan-node.h:390

impala::RowBatch::set_num_rows
void set_num_rows(int num_rows)
Definition: row-batch.h:113

impala::HdfsScanNode::codegend_fn_map_
CodegendFnMap codegend_fn_map_
Definition: hdfs-scan-node.h:324

impala::RuntimeState::obj_pool
ObjectPool * obj_pool() const
Definition: runtime-state.h:92

debug-util.h

RETURN_IF_CANCELLED
#define RETURN_IF_CANCELLED(state)
Definition: runtime-state.h:384

impala::ScanNode::active_scanner_thread_counter_
RuntimeProfile::Counter active_scanner_thread_counter_
The number of active scanner threads that are not blocked by IO.
Definition: scan-node.h:157

hdfs-text-scanner.h

pool
ObjectPool pool
Definition: expr-benchmark.cc:89

impala::ImpaladMetrics::IO_MGR_SHORT_CIRCUIT_BYTES_READ
static IntGauge * IO_MGR_SHORT_CIRCUIT_BYTES_READ
Definition: impalad-metrics.h:146

DECLARE_string
DECLARE_string(cgroup_hierarchy_path)

impala::HdfsTextScanner::Codegen
static llvm::Function * Codegen(HdfsScanNode *, const std::vector< ExprContext * > &conjunct_ctxs)
Codegen writing tuples and evaluating predicates.
Definition: hdfs-text-scanner.cc:609

impala::HdfsScanNode::disks_accessed_bitmap_
RuntimeProfile::Counter disks_accessed_bitmap_
Disk accessed bitmap.
Definition: hdfs-scan-node.h:387

impala::ProgressUpdater::done
bool done() const
Returns if all tasks are done.
Definition: progress-updater.h:52

impala::HdfsTextScanner
Definition: hdfs-text-scanner.h:30

impala::PeriodicCounterUpdater::StopRateCounter
static void StopRateCounter(RuntimeProfile::Counter *counter)
Stops updating the value of 'counter'.
Definition: periodic-counter-updater.cc:75

ADD_COUNTER
#define ADD_COUNTER(profile, name, unit)
Definition: runtime-profile.h:47

uint64_t

impala::HdfsScanNode::SKIP_COLUMN
static const int SKIP_COLUMN
Definition: hdfs-scan-node.h:144

impala::DiskIoMgr::set_disks_access_bitmap
void set_disks_access_bitmap(RequestContext *, RuntimeProfile::Counter *)
Definition: disk-io-mgr.cc:402

impala::SlotDescriptor::col_pos
int col_pos() const
Definition: descriptors.h:84

impala::LlvmCodeGen::AddFunctionToJit
void AddFunctionToJit(llvm::Function *fn, void **fn_ptr)
Definition: llvm-codegen.cc:714

impala::RawValue::Write
static void Write(const void *value, Tuple *tuple, const SlotDescriptor *slot_desc, MemPool *pool)
Definition: raw-value.cc:303

impala::ExecNode::AddExprCtxsToFree
void AddExprCtxsToFree(const std::vector< ExprContext * > &ctxs)

impala::DiskIoMgr::set_bytes_read_counter
void set_bytes_read_counter(RequestContext *, RuntimeProfile::Counter *)
Definition: disk-io-mgr.cc:393

impala::HdfsScanNode::runtime_state
RuntimeState * runtime_state()
Definition: hdfs-scan-node.h:136

impala::HdfsScanNode::AddMaterializedRowBatch
void AddMaterializedRowBatch(RowBatch *row_batch)
Definition: hdfs-scan-node.cc:688

impala::HdfsScanNode::GetNextInternal
Status GetNextInternal(RuntimeState *state, RowBatch *row_batch, bool *eos)
Checks for eos conditions and returns batches from materialized_row_batches_.
Definition: hdfs-scan-node.cc:139

impala::ExecNode::QueryMaintenance
virtual Status QueryMaintenance(RuntimeState *state)
Definition: exec-node.cc:401

impala::DiskIoMgr::RequestRange::len
int64_t len() const
Definition: disk-io-mgr.h:268

impala::ScanNode::bytes_read_counter
RuntimeProfile::Counter * bytes_read_counter() const
Definition: scan-node.h:95

impala::DiskIoMgr::ScanRange::Reset
void Reset(hdfsFS fs, const char *file, int64_t len, int64_t offset, int disk_id, bool try_cache, bool expected_local, int64_t mtime, void *metadata=NULL)
Definition: disk-io-mgr-scan-range.cc:215

impala::HdfsSequenceScanner
Definition: hdfs-sequence-scanner.h:156

impala::HdfsScanNode::materialized_slots_
std::vector< SlotDescriptor * > materialized_slots_
Definition: hdfs-scan-node.h:344

impala::RuntimeState::desc_tbl
const DescriptorTbl & desc_tbl() const
Definition: runtime-state.h:93

impala::HdfsScanNode::Prepare
virtual Status Prepare(RuntimeState *state)
ExecNode methods.
Definition: hdfs-scan-node.cc:304

impala::ExecNode::is_closed
bool is_closed()
Definition: exec-node.h:242

UNEXPECTED_REMOTE_BYTES_WARN_THRESHOLD
const int UNEXPECTED_REMOTE_BYTES_WARN_THRESHOLD
Definition: hdfs-scan-node.cc:79

impala::ScanNode
Abstract base class of all scan nodes; introduces SetScanRange().
Definition: scan-node.h:77

impala::HdfsScanNode::GetCodegenFn
void * GetCodegenFn(THdfsFileFormat::type)
Definition: hdfs-scan-node.cc:224

impala::HdfsScanNode::conjunct_ctxs_
std::vector< ExprContext * > conjunct_ctxs_
Definition: hdfs-scan-node.h:328

runtime-state.h

impala::HdfsFileDesc::fs
hdfsFS fs
Connection to the filesystem containing the file.
Definition: hdfs-scan-node.h:55

impala::HdfsScanNode::PrintHdfsSplitStats
static void PrintHdfsSplitStats(const PerVolumnStats &per_volume_stats, std::stringstream *ss)
Definition: hdfs-scan-node.cc:1073

impala::RowBatch
Definition: row-batch.h:66

impala::QueryResourceMgr::RemoveVcoreAvailableCb
void RemoveVcoreAvailableCb(int32_t callback_id)
Removes the callback with the given ID.
Definition: query-resource-mgr.cc:154

COUNTER_SET
#define COUNTER_SET(c, v)
Definition: runtime-profile.h:56

impala::HdfsScanNode::runtime_state_
RuntimeState * runtime_state_
Definition: hdfs-scan-node.h:274

impala::HdfsScanNode::counters_running_
bool counters_running_
Definition: hdfs-scan-node.h:438

impala::HdfsScanNode::scanner_thread_bytes_required_
int64_t scanner_thread_bytes_required_
Definition: hdfs-scan-node.h:312

impala::HdfsScanNode::EnoughMemoryForScannerThread
bool EnoughMemoryForScannerThread(bool new_thread)
Definition: hdfs-scan-node.cc:706

impala::ImpaladMetrics::IO_MGR_CACHED_BYTES_READ
static IntGauge * IO_MGR_CACHED_BYTES_READ
Definition: impalad-metrics.h:145

impala::HdfsScanNode::ScannerThread
void ScannerThread()
Definition: hdfs-scan-node.cc:797

impala::ExecNode::rows_returned_counter_
RuntimeProfile::Counter * rows_returned_counter_
Definition: exec-node.h:226

impala::DiskIoMgr::bytes_read_local
int64_t bytes_read_local(RequestContext *reader) const
Definition: disk-io-mgr.cc:420

impala::HdfsScanNode::max_compressed_text_file_length_
RuntimeProfile::HighWaterMarkCounter * max_compressed_text_file_length_
Definition: hdfs-scan-node.h:384

hdfs.h

impala::TableDescriptor::name
const std::string & name() const
Definition: descriptors.h:163

impala::PeriodicCounterUpdater::StopSamplingCounter
static void StopSamplingCounter(RuntimeProfile::Counter *counter)
Stops updating the value of 'counter'.
Definition: periodic-counter-updater.cc:80

impala::AtomicUtil::MemoryBarrier
static void MemoryBarrier()
Definition: atomic.h:36

impala::RuntimeState::SetMemLimitExceeded
Status SetMemLimitExceeded(MemTracker *tracker=NULL, int64_t failed_allocation_size=0)
Definition: runtime-state.cc:247

impala::HdfsScanNode::status_
Status status_
Definition: hdfs-scan-node.h:426

hdfs-rcfile-scanner.h

impala::DiskIoMgr::RegisterContext
Status RegisterContext(RequestContext **request_context, MemTracker *reader_mem_tracker=NULL)
Definition: disk-io-mgr.cc:336

impala::ErrorMsg
Definition: error-util.h:47

impala::ExecNode::RowBatchQueue
Definition: exec-node.h:181

impala::HdfsScanNode::UpdateHdfsSplitStats
static void UpdateHdfsSplitStats(const std::vector< TScanRangeParams > &scan_range_params_list, PerVolumnStats *per_volume_stats)
Update the per volume stats with the given scan range params list.
Definition: hdfs-scan-node.cc:1058

impala::HdfsScanNode::tuple_desc_
const TupleDescriptor * tuple_desc_
Descriptor for tuples this scan node constructs.
Definition: hdfs-scan-node.h:283

impala::HdfsScanNode::~HdfsScanNode
~HdfsScanNode()
Definition: hdfs-scan-node.cc:108

impala::HdfsScanNode::num_scanners_codegen_enabled_
AtomicInt< int > num_scanners_codegen_enabled_
Definition: hdfs-scan-node.h:379

impala::HdfsScanNode::initial_ranges_issued_
bool initial_ranges_issued_
Definition: hdfs-scan-node.h:307

impala::HdfsScanNode::TransferToScanNodePool
void TransferToScanNodePool(MemPool *pool)
Acquires all allocations from pool into scan_node_pool_. Thread-safe.
Definition: hdfs-scan-node.cc:299

container-util.h

impala::HdfsScanNode::metadata_lock_
boost::mutex metadata_lock_
Definition: hdfs-scan-node.h:355

DEFINE_int32
DEFINE_int32(max_row_batches, 0,"the maximum size of materialized_row_batches_")

impala::RuntimeProfile::AddHighWaterMarkCounter
HighWaterMarkCounter * AddHighWaterMarkCounter(const std::string &name, TUnit::type unit, const std::string &parent_counter_name="")

impala::DescriptorTbl::GetSlotDescriptor
SlotDescriptor * GetSlotDescriptor(SlotId id) const
Definition: descriptors.cc:447

SCANNER_THREAD_MEM_USAGE
const int SCANNER_THREAD_MEM_USAGE
Definition: hdfs-scan-node.cc:72

impala::ScanNode::total_throughput_counter
RuntimeProfile::Counter * total_throughput_counter() const
Definition: scan-node.h:98

impala::HdfsSequenceScanner::Codegen
static llvm::Function * Codegen(HdfsScanNode *, const std::vector< ExprContext * > &conjunct_ctxs)
Codegen writing tuples and evaluating predicates.
Definition: hdfs-sequence-scanner.cc:50

impala::HdfsFileDesc::splits
std::vector< DiskIoMgr::ScanRange * > splits
Splits (i.e. raw byte ranges) for this file, assigned to this scan node.
Definition: hdfs-scan-node.h:70

impala::DiskIoMgr::unexpected_remote_bytes
int64_t unexpected_remote_bytes(RequestContext *reader) const
Definition: disk-io-mgr.cc:436

impala::HdfsScanNode::AllocateScanRange
DiskIoMgr::ScanRange * AllocateScanRange(hdfsFS fs, const char *file, int64_t len, int64_t offset, int64_t partition_id, int disk_id, bool try_cache, bool expected_local, int64_t mtime)
Definition: hdfs-scan-node.cc:183

impala::Status::OK
static const Status OK
Definition: status.h:87

impala::QueryResourceMgr::NotifyThreadUsageChange
void NotifyThreadUsageChange(int delta)
Definition: query-resource-mgr.cc:140

impala::RuntimeState::reader_contexts
std::vector< DiskIoMgr::RequestContext * > * reader_contexts()
Definition: runtime-state.h:145

impala::ThreadGroup::SetCgroupsMgr
void SetCgroupsMgr(CgroupsMgr *cgroups_mgr)
Definition: thread.h:186

impala::ExecNode::pool_
ObjectPool * pool_
Definition: exec-node.h:211

impala::ThreadGroup::AddThread
Status AddThread(Thread *thread)
Definition: thread.cc:318

impala::QueryResourceMgr::IsVcoreOverSubscribed
bool IsVcoreOverSubscribed()
Definition: query-resource-mgr.h:110

impala::ScanNode::TOTAL_HDFS_READ_TIMER
static const std::string TOTAL_HDFS_READ_TIMER
Definition: scan-node.h:123

impala::HdfsScanner
Definition: hdfs-scanner.h:91

impala::DiskIoMgr::set_read_timer
void set_read_timer(RequestContext *, RuntimeProfile::Counter *)
Definition: disk-io-mgr.cc:389

offset
uint8_t offset[7 *64-sizeof(uint64_t)]
Definition: partitioning-throughput-test.cc:37

mem-pool.h

impala::HdfsPartitionDescriptor
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177

impala::ThreadGroup::JoinAll
void JoinAll()
Definition: thread.cc:327

names.h

impala::HdfsAvroScanner::Codegen
static llvm::Function * Codegen(HdfsScanNode *, const std::vector< ExprContext * > &conjunct_ctxs)
Codegen parsing records, writing tuples and evaluating predicates.
Definition: hdfs-avro-scanner.cc:83

impala::ObjectPool::Add
T * Add(T *t)
Definition: object-pool.h:42

impala::Expr::CreateExprTrees
static Status CreateExprTrees(ObjectPool *pool, const std::vector< TExpr > &texprs, std::vector< ExprContext * > *ctxs)
Definition: expr.cc:149

impala::HdfsScanNode::bytes_read_short_circuit_
RuntimeProfile::Counter * bytes_read_short_circuit_
Total number of bytes read via short circuit read.
Definition: hdfs-scan-node.h:393

SCOPED_THREAD_COUNTER_MEASUREMENT
#define SCOPED_THREAD_COUNTER_MEASUREMENT(c)
Definition: runtime-profile.h:58

impala::HdfsTableDescriptor::GetPartition
HdfsPartitionDescriptor * GetPartition(int64_t partition_id) const
Definition: descriptors.h:238

impala::HdfsScanNode::reader_context_
DiskIoMgr::RequestContext * reader_context_
RequestContext object to use with the disk-io-mgr for reads.
Definition: hdfs-scan-node.h:280

impala::Expr::Prepare
static Status Prepare(const std::vector< ExprContext * > &ctxs, RuntimeState *state, const RowDescriptor &row_desc, MemTracker *tracker)

impala::RuntimeProfile::Counter::Add
virtual void Add(int64_t delta)
Definition: runtime-profile.h:93

impala::ScannerContext::GetStream
Stream * GetStream(int idx=0)
Definition: scanner-context.h:246

disk-info.h

impala::ScanNode::average_scanner_thread_concurrency_
RuntimeProfile::Counter * average_scanner_thread_concurrency_
Definition: scan-node.h:161

impala::ScanNode::bytes_read_timeseries_counter_
RuntimeProfile::TimeSeriesCounter * bytes_read_timeseries_counter_
Time series of the bytes_read_counter_.
Definition: scan-node.h:142

impala::ExecNode::Open
virtual Status Open(RuntimeState *state)
Definition: exec-node.cc:154

hdfs-avro-scanner.h

impala::Status::ok
bool ok() const
Definition: status.h:172

impala::ScanNode::scan_range_params_
const std::vector< TScanRangeParams > * scan_range_params_
The scan ranges this scan node is responsible for. Not owned.
Definition: scan-node.h:138

impala::ImpaladMetrics::IO_MGR_LOCAL_BYTES_READ
static IntGauge * IO_MGR_LOCAL_BYTES_READ
Definition: impalad-metrics.h:144

base-sequence-scanner.h

impala::RuntimeState::resource_pool
ThreadResourceMgr::ResourcePool * resource_pool()
Definition: runtime-state.h:142

impala::RuntimeState::io_mgr
DiskIoMgr * io_mgr()
Definition: runtime-state.h:139

impala::HdfsParquetScanner::IssueInitialRanges
static Status IssueInitialRanges(HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files)
Definition: hdfs-parquet-scanner.cc:83

impala::ExecNode::Close
virtual void Close(RuntimeState *state)
Definition: exec-node.cc:166

impala::HdfsScanNode::path_to_materialized_slot_idx_
PathToSlotIdxMap path_to_materialized_slot_idx_
Definition: hdfs-scan-node.h:332

impala::HdfsScanNode::hdfs_table_
const HdfsTableDescriptor * hdfs_table_
Definition: hdfs-scan-node.h:287

impala::RuntimeState::total_cpu_timer
RuntimeProfile::Counter * total_cpu_timer()
Definition: runtime-state.h:238

descriptors.h

impala::ScanNode::active_hdfs_read_thread_counter_
RuntimeProfile::Counter active_hdfs_read_thread_counter_
The number of active hdfs reading threads reading for this node.
Definition: scan-node.h:164

impala::ProgressUpdater::remaining
int64_t remaining() const
Definition: progress-updater.h:56

impala::HdfsScanNode::PerVolumnStats
boost::unordered_map< int32_t, std::pair< int, int64_t > > PerVolumnStats
map from volume id to <number of split, per volume split lengths>
Definition: hdfs-scan-node.h:252

impala::DiskIoMgr::ScanRange::meta_data
void * meta_data() const
Definition: disk-io-mgr.h:312

impala::ScanNode::read_timer
RuntimeProfile::Counter * read_timer() const
Definition: scan-node.h:97

impala::HdfsScanNode::num_skipped_tokens_
AtomicInt< int > num_skipped_tokens_
Definition: hdfs-scan-node.h:375

impala::HdfsScanNode::Close
virtual void Close(RuntimeState *state)
Definition: hdfs-scan-node.cc:622

impala::HdfsScanNode::GetNext
virtual Status GetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)
Definition: hdfs-scan-node.cc:111

impala::HdfsScanNode::InitTemplateTuple
Tuple * InitTemplateTuple(RuntimeState *state, const std::vector< ExprContext * > &value_ctxs)
Definition: hdfs-scan-node.cc:270

impala::ThreadResourceMgr::ResourcePool
Definition: thread-resource-mgr.h:85

impala::DiskIoMgr::ScanRange
Definition: disk-io-mgr.h:295

VLOG_QUERY_IS_ON
#define VLOG_QUERY_IS_ON
Definition: logging.h:64

impala::Status::IsMemLimitExceeded
bool IsMemLimitExceeded() const
Definition: status.h:178

impala::BitUtil::Popcount
static int Popcount(uint64_t x)
Returns the number of set bits in x.
Definition: bit-util.h:116

impala::HdfsScanner::Prepare
virtual Status Prepare(ScannerContext *context)
One-time initialisation of state that is constant across scan ranges.
Definition: hdfs-scanner.cc:71

impala::HdfsFileDesc
Definition: hdfs-scan-node.h:53

impala::PeriodicCounterUpdater::StopTimeSeriesCounter
static void StopTimeSeriesCounter(RuntimeProfile::TimeSeriesCounter *counter)
Stops 'counter' from receiving any more samples.
Definition: periodic-counter-updater.cc:122

impala::ExecNode::runtime_profile
RuntimeProfile * runtime_profile()
Definition: exec-node.h:161

impala::ProgressUpdater::Update
void Update(int64_t delta)
Definition: progress-updater.cc:34

impala::HdfsScanNode::GetConjunctCtxs
Status GetConjunctCtxs(std::vector< ExprContext * > *ctxs)
Definition: hdfs-scan-node.cc:692