doc/html/plan-fragment-executor_8cc_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #include "runtime/plan-fragment-executor.h"


 #include <thrift/protocol/TDebugProtocol.h>

 #include <boost/date_time/posix_time/posix_time_types.hpp>

 #include <boost/unordered_map.hpp>

 #include <boost/foreach.hpp>

 #include <gutil/strings/substitute.h>


 #include "codegen/llvm-codegen.h"

 #include "common/logging.h"

 #include "common/object-pool.h"

 #include "exec/data-sink.h"

 #include "exec/exec-node.h"

 #include "exec/exchange-node.h"

 #include "exec/scan-node.h"

 #include "exec/hdfs-scan-node.h"

 #include "exec/hbase-table-scanner.h"

 #include "exprs/expr.h"

 #include "runtime/descriptors.h"

 #include "runtime/data-stream-mgr.h"

 #include "runtime/row-batch.h"

 #include "runtime/mem-tracker.h"

 #include "util/cgroups-mgr.h"

 #include "util/cpu-info.h"

 #include "util/debug-util.h"

 #include "util/container-util.h"

 #include "util/parse-util.h"

 #include "util/mem-info.h"

 #include "util/periodic-counter-updater.h"

 #include "util/llama-util.h"

 #include "util/pretty-printer.h"


 DEFINE_bool(serialize_batch, false, "serialize and deserialize each returned row batch");

 DEFINE_int32(status_report_interval, 5, "interval between profile reports; in seconds");

 DECLARE_bool(enable_rm);


 #include "common/names.h"


 namespace posix_time = boost::posix_time;

 using boost::get_system_time;

 using boost::system_time;

 using namespace apache::thrift;

 using namespace strings;


 namespace impala {


 const string PlanFragmentExecutor::PER_HOST_PEAK_MEM_COUNTER = "PerHostPeakMemUsage";


 PlanFragmentExecutor::PlanFragmentExecutor(ExecEnv* exec_env,

     const ReportStatusCallback& report_status_cb) :

     exec_env_(exec_env), plan_(NULL), report_status_cb_(report_status_cb),

     report_thread_active_(false), done_(false), prepared_(false), closed_(false),

     has_thread_token_(false), average_thread_tokens_(NULL),

     mem_usage_sampled_counter_(NULL), thread_usage_sampled_counter_(NULL) {

 }


 PlanFragmentExecutor::~PlanFragmentExecutor() {

   Close();

   if (runtime_state_->query_resource_mgr() != NULL) {

     exec_env_->resource_broker()->UnregisterQueryResourceMgr(query_id_);

   }

   // at this point, the report thread should have been stopped

   DCHECK(!report_thread_active_);

 }


 Status PlanFragmentExecutor::Prepare(const TExecPlanFragmentParams& request) {

   fragment_sw_.Start();

   const TPlanFragmentExecParams& params = request.params;

   query_id_ = request.fragment_instance_ctx.query_ctx.query_id;


   VLOG_QUERY << "Prepare(): query_id=" << PrintId(query_id_) << " instance_id="

              << PrintId(request.fragment_instance_ctx.fragment_instance_id);

   VLOG(2) << "params:\n" << ThriftDebugString(params);


   if (request.__isset.reserved_resource) {

     VLOG_QUERY << "Executing fragment in reserved resource:\n"

                << request.reserved_resource;

   }


   string cgroup = "";

   if (FLAGS_enable_rm && request.__isset.reserved_resource) {

     cgroup = exec_env_->cgroups_mgr()->UniqueIdToCgroup(PrintId(query_id_, "_"));

   }


   runtime_state_.reset(

       new RuntimeState(request.fragment_instance_ctx, cgroup, exec_env_));


   // total_time_counter() is in the runtime_state_ so start it up now.

   SCOPED_TIMER(profile()->total_time_counter());


   // Register after setting runtime_state_ to ensure proper cleanup.

   if (FLAGS_enable_rm && !cgroup.empty() && request.__isset.reserved_resource) {

     bool is_first;

     RETURN_IF_ERROR(exec_env_->cgroups_mgr()->RegisterFragment(

         request.fragment_instance_ctx.fragment_instance_id, cgroup, &is_first));

     // The first fragment using cgroup sets the cgroup's CPU shares based on the reserved

     // resource.

     if (is_first) {

       DCHECK(request.__isset.reserved_resource);

       int32_t cpu_shares = exec_env_->cgroups_mgr()->VirtualCoresToCpuShares(

           request.reserved_resource.v_cpu_cores);

       RETURN_IF_ERROR(exec_env_->cgroups_mgr()->SetCpuShares(cgroup, cpu_shares));

     }

   }


   // TODO: Find the reservation id when the resource request is not set

   if (FLAGS_enable_rm && request.__isset.reserved_resource) {

     TUniqueId reservation_id;

     reservation_id << request.reserved_resource.reservation_id;


     // TODO: Combine this with RegisterFragment() etc.

     QueryResourceMgr* res_mgr;

     bool is_first = exec_env_->resource_broker()->GetQueryResourceMgr(query_id_,

         reservation_id, request.local_resource_address, &res_mgr);

     DCHECK(res_mgr != NULL);

     runtime_state_->SetQueryResourceMgr(res_mgr);

     if (is_first) {

       runtime_state_->query_resource_mgr()->InitVcoreAcquisition(

           request.reserved_resource.v_cpu_cores);

     }

   }


   // reservation or a query option.

   int64_t bytes_limit = -1;

   if (runtime_state_->query_options().__isset.mem_limit &&

       runtime_state_->query_options().mem_limit > 0) {

     bytes_limit = runtime_state_->query_options().mem_limit;

     VLOG_QUERY << "Using query memory limit from query options: "

                << PrettyPrinter::Print(bytes_limit, TUnit::BYTES);

   }


   int64_t rm_reservation_size_bytes = -1;

   if (request.__isset.reserved_resource && request.reserved_resource.memory_mb > 0) {

     rm_reservation_size_bytes =

         static_cast<int64_t>(request.reserved_resource.memory_mb) * 1024L * 1024L;

     // Queries that use more than the hard limit will be killed, so it's not useful to

     // have a reservation larger than the hard limit. Clamp reservation bytes limit to the

     // hard limit (if it exists).

     if (rm_reservation_size_bytes > bytes_limit && bytes_limit != -1) {

       runtime_state_->LogError(ErrorMsg(TErrorCode::FRAGMENT_EXECUTOR,

           PrettyPrinter::PrintBytes(rm_reservation_size_bytes),

           PrettyPrinter::PrintBytes(bytes_limit)));

       rm_reservation_size_bytes = bytes_limit;

     }

     VLOG_QUERY << "Using RM reservation memory limit from resource reservation: "

                << PrettyPrinter::Print(rm_reservation_size_bytes, TUnit::BYTES);

   }


   DCHECK(!params.request_pool.empty());

   runtime_state_->InitMemTrackers(query_id_, &params.request_pool,

       bytes_limit, rm_reservation_size_bytes);

   RETURN_IF_ERROR(runtime_state_->CreateBlockMgr());


   // Reserve one main thread from the pool

   runtime_state_->resource_pool()->AcquireThreadToken();

   if (runtime_state_->query_resource_mgr() != NULL) {

     runtime_state_->query_resource_mgr()->NotifyThreadUsageChange(1);

   }

   has_thread_token_ = true;


   average_thread_tokens_ = profile()->AddSamplingCounter("AverageThreadTokens",

       bind<int64_t>(mem_fn(&ThreadResourceMgr::ResourcePool::num_threads),

           runtime_state_->resource_pool()));

   mem_usage_sampled_counter_ = profile()->AddTimeSeriesCounter("MemoryUsage",

       TUnit::BYTES,

       bind<int64_t>(mem_fn(&MemTracker::consumption),

           runtime_state_->instance_mem_tracker()));

   thread_usage_sampled_counter_ = profile()->AddTimeSeriesCounter("ThreadUsage",

       TUnit::UNIT,

       bind<int64_t>(mem_fn(&ThreadResourceMgr::ResourcePool::num_threads),

           runtime_state_->resource_pool()));


   // set up desc tbl

   DescriptorTbl* desc_tbl = NULL;

   DCHECK(request.__isset.desc_tbl);

   RETURN_IF_ERROR(

       DescriptorTbl::Create(obj_pool(), request.desc_tbl, &desc_tbl));

   runtime_state_->set_desc_tbl(desc_tbl);

   VLOG_QUERY << "descriptor table for fragment="

              << request.fragment_instance_ctx.fragment_instance_id

              << "\n" << desc_tbl->DebugString();


   // set up plan

   DCHECK(request.__isset.fragment);

   RETURN_IF_ERROR(

       ExecNode::CreateTree(obj_pool(), request.fragment.plan, *desc_tbl, &plan_));

   runtime_state_->set_fragment_root_id(plan_->id());


   if (request.params.__isset.debug_node_id) {

     DCHECK(request.params.__isset.debug_action);

     DCHECK(request.params.__isset.debug_phase);

     ExecNode::SetDebugOptions(request.params.debug_node_id,

         request.params.debug_phase, request.params.debug_action, plan_);

   }


   // set #senders of exchange nodes before calling Prepare()

   vector<ExecNode*> exch_nodes;

   plan_->CollectNodes(TPlanNodeType::EXCHANGE_NODE, &exch_nodes);

   BOOST_FOREACH(ExecNode* exch_node, exch_nodes)

   {

     DCHECK_EQ(exch_node->type(), TPlanNodeType::EXCHANGE_NODE);

     int num_senders = FindWithDefault(params.per_exch_num_senders,

         exch_node->id(), 0);

     DCHECK_GT(num_senders, 0);

     static_cast<ExchangeNode*>(exch_node)->set_num_senders(num_senders);

   }


   // set scan ranges

   vector<ExecNode*> scan_nodes;

   vector<TScanRangeParams> no_scan_ranges;

   plan_->CollectScanNodes(&scan_nodes);

   for (int i = 0; i < scan_nodes.size(); ++i) {

     ScanNode* scan_node = static_cast<ScanNode*>(scan_nodes[i]);

     const vector<TScanRangeParams>& scan_ranges = FindWithDefault(

         params.per_node_scan_ranges, scan_node->id(), no_scan_ranges);

     scan_node->SetScanRanges(scan_ranges);

   }


   RuntimeProfile::Counter* prepare_timer = ADD_TIMER(profile(), "PrepareTime");

   {

     SCOPED_TIMER(prepare_timer);

     RETURN_IF_ERROR(plan_->Prepare(runtime_state_.get()));

   }


   PrintVolumeIds(params.per_node_scan_ranges);


   // set up sink, if required

   if (request.fragment.__isset.output_sink) {

     RETURN_IF_ERROR(DataSink::CreateDataSink(

         obj_pool(), request.fragment.output_sink, request.fragment.output_exprs,

         params, row_desc(), &sink_));

     RETURN_IF_ERROR(sink_->Prepare(runtime_state()));


     RuntimeProfile* sink_profile = sink_->profile();

     if (sink_profile != NULL) {

       profile()->AddChild(sink_profile);

     }

   } else {

     sink_.reset(NULL);

   }


   // set up profile counters

   profile()->AddChild(plan_->runtime_profile());

   rows_produced_counter_ =

       ADD_COUNTER(profile(), "RowsProduced", TUnit::UNIT);

   per_host_mem_usage_ =

       ADD_COUNTER(profile(), PER_HOST_PEAK_MEM_COUNTER, TUnit::BYTES);


   row_batch_.reset(new RowBatch(plan_->row_desc(), runtime_state_->batch_size(),

         runtime_state_->instance_mem_tracker()));

   VLOG(2) << "plan_root=\n" << plan_->DebugString();

   prepared_ = true;

   return Status::OK;

 }


 void PlanFragmentExecutor::OptimizeLlvmModule() {

   if (!runtime_state_->codegen_created()) return;

   LlvmCodeGen* codegen;

   Status status = runtime_state_->GetCodegen(&codegen, /* initalize */ false);

   DCHECK(status.ok());

   DCHECK_NOTNULL(codegen);

   status = codegen->FinalizeModule();

   if (!status.ok()) {

     stringstream ss;

     ss << "Error with codegen for this query: " << status.GetDetail();

     runtime_state_->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str()));

   }

 }


 void PlanFragmentExecutor::PrintVolumeIds(

     const PerNodeScanRanges& per_node_scan_ranges) {

   if (per_node_scan_ranges.empty()) return;


   HdfsScanNode::PerVolumnStats per_volume_stats;

   BOOST_FOREACH(const PerNodeScanRanges::value_type& entry, per_node_scan_ranges) {

     HdfsScanNode::UpdateHdfsSplitStats(entry.second, &per_volume_stats);

   }


   stringstream str;


   HdfsScanNode::PrintHdfsSplitStats(per_volume_stats, &str);

   profile()->AddInfoString(HdfsScanNode::HDFS_SPLIT_STATS_DESC, str.str());

   VLOG_FILE

       << "Hdfs split stats (<volume id>:<# splits>/<split lengths>) for query="

       << query_id_ << ":\n" << str.str();

 }


 Status PlanFragmentExecutor::Open() {

   VLOG_QUERY << "Open(): instance_id="

       << runtime_state_->fragment_instance_id();

   // we need to start the profile-reporting thread before calling Open(), since it

   // may block

   if (!report_status_cb_.empty() && FLAGS_status_report_interval > 0) {

     unique_lock<mutex> l(report_thread_lock_);

     report_thread_.reset(

         new Thread("plan-fragment-executor", "report-profile",

             &PlanFragmentExecutor::ReportProfile, this));

     // make sure the thread started up, otherwise ReportProfile() might get into a race

     // with StopReportThread()

     report_thread_started_cv_.wait(l);

     report_thread_active_ = true;

   }


   OptimizeLlvmModule();


   Status status = OpenInternal();

   if (!status.ok() && !status.IsCancelled() && !status.IsMemLimitExceeded()) {

     // Log error message in addition to returning in Status. Queries that do not

     // fetch results (e.g. insert) may not receive the message directly and can

     // only retrieve the log.

     runtime_state_->LogError(status.msg());

   }

   UpdateStatus(status);

   return status;

 }


 Status PlanFragmentExecutor::OpenInternal() {

   {

     SCOPED_TIMER(profile()->total_time_counter());

     RETURN_IF_ERROR(plan_->Open(runtime_state_.get()));

   }


   if (sink_.get() == NULL) return Status::OK;


   RETURN_IF_ERROR(sink_->Open(runtime_state_.get()));


   // If there is a sink, do all the work of driving it here, so that

   // when this returns the query has actually finished

   while (!done_) {

     RowBatch* batch;

     RETURN_IF_ERROR(GetNextInternal(&batch));

     if (batch == NULL) break;

     if (VLOG_ROW_IS_ON) {

       VLOG_ROW << "OpenInternal: #rows=" << batch->num_rows();

       for (int i = 0; i < batch->num_rows(); ++i) {

         VLOG_ROW << PrintRow(batch->GetRow(i), row_desc());

       }

     }


     SCOPED_TIMER(profile()->total_time_counter());

     RETURN_IF_ERROR(sink_->Send(runtime_state(), batch, done_));

   }


   // Close the sink *before* stopping the report thread. Close may

   // need to add some important information to the last report that

   // gets sent. (e.g. table sinks record the files they have written

   // to in this method)

   // The coordinator report channel waits until all backends are

   // either in error or have returned a status report with done =

   // true, so tearing down any data stream state (a separate

   // channel) in Close is safe.

   SCOPED_TIMER(profile()->total_time_counter());

   sink_->Close(runtime_state());

   done_ = true;


   FragmentComplete();

   return Status::OK;

 }


 void PlanFragmentExecutor::ReportProfile() {

   VLOG_FILE << "ReportProfile(): instance_id="

       << runtime_state_->fragment_instance_id();

   DCHECK(!report_status_cb_.empty());

   unique_lock<mutex> l(report_thread_lock_);

   // tell Open() that we started

   report_thread_started_cv_.notify_one();


   // Jitter the reporting time of remote fragments by a random amount between

   // 0 and the report_interval.  This way, the coordinator doesn't get all the

   // updates at once so its better for contention as well as smoother progress

   // reporting.

   int report_fragment_offset = rand() % FLAGS_status_report_interval;

   system_time timeout = get_system_time()

       + posix_time::seconds(report_fragment_offset);

   // We don't want to wait longer than it takes to run the entire fragment.

   stop_report_thread_cv_.timed_wait(l, timeout);


   while (report_thread_active_) {

     system_time timeout = get_system_time()

         + posix_time::seconds(FLAGS_status_report_interval);


     // timed_wait can return because the timeout occurred or the condition variable

     // was signaled.  We can't rely on its return value to distinguish between the

     // two cases (e.g. there is a race here where the wait timed out but before grabbing

     // the lock, the condition variable was signaled).  Instead, we will use an external

     // flag, report_thread_active_, to coordinate this.

     stop_report_thread_cv_.timed_wait(l, timeout);


     if (VLOG_FILE_IS_ON) {

       VLOG_FILE << "Reporting " << (!report_thread_active_ ? "final " : " ")

           << "profile for instance " << runtime_state_->fragment_instance_id();

       stringstream ss;

       profile()->PrettyPrint(&ss);

       VLOG_FILE << ss.str();

     }


     if (!report_thread_active_) break;


     if (completed_report_sent_.Read() == 0) {

       // No complete fragment report has been sent.

       SendReport(false);

     }

   }


   VLOG_FILE << "exiting reporting thread: instance_id="

       << runtime_state_->fragment_instance_id();

 }


 void PlanFragmentExecutor::SendReport(bool done) {

   if (report_status_cb_.empty()) return;


   Status status;

   {

     lock_guard<mutex> l(status_lock_);

     status = status_;

   }


   // Update the counter for the peak per host mem usage.

   per_host_mem_usage_->Set(runtime_state()->query_mem_tracker()->peak_consumption());


   // This will send a report even if we are cancelled.  If the query completed correctly

   // but fragments still need to be cancelled (e.g. limit reached), the coordinator will

   // be waiting for a final report and profile.

   report_status_cb_(status, profile(), done || !status.ok());

 }


 void PlanFragmentExecutor::StopReportThread() {

   if (!report_thread_active_) return;

   {

     lock_guard<mutex> l(report_thread_lock_);

     report_thread_active_ = false;

   }

   stop_report_thread_cv_.notify_one();

   report_thread_->Join();

 }


 // TODO: why can't we just put the total_time_counter() at the

 // beginning of Open() and GetNext(). This seems to really mess

 // the timer here, presumably because the data stream sender is

 // multithreaded and the timer we use gets confused.

 Status PlanFragmentExecutor::GetNext(RowBatch** batch) {

   VLOG_FILE << "GetNext(): instance_id="

       << runtime_state_->fragment_instance_id();

   Status status = GetNextInternal(batch);

   UpdateStatus(status);

   if (done_) {

     VLOG_QUERY << "Finished executing fragment query_id=" << PrintId(query_id_)

         << " instance_id=" << PrintId(runtime_state_->fragment_instance_id());

     FragmentComplete();

     // GetNext() uses *batch = NULL to signal the end.

     if (*batch != NULL && (*batch)->num_rows() == 0) *batch = NULL;

   }


   return status;

 }


 Status PlanFragmentExecutor::GetNextInternal(RowBatch** batch) {

   if (done_) {

     *batch = NULL;

     return Status::OK;

   }


   while (!done_) {

     row_batch_->Reset();

     SCOPED_TIMER(profile()->total_time_counter());

     RETURN_IF_ERROR(

         plan_->GetNext(runtime_state_.get(), row_batch_.get(), &done_));

     *batch = row_batch_.get();

     if (row_batch_->num_rows() > 0) {

       COUNTER_ADD(rows_produced_counter_, row_batch_->num_rows());

       break;

     }

   }


   return Status::OK;

 }


 void PlanFragmentExecutor::FragmentComplete() {

   // Check the atomic flag. If it is set, then a fragment complete report has already

   // been sent.

   bool send_report = completed_report_sent_.CompareAndSwap(0,1);


   fragment_sw_.Stop();

   int64_t cpu_and_wait_time = fragment_sw_.ElapsedTime();

   fragment_sw_ = MonotonicStopWatch();

   int64_t cpu_time = cpu_and_wait_time

       - runtime_state_->total_storage_wait_timer()->value()

       - runtime_state_->total_network_send_timer()->value()

       - runtime_state_->total_network_receive_timer()->value();

   // Timing is not perfect.

   if (cpu_time < 0)

     cpu_time = 0;

   runtime_state_->total_cpu_timer()->Add(cpu_time);


   ReleaseThreadToken();

   StopReportThread();

   if (send_report) SendReport(true);

 }


 void PlanFragmentExecutor::UpdateStatus(const Status& status) {

   if (status.ok()) return;


   bool send_report = completed_report_sent_.CompareAndSwap(0,1);


   {

     lock_guard<mutex> l(status_lock_);

     if (status_.ok()) {

       if (status.IsMemLimitExceeded()) runtime_state_->SetMemLimitExceeded();

       status_ = status;

     }

   }


   StopReportThread();

   if (send_report) SendReport(true);

 }


 void PlanFragmentExecutor::Cancel() {

   VLOG_QUERY << "Cancel(): instance_id="

       << runtime_state_->fragment_instance_id();

   DCHECK(prepared_);

   runtime_state_->set_is_cancelled(true);

   runtime_state_->stream_mgr()->Cancel(runtime_state_->fragment_instance_id());

 }


 const RowDescriptor& PlanFragmentExecutor::row_desc() {

   return plan_->row_desc();

 }


 RuntimeProfile* PlanFragmentExecutor::profile() {

   return runtime_state_->runtime_profile();

 }


 bool PlanFragmentExecutor::ReachedLimit() {

   return plan_->ReachedLimit();

 }


 void PlanFragmentExecutor::ReleaseThreadToken() {

   if (has_thread_token_) {

     has_thread_token_ = false;

     runtime_state_->resource_pool()->ReleaseThreadToken(true);

     if (runtime_state_->query_resource_mgr() != NULL) {

       runtime_state_->query_resource_mgr()->NotifyThreadUsageChange(-1);

     }

     PeriodicCounterUpdater::StopSamplingCounter(average_thread_tokens_);

     PeriodicCounterUpdater::StopTimeSeriesCounter(

         thread_usage_sampled_counter_);

   }

 }


 void PlanFragmentExecutor::Close() {

   if (closed_) return;

   row_batch_.reset();

   // Prepare may not have been called, which sets runtime_state_

   if (runtime_state_.get() != NULL) {

     if (runtime_state_->query_resource_mgr() != NULL) {

       exec_env_->cgroups_mgr()->UnregisterFragment(

           runtime_state_->fragment_instance_id(), runtime_state_->cgroup());

     }

     if (plan_ != NULL) plan_->Close(runtime_state_.get());

     if (sink_.get() != NULL) sink_->Close(runtime_state());

     BOOST_FOREACH(DiskIoMgr::RequestContext* context,

         *runtime_state_->reader_contexts()) {

       runtime_state_->io_mgr()->UnregisterContext(context);

     }

     exec_env_->thread_mgr()->UnregisterPool(runtime_state_->resource_pool());

   }

   if (mem_usage_sampled_counter_ != NULL) {

     PeriodicCounterUpdater::StopTimeSeriesCounter(mem_usage_sampled_counter_);

     mem_usage_sampled_counter_ = NULL;

   }

   closed_ = true;

 }


 }

row-batch.h

impala::DescriptorTbl
Definition: descriptors.h:338

impala::ExecNode::id
int id() const
Definition: exec-node.h:154

impala::PlanFragmentExecutor::UpdateStatus
void UpdateStatus(const Status &status)
Definition: plan-fragment-executor.cc:514

impala::PlanFragmentExecutor::row_batch_
boost::scoped_ptr< RowBatch > row_batch_
Definition: plan-fragment-executor.h:182

impala::ExecNode::CollectNodes
void CollectNodes(TPlanNodeType::type node_type, std::vector< ExecNode * > *nodes)
Definition: exec-node.cc:359

impala::MemTracker::consumption
int64_t consumption() const
Returns the memory consumed in bytes.
Definition: mem-tracker.h:298

impala::ExecNode::CollectScanNodes
void CollectScanNodes(std::vector< ExecNode * > *nodes)
Collect all scan node types.
Definition: exec-node.cc:366

impala::RowBatch::num_rows
int num_rows() const
Definition: row-batch.h:215

impala::Status::GetDetail
const std::string GetDetail() const
Definition: status.cc:184

impala::MonotonicStopWatch::Stop
void Stop()
Definition: stopwatch.h:88

cgroups-mgr.h

impala::CgroupsMgr::VirtualCoresToCpuShares
int32_t VirtualCoresToCpuShares(int16_t v_cpu_cores)
Definition: cgroups-mgr.cc:59

impala::RuntimeProfile::AddSamplingCounter
Counter * AddSamplingCounter(const std::string &name, Counter *src_counter)

hdfs-scan-node.h

impala::RuntimeProfile::AddInfoString
void AddInfoString(const std::string &key, const std::string &value)
Definition: runtime-profile.cc:406

impala::MonotonicStopWatch
Definition: stopwatch.h:74

impala::PlanFragmentExecutor::GetNextInternal
Status GetNextInternal(RowBatch **batch)
Definition: plan-fragment-executor.cc:471

impala::PlanFragmentExecutor::Close
void Close()
Definition: plan-fragment-executor.cc:564

impala::PlanFragmentExecutor::thread_usage_sampled_counter_
RuntimeProfile::TimeSeriesCounter * thread_usage_sampled_counter_
Sampled thread usage (tokens) at even time intervals.
Definition: plan-fragment-executor.h:216

impala::CgroupsMgr::RegisterFragment
Status RegisterFragment(const TUniqueId &fragment_instance_id, const std::string &cgroup, bool *is_first)
Definition: cgroups-mgr.cc:198

impala::Thread
TODO: Consider allowing fragment IDs as category parameters.
Definition: thread.h:45

impala::PlanFragmentExecutor::FragmentComplete
void FragmentComplete()
Called when the fragment execution is complete to finalize counters.
Definition: plan-fragment-executor.cc:492

impala::PlanFragmentExecutor::per_host_mem_usage_
RuntimeProfile::Counter * per_host_mem_usage_
Definition: plan-fragment-executor.h:189

impala::ThreadResourceMgr::ResourcePool::num_threads
int64_t num_threads() const
Definition: thread-resource-mgr.h:134

impala::PlanFragmentExecutor::row_desc
const RowDescriptor & row_desc()
Definition: plan-fragment-executor.cc:539

impala::HdfsScanNode::HDFS_SPLIT_STATS_DESC
static const std::string HDFS_SPLIT_STATS_DESC
Description string for the per volume stats output.
Definition: hdfs-scan-node.h:265

impala::PlanFragmentExecutor::fragment_sw_
MonotonicStopWatch fragment_sw_
Stopwatch for this entire fragment. Started in Prepare(), stopped in Close().
Definition: plan-fragment-executor.h:204

impala::CgroupsMgr::UniqueIdToCgroup
std::string UniqueIdToCgroup(const std::string &unique_id) const
Definition: cgroups-mgr.cc:54

mem-tracker.h

periodic-counter-updater.h

RETURN_IF_ERROR
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242

impala::RowBatch::GetRow
TupleRow * GetRow(int row_idx)
Definition: row-batch.h:140

impala::PlanFragmentExecutor::average_thread_tokens_
RuntimeProfile::Counter * average_thread_tokens_
Definition: plan-fragment-executor.h:201

ADD_TIMER
#define ADD_TIMER(profile, name)
Definition: runtime-profile.h:50

data-stream-mgr.h

plan-fragment-executor.h

impala::PlanFragmentExecutor::report_thread_active_
bool report_thread_active_
Definition: plan-fragment-executor.h:153

mem-info.h

impala::ExecNode::CreateTree
static Status CreateTree(ObjectPool *pool, const TPlan &plan, const DescriptorTbl &descs, ExecNode **root)
Definition: exec-node.cc:199

impala::RowDescriptor
Definition: descriptors.h:373

impala::PlanFragmentExecutor::desc_tbl
const DescriptorTbl & desc_tbl()
Definition: plan-fragment-executor.h:269

DEFINE_int32
DEFINE_int32(status_report_interval, 5,"interval between profile reports; in seconds")

impala::CgroupsMgr::SetCpuShares
Status SetCpuShares(const std::string &cgroup, int32_t num_shares)
Definition: cgroups-mgr.cc:101

impala::ResourceBroker::GetQueryResourceMgr
bool GetQueryResourceMgr(const TUniqueId &query_id, const TUniqueId &reservation_id, const TNetworkAddress &local_resource_address, QueryResourceMgr **res_mgr)
Definition: resource-broker.cc:779

impala::PlanFragmentExecutor::stop_report_thread_cv_
boost::condition_variable stop_report_thread_cv_
Definition: plan-fragment-executor.h:148

impala::ExecNode::row_desc
const RowDescriptor & row_desc() const
Definition: exec-node.h:156

llvm-codegen.h

impala::PrintId
string PrintId(const TUniqueId &id, const string &separator)
Definition: debug-util.cc:97

logging.h

COUNTER_ADD
#define COUNTER_ADD(c, v)
Definition: runtime-profile.h:55

impala::PlanFragmentExecutor::OptimizeLlvmModule
void OptimizeLlvmModule()
Definition: plan-fragment-executor.cc:270

impala::ExecNode::ReachedLimit
bool ReachedLimit()
Definition: exec-node.h:159

impala::PlanFragmentExecutor::Cancel
void Cancel()
Initiate cancellation. Must not be called until after Prepare() returned.
Definition: plan-fragment-executor.cc:531

impala::RuntimeProfile::Counter::Set
virtual void Set(int64_t value)
Definition: runtime-profile.h:102

impala::RuntimeProfile::Counter
Definition: runtime-profile.h:85

SCOPED_TIMER
#define SCOPED_TIMER(c)
Definition: runtime-profile.h:53

impala::PlanFragmentExecutor::report_thread_started_cv_
boost::condition_variable report_thread_started_cv_
Definition: plan-fragment-executor.h:152

impala::ExecEnv::resource_broker
ResourceBroker * resource_broker()
Definition: exec-env.h:95

data-sink.h

impala::PrettyPrinter::Print
static std::string Print(bool value, TUnit::type ignored, bool verbose=false)
Definition: pretty-printer.h:33

impala::AtomicInt::Read
T Read()
Safe read of the value.
Definition: atomic.h:100

pretty-printer.h

impala::ThreadResourceMgr::UnregisterPool
void UnregisterPool(ResourcePool *pool)
Definition: thread-resource-mgr.cc:81

impala::PlanFragmentExecutor::report_status_cb_
ReportStatusCallback report_status_cb_
profile reporting-related
Definition: plan-fragment-executor.h:142

impala::PlanFragmentExecutor::StopReportThread
void StopReportThread()
Definition: plan-fragment-executor.cc:441

impala::PlanFragmentExecutor::Prepare
Status Prepare(const TExecPlanFragmentParams &request)
Definition: plan-fragment-executor.cc:80

object-pool.h

impala::PlanFragmentExecutor::profile
RuntimeProfile * profile()
Profile information for plan and output sink.
Definition: plan-fragment-executor.cc:543

impala::PlanFragmentExecutor::ReleaseThreadToken
void ReleaseThreadToken()
Releases the thread token for this fragment executor.
Definition: plan-fragment-executor.cc:551

impala::ExecNode::type
TPlanNodeType::type type() const
Definition: exec-node.h:155

VLOG_FILE_IS_ON
#define VLOG_FILE_IS_ON
Definition: logging.h:65

impala::LlvmCodeGen
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107

impala::Status
Definition: status.h:81

impala::PlanFragmentExecutor::~PlanFragmentExecutor
~PlanFragmentExecutor()
Definition: plan-fragment-executor.cc:71

impala::PlanFragmentExecutor::closed_
bool closed_
true if Close() has been called
Definition: plan-fragment-executor.h:162

impala::ExecEnv::cgroups_mgr
CgroupsMgr * cgroups_mgr()
Definition: exec-env.h:88

impala::ExecNode::DebugString
std::string DebugString() const
Returns a string representation in DFS order of the plan rooted at this.
Definition: exec-node.cc:345

impala::AtomicInt::CompareAndSwap
bool CompareAndSwap(T old_val, T new_val)
Returns true if the atomic compare-and-swap was successful.
Definition: atomic.h:131

VLOG_QUERY
#define VLOG_QUERY
Definition: logging.h:57

scan-node.h

impala::DiskIoMgr::RequestContext
Definition: disk-io-mgr-internal.h:122

impala::PlanFragmentExecutor::sink_
boost::scoped_ptr< DataSink > sink_
Definition: plan-fragment-executor.h:180

impala::PlanFragmentExecutor::plan_
ExecNode * plan_
Definition: plan-fragment-executor.h:138

impala::PlanFragmentExecutor::PrintVolumeIds
void PrintVolumeIds(const TPlanExecParams &params)
Print stats about scan ranges for each volumeId in params to info log.

impala::RuntimeState
Definition: runtime-state.h:69

impala::Status::IsCancelled
bool IsCancelled() const
Definition: status.h:174

hbase-table-scanner.h

debug-util.h

impala::ResourceBroker::UnregisterQueryResourceMgr
void UnregisterQueryResourceMgr(const TUniqueId &query_id)
Definition: resource-broker.cc:793

impala::ExecNode::Prepare
virtual Status Prepare(RuntimeState *state)
Definition: exec-node.cc:130

ADD_COUNTER
#define ADD_COUNTER(profile, name, unit)
Definition: runtime-profile.h:47

impala::DescriptorTbl::DebugString
std::string DebugString() const
Definition: descriptors.cc:608

exec-node.h

impala::MonotonicStopWatch::Start
void Start()
Definition: stopwatch.h:81

impala::PlanFragmentExecutor::report_thread_lock_
boost::mutex report_thread_lock_
Definition: plan-fragment-executor.h:144

VLOG_ROW
#define VLOG_ROW
Definition: logging.h:59

impala::ScanNode::SetScanRanges
void SetScanRanges(const std::vector< TScanRangeParams > &scan_range_params)
Definition: scan-node.h:89

impala::ScanNode
Abstract base class of all scan nodes; introduces SetScanRange().
Definition: scan-node.h:77

impala::MonotonicStopWatch::ElapsedTime
uint64_t ElapsedTime() const
Returns time in nanosecond.
Definition: stopwatch.h:105

impala::PlanFragmentExecutor::runtime_state_
boost::scoped_ptr< RuntimeState > runtime_state_
Definition: plan-fragment-executor.h:181

parse-util.h

impala::HdfsScanNode::PrintHdfsSplitStats
static void PrintHdfsSplitStats(const PerVolumnStats &per_volume_stats, std::stringstream *ss)
Definition: hdfs-scan-node.cc:1073

impala::RowBatch
Definition: row-batch.h:66

impala::RuntimeProfile::AddTimeSeriesCounter
TimeSeriesCounter * AddTimeSeriesCounter(const std::string &name, TUnit::type unit, DerivedCounterFunction sample_fn)

impala::PlanFragmentExecutor::completed_report_sent_
AtomicInt< int > completed_report_sent_
Definition: plan-fragment-executor.h:210

impala::PlanFragmentExecutor::rows_produced_counter_
RuntimeProfile::Counter * rows_produced_counter_
Number of rows returned by this fragment.
Definition: plan-fragment-executor.h:192

exchange-node.h

impala::PeriodicCounterUpdater::StopSamplingCounter
static void StopSamplingCounter(RuntimeProfile::Counter *counter)
Stops updating the value of 'counter'.
Definition: periodic-counter-updater.cc:80

impala::PlanFragmentExecutor::SendReport
void SendReport(bool done)
Definition: plan-fragment-executor.cc:423

impala::RuntimeProfile
Definition: runtime-profile.h:83

impala::ErrorMsg
Definition: error-util.h:47

impala::HdfsScanNode::UpdateHdfsSplitStats
static void UpdateHdfsSplitStats(const std::vector< TScanRangeParams > &scan_range_params_list, PerVolumnStats *per_volume_stats)
Update the per volume stats with the given scan range params list.
Definition: hdfs-scan-node.cc:1058

impala::PlanFragmentExecutor::query_id_
TUniqueId query_id_
Definition: plan-fragment-executor.h:139

container-util.h

llama-util.h

impala::PlanFragmentExecutor::runtime_state
RuntimeState * runtime_state()
call these only after Prepare()
Definition: plan-fragment-executor.h:127

impala::PlanFragmentExecutor::ReportProfile
void ReportProfile()
Definition: plan-fragment-executor.cc:374

impala::LlvmCodeGen::FinalizeModule
Status FinalizeModule()
Definition: llvm-codegen.cc:607

impala::ExchangeNode
Definition: exchange-node.h:39

impala::Status::OK
static const Status OK
Definition: status.h:87

impala::RuntimeProfile::PrettyPrint
void PrettyPrint(std::ostream *s, const std::string &prefix="") const
Definition: runtime-profile.cc:507

impala::PlanFragmentExecutor::PerNodeScanRanges
std::map< TPlanNodeId, std::vector< TScanRangeParams > > PerNodeScanRanges
typedef for TPlanFragmentExecParams.per_node_scan_ranges
Definition: plan-fragment-executor.h:221

impala::PlanFragmentExecutor::ReportStatusCallback
boost::function< void(const Status &status, RuntimeProfile *profile, bool done)> ReportStatusCallback
Definition: plan-fragment-executor.h:74

expr.h

impala::PlanFragmentExecutor::status_lock_
boost::mutex status_lock_
Definition: plan-fragment-executor.h:175

impala::DescriptorTbl::Create
static Status Create(ObjectPool *pool, const TDescriptorTable &thrift_tbl, DescriptorTbl **tbl)
Definition: descriptors.cc:378

impala::exec_env_
ExecEnv * exec_env_
Definition: coordinator.h:193

impala::PlanFragmentExecutor::ReachedLimit
bool ReachedLimit()
Returns true if this query has a limit and it has been reached.
Definition: plan-fragment-executor.cc:547

impala::FindWithDefault
const V & FindWithDefault(const std::map< K, V > &m, const K &key, const V &default_val)
Definition: container-util.h:73

names.h

impala::ExecNode
Definition: exec-node.h:46

impala::PlanFragmentExecutor::Open
Status Open()
Definition: plan-fragment-executor.cc:302

DEFINE_bool
DEFINE_bool(serialize_batch, false,"serialize and deserialize each returned row batch")

impala::Status::msg
const ErrorMsg & msg() const
Returns the error message associated with a non-successful status.
Definition: status.h:189

impala::PlanFragmentExecutor::mem_usage_sampled_counter_
RuntimeProfile::TimeSeriesCounter * mem_usage_sampled_counter_
Sampled memory usage at even time intervals.
Definition: plan-fragment-executor.h:213

impala::PlanFragmentExecutor::obj_pool
ObjectPool * obj_pool()
Definition: plan-fragment-executor.h:218

VLOG_FILE
#define VLOG_FILE
Definition: logging.h:58

impala::PlanFragmentExecutor::done_
bool done_
true if plan_->GetNext() indicated that it's done
Definition: plan-fragment-executor.h:156

VLOG_ROW_IS_ON
#define VLOG_ROW_IS_ON
Definition: logging.h:66

impala::RuntimeProfile::AddChild
void AddChild(RuntimeProfile *child, bool indent=true, RuntimeProfile *location=NULL)
Definition: runtime-profile.cc:368

impala::ExecEnv
Definition: exec-env.h:53

impala::ExecNode::Open
virtual Status Open(RuntimeState *state)
Definition: exec-node.cc:154

impala::QueryResourceMgr
Only CPU-heavy threads need be managed using this class.
Definition: query-resource-mgr.h:94

impala::PrintRow
string PrintRow(TupleRow *row, const RowDescriptor &d)
Definition: debug-util.cc:192

impala::Status::ok
bool ok() const
Definition: status.h:172

impala::PlanFragmentExecutor::PER_HOST_PEAK_MEM_COUNTER
static const std::string PER_HOST_PEAK_MEM_COUNTER
Name of the counter that is tracking per query, per host peak mem usage.
Definition: plan-fragment-executor.h:134

impala::PlanFragmentExecutor::exec_env_
ExecEnv * exec_env_
Definition: plan-fragment-executor.h:137

impala::PlanFragmentExecutor::prepared_
bool prepared_
true if Prepare() returned OK
Definition: plan-fragment-executor.h:159

impala::query_mem_tracker
MemTracker * query_mem_tracker()

impala::ExecEnv::thread_mgr
ThreadResourceMgr * thread_mgr()
Definition: exec-env.h:87

impala::ExecNode::Close
virtual void Close(RuntimeState *state)
Definition: exec-node.cc:166

impala::PlanFragmentExecutor::OpenInternal
Status OpenInternal()
Definition: plan-fragment-executor.cc:331

impala::ExecNode::SetDebugOptions
static void SetDebugOptions(int node_id, TExecNodePhase::type phase, TDebugAction::type action, ExecNode *tree)
Set debug action for node with given id in 'tree'.
Definition: exec-node.cc:332

descriptors.h

impala::HdfsScanNode::PerVolumnStats
boost::unordered_map< int32_t, std::pair< int, int64_t > > PerVolumnStats
map from volume id to <number of split, per volume split lengths>
Definition: hdfs-scan-node.h:252

impala::CgroupsMgr::UnregisterFragment
Status UnregisterFragment(const TUniqueId &fragment_instance_id, const std::string &cgroup)
Definition: cgroups-mgr.cc:215

impala::ExecNode::GetNext
virtual Status GetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)=0

impala::PlanFragmentExecutor::GetNext
Status GetNext(RowBatch **batch)
Definition: plan-fragment-executor.cc:455

DECLARE_bool
DECLARE_bool(enable_rm)

impala::PlanFragmentExecutor::report_thread_
boost::scoped_ptr< Thread > report_thread_
Definition: plan-fragment-executor.h:143

impala::Status::IsMemLimitExceeded
bool IsMemLimitExceeded() const
Definition: status.h:178

cpu-info.h

impala::PlanFragmentExecutor::status_
Status status_
Definition: plan-fragment-executor.h:169

impala::PeriodicCounterUpdater::StopTimeSeriesCounter
static void StopTimeSeriesCounter(RuntimeProfile::TimeSeriesCounter *counter)
Stops 'counter' from receiving any more samples.
Definition: periodic-counter-updater.cc:122

impala::ExecNode::runtime_profile
RuntimeProfile * runtime_profile()
Definition: exec-node.h:161

impala::PlanFragmentExecutor::has_thread_token_
bool has_thread_token_
true if this fragment has not returned the thread token to the thread resource mgr ...
Definition: plan-fragment-executor.h:165

impala::DataSink::CreateDataSink
static Status CreateDataSink(ObjectPool *pool, const TDataSink &thrift_sink, const std::vector< TExpr > &output_exprs, const TPlanFragmentExecParams &params, const RowDescriptor &row_desc, boost::scoped_ptr< DataSink > *sink)
Definition: data-sink.cc:34