doc/html/exchange-node_8cc_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #include "exec/exchange-node.h"


 #include <boost/scoped_ptr.hpp>


 #include "runtime/data-stream-mgr.h"

 #include "runtime/data-stream-recvr.h"

 #include "runtime/runtime-state.h"

 #include "runtime/row-batch.h"

 #include "util/debug-util.h"

 #include "util/runtime-profile.h"

 #include "gen-cpp/PlanNodes_types.h"


 #include "common/names.h"


 using namespace impala;


 DEFINE_int32(exchg_node_buffer_size_bytes, 1024 * 1024 * 10,

              "(Advanced) Maximum size of per-query receive-side buffer");


 ExchangeNode::ExchangeNode(

     ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)

   : ExecNode(pool, tnode, descs),

     num_senders_(0),

     stream_recvr_(),

     input_row_desc_(descs, tnode.exchange_node.input_row_tuples,

         vector<bool>(

           tnode.nullable_tuples.begin(),

           tnode.nullable_tuples.begin() + tnode.exchange_node.input_row_tuples.size())),

     next_row_idx_(0),

     is_merging_(tnode.exchange_node.__isset.sort_info),

     offset_(tnode.exchange_node.__isset.offset ? tnode.exchange_node.offset : 0),

     num_rows_skipped_(0) {

   DCHECK_GE(offset_, 0);

   DCHECK(is_merging_ || (offset_ == 0));

 }


 Status ExchangeNode::Init(const TPlanNode& tnode) {

   RETURN_IF_ERROR(ExecNode::Init(tnode));

   if (!is_merging_) return Status::OK;


   RETURN_IF_ERROR(sort_exec_exprs_.Init(tnode.exchange_node.sort_info, pool_));

   is_asc_order_ = tnode.exchange_node.sort_info.is_asc_order;

   nulls_first_ = tnode.exchange_node.sort_info.nulls_first;

   return Status::OK;

 }


 Status ExchangeNode::Prepare(RuntimeState* state) {

   RETURN_IF_ERROR(ExecNode::Prepare(state));

   convert_row_batch_timer_ = ADD_TIMER(runtime_profile(), "ConvertRowBatchTime");

   // TODO: figure out appropriate buffer size

   DCHECK_GT(num_senders_, 0);

   stream_recvr_ = ExecEnv::GetInstance()->stream_mgr()->CreateRecvr(state,

       input_row_desc_, state->fragment_instance_id(), id_, num_senders_,

       FLAGS_exchg_node_buffer_size_bytes, runtime_profile(), is_merging_);

   if (is_merging_) {

     RETURN_IF_ERROR(sort_exec_exprs_.Prepare(

         state, row_descriptor_, row_descriptor_, expr_mem_tracker()));

     AddExprCtxsToFree(sort_exec_exprs_);

   }

   return Status::OK;

 }


 Status ExchangeNode::Open(RuntimeState* state) {

   SCOPED_TIMER(runtime_profile_->total_time_counter());

   RETURN_IF_ERROR(ExecNode::Open(state));

   if (is_merging_) {

     RETURN_IF_ERROR(sort_exec_exprs_.Open(state));

     TupleRowComparator less_than(sort_exec_exprs_.lhs_ordering_expr_ctxs(),

         sort_exec_exprs_.rhs_ordering_expr_ctxs(), is_asc_order_, nulls_first_);

     // CreateMerger() will populate its merging heap with batches from the stream_recvr_,

     // so it is not necessary to call FillInputRowBatch().

     stream_recvr_->CreateMerger(less_than);

   } else {

     RETURN_IF_ERROR(FillInputRowBatch(state));

   }

   return Status::OK;

 }


 Status ExchangeNode::Reset(RuntimeState* state) {

   DCHECK(false) << "NYI";

   return Status("NYI");

 }


 void ExchangeNode::Close(RuntimeState* state) {

   if (is_closed()) return;

   if (is_merging_) sort_exec_exprs_.Close(state);

   if (stream_recvr_ != NULL) stream_recvr_->Close();

   stream_recvr_.reset();

   ExecNode::Close(state);

 }


 Status ExchangeNode::FillInputRowBatch(RuntimeState* state) {

   DCHECK(!is_merging_);

   Status ret_status;

   {

     SCOPED_TIMER(state->total_network_receive_timer());

     ret_status = stream_recvr_->GetBatch(&input_batch_);

   }

   VLOG_FILE << "exch: has batch=" << (input_batch_ == NULL ? "false" : "true")

             << " #rows=" << (input_batch_ != NULL ? input_batch_->num_rows() : 0)

             << " is_cancelled=" << (ret_status.IsCancelled() ? "true" : "false")

             << " instance_id=" << state->fragment_instance_id();

   return ret_status;

 }


 Status ExchangeNode::GetNext(RuntimeState* state, RowBatch* output_batch, bool* eos) {

   RETURN_IF_ERROR(ExecDebugAction(TExecNodePhase::GETNEXT, state));

   SCOPED_TIMER(runtime_profile_->total_time_counter());

   if (ReachedLimit()) {

     stream_recvr_->TransferAllResources(output_batch);

     *eos = true;

     return Status::OK;

   } else {

     *eos = false;

   }


   if (is_merging_) return GetNextMerging(state, output_batch, eos);


   while (true) {

     {

       SCOPED_TIMER(convert_row_batch_timer_);

       RETURN_IF_CANCELLED(state);

       RETURN_IF_ERROR(QueryMaintenance(state));

       // copy rows until we hit the limit/capacity or until we exhaust input_batch_

       while (!ReachedLimit() && !output_batch->AtCapacity()

           && input_batch_ != NULL && next_row_idx_ < input_batch_->capacity()) {

         TupleRow* src = input_batch_->GetRow(next_row_idx_);

         ++next_row_idx_;

         int j = output_batch->AddRow();

         TupleRow* dest = output_batch->GetRow(j);

         // if the input row is shorter than the output row, make sure not to leave

         // uninitialized Tuple* around

         output_batch->ClearRow(dest);

         // this works as expected if rows from input_batch form a prefix of

         // rows in output_batch

         input_batch_->CopyRow(src, dest);

         output_batch->CommitLastRow();

         ++num_rows_returned_;

       }

       COUNTER_SET(rows_returned_counter_, num_rows_returned_);


       if (ReachedLimit()) {

         stream_recvr_->TransferAllResources(output_batch);

         *eos = true;

         return Status::OK;

       }

       if (output_batch->AtCapacity()) return Status::OK;

     }


     // we need more rows

     stream_recvr_->TransferAllResources(output_batch);

     RETURN_IF_ERROR(FillInputRowBatch(state));

     *eos = (input_batch_ == NULL);

     if (*eos) return Status::OK;

     next_row_idx_ = 0;

     DCHECK(input_batch_->row_desc().IsPrefixOf(output_batch->row_desc()));

   }

 }


 Status ExchangeNode::GetNextMerging(RuntimeState* state, RowBatch* output_batch,

     bool* eos) {

   DCHECK_EQ(output_batch->num_rows(), 0);

   RETURN_IF_ERROR(stream_recvr_->GetNext(output_batch, eos));


   while ((num_rows_skipped_ < offset_)) {

     num_rows_skipped_ += output_batch->num_rows();

     // Throw away rows in the output batch until the offset is skipped.

     int rows_to_keep = num_rows_skipped_ - offset_;

     if (rows_to_keep > 0) {

       output_batch->CopyRows(0, output_batch->num_rows() - rows_to_keep, rows_to_keep);

       output_batch->set_num_rows(rows_to_keep);

     } else {

       output_batch->set_num_rows(0);

     }

     if (rows_to_keep > 0 || *eos || output_batch->AtCapacity()) break;

     RETURN_IF_ERROR(stream_recvr_->GetNext(output_batch, eos));

   }


   num_rows_returned_ += output_batch->num_rows();

   if (ReachedLimit()) {

     output_batch->set_num_rows(output_batch->num_rows() - (num_rows_returned_ - limit_));

     *eos = true;

   }


   // On eos, transfer all remaining resources from the input batches maintained

   // by the merger to the output batch.

   if (*eos) stream_recvr_->TransferAllResources(output_batch);


   COUNTER_SET(rows_returned_counter_, num_rows_returned_);

   return Status::OK;

 }


 void ExchangeNode::DebugString(int indentation_level, stringstream* out) const {

   *out << string(indentation_level * 2, ' ');

   *out << "ExchangeNode(#senders=" << num_senders_;

   ExecNode::DebugString(indentation_level, out);

   *out << ")";

 }

row-batch.h

impala::DescriptorTbl
Definition: descriptors.h:338

impala::RowBatch::num_rows
int num_rows() const
Definition: row-batch.h:215

impala::DataStreamMgr::CreateRecvr
boost::shared_ptr< DataStreamRecvr > CreateRecvr(RuntimeState *state, const RowDescriptor &row_desc, const TUniqueId &fragment_instance_id, PlanNodeId dest_node_id, int num_senders, int buffer_size, RuntimeProfile *profile, bool is_merging)
Definition: data-stream-mgr.cc:46

impala::TupleRowComparator
Definition: tuple-row-compare.h:27

runtime-profile.h

impala::ExchangeNode::stream_recvr_
boost::shared_ptr< DataStreamRecvr > stream_recvr_
Definition: exchange-node.h:72

impala::ExecNode::num_rows_returned_
int64_t num_rows_returned_
Definition: exec-node.h:223

impala::RowBatch::ClearRow
void ClearRow(TupleRow *row)
Definition: row-batch.h:187

impala::ExchangeNode::Open
virtual Status Open(RuntimeState *state)
Blocks until the first batch is available for consumption via GetNext().
Definition: exchange-node.cc:77

impala::ExchangeNode::num_rows_skipped_
int64_t num_rows_skipped_
Number of rows skipped so far.
Definition: exchange-node.h:103

impala::ExecNode::runtime_profile_
boost::scoped_ptr< RuntimeProfile > runtime_profile_
Definition: exec-node.h:225

impala::RowBatch::row_desc
const RowDescriptor & row_desc() const
Definition: row-batch.h:218

impala::ExchangeNode::Init
virtual Status Init(const TPlanNode &tnode)
Definition: exchange-node.cc:51

RETURN_IF_ERROR
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242

impala::ExecNode::Init
virtual Status Init(const TPlanNode &tnode)
Definition: exec-node.cc:124

impala::RowBatch::GetRow
TupleRow * GetRow(int row_idx)
Definition: row-batch.h:140

impala::ExchangeNode::Prepare
virtual Status Prepare(RuntimeState *state)
Definition: exchange-node.cc:61

impala::ExecNode::row_descriptor_
RowDescriptor row_descriptor_
Definition: exec-node.h:215

impala::ExchangeNode::is_asc_order_
std::vector< bool > is_asc_order_
Definition: exchange-node.h:96

ADD_TIMER
#define ADD_TIMER(profile, name)
Definition: runtime-profile.h:50

data-stream-mgr.h

impala::ExchangeNode::is_merging_
bool is_merging_
Definition: exchange-node.h:92

impala::ExchangeNode::input_row_desc_
RowDescriptor input_row_desc_
our input rows are a prefix of the rows we produce
Definition: exchange-node.h:75

impala::RowBatch::AtCapacity
bool AtCapacity()
Definition: row-batch.h:120

impala::RowBatch::CopyRows
void CopyRows(int dest, int src, int num_rows)
Definition: row-batch.h:179

impala::ObjectPool
Definition: object-pool.h:30

impala::TupleRow
Definition: tuple-row.h:28

impala::ExecNode::ExecDebugAction
Status ExecDebugAction(TExecNodePhase::type phase, RuntimeState *state)
Definition: exec-node.cc:378

impala::ExecNode::ReachedLimit
bool ReachedLimit()
Definition: exec-node.h:159

impala::SortExecExprs::Open
Status Open(RuntimeState *state)
Open all expressions used for sorting and tuple materialization.
Definition: sort-exec-exprs.cc:53

SCOPED_TIMER
#define SCOPED_TIMER(c)
Definition: runtime-profile.h:53

impala::ExecNode::limit_
int64_t limit_
Definition: exec-node.h:222

impala::Status
Definition: status.h:81

impala::ExecNode::DebugString
std::string DebugString() const
Returns a string representation in DFS order of the plan rooted at this.
Definition: exec-node.cc:345

impala::ExecNode::expr_mem_tracker
MemTracker * expr_mem_tracker()
Definition: exec-node.h:163

impala::RuntimeState::total_network_receive_timer
RuntimeProfile::Counter * total_network_receive_timer()
Definition: runtime-state.h:245

impala::ExchangeNode::sort_exec_exprs_
SortExecExprs sort_exec_exprs_
Sort expressions and parameters passed to the merging receiver..
Definition: exchange-node.h:95

impala::ExchangeNode::FillInputRowBatch
Status FillInputRowBatch(RuntimeState *state)
Definition: exchange-node.cc:106

impala::ExchangeNode::ExchangeNode
ExchangeNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
Definition: exchange-node.cc:34

impala::RowDescriptor::IsPrefixOf
bool IsPrefixOf(const RowDescriptor &other_desc) const
Definition: descriptors.cc:352

impala::RuntimeState
Definition: runtime-state.h:69

impala::Status::IsCancelled
bool IsCancelled() const
Definition: status.h:174

impala::RowBatch::set_num_rows
void set_num_rows(int num_rows)
Definition: row-batch.h:113

impala::ExchangeNode::num_senders_
int num_senders_
Definition: exchange-node.h:67

debug-util.h

RETURN_IF_CANCELLED
#define RETURN_IF_CANCELLED(state)
Definition: runtime-state.h:384

impala::ExchangeNode::Reset
virtual Status Reset(RuntimeState *state)
Definition: exchange-node.cc:93

pool
ObjectPool pool
Definition: expr-benchmark.cc:89

impala::SortExecExprs::Init
Status Init(const TSortInfo &sort_info, ObjectPool *pool)
Initialize the expressions from a TSortInfo using the specified pool.
Definition: sort-exec-exprs.cc:21

impala::ExecNode::Prepare
virtual Status Prepare(RuntimeState *state)
Definition: exec-node.cc:130

impala::SortExecExprs::lhs_ordering_expr_ctxs
const std::vector< ExprContext * > & lhs_ordering_expr_ctxs() const
Can only be used after calling Prepare()
Definition: sort-exec-exprs.h:55

impala::RuntimeState::fragment_instance_id
const TUniqueId & fragment_instance_id() const
Definition: runtime-state.h:126

impala::ExchangeNode::convert_row_batch_timer_
RuntimeProfile::Counter * convert_row_batch_timer_
time spent reconstructing received rows
Definition: exchange-node.h:88

impala::ExecNode::AddExprCtxsToFree
void AddExprCtxsToFree(const std::vector< ExprContext * > &ctxs)

impala::ExecNode::QueryMaintenance
virtual Status QueryMaintenance(RuntimeState *state)
Definition: exec-node.cc:401

data-stream-recvr.h

impala::ExecNode::is_closed
bool is_closed()
Definition: exec-node.h:242

impala::RowBatch::CommitLastRow
void CommitLastRow()
Definition: row-batch.h:109

runtime-state.h

impala::RowBatch
Definition: row-batch.h:66

COUNTER_SET
#define COUNTER_SET(c, v)
Definition: runtime-profile.h:56

impala::ExecNode::rows_returned_counter_
RuntimeProfile::Counter * rows_returned_counter_
Definition: exec-node.h:226

impala::ExecEnv::GetInstance
static ExecEnv * GetInstance()
Definition: exec-env.h:63

exchange-node.h

impala::RowBatch::capacity
int capacity() const
Definition: row-batch.h:216

impala::RowBatch::AddRow
int AddRow()
Definition: row-batch.h:100

impala::RowBatch::CopyRow
void CopyRow(TupleRow *src, TupleRow *dest)
Definition: row-batch.h:173

impala::Status::OK
static const Status OK
Definition: status.h:87

impala::ExecNode::pool_
ObjectPool * pool_
Definition: exec-node.h:211

impala::ExchangeNode::next_row_idx_
int next_row_idx_
Definition: exchange-node.h:85

bool

offset
uint8_t offset[7 *64-sizeof(uint64_t)]
Definition: partitioning-throughput-test.cc:37

impala::ExecEnv::stream_mgr
DataStreamMgr * stream_mgr()
Definition: exec-env.h:75

names.h

impala::SortExecExprs::Prepare
Status Prepare(RuntimeState *state, const RowDescriptor &child_row_desc, const RowDescriptor &output_row_desc, MemTracker *expr_mem_tracker)
Prepare all expressions used for sorting and tuple materialization.
Definition: sort-exec-exprs.cc:42

impala::ExecNode
Definition: exec-node.h:46

impala::ExchangeNode::GetNextMerging
Status GetNextMerging(RuntimeState *state, RowBatch *output_batch, bool *eos)
Definition: exchange-node.cc:174

VLOG_FILE
#define VLOG_FILE
Definition: logging.h:58

impala::DEFINE_int32
DEFINE_int32(periodic_counter_update_period_ms, 500,"Period to update rate counters and"" sampling counters in ms")

impala::ExecNode::Open
virtual Status Open(RuntimeState *state)
Definition: exec-node.cc:154

impala::ExchangeNode::input_batch_
RowBatch * input_batch_
Definition: exchange-node.h:80

impala::ExchangeNode::offset_
int64_t offset_
Offset specifying number of rows to skip.
Definition: exchange-node.h:100

impala::ExecNode::id_
int id_
Definition: exec-node.h:209

impala::ExecNode::Close
virtual void Close(RuntimeState *state)
Definition: exec-node.cc:166

impala::SortExecExprs::Close
void Close(RuntimeState *state)
Close all expressions used for sorting and tuple materialization.
Definition: sort-exec-exprs.cc:62

impala::SortExecExprs::rhs_ordering_expr_ctxs
const std::vector< ExprContext * > & rhs_ordering_expr_ctxs() const
Can only be used after calling Open()
Definition: sort-exec-exprs.h:59

impala::ExchangeNode::nulls_first_
std::vector< bool > nulls_first_
Definition: exchange-node.h:97

impala::ExchangeNode::GetNext
virtual Status GetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)
Definition: exchange-node.cc:120

impala::ExchangeNode::Close
virtual void Close(RuntimeState *state)
Definition: exchange-node.cc:98

impala::ExecNode::runtime_profile
RuntimeProfile * runtime_profile()
Definition: exec-node.h:161