doc/html/topn-node_8cc_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #include "exec/topn-node.h"


 #include <sstream>


 #include "exprs/expr.h"

 #include "runtime/descriptors.h"

 #include "runtime/mem-pool.h"

 #include "runtime/raw-value.h"

 #include "runtime/row-batch.h"

 #include "runtime/runtime-state.h"

 #include "runtime/tuple.h"

 #include "runtime/tuple-row.h"

 #include "util/debug-util.h"

 #include "util/runtime-profile.h"


 #include "gen-cpp/Exprs_types.h"

 #include "gen-cpp/PlanNodes_types.h"


 #include "common/names.h"


 using std::priority_queue;

 using namespace impala;


 TopNNode::TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)

   : ExecNode(pool, tnode, descs),

     offset_(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0),

     num_rows_skipped_(0) {

 }


 Status TopNNode::Init(const TPlanNode& tnode) {

   RETURN_IF_ERROR(ExecNode::Init(tnode));

   RETURN_IF_ERROR(sort_exec_exprs_.Init(tnode.sort_node.sort_info, pool_));

   is_asc_order_ = tnode.sort_node.sort_info.is_asc_order;

   nulls_first_ = tnode.sort_node.sort_info.nulls_first;


   DCHECK_EQ(conjunct_ctxs_.size(), 0)

       << "TopNNode should never have predicates to evaluate.";


   return Status::OK;

 }


 Status TopNNode::Prepare(RuntimeState* state) {

   SCOPED_TIMER(runtime_profile_->total_time_counter());

   RETURN_IF_ERROR(ExecNode::Prepare(state));

   tuple_pool_.reset(new MemPool(mem_tracker()));

   RETURN_IF_ERROR(sort_exec_exprs_.Prepare(

       state, child(0)->row_desc(), row_descriptor_, expr_mem_tracker()));

   AddExprCtxsToFree(sort_exec_exprs_);

   materialized_tuple_desc_ = row_descriptor_.tuple_descriptors()[0];

   // Allocate memory for a temporary tuple.

   tmp_tuple_ = reinterpret_cast<Tuple*>(

       tuple_pool_->Allocate(materialized_tuple_desc_->byte_size()));

   return Status::OK;

 }


 Status TopNNode::Open(RuntimeState* state) {

   SCOPED_TIMER(runtime_profile_->total_time_counter());

   RETURN_IF_ERROR(ExecNode::Open(state));

   RETURN_IF_CANCELLED(state);

   RETURN_IF_ERROR(QueryMaintenance(state));

   RETURN_IF_ERROR(sort_exec_exprs_.Open(state));


   tuple_row_less_than_.reset(new TupleRowComparator(

       sort_exec_exprs_.lhs_ordering_expr_ctxs(), sort_exec_exprs_.rhs_ordering_expr_ctxs(),

       is_asc_order_, nulls_first_));

   priority_queue_.reset(

       new priority_queue<Tuple*, vector<Tuple*>, TupleRowComparator>(

           *tuple_row_less_than_));


   RETURN_IF_ERROR(child(0)->Open(state));


   // Limit of 0, no need to fetch anything from children.

   if (limit_ != 0) {

     RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker());

     bool eos;

     do {

       batch.Reset();

       RETURN_IF_ERROR(child(0)->GetNext(state, &batch, &eos));

       for (int i = 0; i < batch.num_rows(); ++i) {

         InsertTupleRow(batch.GetRow(i));

       }

       RETURN_IF_CANCELLED(state);

       RETURN_IF_ERROR(QueryMaintenance(state));

     } while (!eos);

   }

   DCHECK_LE(priority_queue_->size(), limit_ + offset_);

   PrepareForOutput();

   child(0)->Close(state);

   return Status::OK;

 }


 Status TopNNode::GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos) {

   SCOPED_TIMER(runtime_profile_->total_time_counter());

   RETURN_IF_ERROR(ExecDebugAction(TExecNodePhase::GETNEXT, state));

   RETURN_IF_CANCELLED(state);

   RETURN_IF_ERROR(QueryMaintenance(state));

   while (!row_batch->AtCapacity() && (get_next_iter_ != sorted_top_n_.end())) {

     if (num_rows_skipped_ < offset_) {

       ++get_next_iter_;

       ++num_rows_skipped_;

       continue;

     }

     int row_idx = row_batch->AddRow();

     TupleRow* dst_row = row_batch->GetRow(row_idx);

     Tuple* src_tuple = *get_next_iter_;

     TupleRow* src_row = reinterpret_cast<TupleRow*>(&src_tuple);

     row_batch->CopyRow(src_row, dst_row);

     ++get_next_iter_;

     row_batch->CommitLastRow();

     ++num_rows_returned_;

     COUNTER_SET(rows_returned_counter_, num_rows_returned_);

   }

   *eos = get_next_iter_ == sorted_top_n_.end();

   return Status::OK;

 }


 Status TopNNode::Reset(RuntimeState* state) {

   DCHECK(false) << "NYI";

   return Status("NYI");

 }


 void TopNNode::Close(RuntimeState* state) {

   if (is_closed()) return;

   if (tuple_pool_.get() != NULL) tuple_pool_->FreeAll();

   sort_exec_exprs_.Close(state);

   ExecNode::Close(state);

 }


 // Insert if either not at the limit or it's a new TopN tuple_row

 void TopNNode::InsertTupleRow(TupleRow* input_row) {

   Tuple* insert_tuple = NULL;


   if (priority_queue_->size() < limit_ + offset_) {

     insert_tuple = reinterpret_cast<Tuple*>(

         tuple_pool_->Allocate(materialized_tuple_desc_->byte_size()));

     insert_tuple->MaterializeExprs<false>(input_row, *materialized_tuple_desc_,

         sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), tuple_pool_.get());

   } else {

     DCHECK(!priority_queue_->empty());

     Tuple* top_tuple = priority_queue_->top();

     tmp_tuple_->MaterializeExprs<false>(input_row, *materialized_tuple_desc_,

             sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), NULL);

     if ((*tuple_row_less_than_)(tmp_tuple_, top_tuple)) {

       // TODO: DeepCopy() will allocate new buffers for the string data. This needs

       // to be fixed to use a freelist

       tmp_tuple_->DeepCopy(top_tuple, *materialized_tuple_desc_, tuple_pool_.get());

       insert_tuple = top_tuple;

       priority_queue_->pop();

     }

   }


   if (insert_tuple != NULL) priority_queue_->push(insert_tuple);

 }


 // Reverse the order of the tuples in the priority queue

 void TopNNode::PrepareForOutput() {

   sorted_top_n_.resize(priority_queue_->size());

   int index = sorted_top_n_.size() - 1;


   while (priority_queue_->size() > 0) {

     Tuple* tuple = priority_queue_->top();

     priority_queue_->pop();

     sorted_top_n_[index] = tuple;

     --index;

   }


   get_next_iter_ = sorted_top_n_.begin();

 }


 void TopNNode::DebugString(int indentation_level, stringstream* out) const {

   *out << string(indentation_level * 2, ' ');

   *out << "TopNNode("

       << Expr::DebugString(sort_exec_exprs_.lhs_ordering_expr_ctxs());

   for (int i = 0; i < is_asc_order_.size(); ++i) {

     *out << (i > 0 ? " " : "")

          << (is_asc_order_[i] ? "asc" : "desc")

          << " nulls " << (nulls_first_[i] ? "first" : "last");

  }


   ExecNode::DebugString(indentation_level, out);

   *out << ")";

 }

row-batch.h

impala::DescriptorTbl
Definition: descriptors.h:338

impala::TopNNode::is_asc_order_
std::vector< bool > is_asc_order_
Definition: topn-node.h:70

impala::TupleRowComparator
Definition: tuple-row-compare.h:27

runtime-profile.h

impala::ExecNode::num_rows_returned_
int64_t num_rows_returned_
Definition: exec-node.h:223

impala::ExecNode::mem_tracker
MemTracker * mem_tracker()
Definition: exec-node.h:162

impala::TopNNode::InsertTupleRow
void InsertTupleRow(TupleRow *tuple_row)
Definition: topn-node.cc:144

impala::ExecNode::runtime_profile_
boost::scoped_ptr< RuntimeProfile > runtime_profile_
Definition: exec-node.h:225

impala::TopNNode::Reset
virtual Status Reset(RuntimeState *state)
Definition: topn-node.cc:131

impala::Tuple
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48

RETURN_IF_ERROR
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242

raw-value.h

impala::ExecNode::Init
virtual Status Init(const TPlanNode &tnode)
Definition: exec-node.cc:124

impala::RowBatch::GetRow
TupleRow * GetRow(int row_idx)
Definition: row-batch.h:140

impala::ExecNode::row_descriptor_
RowDescriptor row_descriptor_
Definition: exec-node.h:215

impala::RowBatch::AtCapacity
bool AtCapacity()
Definition: row-batch.h:120

impala::TupleDescriptor::byte_size
int byte_size() const
Definition: descriptors.h:300

impala::ObjectPool
Definition: object-pool.h:30

impala::ExecNode::row_desc
const RowDescriptor & row_desc() const
Definition: exec-node.h:156

impala::TupleRow
Definition: tuple-row.h:28

impala::ExecNode::ExecDebugAction
Status ExecDebugAction(TExecNodePhase::type phase, RuntimeState *state)
Definition: exec-node.cc:378

impala::SortExecExprs::Open
Status Open(RuntimeState *state)
Open all expressions used for sorting and tuple materialization.
Definition: sort-exec-exprs.cc:53

SCOPED_TIMER
#define SCOPED_TIMER(c)
Definition: runtime-profile.h:53

impala::ExecNode::limit_
int64_t limit_
Definition: exec-node.h:222

impala::Tuple::DeepCopy
Tuple * DeepCopy(const TupleDescriptor &desc, MemPool *pool, bool convert_ptrs=false)
Definition: tuple.cc:34

impala::TopNNode::tuple_row_less_than_
boost::scoped_ptr< TupleRowComparator > tuple_row_less_than_
Definition: topn-node.h:75

impala::TopNNode::nulls_first_
std::vector< bool > nulls_first_
Definition: topn-node.h:71

impala::TopNNode::tuple_pool_
boost::scoped_ptr< MemPool > tuple_pool_
Stores everything referenced in priority_queue_.
Definition: topn-node.h:90

impala::Status
Definition: status.h:81

impala::TopNNode::Prepare
virtual Status Prepare(RuntimeState *state)
Definition: topn-node.cc:56

impala::ExecNode::DebugString
std::string DebugString() const
Returns a string representation in DFS order of the plan rooted at this.
Definition: exec-node.cc:345

impala::ExecNode::expr_mem_tracker
MemTracker * expr_mem_tracker()
Definition: exec-node.h:163

impala::TopNNode::get_next_iter_
std::vector< Tuple * >::iterator get_next_iter_
Definition: topn-node.h:87

impala::TopNNode::Init
virtual Status Init(const TPlanNode &tnode)
Definition: topn-node.cc:44

impala::MemPool
Definition: mem-pool.h:77

impala::RowBatch::Reset
void Reset()
Resets the row batch, returning all resources it has accumulated.
Definition: row-batch.cc:224

impala::RuntimeState
Definition: runtime-state.h:69

impala::TopNNode::Close
virtual void Close(RuntimeState *state)
Definition: topn-node.cc:136

debug-util.h

RETURN_IF_CANCELLED
#define RETURN_IF_CANCELLED(state)
Definition: runtime-state.h:384

pool
ObjectPool pool
Definition: expr-benchmark.cc:89

impala::SortExecExprs::Init
Status Init(const TSortInfo &sort_info, ObjectPool *pool)
Initialize the expressions from a TSortInfo using the specified pool.
Definition: sort-exec-exprs.cc:21

impala::TopNNode::GetNext
virtual Status GetNext(RuntimeState *state, RowBatch *row_batch, bool *eos)
Definition: topn-node.cc:106

impala::TopNNode::TopNNode
TopNNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs)
Definition: topn-node.cc:38

impala::TopNNode::offset_
int64_t offset_
Number of rows to skip.
Definition: topn-node.h:64

impala::ExecNode::Prepare
virtual Status Prepare(RuntimeState *state)
Definition: exec-node.cc:130

impala::TopNNode::sorted_top_n_
std::vector< Tuple * > sorted_top_n_
After computing the TopN in the priority_queue, pop them and put them in this vector.
Definition: topn-node.h:86

impala::SortExecExprs::lhs_ordering_expr_ctxs
const std::vector< ExprContext * > & lhs_ordering_expr_ctxs() const
Can only be used after calling Prepare()
Definition: sort-exec-exprs.h:55

impala::ExecNode::AddExprCtxsToFree
void AddExprCtxsToFree(const std::vector< ExprContext * > &ctxs)

impala::TopNNode::tmp_tuple_
Tuple * tmp_tuple_
Definition: topn-node.h:94

impala::ExecNode::QueryMaintenance
virtual Status QueryMaintenance(RuntimeState *state)
Definition: exec-node.cc:401

impala::ExecNode::is_closed
bool is_closed()
Definition: exec-node.h:242

impala::RowBatch::CommitLastRow
void CommitLastRow()
Definition: row-batch.h:109

runtime-state.h

impala::RowBatch
Definition: row-batch.h:66

COUNTER_SET
#define COUNTER_SET(c, v)
Definition: runtime-profile.h:56

impala::RuntimeState::batch_size
int batch_size() const
Definition: runtime-state.h:98

impala::ExecNode::rows_returned_counter_
RuntimeProfile::Counter * rows_returned_counter_
Definition: exec-node.h:226

impala::ExecNode::child
ExecNode * child(int i)
Definition: exec-node.h:241

impala::RowDescriptor::tuple_descriptors
const std::vector< TupleDescriptor * > & tuple_descriptors() const
Return descriptors for all tuples in this row, in order of appearance.
Definition: descriptors.h:412

impala::TopNNode::materialized_tuple_desc_
TupleDescriptor * materialized_tuple_desc_
Cached descriptor for the materialized tuple. Assigned in Prepare().
Definition: topn-node.h:73

topn-node.h

impala::RowBatch::AddRow
int AddRow()
Definition: row-batch.h:100

impala::RowBatch::CopyRow
void CopyRow(TupleRow *src, TupleRow *dest)
Definition: row-batch.h:173

impala::Status::OK
static const Status OK
Definition: status.h:87

impala::ExecNode::pool_
ObjectPool * pool_
Definition: exec-node.h:211

tuple.h

expr.h

offset
uint8_t offset[7 *64-sizeof(uint64_t)]
Definition: partitioning-throughput-test.cc:37

mem-pool.h

names.h

impala::SortExecExprs::Prepare
Status Prepare(RuntimeState *state, const RowDescriptor &child_row_desc, const RowDescriptor &output_row_desc, MemTracker *expr_mem_tracker)
Prepare all expressions used for sorting and tuple materialization.
Definition: sort-exec-exprs.cc:42

impala::ExecNode
Definition: exec-node.h:46

impala::Tuple::MaterializeExprs
void MaterializeExprs(TupleRow *row, const TupleDescriptor &desc, const std::vector< ExprContext * > &materialize_expr_ctxs, MemPool *pool, std::vector< StringValue * > *non_null_var_len_values=NULL, int *total_var_len=NULL)

tuple-row.h

impala::TopNNode::sort_exec_exprs_
SortExecExprs sort_exec_exprs_
Definition: topn-node.h:69

impala::TopNNode::Open
virtual Status Open(RuntimeState *state)
Definition: topn-node.cc:70

impala::ExecNode::Open
virtual Status Open(RuntimeState *state)
Definition: exec-node.cc:154

impala::TopNNode::PrepareForOutput
void PrepareForOutput()
Flatten and reverse the priority queue.
Definition: topn-node.cc:170

impala::TopNNode::priority_queue_
boost::scoped_ptr< std::priority_queue< Tuple *, std::vector< Tuple * >, TupleRowComparator > > priority_queue_
Definition: topn-node.h:83

impala::ExecNode::conjunct_ctxs_
std::vector< ExprContext * > conjunct_ctxs_
Definition: exec-node.h:212

impala::TopNNode::num_rows_skipped_
int64_t num_rows_skipped_
Definition: topn-node.h:65

impala::ExecNode::Close
virtual void Close(RuntimeState *state)
Definition: exec-node.cc:166

descriptors.h

impala::Expr::DebugString
virtual std::string DebugString() const
Definition: expr.cc:385

impala::SortExecExprs::Close
void Close(RuntimeState *state)
Close all expressions used for sorting and tuple materialization.
Definition: sort-exec-exprs.cc:62

impala::SortExecExprs::sort_tuple_slot_expr_ctxs
const std::vector< ExprContext * > & sort_tuple_slot_expr_ctxs() const
Definition: sort-exec-exprs.h:50

impala::SortExecExprs::rhs_ordering_expr_ctxs
const std::vector< ExprContext * > & rhs_ordering_expr_ctxs() const
Can only be used after calling Open()
Definition: sort-exec-exprs.h:59