Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
sorter.h
Go to the documentation of this file.
1 // Copyright 2013 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_RUNTIME_SORTER_H_
16 #define IMPALA_RUNTIME_SORTER_H_
17 
19 #include "util/tuple-row-compare.h"
20 
21 namespace impala {
22 
23 class SortedRunMerger;
24 class RuntimeProfile;
25 class RowBatch;
26 
31 //
38 //
42 //
45 //
57 //
67 //
71 //
73 //
78 //
84 class Sorter {
85  public:
90  Sorter(const TupleRowComparator& compare_less_than,
91  const std::vector<ExprContext*>& sort_tuple_slot_expr_ctxs,
92  RowDescriptor* output_row_desc, MemTracker* mem_tracker,
93  RuntimeProfile* profile, RuntimeState* state);
94 
95  ~Sorter();
96 
99  Status Init();
100 
102  Status AddBatch(RowBatch* batch);
103 
106  Status InputDone();
107 
109  Status GetNext(RowBatch* batch, bool* eos);
110 
119  static uint64_t EstimateMergeMem(uint64_t available_blocks, RowDescriptor* row_desc,
120  int merge_batch_size);
121 
122  private:
123  class Run;
124  class TupleSorter;
125 
130  Status CreateMerger(int num_runs);
131 
139 
142  Status SortRun();
143 
146 
149  boost::scoped_ptr<TupleSorter> in_mem_tuple_sorter_;
150 
153 
156 
159 
165 
169  std::list<Run*> sorted_runs_;
170 
174 
177  std::vector<ExprContext*> sort_tuple_slot_expr_ctxs_;
178 
181 
185  boost::scoped_ptr<SortedRunMerger> merger_;
186 
189  std::list<Run*> merging_runs_;
190 
193 
200 };
201 
202 } // namespace impala
203 
204 #endif
std::vector< ExprContext * > sort_tuple_slot_expr_ctxs_
Definition: sorter.h:177
RuntimeProfile::Counter * num_merges_counter_
Definition: sorter.h:197
Status SortRun()
Definition: sorter.cc:1006
static uint64_t EstimateMergeMem(uint64_t available_blocks, RowDescriptor *row_desc, int merge_batch_size)
Definition: sorter.cc:1038
Status Init()
Definition: sorter.cc:896
Status CreateMerger(int num_runs)
Definition: sorter.cc:1123
RuntimeProfile * profile_
Runtime profile and counters for this sorter instance.
Definition: sorter.h:195
RuntimeState *const state_
Runtime state instance used to check for cancellation. Not owned.
Definition: sorter.h:145
TupleRowComparator compare_less_than_
In memory sorter and less-than comparator.
Definition: sorter.h:148
RuntimeProfile::Counter * in_mem_sort_timer_
Definition: sorter.h:198
boost::scoped_ptr< TupleSorter > in_mem_tuple_sorter_
Definition: sorter.h:149
RuntimeProfile::Counter * sorted_data_size_
Definition: sorter.h:199
Status InputDone()
Definition: sorter.cc:949
boost::scoped_ptr< SortedRunMerger > merger_
Definition: sorter.h:185
MemTracker * mem_tracker_
Mem tracker for batches created during merge. Not owned by Sorter.
Definition: sorter.h:180
Status AddBatch(RowBatch *batch)
Adds a batch of input rows to the current unsorted run.
Definition: sorter.cc:923
RuntimeProfile::Counter * initial_runs_counter_
Definition: sorter.h:196
RowDescriptor * output_row_desc_
Definition: sorter.h:173
This class is thread-safe.
Definition: mem-tracker.h:61
const RowDescriptor & row_desc() const
Status GetNext(RowBatch *batch, bool *eos)
Get the next batch of sorted output rows from the sorter.
Definition: sorter.cc:993
std::list< Run * > sorted_runs_
Definition: sorter.h:169
Note that Init() must be called right after the constructor.
Definition: sorter.h:84
BufferedBlockMgr::Client * block_mgr_client_
Handle to block mgr to make allocations from.
Definition: sorter.h:155
Sorter(const TupleRowComparator &compare_less_than, const std::vector< ExprContext * > &sort_tuple_slot_expr_ctxs, RowDescriptor *output_row_desc, MemTracker *mem_tracker, RuntimeProfile *profile, RuntimeState *state)
Definition: sorter.cc:870
std::list< Run * > merging_runs_
Definition: sorter.h:189
Status MergeIntermediateRuns()
Definition: sorter.cc:1054
ObjectPool obj_pool_
Pool of owned Run objects.
Definition: sorter.h:192
Run * unsorted_run_
Definition: sorter.h:164
bool has_var_len_slots_
True if the tuples to be sorted have var-length slots.
Definition: sorter.h:158
BufferedBlockMgr * block_mgr_
Block manager object used to allocate, pin and release runs. Not owned by Sorter. ...
Definition: sorter.h:152