Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
summary-util.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "util/summary-util.h"
16 
17 #include <vector>
18 #include <boost/lexical_cast.hpp>
19 
20 #include "common/logging.h"
21 #include "util/pretty-printer.h"
22 #include "util/redactor.h"
23 #include "util/table-printer.h"
24 
25 #include "common/names.h"
26 
27 using namespace impala;
28 
29 // Helper function for PrintExecSummary() that walks the exec summary recursively.
30 // Output for this node is appended to *result. Each value in *result should contain
31 // the statistics for a single exec summary node.
32 // node_idx is an in/out parameter. It is called with the idx (into exec_summary_.nodes)
33 // for the current node and on return, will contain the id of the next node.
34 void PrintExecSummary(const TExecSummary& exec_summary, int indent_level,
35  int new_indent_level, int* node_idx,
36  vector<vector<string> >* result) {
37  DCHECK_LT(*node_idx, exec_summary.nodes.size());
38  const TPlanNodeExecSummary& node = exec_summary.nodes[*node_idx];
39  const TExecStats& est_stats = node.estimated_stats;
40 
41  TExecStats agg_stats;
42  TExecStats max_stats;
43 
44 #define COMPUTE_MAX_SUM_STATS(NAME)\
45  agg_stats.NAME += node.exec_stats[i].NAME;\
46  max_stats.NAME = std::max(max_stats.NAME, node.exec_stats[i].NAME)
47 
48  // Compute avg and max of each used stat (cpu_time_ns is unused in the summary output).
49  for (int i = 0; i < node.exec_stats.size(); ++i) {
50  COMPUTE_MAX_SUM_STATS(latency_ns);
51  COMPUTE_MAX_SUM_STATS(cardinality);
52  COMPUTE_MAX_SUM_STATS(memory_used);
53  }
54 #undef COMPUTE_MAX_SUM_STATS
55 
56  int64_t avg_time = node.exec_stats.size() == 0 ? 0 :
57  agg_stats.latency_ns / node.exec_stats.size();
58 
59  // Print the level to indicate nesting with "|--"
60  stringstream label_ss;
61  if (indent_level != 0) {
62  label_ss << "|";
63  for (int i = 0; i < indent_level - 1; ++i) {
64  label_ss << " |";
65  }
66  label_ss << (new_indent_level ? "--" : " ");
67  }
68 
69  label_ss << node.label;
70 
71  vector<string> row;
72  row.push_back(label_ss.str());
73  row.push_back(lexical_cast<string>(node.exec_stats.size())); // Num instances
74  row.push_back(PrettyPrinter::Print(avg_time, TUnit::TIME_NS));
75  row.push_back(PrettyPrinter::Print(max_stats.latency_ns, TUnit::TIME_NS));
76  row.push_back(PrettyPrinter::Print(
77  node.is_broadcast ? max_stats.cardinality : agg_stats.cardinality,
78  TUnit::UNIT));
79  row.push_back(PrettyPrinter::Print(est_stats.cardinality, TUnit::UNIT));
80  row.push_back(PrettyPrinter::Print(max_stats.memory_used, TUnit::BYTES));
81  row.push_back(PrettyPrinter::Print(est_stats.memory_used, TUnit::BYTES));
82  // Node "details" may contain exprs which should be redacted.
83  row.push_back(RedactCopy(node.label_detail));
84  result->push_back(row);
85 
86  map<int, int>::const_iterator child_fragment_idx_it =
87  exec_summary.exch_to_sender_map.find(*node_idx);
88  if (child_fragment_idx_it != exec_summary.exch_to_sender_map.end()) {
89  DCHECK_EQ(node.num_children, 0);
90  int child_fragment_id = child_fragment_idx_it->second;
91  PrintExecSummary(exec_summary, indent_level, false, &child_fragment_id, result);
92  }
93  ++*node_idx;
94  if (node.num_children == 0) return;
95 
96  // Print the non-left children to the stream first.
97  vector<vector<string> > child0_result;
98  PrintExecSummary(exec_summary, indent_level, false, node_idx, &child0_result);
99 
100  for (int i = 1; i < node.num_children; ++i) {
101  PrintExecSummary(exec_summary, indent_level + 1, true, node_idx, result);
102  }
103  for (int i = 0; i < child0_result.size(); ++i) {
104  result->push_back(child0_result[i]);
105  }
106 }
107 
108 string impala::PrintExecSummary(const TExecSummary& exec_summary) {
109  // Bail if Coordinator::InitExecProfile() has not been called.
110  if (!exec_summary.__isset.nodes) return "";
111 
112  TablePrinter printer;
113  printer.set_max_output_width(30);
114  printer.AddColumn("Operator", true);
115  printer.AddColumn("#Hosts", false);
116  printer.AddColumn("Avg Time", false);
117  printer.AddColumn("Max Time", false);
118  printer.AddColumn("#Rows", false);
119  printer.AddColumn("Est. #Rows", false);
120  printer.AddColumn("Peak Mem", false);
121  printer.AddColumn("Est. Peak Mem", false);
122  printer.AddColumn("Detail", true);
123 
124  vector<vector<string> > rows;
125  int node_idx = 0;
126  ::PrintExecSummary(exec_summary, 0, false, &node_idx, &rows);
127  for (int i = 0; i < rows.size(); ++i) {
128  printer.AddRow(rows[i]);
129  }
130  return printer.ToString("\n");
131 }
std::string RedactCopy(const std::string &original)
Utility function to redacted a string without modifying the original.
Definition: redactor.h:63
#define COMPUTE_MAX_SUM_STATS(NAME)
std::string ToString(const std::string &prefix="") const
Print to a table with prefix coming before the output.
static std::string Print(bool value, TUnit::type ignored, bool verbose=false)
void AddColumn(const std::string &label, bool left_align)
std::string PrintExecSummary(const TExecSummary &exec_summary)
Print the exec summary as a formatted table.
const TExecSummary & exec_summary() const
Definition: coordinator.h:173
void AddRow(const std::vector< std::string > &row)
Add a row to the table. This must have the same width as labels.
void set_max_output_width(int width)