Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
SelectStmt.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.ArrayList;
18 import java.util.Collections;
19 import java.util.List;
20 import java.util.Set;
21 
22 import org.slf4j.Logger;
23 import org.slf4j.LoggerFactory;
24 
34 import com.cloudera.impala.common.TreeNode;
35 import com.google.common.base.Preconditions;
36 import com.google.common.base.Predicates;
37 import com.google.common.collect.Iterables;
38 import com.google.common.collect.Lists;
39 import com.google.common.collect.Sets;
40 
45 public class SelectStmt extends QueryStmt {
46  private final static Logger LOG = LoggerFactory.getLogger(SelectStmt.class);
47 
49  protected final ArrayList<String> colLabels_; // lower case column labels
50  protected final List<TableRef> tableRefs_;
51  protected Expr whereClause_;
52  protected ArrayList<Expr> groupingExprs_;
53  protected final Expr havingClause_; // original having clause
54 
55  // havingClause with aliases and agg output resolved
56  private Expr havingPred_;
57 
58  // set if we have any kind of aggregation operation, include SELECT DISTINCT
60 
61  // set if we have AnalyticExprs in the select list/order by clause
63 
64  // SQL string of this SelectStmt before inline-view expression substitution.
65  // Set in analyze().
66  protected String sqlString_;
67 
68  // substitutes all exprs in this select block to reference base tables
69  // directly
71 
72  SelectStmt(SelectList selectList,
73  List<TableRef> tableRefList,
74  Expr wherePredicate, ArrayList<Expr> groupingExprs,
75  Expr havingPredicate, ArrayList<OrderByElement> orderByElements,
76  LimitElement limitElement) {
77  super(orderByElements, limitElement);
78  this.selectList_ = selectList;
79  if (tableRefList == null) {
80  this.tableRefs_ = Lists.newArrayList();
81  } else {
82  this.tableRefs_ = tableRefList;
83  }
84  this.whereClause_ = wherePredicate;
85  this.groupingExprs_ = groupingExprs;
86  this.havingClause_ = havingPredicate;
87  this.colLabels_ = Lists.newArrayList();
88  this.havingPred_ = null;
89  this.aggInfo_ = null;
90  this.sortInfo_ = null;
91  // Set left table refs to ensure correct toSql() before analysis.
92  for (int i = 1; i < tableRefs_.size(); ++i) {
93  tableRefs_.get(i).setLeftTblRef(tableRefs_.get(i - 1));
94  }
95  }
96 
100  public SelectList getSelectList() { return selectList_; }
101 
105  public Expr getHavingPred() { return havingPred_; }
106 
107  public List<TableRef> getTableRefs() { return tableRefs_; }
108  public boolean hasWhereClause() { return whereClause_ != null; }
109  public boolean hasGroupByClause() { return groupingExprs_ != null; }
110  public Expr getWhereClause() { return whereClause_; }
111  public void setWhereClause(Expr whereClause) { whereClause_ = whereClause; }
112  public AggregateInfo getAggInfo() { return aggInfo_; }
113  public boolean hasAggInfo() { return aggInfo_ != null; }
115  public boolean hasAnalyticInfo() { return analyticInfo_ != null; }
116  @Override
117  public ArrayList<String> getColLabels() { return colLabels_; }
119 
120  // Column alias generator used during query rewriting.
123  if (columnAliasGenerator_ == null) {
125  }
126  return columnAliasGenerator_;
127  }
128 
129  // Table alias generator used during query rewriting.
132  if (tableAliasGenerator_ == null) {
134  }
135  return tableAliasGenerator_;
136  }
137 
141  @Override
142  public void analyze(Analyzer analyzer) throws AnalysisException {
143  super.analyze(analyzer);
144 
145  // Start out with table refs to establish aliases.
146  TableRef leftTblRef = null; // the one to the left of tblRef
147  for (int i = 0; i < tableRefs_.size(); ++i) {
148  // Resolve and replace non-InlineViewRef table refs with a BaseTableRef or ViewRef.
149  TableRef tblRef = tableRefs_.get(i);
150  tblRef = analyzer.resolveTableRef(tblRef);
151  Preconditions.checkNotNull(tblRef);
152  tableRefs_.set(i, tblRef);
153  tblRef.setLeftTblRef(leftTblRef);
154  try {
155  tblRef.analyze(analyzer);
156  } catch (AnalysisException e) {
157  // Only re-throw the exception if no tables are missing.
158  if (analyzer.getMissingTbls().isEmpty()) throw e;
159  }
160  leftTblRef = tblRef;
161  }
162 
163  // All tableRefs have been analyzed, but at least one table was found missing.
164  // There is no reason to proceed with analysis past this point.
165  if (!analyzer.getMissingTbls().isEmpty()) {
166  throw new AnalysisException("Found missing tables. Aborting analysis.");
167  }
168 
169  // analyze plan hints from select list
170  selectList_.analyzePlanHints(analyzer);
171 
172  // populate resultExprs_, aliasSmap_, and colLabels_
173  for (int i = 0; i < selectList_.getItems().size(); ++i) {
174  SelectListItem item = selectList_.getItems().get(i);
175  if (item.isStar()) {
176  if (item.getRawPath() != null) {
177  Path resolvedPath = analyzeStarPath(item.getRawPath(), analyzer);
178  expandStar(resolvedPath, analyzer);
179  } else {
180  expandStar(analyzer);
181  }
182  } else {
183  // Analyze the resultExpr before generating a label to ensure enforcement
184  // of expr child and depth limits (toColumn() label may call toSql()).
185  item.getExpr().analyze(analyzer);
186  if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) {
187  throw new AnalysisException(
188  "Subqueries are not supported in the select list.");
189  }
190  resultExprs_.add(item.getExpr());
191  String label = item.toColumnLabel(i, analyzer.useHiveColLabels());
192  SlotRef aliasRef = new SlotRef(label);
193  Expr existingAliasExpr = aliasSmap_.get(aliasRef);
194  if (existingAliasExpr != null && !existingAliasExpr.equals(item.getExpr())) {
195  // If we have already seen this alias, it refers to more than one column and
196  // therefore is ambiguous.
197  ambiguousAliasList_.add(aliasRef);
198  }
199  aliasSmap_.put(aliasRef, item.getExpr().clone());
200  colLabels_.add(label);
201  }
202  }
203  // Complex types are currently not supported in the select list because we'd need
204  // to serialize them in a meaningful way.
205  for (Expr expr: resultExprs_) {
206  if (expr.getType().isComplexType()) {
207  throw new AnalysisException(String.format(
208  "Expr '%s' in select list returns a complex type '%s'.\n" +
209  "Only scalar types are allowed in the select list.",
210  expr.toSql(), expr.getType().toSql()));
211  }
212  }
213 
214  if (TreeNode.contains(resultExprs_, AnalyticExpr.class)) {
215  if (tableRefs_.isEmpty()) {
216  throw new AnalysisException("Analytic expressions require FROM clause.");
217  }
218 
219  // do this here, not after analyzeAggregation(), otherwise the AnalyticExprs
220  // will get substituted away
221  if (selectList_.isDistinct()) {
222  throw new AnalysisException(
223  "cannot combine SELECT DISTINCT with analytic functions");
224  }
225  }
226 
227  if (whereClause_ != null) {
228  whereClause_.analyze(analyzer);
229  if (whereClause_.contains(Expr.isAggregatePredicate())) {
230  throw new AnalysisException(
231  "aggregate function not allowed in WHERE clause");
232  }
233  whereClause_.checkReturnsBool("WHERE clause", false);
234  Expr e = whereClause_.findFirstOf(AnalyticExpr.class);
235  if (e != null) {
236  throw new AnalysisException(
237  "WHERE clause must not contain analytic expressions: " + e.toSql());
238  }
239  analyzer.registerConjuncts(whereClause_, false);
240  }
241 
242  createSortInfo(analyzer);
243  analyzeAggregation(analyzer);
244  analyzeAnalytics(analyzer);
245  if (evaluateOrderBy_) createSortTupleInfo(analyzer);
246 
247  // Remember the SQL string before inline-view expression substitution.
248  sqlString_ = toSql();
249  resolveInlineViewRefs(analyzer);
250 
251  // If this block's select-project-join portion returns an empty result set and the
252  // block has no aggregation, then mark this block as returning an empty result set.
253  if (analyzer.hasEmptySpjResultSet() && aggInfo_ == null) {
254  analyzer.setHasEmptyResultSet();
255  }
256 
257  ColumnLineageGraph graph = analyzer.getColumnLineageGraph();
258  if (aggInfo_ != null && !aggInfo_.getAggregateExprs().isEmpty()) {
259  graph.addDependencyPredicates(aggInfo_.getGroupingExprs());
260  }
261  if (sortInfo_ != null && hasLimit()) {
262  // When there is a LIMIT clause in conjunction with an ORDER BY, the ordering exprs
263  // must be added in the column lineage graph.
264  graph.addDependencyPredicates(sortInfo_.getOrderingExprs());
265  }
266 
267  if (aggInfo_ != null) LOG.debug("post-analysis " + aggInfo_.debugString());
268  }
269 
273  @Override
274  public void materializeRequiredSlots(Analyzer analyzer) {
275  // Mark unassigned join predicates. Some predicates that must be evaluated by a join
276  // can also be safely evaluated below the join (picked up by getBoundPredicates()).
277  // Such predicates will be marked twice and that is ok.
278  List<Expr> unassigned =
279  analyzer.getUnassignedConjuncts(getTableRefIds(), true);
280  List<Expr> unassignedJoinConjuncts = Lists.newArrayList();
281  for (Expr e: unassigned) {
282  if (analyzer.evalByJoin(e)) unassignedJoinConjuncts.add(e);
283  }
284  List<Expr> baseTblJoinConjuncts =
285  Expr.substituteList(unassignedJoinConjuncts, baseTblSmap_, analyzer, false);
286  materializeSlots(analyzer, baseTblJoinConjuncts);
287 
288  if (evaluateOrderBy_) {
289  // mark ordering exprs before marking agg/analytic exprs because they could contain
290  // agg/analytic exprs that are not referenced anywhere but the ORDER BY clause
291  sortInfo_.materializeRequiredSlots(analyzer, baseTblSmap_);
292  }
293 
294  if (hasAnalyticInfo()) {
295  // Mark analytic exprs before marking agg exprs because they could contain agg
296  // exprs that are not referenced anywhere but the analytic expr.
297  // Gather unassigned predicates and mark their slots. It is not desirable
298  // to account for propagated predicates because if an analytic expr is only
299  // referenced by a propagated predicate, then it's better to not materialize the
300  // analytic expr at all.
301  ArrayList<TupleId> tids = Lists.newArrayList();
302  getMaterializedTupleIds(tids); // includes the analytic tuple
303  List<Expr> conjuncts = analyzer.getUnassignedConjuncts(tids, false);
304  materializeSlots(analyzer, conjuncts);
305  analyticInfo_.materializeRequiredSlots(analyzer, baseTblSmap_);
306  }
307 
308  if (aggInfo_ != null) {
309  // mark all agg exprs needed for HAVING pred and binding predicates as materialized
310  // before calling AggregateInfo.materializeRequiredSlots(), otherwise they won't
311  // show up in AggregateInfo.getMaterializedAggregateExprs()
312  ArrayList<Expr> havingConjuncts = Lists.newArrayList();
313  if (havingPred_ != null) havingConjuncts.add(havingPred_);
314  // Ignore predicates bound to a group-by slot because those
315  // are already evaluated below this agg node (e.g., in a scan).
316  Set<SlotId> groupBySlots = Sets.newHashSet();
317  for (int i = 0; i < aggInfo_.getGroupingExprs().size(); ++i) {
318  groupBySlots.add(aggInfo_.getOutputTupleDesc().getSlots().get(i).getId());
319  }
320  // Binding predicates are assigned to the final output tuple of the aggregation,
321  // which is the tuple of the 2nd phase agg for distinct aggs.
322  ArrayList<Expr> bindingPredicates =
323  analyzer.getBoundPredicates(aggInfo_.getResultTupleId(), groupBySlots, false);
324  havingConjuncts.addAll(bindingPredicates);
325  havingConjuncts.addAll(
326  analyzer.getUnassignedConjuncts(aggInfo_.getResultTupleId().asList(), false));
327  materializeSlots(analyzer, havingConjuncts);
328  aggInfo_.materializeRequiredSlots(analyzer, baseTblSmap_);
329  }
330  }
331 
336  protected void resolveInlineViewRefs(Analyzer analyzer)
337  throws AnalysisException {
338  // Gather the inline view substitution maps from the enclosed inline views
339  for (TableRef tblRef: tableRefs_) {
340  if (tblRef instanceof InlineViewRef) {
341  InlineViewRef inlineViewRef = (InlineViewRef) tblRef;
342  baseTblSmap_ =
343  ExprSubstitutionMap.combine(baseTblSmap_, inlineViewRef.getBaseTblSmap());
344  }
345  }
347  Expr.trySubstituteList(resultExprs_, baseTblSmap_, analyzer, false);
348  LOG.trace("baseTblSmap_: " + baseTblSmap_.debugString());
349  LOG.trace("resultExprs: " + Expr.debugString(resultExprs_));
350  LOG.trace("baseTblResultExprs: " + Expr.debugString(baseTblResultExprs_));
351  }
352 
353  public List<TupleId> getTableRefIds() {
354  List<TupleId> result = Lists.newArrayList();
355  for (TableRef ref: tableRefs_) {
356  result.add(ref.getId());
357  }
358  return result;
359  }
360 
366  private Path analyzeStarPath(List<String> rawPath, Analyzer analyzer)
367  throws AnalysisException {
368  Path resolvedPath = null;
369  try {
370  resolvedPath = analyzer.resolvePath(rawPath, PathType.STAR);
371  } catch (TableLoadingException e) {
372  // Should never happen because we only check registered table aliases.
373  Preconditions.checkState(false);
374  }
375  Preconditions.checkNotNull(resolvedPath);
376  return resolvedPath;
377  }
378 
382  private void expandStar(Analyzer analyzer) throws AnalysisException {
383  if (tableRefs_.isEmpty()) {
384  throw new AnalysisException("'*' expression in select list requires FROM clause.");
385  }
386  // expand in From clause order
387  for (TableRef tableRef: tableRefs_) {
388  if (analyzer.isSemiJoined(tableRef.getId())) continue;
389  Path resolvedPath = new Path(tableRef.getDesc(), Collections.<String>emptyList());
390  Preconditions.checkState(resolvedPath.resolve());
391  expandStar(resolvedPath, analyzer);
392  }
393  }
394 
398  private void expandStar(Path resolvedPath, Analyzer analyzer)
399  throws AnalysisException {
400  Preconditions.checkState(resolvedPath.isResolved());
401  if (resolvedPath.destTupleDesc() != null &&
402  resolvedPath.destTupleDesc().getTable() != null &&
403  resolvedPath.destTupleDesc().getPath().getMatchedTypes().isEmpty()) {
404  // The resolved path targets a registered tuple descriptor of a catalog
405  // table. Expand the '*' based on the Hive-column order.
406  TupleDescriptor tupleDesc = resolvedPath.destTupleDesc();
407  Table table = tupleDesc.getTable();
408  for (Column c: table.getColumnsInHiveOrder()) {
409  addStarResultExpr(resolvedPath, analyzer, c.getName());
410  }
411  } else {
412  // The resolved path does not target the descriptor of a catalog table.
413  // Expand '*' based on the destination type of the resolved path.
414  Preconditions.checkState(resolvedPath.destType().isStructType());
415  StructType structType = (StructType) resolvedPath.destType();
416  Preconditions.checkNotNull(structType);
417 
418  // Star expansion for references to nested collections.
419  // Collection Type Star Expansion
420  // array<int> --> item
421  // array<struct<f1,f2,...,fn>> --> f1, f2, ..., fn
422  // map<int,int> --> key, value
423  // map<int,struct<f1,f2,...,fn>> --> key, f1, f2, ..., fn
424  if (structType instanceof CollectionStructType) {
425  CollectionStructType cst = (CollectionStructType) structType;
426  if (cst.isMapStruct()) {
427  addStarResultExpr(resolvedPath, analyzer, Path.MAP_KEY_FIELD_NAME);
428  }
429  if (cst.getOptionalField().getType().isStructType()) {
430  structType = (StructType) cst.getOptionalField().getType();
431  for (StructField f: structType.getFields()) {
433  resolvedPath, analyzer, cst.getOptionalField().getName(), f.getName());
434  }
435  } else if (cst.isMapStruct()) {
436  addStarResultExpr(resolvedPath, analyzer, Path.MAP_VALUE_FIELD_NAME);
437  } else {
438  addStarResultExpr(resolvedPath, analyzer, Path.ARRAY_ITEM_FIELD_NAME);
439  }
440  } else {
441  // Default star expansion.
442  for (StructField f: structType.getFields()) {
443  addStarResultExpr(resolvedPath, analyzer, f.getName());
444  }
445  }
446  }
447  }
448 
453  private void addStarResultExpr(Path resolvedPath, Analyzer analyzer,
454  String... relRawPath) throws AnalysisException {
455  Path p = Path.createRelPath(resolvedPath, relRawPath);
456  Preconditions.checkState(p.resolve());
457  SlotDescriptor slotDesc = analyzer.registerSlotRef(p);
458  SlotRef slotRef = new SlotRef(slotDesc);
459  slotRef.analyze(analyzer);
460  resultExprs_.add(slotRef);
461  colLabels_.add(relRawPath[relRawPath.length - 1]);
462  }
463 
470  private void analyzeAggregation(Analyzer analyzer)
471  throws AnalysisException {
472  if (groupingExprs_ == null && !selectList_.isDistinct()
473  && !TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())
474  && (sortInfo_ == null
475  || !TreeNode.contains(sortInfo_.getOrderingExprs(),
477  // we're not computing aggregates
478  return;
479  }
480 
481  // If we're computing an aggregate, we must have a FROM clause.
482  if (tableRefs_.size() == 0) {
483  throw new AnalysisException(
484  "aggregation without a FROM clause is not allowed");
485  }
486 
487  if ((groupingExprs_ != null ||
488  TreeNode.contains(resultExprs_, Expr.isAggregatePredicate()))
489  && selectList_.isDistinct()) {
490  throw new AnalysisException(
491  "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY");
492  }
493 
494  // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to
495  // name all star-expanded cols in the group by clause you might as well do it
496  // in the select list)
497  if (groupingExprs_ != null) {
498  for (SelectListItem item : selectList_.getItems()) {
499  if (item.isStar()) {
500  throw new AnalysisException(
501  "cannot combine '*' in select list with GROUP BY: " + item.toSql());
502  }
503  }
504  }
505 
506  // disallow subqueries in the GROUP BY clause
507  if (groupingExprs_ != null) {
508  for (Expr expr: groupingExprs_) {
509  if (expr.contains(Predicates.instanceOf(Subquery.class))) {
510  throw new AnalysisException(
511  "Subqueries are not supported in the GROUP BY clause.");
512  }
513  }
514  }
515 
516  // analyze grouping exprs
517  ArrayList<Expr> groupingExprsCopy = Lists.newArrayList();
518  if (groupingExprs_ != null) {
519  // make a deep copy here, we don't want to modify the original
520  // exprs during analysis (in case we need to print them later)
521  groupingExprsCopy = Expr.cloneList(groupingExprs_);
522  substituteOrdinals(groupingExprsCopy, "GROUP BY", analyzer);
523  Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy);
524  if (ambiguousAlias != null) {
525  throw new AnalysisException("Column '" + ambiguousAlias.toSql() +
526  "' in GROUP BY clause is ambiguous");
527  }
528  groupingExprsCopy =
529  Expr.trySubstituteList(groupingExprsCopy, aliasSmap_, analyzer, false);
530  for (int i = 0; i < groupingExprsCopy.size(); ++i) {
531  groupingExprsCopy.get(i).analyze(analyzer);
532  if (groupingExprsCopy.get(i).contains(Expr.isAggregatePredicate())) {
533  // reference the original expr in the error msg
534  throw new AnalysisException(
535  "GROUP BY expression must not contain aggregate functions: "
536  + groupingExprs_.get(i).toSql());
537  }
538  if (groupingExprsCopy.get(i).contains(AnalyticExpr.class)) {
539  // reference the original expr in the error msg
540  throw new AnalysisException(
541  "GROUP BY expression must not contain analytic expressions: "
542  + groupingExprsCopy.get(i).toSql());
543  }
544  }
545  }
546 
547  // analyze having clause
548  if (havingClause_ != null) {
549  if (havingClause_.contains(Predicates.instanceOf(Subquery.class))) {
550  throw new AnalysisException("Subqueries are not supported in the HAVING clause.");
551  }
552  // substitute aliases in place (ordinals not allowed in having clause)
553  havingPred_ = havingClause_.substitute(aliasSmap_, analyzer, false);
554  havingPred_.checkReturnsBool("HAVING clause", true);
555  // can't contain analytic exprs
556  Expr analyticExpr = havingPred_.findFirstOf(AnalyticExpr.class);
557  if (analyticExpr != null) {
558  throw new AnalysisException(
559  "HAVING clause must not contain analytic expressions: "
560  + analyticExpr.toSql());
561  }
562  }
563 
564  // Collect the aggregate expressions from the SELECT, HAVING and ORDER BY clauses
565  // of this statement.
566  ArrayList<FunctionCallExpr> aggExprs = Lists.newArrayList();
567  TreeNode.collect(resultExprs_, Expr.isAggregatePredicate(), aggExprs);
568  if (havingPred_ != null) {
569  havingPred_.collect(Expr.isAggregatePredicate(), aggExprs);
570  }
571  if (sortInfo_ != null) {
572  // TODO: Avoid evaluating aggs in ignored order-bys
573  TreeNode.collect(sortInfo_.getOrderingExprs(), Expr.isAggregatePredicate(),
574  aggExprs);
575  }
576 
577  // Optionally rewrite all count(distinct <expr>) into equivalent NDV() calls.
578  ExprSubstitutionMap ndvSmap = null;
579  if (analyzer.getQueryCtx().getRequest().query_options.appx_count_distinct) {
580  ndvSmap = new ExprSubstitutionMap();
581  for (FunctionCallExpr aggExpr: aggExprs) {
582  if (!aggExpr.isDistinct()
583  || !aggExpr.getFnName().getFunction().equals("count")
584  || aggExpr.getParams().size() != 1) {
585  continue;
586  }
587  FunctionCallExpr ndvFnCall =
588  new FunctionCallExpr("ndv", aggExpr.getParams().exprs());
589  ndvFnCall.analyzeNoThrow(analyzer);
590  Preconditions.checkState(ndvFnCall.getType().equals(aggExpr.getType()));
591  ndvSmap.put(aggExpr, ndvFnCall);
592  }
593  // Replace all count(distinct <expr>) with NDV(<expr>).
594  List<Expr> substAggExprs = Expr.substituteList(aggExprs, ndvSmap, analyzer, false);
595  aggExprs.clear();
596  for (Expr aggExpr: substAggExprs) {
597  Preconditions.checkState(aggExpr instanceof FunctionCallExpr);
598  aggExprs.add((FunctionCallExpr) aggExpr);
599  }
600  }
601 
602  // When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are
603  // evaluated in two phases (see AggregateInfo for more details). In particular,
604  // COUNT(c) in "SELECT COUNT(c), AGG(DISTINCT d) from R" is transformed to
605  // "SELECT SUM(cnt) FROM (SELECT COUNT(c) as cnt from R group by d ) S".
606  // Since a group-by expression is added to the inner query it returns no rows if
607  // R is empty, in which case the SUM of COUNTs will return NULL.
608  // However the original COUNT(c) should have returned 0 instead of NULL in this case.
609  // Therefore, COUNT([ALL]) is transformed into zeroifnull(COUNT([ALL]) if
610  // i) There is no GROUP-BY clause, and
611  // ii) Other DISTINCT aggregates are present.
612  ExprSubstitutionMap countAllMap = createCountAllMap(aggExprs, analyzer);
613  countAllMap = ExprSubstitutionMap.compose(ndvSmap, countAllMap, analyzer);
614  List<Expr> substitutedAggs =
615  Expr.substituteList(aggExprs, countAllMap, analyzer, false);
616  aggExprs.clear();
617  TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), aggExprs);
618  createAggInfo(groupingExprsCopy, aggExprs, analyzer);
619 
620  // combine avg smap with the one that produces the final agg output
621  AggregateInfo finalAggInfo =
622  aggInfo_.getSecondPhaseDistinctAggInfo() != null
624  : aggInfo_;
625 
626  ExprSubstitutionMap combinedSmap =
627  ExprSubstitutionMap.compose(countAllMap, finalAggInfo.getOutputSmap(), analyzer);
628  LOG.trace("combined smap: " + combinedSmap.debugString());
629 
630  // change select list, having and ordering exprs to point to agg output. We need
631  // to reanalyze the exprs at this point.
632  LOG.trace("desctbl: " + analyzer.getDescTbl().debugString());
633  LOG.trace("resultexprs: " + Expr.debugString(resultExprs_));
634  resultExprs_ = Expr.substituteList(resultExprs_, combinedSmap, analyzer, false);
635  LOG.trace("post-agg selectListExprs: " + Expr.debugString(resultExprs_));
636  if (havingPred_ != null) {
637  // Make sure the predicate in the HAVING clause does not contain a
638  // subquery.
639  Preconditions.checkState(!havingPred_.contains(
640  Predicates.instanceOf(Subquery.class)));
641  havingPred_ = havingPred_.substitute(combinedSmap, analyzer, false);
642  analyzer.registerConjuncts(havingPred_, true);
643  LOG.debug("post-agg havingPred: " + havingPred_.debugString());
644  }
645  if (sortInfo_ != null) {
646  sortInfo_.substituteOrderingExprs(combinedSmap, analyzer);
647  LOG.debug("post-agg orderingExprs: " +
648  Expr.debugString(sortInfo_.getOrderingExprs()));
649  }
650 
651  // check that all post-agg exprs point to agg output
652  for (int i = 0; i < selectList_.getItems().size(); ++i) {
653  if (!resultExprs_.get(i).isBound(finalAggInfo.getOutputTupleId())) {
654  throw new AnalysisException(
655  "select list expression not produced by aggregation output "
656  + "(missing from GROUP BY clause?): "
657  + selectList_.getItems().get(i).getExpr().toSql());
658  }
659  }
660  if (orderByElements_ != null) {
661  for (int i = 0; i < orderByElements_.size(); ++i) {
662  if (!sortInfo_.getOrderingExprs().get(i).isBound(
663  finalAggInfo.getOutputTupleId())) {
664  throw new AnalysisException(
665  "ORDER BY expression not produced by aggregation output "
666  + "(missing from GROUP BY clause?): "
667  + orderByElements_.get(i).getExpr().toSql());
668  }
669  }
670  }
671  if (havingPred_ != null) {
672  if (!havingPred_.isBound(finalAggInfo.getOutputTupleId())) {
673  throw new AnalysisException(
674  "HAVING clause not produced by aggregation output "
675  + "(missing from GROUP BY clause?): "
676  + havingClause_.toSql());
677  }
678  }
679  }
680 
681 
690  List<FunctionCallExpr> aggExprs, Analyzer analyzer)
691  throws AnalysisException {
692  ExprSubstitutionMap scalarCountAllMap = new ExprSubstitutionMap();
693 
694  if (groupingExprs_ != null && !groupingExprs_.isEmpty()) {
695  // There are grouping expressions, so no substitution needs to be done.
696  return scalarCountAllMap;
697  }
698 
699  com.google.common.base.Predicate<FunctionCallExpr> isNotDistinctPred =
700  new com.google.common.base.Predicate<FunctionCallExpr>() {
701  public boolean apply(FunctionCallExpr expr) {
702  return !expr.isDistinct();
703  }
704  };
705  if (Iterables.all(aggExprs, isNotDistinctPred)) {
706  // Only [ALL] aggs, so no substitution needs to be done.
707  return scalarCountAllMap;
708  }
709 
710  com.google.common.base.Predicate<FunctionCallExpr> isCountPred =
711  new com.google.common.base.Predicate<FunctionCallExpr>() {
712  public boolean apply(FunctionCallExpr expr) {
713  return expr.getFnName().getFunction().equals("count");
714  }
715  };
716 
717  Iterable<FunctionCallExpr> countAllAggs =
718  Iterables.filter(aggExprs, Predicates.and(isCountPred, isNotDistinctPred));
719  for (FunctionCallExpr countAllAgg: countAllAggs) {
720  // Replace COUNT(ALL) with zeroifnull(COUNT(ALL))
721  ArrayList<Expr> zeroIfNullParam = Lists.newArrayList(countAllAgg.clone());
722  FunctionCallExpr zeroIfNull =
723  new FunctionCallExpr("zeroifnull", zeroIfNullParam);
724  zeroIfNull.analyze(analyzer);
725  scalarCountAllMap.put(countAllAgg, zeroIfNull);
726  }
727 
728  return scalarCountAllMap;
729  }
730 
734  private void createAggInfo(ArrayList<Expr> groupingExprs,
735  ArrayList<FunctionCallExpr> aggExprs, Analyzer analyzer)
736  throws AnalysisException {
737  if (selectList_.isDistinct()) {
738  // Create aggInfo for SELECT DISTINCT ... stmt:
739  // - all select list items turn into grouping exprs
740  // - there are no aggregate exprs
741  Preconditions.checkState(groupingExprs.isEmpty());
742  Preconditions.checkState(aggExprs.isEmpty());
743  ArrayList<Expr> distinctGroupingExprs = Expr.cloneList(resultExprs_);
744  aggInfo_ =
745  AggregateInfo.create(distinctGroupingExprs, null, null, analyzer);
746  } else {
747  aggInfo_ = AggregateInfo.create(groupingExprs, aggExprs, null, analyzer);
748  }
749  }
750 
755  private void analyzeAnalytics(Analyzer analyzer)
756  throws AnalysisException {
757  // collect AnalyticExprs from the SELECT and ORDER BY clauses
758  ArrayList<Expr> analyticExprs = Lists.newArrayList();
759  TreeNode.collect(resultExprs_, AnalyticExpr.class, analyticExprs);
760  if (sortInfo_ != null) {
761  TreeNode.collect(sortInfo_.getOrderingExprs(), AnalyticExpr.class,
762  analyticExprs);
763  }
764  if (analyticExprs.isEmpty()) return;
765  analyticInfo_ = AnalyticInfo.create(analyticExprs, analyzer);
766 
767  // change select list and ordering exprs to point to analytic output. We need
768  // to reanalyze the exprs at this point.
769  resultExprs_ = Expr.substituteList(resultExprs_, analyticInfo_.getSmap(), analyzer,
770  false);
771  LOG.trace("post-analytic selectListExprs: " + Expr.debugString(resultExprs_));
772  if (sortInfo_ != null) {
773  sortInfo_.substituteOrderingExprs(analyticInfo_.getSmap(), analyzer);
774  LOG.trace("post-analytic orderingExprs: " +
775  Expr.debugString(sortInfo_.getOrderingExprs()));
776  }
777  }
778 
782  @Override
783  public String toSql() {
784  // Return the SQL string before inline-view expression substitution.
785  if (sqlString_ != null) return sqlString_;
786 
787  StringBuilder strBuilder = new StringBuilder();
788  if (withClause_ != null) {
789  strBuilder.append(withClause_.toSql());
790  strBuilder.append(" ");
791  }
792 
793  // Select list
794  strBuilder.append("SELECT ");
795  if (selectList_.isDistinct()) {
796  strBuilder.append("DISTINCT ");
797  }
798  if (selectList_.hasPlanHints()) {
799  strBuilder.append(ToSqlUtils.getPlanHintsSql(selectList_.getPlanHints()) + " ");
800  }
801  for (int i = 0; i < selectList_.getItems().size(); ++i) {
802  strBuilder.append(selectList_.getItems().get(i).toSql());
803  strBuilder.append((i+1 != selectList_.getItems().size()) ? ", " : "");
804  }
805  // From clause
806  if (!tableRefs_.isEmpty()) {
807  strBuilder.append(" FROM ");
808  for (int i = 0; i < tableRefs_.size(); ++i) {
809  strBuilder.append(tableRefs_.get(i).toSql());
810  }
811  }
812  // Where clause
813  if (whereClause_ != null) {
814  strBuilder.append(" WHERE ");
815  strBuilder.append(whereClause_.toSql());
816  }
817  // Group By clause
818  if (groupingExprs_ != null) {
819  strBuilder.append(" GROUP BY ");
820  for (int i = 0; i < groupingExprs_.size(); ++i) {
821  strBuilder.append(groupingExprs_.get(i).toSql());
822  strBuilder.append((i+1 != groupingExprs_.size()) ? ", " : "");
823  }
824  }
825  // Having clause
826  if (havingClause_ != null) {
827  strBuilder.append(" HAVING ");
828  strBuilder.append(havingClause_.toSql());
829  }
830  // Order By clause
831  if (orderByElements_ != null) {
832  strBuilder.append(" ORDER BY ");
833  for (int i = 0; i < orderByElements_.size(); ++i) {
834  strBuilder.append(orderByElements_.get(i).toSql());
835  strBuilder.append((i+1 != orderByElements_.size()) ? ", " : "");
836  }
837  }
838  // Limit clause.
839  strBuilder.append(limitElement_.toSql());
840  return strBuilder.toString();
841  }
842 
850  @Override
851  public void getMaterializedTupleIds(ArrayList<TupleId> tupleIdList) {
852  if (evaluateOrderBy_) {
853  tupleIdList.add(sortInfo_.getSortTupleDescriptor().getId());
854  } else if (aggInfo_ != null) {
855  // Return the tuple id produced in the final aggregation step.
856  tupleIdList.add(aggInfo_.getResultTupleId());
857  } else {
858  for (TableRef tblRef: tableRefs_) {
859  // Don't include materialized tuple ids from semi-joined table
860  // refs (see IMPALA-1526)
861  if (tblRef.getJoinOp().isLeftSemiJoin()) continue;
862  // Remove the materialized tuple ids of all the table refs that
863  // are semi-joined by the right semi/anti join.
864  if (tblRef.getJoinOp().isRightSemiJoin()) tupleIdList.clear();
865  tupleIdList.addAll(tblRef.getMaterializedTupleIds());
866  }
867  }
868  // We materialize the agg tuple or the table refs together with the analytic tuple.
869  if (hasAnalyticInfo() && !evaluateOrderBy_) {
870  tupleIdList.add(analyticInfo_.getOutputTupleId());
871  }
872  }
873 
874  private ArrayList<TableRef> cloneTableRefs() {
875  ArrayList<TableRef> clone = Lists.newArrayList();
876  for (TableRef tblRef: tableRefs_) {
877  clone.add(tblRef.clone());
878  }
879  return clone;
880  }
881 
882  @Override
883  public QueryStmt clone() {
884  SelectStmt selectClone = new SelectStmt(selectList_.clone(), cloneTableRefs(),
885  (whereClause_ != null) ? whereClause_.clone().reset() : null,
886  (groupingExprs_ != null) ? Expr.resetList(Expr.cloneList(groupingExprs_)) : null,
887  (havingClause_ != null) ? havingClause_.clone().reset() : null,
889  (limitElement_ != null) ? limitElement_.clone() : null);
890  selectClone.setWithClause(cloneWithClause());
891  return selectClone;
892  }
893 
904  public boolean returnsSingleRow() {
905  // limit 1 clause
906  if (limitElement_ != null && limitElement_.getLimit() == 1) return true;
907  // No from clause (base tables or inline views)
908  if (tableRefs_.isEmpty()) return true;
909  // Aggregation with no group by and no DISTINCT
910  if (hasAggInfo() && !hasGroupByClause() && !selectList_.isDistinct()) return true;
911  // In all other cases, return false.
912  return false;
913  }
914 }
void substituteOrdinals(List< Expr > exprs, String errorPrefix, Analyzer analyzer)
Definition: QueryStmt.java:230
void resolveInlineViewRefs(Analyzer analyzer)
void getMaterializedTupleIds(ArrayList< TupleId > tupleIdList)
ColumnAliasGenerator columnAliasGenerator_
void setWhereClause(Expr whereClause)
static com.google.common.base.Predicate< Expr > isAggregatePredicate()
Definition: Expr.java:523
ArrayList< OrderByElement > cloneOrderByElements()
Definition: QueryStmt.java:311
ArrayList< TableRef > cloneTableRefs()
void analyzeAggregation(Analyzer analyzer)
SelectStmt(SelectList selectList, List< TableRef > tableRefList, Expr wherePredicate, ArrayList< Expr > groupingExprs, Expr havingPredicate, ArrayList< OrderByElement > orderByElements, LimitElement limitElement)
Definition: SelectStmt.java:72
TableAliasGenerator getTableAliasGenerator()
void expandStar(Analyzer analyzer)
void createSortTupleInfo(Analyzer analyzer)
Definition: QueryStmt.java:185
void materializeRequiredSlots(Analyzer analyzer)
ColumnAliasGenerator getColumnAliasGenerator()
ArrayList< FunctionCallExpr > getAggregateExprs()
void expandStar(Path resolvedPath, Analyzer analyzer)
final ExprSubstitutionMap aliasSmap_
Definition: QueryStmt.java:62
Expr getFirstAmbiguousAlias(List< Expr > exprs)
Definition: QueryStmt.java:219
final ArrayList< String > colLabels_
Definition: SelectStmt.java:49
Path analyzeStarPath(List< String > rawPath, Analyzer analyzer)
void materializeSlots(Analyzer analyzer, List< Expr > exprs)
Definition: QueryStmt.java:303
void addStarResultExpr(Path resolvedPath, Analyzer analyzer, String...relRawPath)
ExprSubstitutionMap createCountAllMap(List< FunctionCallExpr > aggExprs, Analyzer analyzer)
ArrayList< OrderByElement > orderByElements_
Definition: QueryStmt.java:44
static final String MAP_VALUE_FIELD_NAME
Definition: Path.java:79
boolean equals(Object obj)
Definition: Expr.java:566
void createAggInfo(ArrayList< Expr > groupingExprs, ArrayList< FunctionCallExpr > aggExprs, Analyzer analyzer)
void analyzeAnalytics(Analyzer analyzer)
void createSortInfo(Analyzer analyzer)
Definition: QueryStmt.java:115
static final String ARRAY_ITEM_FIELD_NAME
Definition: Path.java:76
List< SelectListItem > getItems()
Definition: SelectList.java:66
boolean isBound(TupleId tid)
Definition: Expr.java:845
static final String MAP_KEY_FIELD_NAME
Definition: Path.java:78