Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
StmtRewriter.java
Go to the documentation of this file.
1 // Copyright 2014 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.ArrayList;
18 import java.util.List;
19 
20 import org.slf4j.Logger;
21 import org.slf4j.LoggerFactory;
22 
26 import com.google.common.base.Preconditions;
27 import com.google.common.base.Predicates;
28 import com.google.common.collect.Lists;
29 
34 public class StmtRewriter {
35  private final static Logger LOG = LoggerFactory.getLogger(StmtRewriter.class);
36 
41  public static StatementBase rewrite(AnalysisResult analysisResult)
42  throws AnalysisException {
43  StatementBase rewrittenStmt = null;
44  if (analysisResult.getStmt() instanceof QueryStmt) {
45  QueryStmt analyzedStmt = (QueryStmt)analysisResult.getStmt();
46  rewriteQueryStatement(analyzedStmt, analysisResult.getAnalyzer());
47  rewrittenStmt = analyzedStmt.clone();
48  } else if (analysisResult.getStmt() instanceof InsertStmt) {
49  // For an InsertStmt, rewrites are performed during its analysis.
50  // Clone the insert stmt to reset its analysis state.
51  rewrittenStmt = ((InsertStmt)analysisResult.getStmt()).clone();
52  } else if (analysisResult.getStmt() instanceof CreateTableAsSelectStmt) {
53  // For a CTAS, rewrites are performed during its analysis.
54  CreateTableAsSelectStmt ctasStmt =
55  (CreateTableAsSelectStmt)analysisResult.getStmt();
56  // Create a new CTAS from the original create statement and the
57  // rewritten insert statement.
58  Preconditions.checkNotNull(analysisResult.getTmpCreateTableStmt());
59  rewrittenStmt = new CreateTableAsSelectStmt(analysisResult.getTmpCreateTableStmt(),
60  ctasStmt.getQueryStmt().clone());
61  } else {
62  throw new AnalysisException("Unsupported statement containing subqueries: " +
63  analysisResult.getStmt().toSql());
64  }
65  return rewrittenStmt;
66  }
67 
72  public static void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer)
73  throws AnalysisException {
74  Preconditions.checkNotNull(stmt);
75  if (stmt instanceof SelectStmt) {
76  rewriteSelectStatement((SelectStmt)stmt, analyzer);
77  } else if (stmt instanceof UnionStmt) {
78  rewriteUnionStatement((UnionStmt)stmt, analyzer);
79  } else {
80  throw new AnalysisException("Subqueries not supported for " +
81  stmt.getClass().getSimpleName() + " statements");
82  }
83  }
84 
90  private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer)
91  throws AnalysisException {
92  // Rewrite all the subqueries in the FROM clause.
93  for (TableRef tblRef: stmt.tableRefs_) {
94  if (!(tblRef instanceof InlineViewRef)) continue;
95  InlineViewRef inlineViewRef = (InlineViewRef)tblRef;
96  rewriteQueryStatement(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer());
97  // Reset the state of the underlying stmt since it was rewritten
98  inlineViewRef.setRewrittenViewStmt(inlineViewRef.getViewStmt().clone());
99  }
100  // Rewrite all the subqueries in the WHERE clause.
101  if (stmt.hasWhereClause()) {
102  // Push negation to leaf operands.
103  stmt.whereClause_ = Expr.pushNegationToOperands(stmt.whereClause_);
104  // Check if we can rewrite the subqueries in the WHERE clause. OR predicates with
105  // subqueries are not supported.
106  if (hasSubqueryInDisjunction(stmt.whereClause_)) {
107  throw new AnalysisException("Subqueries in OR predicates are not supported: " +
108  stmt.whereClause_.toSql());
109  }
110  rewriteWhereClauseSubqueries(stmt, analyzer);
111  }
112  stmt.sqlString_ = null;
113  LOG.trace("rewritten stmt: " + stmt.toSql());
114  }
115 
120  private static void rewriteUnionStatement(UnionStmt stmt, Analyzer analyzer)
121  throws AnalysisException {
122  for (UnionOperand operand: stmt.getOperands()) {
123  Preconditions.checkState(operand.getQueryStmt() instanceof SelectStmt);
124  StmtRewriter.rewriteSelectStatement(
125  (SelectStmt)operand.getQueryStmt(), operand.getAnalyzer());
126  }
127  }
128 
133  private static boolean hasSubqueryInDisjunction(Expr expr) {
134  if (!(expr instanceof CompoundPredicate)) return false;
135  if (Expr.IS_OR_PREDICATE.apply(expr)) {
136  return expr.contains(Subquery.class);
137  }
138  for (Expr child: expr.getChildren()) {
139  if (hasSubqueryInDisjunction(child)) return true;
140  }
141  return false;
142  }
143 
198  private static void rewriteWhereClauseSubqueries(SelectStmt stmt, Analyzer analyzer)
199  throws AnalysisException {
200  int numTableRefs = stmt.tableRefs_.size();
201  ArrayList<Expr> exprsWithSubqueries = Lists.newArrayList();
203  // Replace all BetweenPredicates that contain subqueries with their
204  // equivalent compound predicates.
205  stmt.whereClause_ = replaceBetweenPredicates(stmt.whereClause_);
206  // Check if all the conjuncts in the WHERE clause that contain subqueries
207  // can currently be rewritten as a join.
208  for (Expr conjunct: stmt.whereClause_.getConjuncts()) {
209  List<Subquery> subqueries = Lists.newArrayList();
210  conjunct.collectAll(Predicates.instanceOf(Subquery.class), subqueries);
211  if (subqueries.size() == 0) continue;
212  if (subqueries.size() > 1) {
213  throw new AnalysisException("Multiple subqueries are not supported in " +
214  "expression: " + conjunct.toSql());
215  }
216  if (!(conjunct instanceof InPredicate) && !(conjunct instanceof ExistsPredicate) &&
217  !(conjunct instanceof BinaryPredicate) &&
218  !conjunct.contains(Expr.IS_SCALAR_SUBQUERY)) {
219  throw new AnalysisException("Non-scalar subquery is not supported in " +
220  "expression: " + conjunct.toSql());
221  }
222 
223  if (conjunct instanceof ExistsPredicate) {
224  // Check if we can determine the result of an ExistsPredicate during analysis.
225  // If so, replace the predicate with a BoolLiteral predicate and remove it from
226  // the list of predicates to be rewritten.
227  BoolLiteral boolLiteral = replaceExistsPredicate((ExistsPredicate) conjunct);
228  if (boolLiteral != null) {
229  boolLiteral.analyze(analyzer);
230  smap.put(conjunct, boolLiteral);
231  continue;
232  }
233  }
234 
235  // Replace all the supported exprs with subqueries with true BoolLiterals
236  // using an smap.
237  BoolLiteral boolLiteral = new BoolLiteral(true);
238  boolLiteral.analyze(analyzer);
239  smap.put(conjunct, boolLiteral);
240  exprsWithSubqueries.add(conjunct);
241  }
242  stmt.whereClause_ = stmt.whereClause_.substitute(smap, analyzer, false);
243 
244  boolean hasNewVisibleTuple = false;
245  // Recursively rewrite all the exprs that contain subqueries and merge them
246  // with 'stmt'.
247  for (Expr expr: exprsWithSubqueries) {
248  if (mergeExpr(stmt, rewriteExpr(expr, analyzer), analyzer)) {
249  hasNewVisibleTuple = true;
250  }
251  }
252  if (canEliminate(stmt.whereClause_)) stmt.whereClause_ = null;
253  if (hasNewVisibleTuple) replaceUnqualifiedStarItems(stmt, numTableRefs);
254  }
255 
262  Subquery subquery = predicate.getSubquery();
263  Preconditions.checkNotNull(subquery);
264  SelectStmt subqueryStmt = (SelectStmt) subquery.getStatement();
265  BoolLiteral boolLiteral = null;
266  if (subqueryStmt.getAnalyzer().hasEmptyResultSet()) {
267  boolLiteral = new BoolLiteral(predicate.isNotExists());
268  } else if (subqueryStmt.hasAggInfo() && subqueryStmt.getAggInfo().hasAggregateExprs()
269  && !subqueryStmt.hasAnalyticInfo() && subqueryStmt.getHavingPred() == null) {
270  boolLiteral = new BoolLiteral(!predicate.isNotExists());
271  }
272  return boolLiteral;
273  }
274 
280  private static Expr replaceBetweenPredicates(Expr expr) {
281  if (expr instanceof BetweenPredicate && expr.contains(Subquery.class)) {
282  return ((BetweenPredicate)expr).getRewrittenPredicate();
283  }
284  for (int i = 0; i < expr.getChildren().size(); ++i) {
285  expr.setChild(i, replaceBetweenPredicates(expr.getChild(i)));
286  }
287  return expr;
288  }
289 
294  private static Expr rewriteExpr(Expr expr, Analyzer analyzer)
295  throws AnalysisException {
296  // Extract the subquery and rewrite it.
297  Subquery subquery = expr.getSubquery();
298  Preconditions.checkNotNull(subquery);
299  rewriteSelectStatement((SelectStmt) subquery.getStatement(), subquery.getAnalyzer());
300  // Create a new Subquery with the rewritten stmt and use a substitution map
301  // to replace the original subquery from the expr.
302  Subquery newSubquery = new Subquery(subquery.getStatement().clone());
303  newSubquery.analyze(analyzer);
305  smap.put(subquery, newSubquery);
306  return expr.substitute(smap, analyzer, false);
307  }
308 
332  private static boolean mergeExpr(SelectStmt stmt, Expr expr,
333  Analyzer analyzer) throws AnalysisException {
334  Preconditions.checkNotNull(expr);
335  Preconditions.checkNotNull(analyzer);
336  boolean updateSelectList = false;
337 
338  SelectStmt subqueryStmt = (SelectStmt)expr.getSubquery().getStatement();
339  // Create a new inline view from the subquery stmt. The inline view will be added
340  // to the stmt's table refs later. Explicitly set the inline view's column labels
341  // to eliminate any chance that column aliases from the parent query could reference
342  // select items from the inline view after the rewrite.
343  List<String> colLabels = Lists.newArrayList();
344  for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) {
345  colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias());
346  }
347  InlineViewRef inlineView = new InlineViewRef(
348  stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt, colLabels);
349 
350  // Extract all correlated predicates from the subquery.
351  List<Expr> onClauseConjuncts = extractCorrelatedPredicates(subqueryStmt);
352  if (!onClauseConjuncts.isEmpty()) {
354  // For correlated subqueries that are eligible for rewrite by transforming
355  // into a join, a LIMIT clause has no effect on the results, so we can
356  // safely remove it.
357  subqueryStmt.limitElement_ = null;
358  }
359 
360  if (expr instanceof ExistsPredicate) {
361  // For uncorrelated subqueries, we limit the number of rows returned by the
362  // subquery.
363  if (onClauseConjuncts.isEmpty()) subqueryStmt.setLimit(1);
364  }
365 
366  // Update the subquery's select list and/or its GROUP BY clause by adding
367  // exprs from the extracted correlated predicates.
368  boolean updateGroupBy = expr.getSubquery().isScalarSubquery() ||
369  (expr instanceof ExistsPredicate &&
370  subqueryStmt.hasAggInfo() && !subqueryStmt.getSelectList().isDistinct());
371  List<Expr> lhsExprs = Lists.newArrayList();
372  List<Expr> rhsExprs = Lists.newArrayList();
373  for (Expr conjunct: onClauseConjuncts) {
374  updateInlineView(inlineView, conjunct, stmt.getTableRefIds(),
375  lhsExprs, rhsExprs, updateGroupBy);
376  }
377 
378  // Analyzing the inline view trigger reanalysis of the subquery's select statement.
379  // However the statement is already analyzed and since statement analysis is not
380  // idempotent, the analysis needs to be reset (by a call to clone()).
381  inlineView = (InlineViewRef)inlineView.clone();
382  inlineView.analyze(analyzer);
383  inlineView.setLeftTblRef(stmt.tableRefs_.get(stmt.tableRefs_.size() - 1));
384  stmt.tableRefs_.add(inlineView);
386 
387  // Create a join conjunct from the expr that contains a subquery.
388  Expr joinConjunct = createJoinConjunct(expr, inlineView, analyzer,
389  !onClauseConjuncts.isEmpty());
390  if (joinConjunct != null) {
391  SelectListItem firstItem =
392  ((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
393  if (!onClauseConjuncts.isEmpty() &&
394  firstItem.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
395  // Correlated subqueries with an aggregate function that returns non-null on
396  // an empty input are rewritten using a LEFT OUTER JOIN because we
397  // need to ensure that there is one agg value for every tuple of 'stmt'
398  // (parent select block), even for those tuples of 'stmt' that get rejected
399  // by the subquery due to some predicate. The new join conjunct is added to
400  // stmt's WHERE clause because it needs to be applied to the result of the
401  // LEFT OUTER JOIN (both matched and unmatched tuples).
402  //
403  // TODO Handle other aggregate functions and UDAs that return a non-NULL value
404  // on an empty set.
405  // TODO Handle count aggregate functions in an expression in subqueries
406  // select list.
407  stmt.whereClause_ =
408  CompoundPredicate.createConjunction(joinConjunct, stmt.whereClause_);
409  joinConjunct = null;
411  updateSelectList = true;
412  }
413 
414  if (joinConjunct != null) onClauseConjuncts.add(joinConjunct);
415  }
416 
417  // Create the ON clause from the extracted correlated predicates.
418  Expr onClausePredicate =
419  CompoundPredicate.createConjunctivePredicate(onClauseConjuncts);
420 
421  if (onClausePredicate == null) {
422  Preconditions.checkState(expr instanceof ExistsPredicate);
423  // TODO: Remove this when we support independent subquery evaluation.
424  if (((ExistsPredicate)expr).isNotExists()) {
425  throw new AnalysisException("Unsupported uncorrelated NOT EXISTS subquery: " +
426  subqueryStmt.toSql());
427  }
428  // We don't have an ON clause predicate to create an equi-join. Rewrite the
429  // subquery using a CROSS JOIN.
430  // TODO This is very expensive. Remove it when we implement independent
431  // subquery evaluation.
432  inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
433  LOG.warn("uncorrelated subquery rewritten using a cross join");
434  // Indicate that new visible tuples may be added in stmt's select list.
435  return true;
436  }
437 
438  // Create an smap from the original select-list exprs of the select list to
439  // the corresponding inline-view columns.
441  Preconditions.checkState(lhsExprs.size() == rhsExprs.size());
442  for (int i = 0; i < lhsExprs.size(); ++i) {
443  Expr lhsExpr = lhsExprs.get(i);
444  Expr rhsExpr = rhsExprs.get(i);
445  rhsExpr.analyze(analyzer);
446  smap.put(lhsExpr, rhsExpr);
447  }
448  onClausePredicate = onClausePredicate.substitute(smap, analyzer, false);
449 
450  // Check for references to ancestor query blocks (cycles in the dependency
451  // graph of query blocks are not supported).
452  if (!onClausePredicate.isBoundByTupleIds(stmt.getTableRefIds())) {
453  throw new AnalysisException("Unsupported correlated subquery: " +
454  subqueryStmt.toSql());
455  }
456 
457  // Check if we have a valid ON clause for an equi-join.
458  boolean hasEqJoinPred = false;
459  for (Expr conjunct: onClausePredicate.getConjuncts()) {
460  if (!(conjunct instanceof BinaryPredicate) ||
461  ((BinaryPredicate)conjunct).getOp() != BinaryPredicate.Operator.EQ) {
462  continue;
463  }
464  List<TupleId> lhsTupleIds = Lists.newArrayList();
465  conjunct.getChild(0).getIds(lhsTupleIds, null);
466  if (lhsTupleIds.isEmpty()) continue;
467  List<TupleId> rhsTupleIds = Lists.newArrayList();
468  conjunct.getChild(1).getIds(rhsTupleIds, null);
469  if (rhsTupleIds.isEmpty()) continue;
470  // Check if columns from the outer query block (stmt) appear in both sides
471  // of the binary predicate.
472  if ((lhsTupleIds.contains(inlineView.getDesc().getId()) && lhsTupleIds.size() > 1)
473  || (rhsTupleIds.contains(inlineView.getDesc().getId())
474  && rhsTupleIds.size() > 1)) {
475  continue;
476  }
477  hasEqJoinPred = true;
478  break;
479  }
480 
481  if (!hasEqJoinPred) {
482  // TODO: Remove this when independent subquery evaluation is implemented.
483  // TODO: Requires support for non-equi joins.
484  boolean hasGroupBy = ((SelectStmt) inlineView.getViewStmt()).hasGroupByClause();
485  if (!expr.getSubquery().isScalarSubquery() ||
486  (!(hasGroupBy && stmt.selectList_.isDistinct()) && hasGroupBy)) {
487  throw new AnalysisException("Unsupported predicate with subquery: " +
488  expr.toSql());
489  }
490 
491  // We can rewrite the aggregate subquery using a cross join. All conjuncts
492  // that were extracted from the subquery are added to stmt's WHERE clause.
493  stmt.whereClause_ =
494  CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause_);
495  inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
496  // Indicate that the CROSS JOIN may add a new visible tuple to stmt's
497  // select list (if the latter contains an unqualified star item '*')
498  return true;
499  }
500 
501  // We have a valid equi-join conjunct.
502  if (expr instanceof InPredicate && ((InPredicate)expr).isNotIn() ||
503  expr instanceof ExistsPredicate && ((ExistsPredicate)expr).isNotExists()) {
504  // For the case of a NOT IN with an eq join conjunct, replace the join
505  // conjunct with a conjunct that uses the null-matching eq operator.
506  if (expr instanceof InPredicate) {
508  List<TupleId> tIds = Lists.newArrayList();
509  joinConjunct.getIds(tIds, null);
510  if (tIds.size() <= 1 || !tIds.contains(inlineView.getDesc().getId())) {
511  throw new AnalysisException("Unsupported NOT IN predicate with subquery: " +
512  expr.toSql());
513  }
514  // Replace the EQ operator in the generated join conjunct with a
515  // null-matching EQ operator.
516  for (Expr conjunct: onClausePredicate.getConjuncts()) {
517  if (conjunct.equals(joinConjunct)) {
518  Preconditions.checkState(conjunct instanceof BinaryPredicate);
519  Preconditions.checkState(((BinaryPredicate)conjunct).getOp() ==
520  BinaryPredicate.Operator.EQ);
521  ((BinaryPredicate)conjunct).setOp(BinaryPredicate.Operator.NULL_MATCHING_EQ);
522  break;
523  }
524  }
525  } else {
527  }
528  }
529  inlineView.setJoinOp(joinOp);
530  inlineView.setOnClause(onClausePredicate);
531  return updateSelectList;
532  }
533 
540  private static void replaceUnqualifiedStarItems(SelectStmt stmt, int tableIdx) {
541  Preconditions.checkState(tableIdx < stmt.tableRefs_.size());
542  ArrayList<SelectListItem> newItems = Lists.newArrayList();
543  for (int i = 0; i < stmt.selectList_.getItems().size(); ++i) {
544  SelectListItem item = stmt.selectList_.getItems().get(i);
545  if (!item.isStar() || item.getRawPath() != null) {
546  newItems.add(item);
547  continue;
548  }
549  // '*' needs to be replaced by tbl1.*,...,tbln.*, where
550  // tbl1,...,tbln are the visible tableRefs in stmt.
551  for (int j = 0; j < tableIdx; ++j) {
552  TableRef tableRef = stmt.tableRefs_.get(j);
553  if (tableRef.getJoinOp() == JoinOperator.LEFT_SEMI_JOIN ||
554  tableRef.getJoinOp() == JoinOperator.LEFT_ANTI_JOIN) {
555  continue;
556  }
557  newItems.add(SelectListItem.createStarItem(
558  Lists.newArrayList(tableRef.getUniqueAlias())));
559  }
560  }
561  Preconditions.checkState(!newItems.isEmpty());
562  boolean isDistinct = stmt.selectList_.isDistinct();
563  stmt.selectList_ =
564  new SelectList(newItems, isDistinct, stmt.selectList_.getPlanHints());
565  }
566 
571  private static boolean canEliminate(Expr expr) {
572  for (Expr conjunct: expr.getConjuncts()) {
573  if (!Expr.IS_TRUE_LITERAL.apply(conjunct)) return false;
574  }
575  return true;
576  }
577 
583  private static ArrayList<Expr> extractCorrelatedPredicates(SelectStmt subqueryStmt)
584  throws AnalysisException {
585  List<TupleId> subqueryTupleIds = subqueryStmt.getTableRefIds();
586  ArrayList<Expr> correlatedPredicates = Lists.newArrayList();
587 
588  if (subqueryStmt.hasWhereClause()) {
589  if (!canExtractCorrelatedPredicates(subqueryStmt.getWhereClause(),
590  subqueryTupleIds)) {
591  throw new AnalysisException("Disjunctions with correlated predicates " +
592  "are not supported: " + subqueryStmt.getWhereClause().toSql());
593  }
594  // Extract the correlated predicates from the subquery's WHERE clause and
595  // replace them with true BoolLiterals.
596  Expr newWhereClause = extractCorrelatedPredicates(subqueryStmt.getWhereClause(),
597  subqueryTupleIds, correlatedPredicates);
598  if (canEliminate(newWhereClause)) newWhereClause = null;
599  subqueryStmt.setWhereClause(newWhereClause);
600  }
601 
602  // Process all correlated predicates from subquery's ON clauses.
603  for (TableRef tableRef: subqueryStmt.getTableRefs()) {
604  if (tableRef.getOnClause() == null) continue;
605 
606  ArrayList<Expr> onClauseCorrelatedPreds = Lists.newArrayList();
607  Expr newOnClause = extractCorrelatedPredicates(tableRef.getOnClause(),
608  subqueryTupleIds, onClauseCorrelatedPreds);
609  if (onClauseCorrelatedPreds.isEmpty()) continue;
610 
611  correlatedPredicates.addAll(onClauseCorrelatedPreds);
612  if (canEliminate(newOnClause)) {
613  // After the extraction of correlated predicates from an ON clause,
614  // the latter may only contain conjunctions of True BoolLiterals. In
615  // this case, we can eliminate the ON clause and set the join type to
616  // CROSS JOIN.
617  tableRef.setJoinOp(JoinOperator.CROSS_JOIN);
618  tableRef.setOnClause(null);
619  } else {
620  tableRef.setOnClause(newOnClause);
621  }
622  }
623  return correlatedPredicates;
624  }
625 
631  private static Expr extractCorrelatedPredicates(Expr root, List<TupleId> tupleIds,
632  ArrayList<Expr> matches) {
633  if (isCorrelatedPredicate(root, tupleIds)) {
634  matches.add(root);
635  return new BoolLiteral(true);
636  }
637  for (int i = 0; i < root.getChildren().size(); ++i) {
638  root.getChildren().set(i, extractCorrelatedPredicates(root.getChild(i), tupleIds,
639  matches));
640  }
641  return root;
642  }
643 
650  private static void canRewriteCorrelatedSubquery(Expr expr)
651  throws AnalysisException {
652  Preconditions.checkNotNull(expr);
653  Preconditions.checkState(expr.contains(Subquery.class));
654  SelectStmt stmt = (SelectStmt) expr.getSubquery().getStatement();
655  Preconditions.checkNotNull(stmt);
656  // Grouping and/or aggregation (including analytic functions) is only
657  // allowed on correlated EXISTS subqueries
658  if ((expr instanceof BinaryPredicate
659  && (stmt.hasGroupByClause() || stmt.hasAnalyticInfo()))
660  || (expr instanceof InPredicate
661  && (stmt.hasAggInfo() || stmt.hasAnalyticInfo()))) {
662  throw new AnalysisException("Unsupported correlated subquery with grouping " +
663  "and/or aggregation: " + stmt.toSql());
664  }
665  // The following correlated subqueries with a limit clause are supported:
666  // 1. EXISTS subqueries
667  // 2. Scalar subqueries with aggregation
668  if (stmt.hasLimit() &&
669  (!(expr instanceof BinaryPredicate) || !stmt.hasAggInfo() ||
670  stmt.selectList_.isDistinct()) &&
671  !(expr instanceof ExistsPredicate)) {
672  throw new AnalysisException("Unsupported correlated subquery with a " +
673  "LIMIT clause: " + stmt.toSql());
674  }
675  }
676 
686  private static void updateInlineView(InlineViewRef inlineView, Expr expr,
687  List<TupleId> parentQueryTids, List<Expr> lhsExprs, List<Expr> rhsExprs,
688  boolean updateGroupBy) throws AnalysisException {
689  SelectStmt stmt = (SelectStmt)inlineView.getViewStmt();
690  List<TupleId> subqueryTblIds = stmt.getTableRefIds();
691  ArrayList<Expr> groupByExprs = null;
692  if (updateGroupBy) groupByExprs = Lists.newArrayList();
693 
694  List<SelectListItem> items = stmt.selectList_.getItems();
695  // Collect all the SlotRefs from 'expr' and identify those that are bound by
696  // subquery tuple ids.
697  ArrayList<Expr> slotRefs = Lists.newArrayList();
698  expr.collectAll(Predicates.instanceOf(SlotRef.class), slotRefs);
699  List<Expr> exprsBoundBySubqueryTids = Lists.newArrayList();
700  for (Expr slotRef: slotRefs) {
701  if (slotRef.isBoundByTupleIds(subqueryTblIds)) {
702  exprsBoundBySubqueryTids.add(slotRef);
703  }
704  }
705  // The correlated predicate only references slots from a parent block,
706  // no need to update the subquery's select or group by list.
707  if (exprsBoundBySubqueryTids.isEmpty()) return;
708  if (updateGroupBy) {
709  Preconditions.checkState(expr instanceof BinaryPredicate);
710  Expr exprBoundBySubqueryTids = null;
711  if (exprsBoundBySubqueryTids.size() > 1) {
712  // If the predicate contains multiple SlotRefs bound by subquery tuple
713  // ids, they must all be on the same side of that predicate.
714  if (expr.getChild(0).isBoundByTupleIds(subqueryTblIds) &&
715  expr.getChild(1).isBoundByTupleIds(parentQueryTids)) {
716  exprBoundBySubqueryTids = expr.getChild(0);
717  } else if (expr.getChild(0).isBoundByTupleIds(parentQueryTids) &&
718  expr.getChild(1).isBoundByTupleIds(subqueryTblIds)) {
719  exprBoundBySubqueryTids = expr.getChild(1);
720  } else {
721  throw new AnalysisException("All subquery columns " +
722  "that participate in a predicate must be on the same side of " +
723  "that predicate: " + expr.toSql());
724  }
725  } else {
726  Preconditions.checkState(exprsBoundBySubqueryTids.size() == 1);
727  exprBoundBySubqueryTids = exprsBoundBySubqueryTids.get(0);
728  }
729  exprsBoundBySubqueryTids.clear();
730  exprsBoundBySubqueryTids.add(exprBoundBySubqueryTids);
731  }
732 
733  // Add the exprs bound by subquery tuple ids to the select list and
734  // register it for substitution. We use a temporary substitution map
735  // because we cannot at this point analyze the new select list expr. Once
736  // the new inline view is analyzed, the entries from this map will be
737  // added to an ExprSubstitutionMap.
738  for (Expr boundExpr: exprsBoundBySubqueryTids) {
739  String colAlias = stmt.getColumnAliasGenerator().getNextAlias();
740  items.add(new SelectListItem(boundExpr, null));
741  inlineView.getExplicitColLabels().add(colAlias);
742  lhsExprs.add(boundExpr);
743  rhsExprs.add(new SlotRef(Lists.newArrayList(inlineView.getUniqueAlias(), colAlias)));
744  if (groupByExprs != null) groupByExprs.add(boundExpr);
745  }
746 
747  // Update the subquery's select list.
748  boolean isDistinct = stmt.selectList_.isDistinct();
749  stmt.selectList_ = new SelectList(
750  items, isDistinct, stmt.selectList_.getPlanHints());
751  // Update subquery's GROUP BY clause
752  if (groupByExprs != null && !groupByExprs.isEmpty()) {
753  if (stmt.hasGroupByClause()) {
754  stmt.groupingExprs_.addAll(groupByExprs);
755  } else {
756  stmt.groupingExprs_ = groupByExprs;
757  }
758  }
759  }
760 
765  private static boolean canExtractCorrelatedPredicates(Expr expr,
766  List<TupleId> subqueryTupleIds) {
767  if (!(expr instanceof CompoundPredicate)) return true;
768  if (Expr.IS_OR_PREDICATE.apply(expr)) {
769  return !containsCorrelatedPredicate(expr, subqueryTupleIds);
770  }
771  for (Expr child: expr.getChildren()) {
772  if (!canExtractCorrelatedPredicates(child, subqueryTupleIds)) {
773  return false;
774  }
775  }
776  return true;
777  }
778 
783  private static boolean containsCorrelatedPredicate(Expr root, List<TupleId> tupleIds) {
784  if (isCorrelatedPredicate(root, tupleIds)) return true;
785  for (Expr child: root.getChildren()) {
786  if (containsCorrelatedPredicate(child, tupleIds)) return true;
787  }
788  return false;
789  }
790 
796  private static boolean isCorrelatedPredicate(Expr expr, List<TupleId> tupleIds) {
797  return (expr instanceof BinaryPredicate || expr instanceof SlotRef)
798  && !expr.isBoundByTupleIds(tupleIds);
799  }
800 
809  private static Expr createJoinConjunct(Expr exprWithSubquery, InlineViewRef inlineView,
810  Analyzer analyzer, boolean isCorrelated) throws AnalysisException {
811  Preconditions.checkNotNull(exprWithSubquery);
812  Preconditions.checkNotNull(inlineView);
813  Preconditions.checkState(exprWithSubquery.contains(Subquery.class));
814  if (exprWithSubquery instanceof ExistsPredicate) return null;
815  // Create a SlotRef from the first item of inlineView's select list
816  SlotRef slotRef = new SlotRef(Lists.newArrayList(
817  inlineView.getUniqueAlias(), inlineView.getColLabels().get(0)));
818  slotRef.analyze(analyzer);
819  Expr subquerySubstitute = slotRef;
820  if (exprWithSubquery instanceof InPredicate) {
821  BinaryPredicate pred = new BinaryPredicate(BinaryPredicate.Operator.EQ,
822  exprWithSubquery.getChild(0), slotRef);
823  pred.analyze(analyzer);
824  return pred;
825  }
826  // Only scalar subqueries are supported
827  Subquery subquery = exprWithSubquery.getSubquery();
828  if (!subquery.isScalarSubquery()) {
829  throw new AnalysisException("Unsupported predicate with a non-scalar subquery: "
830  + subquery.toSql());
831  }
833  SelectListItem item =
834  ((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
835  if (isCorrelated && !item.getExpr().contains(Expr.IS_BUILTIN_AGG_FN)) {
836  throw new AnalysisException("UDAs are not supported in the select list of " +
837  "correlated subqueries: " + subquery.toSql());
838  }
839  if (isCorrelated && item.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
840  // TODO: Add support for multiple agg functions that return non-null on an
841  // empty input, by wrapping them with zeroifnull functions before the inline
842  // view is analyzed.
843  if (!Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr()) &&
844  (!(item.getExpr() instanceof CastExpr) ||
845  !Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr().getChild(0)))) {
846  throw new AnalysisException("Aggregate function that returns non-null on " +
847  "an empty input cannot be used in an expression in a " +
848  "correlated subquery's select list: " + subquery.toSql());
849  }
850 
851  List<Expr> aggFns = Lists.newArrayList();
852  item.getExpr().collectAll(Expr.NON_NULL_EMPTY_AGG, aggFns);
853  // TODO Generalize this by making the aggregate functions aware of the
854  // literal expr that they return on empty input, e.g. max returns a
855  // NullLiteral whereas count returns a NumericLiteral.
856  if (((FunctionCallExpr)aggFns.get(0)).getReturnType().isNumericType()) {
857  FunctionCallExpr zeroIfNull = new FunctionCallExpr("zeroifnull",
858  Lists.newArrayList((Expr) slotRef));
859  zeroIfNull.analyze(analyzer);
860  subquerySubstitute = zeroIfNull;
861  } else if (((FunctionCallExpr)aggFns.get(0)).getReturnType().isStringType()) {
862  List<Expr> params = Lists.newArrayList();
863  params.add(slotRef);
864  params.add(new StringLiteral(""));
865  FunctionCallExpr ifnull = new FunctionCallExpr("ifnull", params);
866  ifnull.analyze(analyzer);
867  subquerySubstitute = ifnull;
868  } else {
869  throw new AnalysisException("Unsupported aggregate function used in " +
870  "a correlated subquery's select list: " + subquery.toSql());
871  }
872  }
873  smap.put(subquery, subquerySubstitute);
874  return exprWithSubquery.substitute(smap, analyzer, false);
875  }
876 }
static StatementBase rewrite(AnalysisResult analysisResult)
static final com.google.common.base.Predicate< Expr > NON_NULL_EMPTY_AGG
Definition: Expr.java:106
static Expr createJoinConjunct(Expr exprWithSubquery, InlineViewRef inlineView, Analyzer analyzer, boolean isCorrelated)
static Expr extractCorrelatedPredicates(Expr root, List< TupleId > tupleIds, ArrayList< Expr > matches)
static void updateInlineView(InlineViewRef inlineView, Expr expr, List< TupleId > parentQueryTids, List< Expr > lhsExprs, List< Expr > rhsExprs, boolean updateGroupBy)
static void rewriteUnionStatement(UnionStmt stmt, Analyzer analyzer)
static boolean mergeExpr(SelectStmt stmt, Expr expr, Analyzer analyzer)
static boolean hasSubqueryInDisjunction(Expr expr)
static BoolLiteral replaceExistsPredicate(ExistsPredicate predicate)
static final com.google.common.base.Predicate< Expr > IS_TRUE_LITERAL
Definition: Expr.java:124
static void replaceUnqualifiedStarItems(SelectStmt stmt, int tableIdx)
static boolean canExtractCorrelatedPredicates(Expr expr, List< TupleId > subqueryTupleIds)
static void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer)
static Expr replaceBetweenPredicates(Expr expr)
static Expr rewriteExpr(Expr expr, Analyzer analyzer)
static boolean containsCorrelatedPredicate(Expr root, List< TupleId > tupleIds)
static void rewriteWhereClauseSubqueries(SelectStmt stmt, Analyzer analyzer)
static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer)
static void canRewriteCorrelatedSubquery(Expr expr)
static final com.google.common.base.Predicate< Expr > IS_OR_PREDICATE
Definition: Expr.java:85
static ArrayList< Expr > extractCorrelatedPredicates(SelectStmt subqueryStmt)
static final com.google.common.base.Predicate< Expr > IS_SCALAR_SUBQUERY
Definition: Expr.java:95
static boolean canEliminate(Expr expr)
boolean isBoundByTupleIds(List< TupleId > tids)
Definition: Expr.java:852
static boolean isCorrelatedPredicate(Expr expr, List< TupleId > tupleIds)
static final com.google.common.base.Predicate< Expr > IS_BUILTIN_AGG_FN
Definition: Expr.java:115