Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
UnionStmt.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.ArrayList;
18 import java.util.List;
19 
20 import org.slf4j.Logger;
21 import org.slf4j.LoggerFactory;
22 
26 import com.google.common.base.Preconditions;
27 import com.google.common.collect.Lists;
28 
37 public class UnionStmt extends QueryStmt {
38  private final static Logger LOG = LoggerFactory.getLogger(UnionStmt.class);
39 
40  public static enum Qualifier {
41  ALL,
42  DISTINCT
43  }
44 
50  public static class UnionOperand {
51  private final QueryStmt queryStmt_;
52  // Null for the first operand.
54 
55  // Analyzer used for this operand. Set in analyze().
56  // We must preserve the conjuncts registered in the analyzer for partition pruning.
58 
59  // map from UnionStmts result slots to our resultExprs; useful during plan generation
61 
62  // set if this operand is guaranteed to return an empty result set;
63  // used in planning when assigning conjuncts
64  private boolean isDropped_ = false;
65 
66  public UnionOperand(QueryStmt queryStmt, Qualifier qualifier) {
67  this.queryStmt_ = queryStmt;
68  this.qualifier_ = qualifier;
69  }
70 
71  public void analyze(Analyzer parent) throws AnalysisException {
72  analyzer_ = new Analyzer(parent);
73  queryStmt_.analyze(analyzer_);
74  }
75 
76  public QueryStmt getQueryStmt() { return queryStmt_; }
77  public Qualifier getQualifier() { return qualifier_; }
78  // Used for propagating DISTINCT.
79  public void setQualifier(Qualifier qualifier) { this.qualifier_ = qualifier; }
80  public Analyzer getAnalyzer() { return analyzer_; }
81  public ExprSubstitutionMap getSmap() { return smap_; }
82  public void drop() { isDropped_ = true; }
83  public boolean isDropped() { return isDropped_; }
84 
85  @Override
86  public UnionOperand clone() {
87  return new UnionOperand(queryStmt_.clone(), qualifier_);
88  }
89 
90  public boolean hasAnalyticExprs() {
91  if (queryStmt_ instanceof SelectStmt) {
92  return ((SelectStmt) queryStmt_).hasAnalyticInfo();
93  } else {
94  Preconditions.checkState(queryStmt_ instanceof UnionStmt);
96  }
97  }
98  }
99 
100  // before analysis, this contains the list of union operands derived verbatim
101  // from the query;
102  // after analysis, this contains all of distinctOperands followed by allOperands
103  protected final List<UnionOperand> operands_;
104 
105  // filled during analyze(); contains all operands that need to go through
106  // distinct aggregation
107  protected final List<UnionOperand> distinctOperands_ = Lists.newArrayList();
108 
109  // filled during analyze(); contains all operands that can be aggregated with
110  // a simple merge without duplicate elimination (also needs to merge the output
111  // of the DISTINCT operands)
112  protected final List<UnionOperand> allOperands_ = Lists.newArrayList();
113 
114  protected AggregateInfo distinctAggInfo_; // only set if we have DISTINCT ops
115 
116  // Single tuple materialized by the union. Set in analyze().
117  protected TupleId tupleId_;
118 
119  // set prior to unnesting
120  protected String toSqlString_ = null;
121 
122  // true if any of the operands_ references an AnalyticExpr
123  private boolean hasAnalyticExprs_ = false;
124 
125  public UnionStmt(List<UnionOperand> operands,
126  ArrayList<OrderByElement> orderByElements, LimitElement limitElement) {
127  super(orderByElements, limitElement);
128  this.operands_ = operands;
129  }
130 
131  public List<UnionOperand> getOperands() { return operands_; }
132  public List<UnionOperand> getDistinctOperands() { return distinctOperands_; }
133  public boolean hasDistinctOps() { return !distinctOperands_.isEmpty(); }
134  public List<UnionOperand> getAllOperands() { return allOperands_; }
135  public boolean hasAllOps() { return !allOperands_.isEmpty(); }
137  public boolean hasAnalyticExprs() { return hasAnalyticExprs_; }
138 
139  public void removeAllOperands() {
140  operands_.removeAll(allOperands_);
141  allOperands_.clear();
142  }
143 
148  @Override
149  public void analyze(Analyzer analyzer) throws AnalysisException {
150  try {
151  super.analyze(analyzer);
152  } catch (AnalysisException e) {
153  if (analyzer.getMissingTbls().isEmpty()) throw e;
154  }
155  Preconditions.checkState(operands_.size() > 0);
156 
157  // Propagates DISTINCT from right to left
159 
160  // Make sure all operands return an equal number of exprs.
161  QueryStmt firstQuery = operands_.get(0).getQueryStmt();
162 
163  try {
164  operands_.get(0).analyze(analyzer);
165  } catch (AnalysisException e) {
166  if (analyzer.getMissingTbls().isEmpty()) throw e;
167  }
168 
169  List<List<Expr>> resultExprLists = Lists.newArrayList();
170  List<Expr> firstQueryExprs = firstQuery.getBaseTblResultExprs();
171  resultExprLists.add(firstQueryExprs);
172  for (int i = 1; i < operands_.size(); ++i) {
173  QueryStmt query = operands_.get(i).getQueryStmt();
174  try {
175  operands_.get(i).analyze(analyzer);
176  List<Expr> exprs = query.getBaseTblResultExprs();
177  if (firstQueryExprs.size() != exprs.size()) {
178  throw new AnalysisException("Operands have unequal number of columns:\n" +
179  "'" + queryStmtToSql(firstQuery) + "' has " +
180  firstQueryExprs.size() + " column(s)\n" +
181  "'" + queryStmtToSql(query) + "' has " + exprs.size() + " column(s)");
182  }
183  resultExprLists.add(exprs);
184  } catch (AnalysisException e) {
185  if (analyzer.getMissingTbls().isEmpty()) throw e;
186  }
187  }
188 
189  if (!analyzer.getMissingTbls().isEmpty()) {
190  throw new AnalysisException("Found missing tables. Aborting analysis.");
191  }
192 
193  // compute hasAnalyticExprs_
194  hasAnalyticExprs_ = false;
195  for (UnionOperand op: operands_) {
196  if (op.hasAnalyticExprs()) {
197  hasAnalyticExprs_ = true;
198  break;
199  }
200  }
201 
202  analyzer.castToUnionCompatibleTypes(resultExprLists);
203 
204  // Create tuple descriptor materialized by this UnionStmt,
205  // its resultExprs, and its sortInfo if necessary.
206  createMetadata(analyzer);
207  createSortInfo(analyzer);
208  toSqlString_ = toSql();
209 
210  unnestOperands(analyzer);
211  if (evaluateOrderBy_) createSortTupleInfo(analyzer);
213  }
214 
220  @Override
221  public void materializeRequiredSlots(Analyzer analyzer) throws InternalException {
222  TupleDescriptor tupleDesc = analyzer.getDescTbl().getTupleDesc(tupleId_);
223  if (!distinctOperands_.isEmpty()) {
224  // to keep things simple we materialize all grouping exprs = output slots,
225  // regardless of what's being referenced externally
226  for (SlotDescriptor slotDesc: tupleDesc.getSlots()) {
227  slotDesc.setIsMaterialized(true);
228  }
229  }
230 
231  if (evaluateOrderBy_) {
232  sortInfo_.materializeRequiredSlots(analyzer, null);
233  }
234 
235  // collect operands' result exprs
236  List<SlotDescriptor> outputSlots = tupleDesc.getSlots();
237  List<Expr> exprs = Lists.newArrayList();
238  for (int i = 0; i < outputSlots.size(); ++i) {
239  SlotDescriptor slotDesc = outputSlots.get(i);
240  if (!slotDesc.isMaterialized()) continue;
241  for (UnionOperand op: operands_) {
242  exprs.add(op.getQueryStmt().getBaseTblResultExprs().get(i));
243  }
244  if (distinctAggInfo_ != null) {
245  // also mark the corresponding slot in the distinct agg tuple as being
246  // materialized
247  distinctAggInfo_.getOutputTupleDesc().getSlots().get(i).setIsMaterialized(true);
248  }
249  }
250  materializeSlots(analyzer, exprs);
251 
252  for (UnionOperand op: operands_) {
253  op.getQueryStmt().materializeRequiredSlots(analyzer);
254  }
255  }
256 
261  private void unnestOperands(Analyzer analyzer) throws AnalysisException {
262  if (operands_.size() == 1) {
263  // ValuesStmt for a single row.
264  allOperands_.add(operands_.get(0));
265  setOperandSmap(operands_.get(0), analyzer);
266  return;
267  }
268 
269  // find index of first ALL operand
270  int firstUnionAllIdx = operands_.size();
271  for (int i = 1; i < operands_.size(); ++i) {
272  UnionOperand operand = operands_.get(i);
273  if (operand.getQualifier() == Qualifier.ALL) {
274  firstUnionAllIdx = (i == 1 ? 0 : i);
275  break;
276  }
277  }
278  // operands[0] is always implicitly ALL, so operands[1] can't be the
279  // first one
280  Preconditions.checkState(firstUnionAllIdx != 1);
281 
282  // unnest DISTINCT operands
283  Preconditions.checkState(distinctOperands_.isEmpty());
284  for (int i = 0; i < firstUnionAllIdx; ++i) {
286  }
287 
288  // unnest ALL operands
289  Preconditions.checkState(allOperands_.isEmpty());
290  for (int i = firstUnionAllIdx; i < operands_.size(); ++i) {
292  }
293 
294  operands_.clear();
295  operands_.addAll(distinctOperands_);
296  operands_.addAll(allOperands_);
297 
298  // create unnested operands' smaps
299  for (UnionOperand operand: operands_) {
300  setOperandSmap(operand, analyzer);
301  }
302 
303  // create distinctAggInfo, if necessary
304  if (!distinctOperands_.isEmpty()) {
305  // Aggregate produces exactly the same tuple as the original union stmt.
306  ArrayList<Expr> groupingExprs = Expr.cloneList(resultExprs_);
307  try {
309  AggregateInfo.create(groupingExprs, null,
310  analyzer.getDescTbl().getTupleDesc(tupleId_), analyzer);
311  } catch (AnalysisException e) {
312  // this should never happen
313  throw new AnalysisException("error creating agg info in UnionStmt.analyze()");
314  }
315  }
316  }
317 
322  private void setOperandSmap(UnionOperand operand, Analyzer analyzer) {
323  TupleDescriptor tupleDesc = analyzer.getDescTbl().getTupleDesc(tupleId_);
324  // operands' smaps were already set in the operands' analyze()
325  operand.getSmap().clear();
326  for (int i = 0; i < tupleDesc.getSlots().size(); ++i) {
327  SlotDescriptor outputSlot = tupleDesc.getSlots().get(i);
328  operand.getSmap().put(
329  new SlotRef(outputSlot),
330  // TODO: baseTblResultExprs?
331  operand.getQueryStmt().getResultExprs().get(i).clone());
332  }
333  }
334 
339  private void unnestOperand(
340  List<UnionOperand> target, Qualifier targetQualifier, UnionOperand operand) {
341  QueryStmt queryStmt = operand.getQueryStmt();
342  if (queryStmt instanceof SelectStmt) {
343  target.add(operand);
344  return;
345  }
346 
347  Preconditions.checkState(queryStmt instanceof UnionStmt);
348  UnionStmt unionStmt = (UnionStmt) queryStmt;
349  if (unionStmt.hasLimit() || unionStmt.hasOffset()) {
350  // we must preserve the nested Union
351  target.add(operand);
352  } else if (targetQualifier == Qualifier.DISTINCT || !unionStmt.hasDistinctOps()) {
353  // there is no limit in the nested Union and we can absorb all of its
354  // operands as-is
355  target.addAll(unionStmt.getDistinctOperands());
356  target.addAll(unionStmt.getAllOperands());
357  } else {
358  // the nested Union contains some Distinct ops and we're accumulating
359  // into our All ops; unnest only the All ops and leave the rest in place
360  target.addAll(unionStmt.getAllOperands());
361  unionStmt.removeAllOperands();
362  target.add(operand);
363  }
364  }
365 
370  protected String queryStmtToSql(QueryStmt queryStmt) {
371  return queryStmt.toSql();
372  }
373 
380  private void propagateDistinct() {
381  int lastDistinctPos = -1;
382  for (int i = operands_.size() - 1; i > 0; --i) {
383  UnionOperand operand = operands_.get(i);
384  if (lastDistinctPos != -1) {
385  // There is a DISTINCT somewhere to the right.
386  operand.setQualifier(Qualifier.DISTINCT);
387  } else if (operand.getQualifier() == Qualifier.DISTINCT) {
388  lastDistinctPos = i;
389  }
390  }
391  }
392 
399  private void createMetadata(Analyzer analyzer) throws AnalysisException {
400  // Create tuple descriptor for materialized tuple created by the union.
401  TupleDescriptor tupleDesc = analyzer.getDescTbl().createTupleDescriptor("union");
402  tupleDesc.setIsMaterialized(true);
403  tupleId_ = tupleDesc.getId();
404  LOG.trace("UnionStmt.createMetadata: tupleId=" + tupleId_.toString());
405 
406  // One slot per expr in the select blocks. Use first select block as representative.
407  List<Expr> firstSelectExprs = operands_.get(0).getQueryStmt().getBaseTblResultExprs();
408 
409  // Compute column stats for the materialized slots from the source exprs.
410  List<ColumnStats> columnStats = Lists.newArrayList();
411  for (int i = 0; i < operands_.size(); ++i) {
412  List<Expr> selectExprs = operands_.get(i).getQueryStmt().getBaseTblResultExprs();
413  for (int j = 0; j < selectExprs.size(); ++j) {
414  ColumnStats statsToAdd = ColumnStats.fromExpr(selectExprs.get(j));
415  if (i == 0) {
416  columnStats.add(statsToAdd);
417  } else {
418  columnStats.get(j).add(statsToAdd);
419  }
420  }
421  }
422 
423  // Create tuple descriptor and slots.
424  for (int i = 0; i < firstSelectExprs.size(); ++i) {
425  Expr expr = firstSelectExprs.get(i);
426  SlotDescriptor slotDesc = analyzer.addSlotDescriptor(tupleDesc);
427  slotDesc.setLabel(getColLabels().get(i));
428  slotDesc.setType(expr.getType());
429  slotDesc.setStats(columnStats.get(i));
430  SlotRef outputSlotRef = new SlotRef(slotDesc);
431  resultExprs_.add(outputSlotRef);
432 
433  // Add to aliasSMap so that column refs in "order by" can be resolved.
434  if (orderByElements_ != null) {
435  SlotRef aliasRef = new SlotRef(getColLabels().get(i));
436  if (aliasSmap_.containsMappingFor(aliasRef)) {
437  ambiguousAliasList_.add(aliasRef);
438  } else {
439  aliasSmap_.put(aliasRef, outputSlotRef);
440  }
441  }
442 
443  // register single-directional value transfers from output slot
444  // to operands' result exprs (if those happen to be slotrefs);
445  // don't do that if the operand computes analytic exprs
446  // (see Planner.createInlineViewPlan() for the reasoning)
447  for (UnionOperand op: operands_) {
448  Expr resultExpr = op.getQueryStmt().getBaseTblResultExprs().get(i);
449  slotDesc.addSourceExpr(resultExpr);
450  if (op.hasAnalyticExprs()) continue;
451  SlotRef slotRef = resultExpr.unwrapSlotRef(true);
452  if (slotRef == null) continue;
453  analyzer.registerValueTransfer(outputSlotRef.getSlotId(), slotRef.getSlotId());
454  }
455  }
457  }
458 
459  public TupleId getTupleId() { return tupleId_; }
460 
461  @Override
462  public void getMaterializedTupleIds(ArrayList<TupleId> tupleIdList) {
463  // Return the sort tuple if there is an evaluated order by.
464  if (evaluateOrderBy_) {
465  tupleIdList.add(sortInfo_.getSortTupleDescriptor().getId());
466  } else {
467  tupleIdList.add(tupleId_);
468  }
469  }
470 
471  @Override
472  public String toSql() {
473  if (toSqlString_ != null) return toSqlString_;
474  StringBuilder strBuilder = new StringBuilder();
475  Preconditions.checkState(operands_.size() > 0);
476 
477  if (withClause_ != null) {
478  strBuilder.append(withClause_.toSql());
479  strBuilder.append(" ");
480  }
481 
482  strBuilder.append(operands_.get(0).getQueryStmt().toSql());
483  for (int i = 1; i < operands_.size() - 1; ++i) {
484  strBuilder.append(" UNION " +
485  ((operands_.get(i).getQualifier() == Qualifier.ALL) ? "ALL " : ""));
486  if (operands_.get(i).getQueryStmt() instanceof UnionStmt) {
487  strBuilder.append("(");
488  }
489  strBuilder.append(operands_.get(i).getQueryStmt().toSql());
490  if (operands_.get(i).getQueryStmt() instanceof UnionStmt) {
491  strBuilder.append(")");
492  }
493  }
494  // Determine whether we need parenthesis around the last union operand.
495  UnionOperand lastOperand = operands_.get(operands_.size() - 1);
496  QueryStmt lastQueryStmt = lastOperand.getQueryStmt();
497  strBuilder.append(" UNION " +
498  ((lastOperand.getQualifier() == Qualifier.ALL) ? "ALL " : ""));
499  if (lastQueryStmt instanceof UnionStmt ||
500  ((hasOrderByClause() || hasLimit() || hasOffset()) &&
501  !lastQueryStmt.hasLimit() && !lastQueryStmt.hasOffset() &&
502  !lastQueryStmt.hasOrderByClause())) {
503  strBuilder.append("(");
504  strBuilder.append(lastQueryStmt.toSql());
505  strBuilder.append(")");
506  } else {
507  strBuilder.append(lastQueryStmt.toSql());
508  }
509  // Order By clause
510  if (hasOrderByClause()) {
511  strBuilder.append(" ORDER BY ");
512  for (int i = 0; i < orderByElements_.size(); ++i) {
513  strBuilder.append(orderByElements_.get(i).toSql());
514  strBuilder.append((i+1 != orderByElements_.size()) ? ", " : "");
515  }
516  }
517  // Limit clause.
518  strBuilder.append(limitElement_.toSql());
519  return strBuilder.toString();
520  }
521 
522  @Override
523  public ArrayList<String> getColLabels() {
524  Preconditions.checkState(operands_.size() > 0);
525  return operands_.get(0).getQueryStmt().getColLabels();
526  }
527 
528  @Override
529  public QueryStmt clone() {
530  List<UnionOperand> operandClones = Lists.newArrayList();
531  for (UnionOperand operand: operands_) {
532  operandClones.add(operand.clone());
533  }
534  UnionStmt unionClone = new UnionStmt(operandClones, cloneOrderByElements(),
535  limitElement_ == null ? null : limitElement_.clone());
536  unionClone.setWithClause(cloneWithClause());
537  unionClone.hasAnalyticExprs_ = hasAnalyticExprs_;
538  return unionClone;
539  }
540 }
void unnestOperands(Analyzer analyzer)
Definition: UnionStmt.java:261
void setOperandSmap(UnionOperand operand, Analyzer analyzer)
Definition: UnionStmt.java:322
ArrayList< OrderByElement > cloneOrderByElements()
Definition: QueryStmt.java:311
void getMaterializedTupleIds(ArrayList< TupleId > tupleIdList)
Definition: UnionStmt.java:462
List< UnionOperand > getAllOperands()
Definition: UnionStmt.java:134
void materializeRequiredSlots(Analyzer analyzer)
Definition: UnionStmt.java:221
final List< UnionOperand > operands_
Definition: UnionStmt.java:103
void createSortTupleInfo(Analyzer analyzer)
Definition: QueryStmt.java:185
void analyze(Analyzer analyzer)
Definition: UnionStmt.java:149
void createMetadata(Analyzer analyzer)
Definition: UnionStmt.java:399
List< UnionOperand > getDistinctOperands()
Definition: UnionStmt.java:132
UnionStmt(List< UnionOperand > operands, ArrayList< OrderByElement > orderByElements, LimitElement limitElement)
Definition: UnionStmt.java:125
UnionOperand(QueryStmt queryStmt, Qualifier qualifier)
Definition: UnionStmt.java:66
void unnestOperand(List< UnionOperand > target, Qualifier targetQualifier, UnionOperand operand)
Definition: UnionStmt.java:339
final ExprSubstitutionMap aliasSmap_
Definition: QueryStmt.java:62
final List< UnionOperand > allOperands_
Definition: UnionStmt.java:112
void materializeSlots(Analyzer analyzer, List< Expr > exprs)
Definition: QueryStmt.java:303
String queryStmtToSql(QueryStmt queryStmt)
Definition: UnionStmt.java:370
ArrayList< Expr > getBaseTblResultExprs()
Definition: QueryStmt.java:280
ArrayList< OrderByElement > orderByElements_
Definition: QueryStmt.java:44
final List< UnionOperand > distinctOperands_
Definition: UnionStmt.java:107
void createSortInfo(Analyzer analyzer)
Definition: QueryStmt.java:115
ArrayList< String > getColLabels()
Definition: UnionStmt.java:523
List< UnionOperand > getOperands()
Definition: UnionStmt.java:131