Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
InPredicate.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.ArrayList;
18 import java.util.List;
19 
26 import com.cloudera.impala.common.Reference;
27 import com.cloudera.impala.thrift.TExprNode;
28 import com.cloudera.impala.thrift.TExprNodeType;
29 import com.google.common.base.Preconditions;
30 import com.google.common.collect.Lists;
31 
37 public class InPredicate extends Predicate {
38  private static final String IN_SET_LOOKUP = "in_set_lookup";
39  private static final String NOT_IN_SET_LOOKUP = "not_in_set_lookup";
40  private static final String IN_ITERATE= "in_iterate";
41  private static final String NOT_IN_ITERATE = "not_in_iterate";
42  private final boolean isNotIn_;
43 
44  public boolean isNotIn() { return isNotIn_; }
45 
46  public static void initBuiltins(Db db) {
47  for (Type t: Type.getSupportedTypes()) {
48  if (t.isNull()) continue;
49  // TODO we do not support codegen for CHAR and the In predicate must be codegened
50  // because it has variable number of arguments. This will force CHARs to be
51  // cast up to strings; meaning that "in" comparisons will not have CHAR comparison
52  // semantics.
53  if (t.getPrimitiveType() == PrimitiveType.CHAR) continue;
54 
55  String typeString = t.getPrimitiveType().toString().toLowerCase();
56  if (t.isScalarType(PrimitiveType.VARCHAR)) typeString = "string";
57 
58  db.addBuiltin(ScalarFunction.createBuiltin(IN_ITERATE,
59  Lists.newArrayList(t, t), true, Type.BOOLEAN,
60  "impala::InPredicate::InIterate", null, null, false));
61  db.addBuiltin(ScalarFunction.createBuiltin(NOT_IN_ITERATE,
62  Lists.newArrayList(t, t), true, Type.BOOLEAN,
63  "impala::InPredicate::NotInIterate", null, null, false));
64 
65  String prepareFn = "impala::InPredicate::SetLookupPrepare_" + typeString;
66  String closeFn = "impala::InPredicate::SetLookupClose_" + typeString;
67 
68  db.addBuiltin(ScalarFunction.createBuiltin(IN_SET_LOOKUP,
69  Lists.newArrayList(t, t), true, Type.BOOLEAN,
70  "impala::InPredicate::InSetLookup", prepareFn, closeFn, false));
71  db.addBuiltin(ScalarFunction.createBuiltin(NOT_IN_SET_LOOKUP,
72  Lists.newArrayList(t, t), true, Type.BOOLEAN,
73  "impala::InPredicate::NotInSetLookup", prepareFn, closeFn, false));
74 
75  }
76  }
77 
78  // First child is the comparison expr for which we
79  // should check membership in the inList (the remaining children).
80  public InPredicate(Expr compareExpr, List<Expr> inList, boolean isNotIn) {
81  children_.add(compareExpr);
82  children_.addAll(inList);
83  isNotIn_ = isNotIn;
84  }
85 
86  // C'tor for initializing an [NOT] IN predicate with a subquery child.
87  public InPredicate(Expr compareExpr, Expr subquery, boolean isNotIn) {
88  Preconditions.checkNotNull(compareExpr);
89  Preconditions.checkNotNull(subquery);
90  children_.add(compareExpr);
91  children_.add(subquery);
92  isNotIn_ = isNotIn;
93  }
94 
98  protected InPredicate(InPredicate other) {
99  super(other);
100  isNotIn_ = other.isNotIn_;
101  }
102 
103  @Override
104  public void analyze(Analyzer analyzer) throws AnalysisException {
105  if (isAnalyzed_) return;
106  super.analyze(analyzer);
107 
108  if (contains(Subquery.class)) {
109  // An [NOT] IN predicate with a subquery must contain two children, the second of
110  // which is a Subquery.
111  if (children_.size() != 2 || !(getChild(1) instanceof Subquery)) {
112  throw new AnalysisException("Unsupported IN predicate with a subquery: " +
113  toSqlImpl());
114  }
115  Subquery subquery = (Subquery)getChild(1);
116  if (!subquery.returnsScalarColumn()) {
117  throw new AnalysisException("Subquery must return a single column: " +
118  subquery.toSql());
119  }
120 
121  // Ensure that the column in the lhs of the IN predicate and the result of
122  // the subquery are type compatible. No need to perform any
123  // casting at this point. Any casting needed will be performed when the
124  // subquery is unnested.
125  ArrayList<Expr> subqueryExprs = subquery.getStatement().getResultExprs();
126  Expr compareExpr = children_.get(0);
127  Expr subqueryExpr = subqueryExprs.get(0);
128  analyzer.getCompatibleType(compareExpr.getType(), compareExpr, subqueryExpr);
129  } else {
130  Preconditions.checkState(getChildren().size() >= 2);
131  analyzer.castAllToCompatibleType(children_);
132  Type childType = children_.get(0).getType();
133 
134  if (childType.isNull()) {
135  // Make sure the BE never sees TYPE_NULL by picking an arbitrary type
136  for (int i = 0; i < children_.size(); ++i) {
138  }
139  }
140 
141  // Choose SetLookup or Iterate strategy. SetLookup can be used if all the exprs in
142  // the IN list are constant, and is faster than iterating if the IN list is big
143  // enough.
144  boolean allConstant = true;
145  for (int i = 1; i < children_.size(); ++i) {
146  if (!children_.get(i).isConstant()) {
147  allConstant = false;
148  break;
149  }
150  }
151  boolean useSetLookup = allConstant;
152  // Threshold based on InPredicateBenchmark results
153  int setLookupThreshold = children_.get(0).getType().isStringType() ? 6 : 2;
154  if (children_.size() - 1 < setLookupThreshold) useSetLookup = false;
155 
156  // Only lookup fn_ if all subqueries have been rewritten. If the second child is a
157  // subquery, it will have type ArrayType, which cannot be resolved to a builtin
158  // function and will fail analysis.
159  Type[] argTypes = {getChild(0).type_, getChild(1).type_};
160  if (useSetLookup) {
162  argTypes, CompareMode.IS_SUPERTYPE_OF);
163  } else {
165  argTypes, CompareMode.IS_SUPERTYPE_OF);
166  }
167  Preconditions.checkNotNull(fn_);
168  Preconditions.checkState(fn_.getReturnType().isBoolean());
169  castForFunctionCall(false);
170  }
171 
172  // TODO: Fix selectivity_ for nested predicate
173  Reference<SlotRef> slotRefRef = new Reference<SlotRef>();
174  Reference<Integer> idxRef = new Reference<Integer>();
175  if (isSingleColumnPredicate(slotRefRef, idxRef)
176  && idxRef.getRef() == 0
177  && slotRefRef.getRef().getNumDistinctValues() > 0) {
178  selectivity_ = (double) (getChildren().size() - 1)
179  / (double) slotRefRef.getRef().getNumDistinctValues();
180  selectivity_ = Math.max(0.0, Math.min(1.0, selectivity_));
181  } else {
183  }
184  }
185 
186  @Override
187  protected void toThrift(TExprNode msg) {
188  // Can't serialize a predicate with a subquery
189  Preconditions.checkState(!contains(Subquery.class));
190  msg.node_type = TExprNodeType.FUNCTION_CALL;
191  }
192 
193  @Override
194  public String toSqlImpl() {
195  StringBuilder strBuilder = new StringBuilder();
196  String notStr = (isNotIn_) ? "NOT " : "";
197  strBuilder.append(getChild(0).toSql() + " " + notStr + "IN ");
198  boolean hasSubquery = contains(Subquery.class);
199  if (!hasSubquery) strBuilder.append("(");
200  for (int i = 1; i < children_.size(); ++i) {
201  strBuilder.append(getChild(i).toSql());
202  strBuilder.append((i+1 != children_.size()) ? ", " : "");
203  }
204  if (!hasSubquery) strBuilder.append(")");
205  return strBuilder.toString();
206  }
207 
208  /*
209  * If predicate is of the form "<SlotRef> [NOT] IN", returns the
210  * SlotRef.
211  */
212  @Override
214  return getChild(0).unwrapSlotRef(true);
215  }
216 
220  @Override
221  public Expr negate() {
222  return new InPredicate(getChild(0), children_.subList(1, children_.size()),
223  !isNotIn_);
224  }
225 
226  @Override
227  public Expr clone() { return new InPredicate(this); }
228 }
InPredicate(Expr compareExpr, List< Expr > inList, boolean isNotIn)
void uncheckedCastChild(Type targetType, int childIndex)
Definition: Expr.java:1024
void castForFunctionCall(boolean ignoreWildcardDecimals)
Definition: Expr.java:312
static final ScalarType BOOLEAN
Definition: Type.java:46
InPredicate(Expr compareExpr, Expr subquery, boolean isNotIn)
PrimitiveType
Definition: types.h:27
Function getBuiltinFunction(Analyzer analyzer, String name, Type[] argTypes, CompareMode mode)
Definition: Expr.java:290
boolean isSingleColumnPredicate(Reference< SlotRef > slotRefRef, Reference< Integer > idxRef)
Definition: Predicate.java:64
static ArrayList< ScalarType > getSupportedTypes()
Definition: Type.java:109
static double DEFAULT_SELECTIVITY
Definition: Expr.java:63