Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Function.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.catalog;
16 
17 import java.util.List;
18 
24 import com.cloudera.impala.thrift.TAggregateFunction;
25 import com.cloudera.impala.thrift.TCatalogObjectType;
26 import com.cloudera.impala.thrift.TColumnType;
27 import com.cloudera.impala.thrift.TFunction;
28 import com.cloudera.impala.thrift.TFunctionBinaryType;
29 import com.cloudera.impala.thrift.TScalarFunction;
30 import com.cloudera.impala.thrift.TSymbolLookupParams;
31 import com.cloudera.impala.thrift.TSymbolLookupResult;
32 import com.cloudera.impala.thrift.TSymbolType;
33 import com.google.common.base.Joiner;
34 import com.google.common.base.Preconditions;
35 import com.google.common.collect.Lists;
36 
37 
41 public class Function implements CatalogObject {
42  // Enum for how to compare function signatures.
43  // For decimal types, the type in the function can be a wildcard, i.e. decimal(*,*).
44  // The wildcard can *only* exist as function type, the caller will always be a
45  // fully specified decimal.
46  // For the purposes of function type resolution, decimal(*,*) will match exactly
47  // with any fully specified decimal (i.e. fn(decimal(*,*)) matches identically for
48  // the call to fn(decimal(1,0)).
49  public enum CompareMode {
50  // Two signatures are identical if the number of arguments and their types match
51  // exactly and either both signatures are varargs or neither.
53 
54  // Two signatures are indistinguishable if there is no way to tell them apart
55  // when matching a particular instantiation. That is, their fixed arguments
56  // match exactly and the remaining varargs have the same type.
57  // e.g. fn(int, int, int) and fn(int...)
58  // Argument types that are NULL are ignored when doing this comparison.
59  // e.g. fn(NULL, int) is indistinguishable from fn(int, int)
61 
62  // X is a supertype of Y if Y.arg[i] can be implicitly cast to X.arg[i]. If X has
63  // vargs, the remaining arguments of Y must be implicitly castable to the var arg
64  // type. The key property this provides is that X can be used in place of Y.
65  // e.g.
66  // fn(int, double, string...) is a supertype of fn(tinyint, float, string, string)
68  }
69 
70  // User specified function name e.g. "Add"
72 
73  private final Type retType_;
74  // Array of parameter types. empty array if this function does not have parameters.
75  private Type[] argTypes_;
76 
77  // If true, this function has variable arguments.
78  // TODO: we don't currently support varargs with no fixed types. i.e. fn(...)
79  private boolean hasVarArgs_;
80 
81  // If true (default), this function is called directly by the user. For operators,
82  // this is false. If false, it also means the function is not visible from
83  // 'show functions'.
84  private boolean userVisible_;
85 
86  // Absolute path in HDFS for the binary that contains this function.
87  // e.g. /udfs/udfs.jar
88  private HdfsUri location_;
89  private TFunctionBinaryType binaryType_;
91 
92  public Function(FunctionName name, Type[] argTypes,
93  Type retType, boolean varArgs) {
94  this.name_ = name;
95  this.hasVarArgs_ = varArgs;
96  if (argTypes == null) {
97  argTypes_ = new Type[0];
98  } else {
99  this.argTypes_ = argTypes;
100  }
101  this.retType_ = retType;
102  this.userVisible_ = true;
103  }
104 
105  public Function(FunctionName name, List<Type> args,
106  Type retType, boolean varArgs) {
107  this(name, (Type[])null, retType, varArgs);
108  if (args.size() > 0) {
109  argTypes_ = args.toArray(new Type[args.size()]);
110  } else {
111  argTypes_ = new Type[0];
112  }
113  }
114 
115  public FunctionName getFunctionName() { return name_; }
116  public String functionName() { return name_.getFunction(); }
117  public String dbName() { return name_.getDb(); }
118  public Type getReturnType() { return retType_; }
119  public Type[] getArgs() { return argTypes_; }
120  // Returns the number of arguments to this function.
121  public int getNumArgs() { return argTypes_.length; }
122  public HdfsUri getLocation() { return location_; }
123  public TFunctionBinaryType getBinaryType() { return binaryType_; }
124  public boolean hasVarArgs() { return hasVarArgs_; }
125  public boolean userVisible() { return userVisible_; }
126  public Type getVarArgsType() {
127  if (!hasVarArgs_) return Type.INVALID;
128  Preconditions.checkState(argTypes_.length > 0);
129  return argTypes_[argTypes_.length - 1];
130  }
131 
132  public void setName(FunctionName name) { name_ = name; }
133  public void setLocation(HdfsUri loc) { location_ = loc; }
134  public void setBinaryType(TFunctionBinaryType type) { binaryType_ = type; }
135  public void setHasVarArgs(boolean v) { hasVarArgs_ = v; }
136  public void setUserVisible(boolean b) { userVisible_ = b; }
137 
138  // Returns a string with the signature in human readable format:
139  // FnName(argtype1, argtyp2). e.g. Add(int, int)
140  public String signatureString() {
141  StringBuilder sb = new StringBuilder();
142  sb.append(name_.getFunction())
143  .append("(")
144  .append(Joiner.on(", ").join(argTypes_));
145  if (hasVarArgs_) sb.append("...");
146  sb.append(")");
147  return sb.toString();
148  }
149 
150  @Override
151  public boolean equals(Object o) {
152  if (!(o instanceof Function)) return false;
153  return compare((Function)o, CompareMode.IS_IDENTICAL);
154  }
155 
156  // Compares this to 'other' for mode.
157  public boolean compare(Function other, CompareMode mode) {
158  switch (mode) {
159  case IS_IDENTICAL: return isIdentical(other);
160  case IS_INDISTINGUISHABLE: return isIndistinguishable(other);
161  case IS_SUPERTYPE_OF: return isSuperTypeOf(other);
162  default:
163  Preconditions.checkState(false);
164  return false;
165  }
166  }
173  private boolean isSuperTypeOf(Function other) {
174  if (!other.name_.equals(name_)) return false;
175  if (!this.hasVarArgs_ && other.argTypes_.length != this.argTypes_.length) {
176  return false;
177  }
178  if (this.hasVarArgs_ && other.argTypes_.length < this.argTypes_.length) return false;
179  for (int i = 0; i < this.argTypes_.length; ++i) {
180  if (!Type.isImplicitlyCastable(other.argTypes_[i], this.argTypes_[i])) {
181  return false;
182  }
183  }
184  // Check trailing varargs.
185  if (this.hasVarArgs_) {
186  for (int i = this.argTypes_.length; i < other.argTypes_.length; ++i) {
187  if (other.argTypes_[i].matchesType(this.getVarArgsType())) continue;
188  if (!Type.isImplicitlyCastable(other.argTypes_[i],
189  this.getVarArgsType())) {
190  return false;
191  }
192  }
193  }
194  return true;
195  }
196 
201  Type[] promoted = argTypes_.clone();
202  for (int i = 0; i < promoted.length; ++i) {
203  if (promoted[i].isScalarType(PrimitiveType.CHAR)) promoted[i] = ScalarType.STRING;
204  }
205  return new Function(name_, promoted, retType_, hasVarArgs_);
206  }
207 
212  public Function selectClosestSuperType(List<Function> candidates) {
213  Preconditions.checkArgument(candidates.size() > 0);
214  if (candidates.size() == 1) return candidates.get(0);
215 
216  // Always promote CHAR to STRING before attempting any other promotions.
217  Function withStrs = promoteCharsToStrings();
218  for (Function f: candidates) {
219  if (withStrs.isIndistinguishable(f)) return f;
220  }
221  // Otherwise, we use the previous rules of resolution which are to take the first
222  // one in the list.
223  return candidates.get(0);
224  }
225 
226  private boolean isIdentical(Function o) {
227  if (!o.name_.equals(name_)) return false;
228  if (o.argTypes_.length != this.argTypes_.length) return false;
229  if (o.hasVarArgs_ != this.hasVarArgs_) return false;
230  for (int i = 0; i < this.argTypes_.length; ++i) {
231  if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false;
232  }
233  return true;
234  }
235 
236  private boolean isIndistinguishable(Function o) {
237  if (!o.name_.equals(name_)) return false;
238  int minArgs = Math.min(o.argTypes_.length, this.argTypes_.length);
239  // The first fully specified args must be identical.
240  for (int i = 0; i < minArgs; ++i) {
241  if (o.argTypes_[i].isNull() || this.argTypes_[i].isNull()) continue;
242  if (!o.argTypes_[i].matchesType(this.argTypes_[i])) return false;
243  }
244  if (o.argTypes_.length == this.argTypes_.length) return true;
245 
246  if (o.hasVarArgs_ && this.hasVarArgs_) {
247  if (!o.getVarArgsType().matchesType(this.getVarArgsType())) return false;
248  if (this.getNumArgs() > o.getNumArgs()) {
249  for (int i = minArgs; i < this.getNumArgs(); ++i) {
250  if (this.argTypes_[i].isNull()) continue;
251  if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false;
252  }
253  } else {
254  for (int i = minArgs; i < o.getNumArgs(); ++i) {
255  if (o.argTypes_[i].isNull()) continue;
256  if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false;
257  }
258  }
259  return true;
260  } else if (o.hasVarArgs_) {
261  // o has var args so check the remaining arguments from this
262  if (o.getNumArgs() > minArgs) return false;
263  for (int i = minArgs; i < this.getNumArgs(); ++i) {
264  if (this.argTypes_[i].isNull()) continue;
265  if (!this.argTypes_[i].matchesType(o.getVarArgsType())) return false;
266  }
267  return true;
268  } else if (this.hasVarArgs_) {
269  // this has var args so check the remaining arguments from s
270  if (this.getNumArgs() > minArgs) return false;
271  for (int i = minArgs; i < o.getNumArgs(); ++i) {
272  if (o.argTypes_[i].isNull()) continue;
273  if (!o.argTypes_[i].matchesType(this.getVarArgsType())) return false;
274  }
275  return true;
276  } else {
277  // Neither has var args and the lengths don't match
278  return false;
279  }
280  }
281 
282  @Override
283  public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.FUNCTION; }
284 
285  @Override
286  public long getCatalogVersion() { return catalogVersion_; }
287 
288  @Override
289  public void setCatalogVersion(long newVersion) { catalogVersion_ = newVersion; }
290 
291  @Override
292  public String getName() { return getFunctionName().toString(); }
293 
294  public TFunction toThrift() {
295  TFunction fn = new TFunction();
296  fn.setSignature(signatureString());
297  fn.setName(name_.toThrift());
298  fn.setBinary_type(binaryType_);
299  if (location_ != null) fn.setHdfs_location(location_.toString());
300  fn.setArg_types(Type.toThrift(argTypes_));
301  fn.setRet_type(getReturnType().toThrift());
302  fn.setHas_var_args(hasVarArgs_);
303  // TODO: Comment field is missing?
304  // fn.setComment(comment_)
305  return fn;
306  }
307 
308  public static Function fromThrift(TFunction fn) {
309  List<Type> argTypes = Lists.newArrayList();
310  for (TColumnType t: fn.getArg_types()) {
311  argTypes.add(Type.fromThrift(t));
312  }
313 
314  Function function = null;
315  if (fn.isSetScalar_fn()) {
316  TScalarFunction scalarFn = fn.getScalar_fn();
317  function = new ScalarFunction(FunctionName.fromThrift(fn.getName()), argTypes,
318  Type.fromThrift(fn.getRet_type()), new HdfsUri(fn.getHdfs_location()),
319  scalarFn.getSymbol(), scalarFn.getPrepare_fn_symbol(),
320  scalarFn.getClose_fn_symbol());
321  } else if (fn.isSetAggregate_fn()) {
322  TAggregateFunction aggFn = fn.getAggregate_fn();
323  function = new AggregateFunction(FunctionName.fromThrift(fn.getName()), argTypes,
324  Type.fromThrift(fn.getRet_type()),
325  Type.fromThrift(aggFn.getIntermediate_type()),
326  new HdfsUri(fn.getHdfs_location()), aggFn.getUpdate_fn_symbol(),
327  aggFn.getInit_fn_symbol(), aggFn.getSerialize_fn_symbol(),
328  aggFn.getMerge_fn_symbol(), aggFn.getGet_value_fn_symbol(),
329  null, aggFn.getFinalize_fn_symbol());
330  } else {
331  // In the case where we are trying to look up the object, we only have the
332  // signature.
333  function = new Function(FunctionName.fromThrift(fn.getName()),
334  argTypes, Type.fromThrift(fn.getRet_type()), fn.isHas_var_args());
335  }
336  function.setBinaryType(fn.getBinary_type());
337  function.setHasVarArgs(fn.isHas_var_args());
338  return function;
339  }
340 
341  @Override
342  public boolean isLoaded() { return true; }
343 
344  // Returns the resolved symbol in the binary. The BE will do a lookup of 'symbol'
345  // in the binary and try to resolve unmangled names.
346  // If this function is expecting a return argument, retArgType is that type. It should
347  // be null if this function isn't expecting a return argument.
348  public String lookupSymbol(String symbol, TSymbolType symbolType, Type retArgType,
349  boolean hasVarArgs, Type... argTypes) throws AnalysisException {
350  if (symbol.length() == 0) {
351  if (binaryType_ == TFunctionBinaryType.BUILTIN) {
352  // We allow empty builtin symbols in order to stage work in the FE before its
353  // implemented in the BE
354  return symbol;
355  }
356  throw new AnalysisException("Could not find symbol ''");
357  }
358 
359  TSymbolLookupParams lookup = new TSymbolLookupParams();
360  // Builtin functions do not have an external library, they are loaded directly from
361  // the running process
362  lookup.location = binaryType_ != TFunctionBinaryType.BUILTIN ?
363  location_.toString() : "";
364  lookup.symbol = symbol;
365  lookup.symbol_type = symbolType;
366  lookup.fn_binary_type = binaryType_;
367  lookup.arg_types = Type.toThrift(argTypes);
368  lookup.has_var_args = hasVarArgs;
369  if (retArgType != null) lookup.setRet_arg_type(retArgType.toThrift());
370 
371  try {
372  TSymbolLookupResult result = FeSupport.LookupSymbol(lookup);
373  switch (result.result_code) {
374  case SYMBOL_FOUND:
375  return result.symbol;
376  case BINARY_NOT_FOUND:
377  Preconditions.checkState(binaryType_ != TFunctionBinaryType.BUILTIN);
378  throw new AnalysisException(
379  "Could not load binary: " + location_.getLocation() + "\n" +
380  result.error_msg);
381  case SYMBOL_NOT_FOUND:
382  throw new AnalysisException(result.error_msg);
383  default:
384  // Should never get here.
385  throw new AnalysisException("Internal Error");
386  }
387  } catch (InternalException e) {
388  // Should never get here.
389  e.printStackTrace();
390  throw new AnalysisException("Could not find symbol: " + symbol, e);
391  }
392  }
393 
394  public String lookupSymbol(String symbol, TSymbolType symbolType)
395  throws AnalysisException {
396  Preconditions.checkState(
397  symbolType == TSymbolType.UDF_PREPARE || symbolType == TSymbolType.UDF_CLOSE);
398  return lookupSymbol(symbol, symbolType, null, false);
399  }
400 
401  public static String getUdfType(Type t) {
402  switch (t.getPrimitiveType()) {
403  case BOOLEAN:
404  return "BooleanVal";
405  case TINYINT:
406  return "TinyIntVal";
407  case SMALLINT:
408  return "SmallIntVal";
409  case INT:
410  return "IntVal";
411  case BIGINT:
412  return "BigIntVal";
413  case FLOAT:
414  return "FloatVal";
415  case DOUBLE:
416  return "DoubleVal";
417  case STRING:
418  case VARCHAR:
419  case CHAR:
420  return "StringVal";
421  case TIMESTAMP:
422  return "TimestampVal";
423  case DECIMAL:
424  return "DecimalVal";
425  default:
426  Preconditions.checkState(false, t.toString());
427  return "";
428  }
429  }
430 }
static String getUdfType(Type t)
Definition: Function.java:401
TFunctionBinaryType getBinaryType()
Definition: Function.java:123
static final ScalarType STRING
Definition: Type.java:53
String lookupSymbol(String symbol, TSymbolType symbolType, Type retArgType, boolean hasVarArgs, Type...argTypes)
Definition: Function.java:348
PrimitiveType getPrimitiveType()
Definition: Type.java:188
static Type fromThrift(TColumnType thrift)
Definition: Type.java:293
boolean matchesType(Type t)
Definition: Type.java:218
static final long INITIAL_CATALOG_VERSION
Definition: Catalog.java:57
Function selectClosestSuperType(List< Function > candidates)
Definition: Function.java:212
void setCatalogVersion(long newVersion)
Definition: Function.java:289
static boolean isImplicitlyCastable(Type t1, Type t2)
Definition: Type.java:259
String lookupSymbol(String symbol, TSymbolType symbolType)
Definition: Function.java:394
void setBinaryType(TFunctionBinaryType type)
Definition: Function.java:134
static FunctionName fromThrift(TFunctionName fnName)
boolean isIndistinguishable(Function o)
Definition: Function.java:236
void setName(FunctionName name)
Definition: Function.java:132
TFunctionBinaryType binaryType_
Definition: Function.java:89
Function(FunctionName name, Type[] argTypes, Type retType, boolean varArgs)
Definition: Function.java:92
Function(FunctionName name, List< Type > args, Type retType, boolean varArgs)
Definition: Function.java:105
static Function fromThrift(TFunction fn)
Definition: Function.java:308
TCatalogObjectType getCatalogObjectType()
Definition: Function.java:283
boolean compare(Function other, CompareMode mode)
Definition: Function.java:157
string name
Definition: cpu-info.cc:50
static final ScalarType INVALID
Definition: Type.java:44
boolean isSuperTypeOf(Function other)
Definition: Function.java:173