Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CreateUdaStmt.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.ArrayList;
18 import java.util.HashMap;
19 
25 import com.cloudera.impala.thrift.TFunctionBinaryType;
26 import com.cloudera.impala.thrift.TSymbolType;
27 import com.google.common.base.Preconditions;
28 
32 public class CreateUdaStmt extends CreateFunctionStmtBase {
34 
46  public CreateUdaStmt(FunctionName fnSymbol, FunctionArgs args,
47  TypeDef retTypeDef, TypeDef intermediateTypeDef,
48  HdfsUri location, boolean ifNotExists,
49  HashMap<CreateFunctionStmtBase.OptArg, String> optArgs) {
50  super(fnSymbol, args, retTypeDef, location, ifNotExists, optArgs);
51  intermediateTypeDef_ = intermediateTypeDef;
52  }
53 
54  private void reportCouldNotInferSymbol(String function) throws AnalysisException {
55  throw new AnalysisException("Could not infer symbol for "
56  + function + "() function.");
57  }
58 
59  // Gets the symbol for 'arg'. If the user set it from the dll, return that. Otherwise
60  // try to infer the Symbol from the Update function. To infer the Symbol, the update
61  // function must contain "update" or "Update" and we switch that out with 'defaultSymbol'.
62  // Returns null if no symbol was found.
63  private String getSymbolSymbol(OptArg arg, String defaultSymbol) {
64  // First lookup if the user explicitly set it.
65  if (optArgs_.get(arg) != null) return optArgs_.get(arg);
66  // Try to match it from Update
67  String updateFn = optArgs_.get(OptArg.UPDATE_FN);
68  // Mangled strings start with _Z. We can't get substitute Symbols for mangled
69  // strings.
70  // TODO: this is doable in the BE with more symbol parsing.
71  if (updateFn.startsWith("_Z")) return null;
72 
73  if (updateFn.contains("update")) return updateFn.replace("update", defaultSymbol);
74  if (updateFn.contains("Update")) {
75  char[] array = defaultSymbol.toCharArray();
76  array[0] = Character.toUpperCase(array[0]);
77  String s = new String(array);
78  return updateFn.replace("Update", s);
79  }
80  return null;
81  }
82 
83  @Override
84  public void analyze(Analyzer analyzer) throws AnalysisException {
85  super.analyze(analyzer);
86  Preconditions.checkNotNull(fn_);
87  Preconditions.checkState(fn_ instanceof AggregateFunction);
89 
90  if (uda.getNumArgs() == 0) {
91  throw new AnalysisException("UDAs must take at least one argument.");
92  }
93 
94  if (uda.getBinaryType() == TFunctionBinaryType.HIVE) {
95  throw new AnalysisException("Java UDAs are not supported.");
96  }
97 
98  // TODO: these are temporarily restrictions since the BE cannot yet
99  // execute them.
100  if (uda.getBinaryType() == TFunctionBinaryType.IR) {
101  throw new AnalysisException("IR UDAs are not yet supported.");
102  }
103  if (fn_.hasVarArgs()) {
104  throw new AnalysisException("UDAs with varargs are not yet supported.");
105  }
106  if (fn_.getNumArgs() > 8) {
107  throw new AnalysisException(
108  "UDAs with more than 8 arguments are not yet supported.");
109  }
110 
111  if (uda.getReturnType().getPrimitiveType() == PrimitiveType.CHAR) {
112  throw new AnalysisException("UDAs with CHAR return type are not yet supported.");
113  }
114  if (uda.getReturnType().getPrimitiveType() == PrimitiveType.VARCHAR) {
115  throw new AnalysisException("UDAs with VARCHAR return type are not yet supported.");
116  }
117  for (int i = 0; i < uda.getNumArgs(); ++i) {
118  if (uda.getArgs()[i].getPrimitiveType() == PrimitiveType.CHAR) {
119  throw new AnalysisException("UDAs with CHAR arguments are not yet supported.");
120  }
121  if (uda.getArgs()[i].getPrimitiveType() == PrimitiveType.VARCHAR) {
122  throw new AnalysisException("UDAs with VARCHAR arguments are not yet supported.");
123  }
124  }
125 
126  Type intermediateType = null;
127  if (intermediateTypeDef_ == null) {
128  intermediateType = uda.getReturnType();
129  } else {
130  intermediateTypeDef_.analyze(analyzer);
131  intermediateType = intermediateTypeDef_.getType();
132  }
133  uda.setIntermediateType(intermediateType);
134 
135  // TODO: this is a temporary restriction. Remove when we can support
136  // different intermediate types.
137  if (!intermediateType.equals(fn_.getReturnType())) {
138  StringBuilder error = new StringBuilder();
139  error.append("UDAs with an intermediate type, ")
140  .append(intermediateType.toSql())
141  .append(", that is different from the return type, ")
142  .append(fn_.getReturnType().toSql())
143  .append(", are currently not supported.");
144  throw new AnalysisException(error.toString());
145  }
146 
147  // Check arguments that are only valid in UDFs are not set.
151 
152  // The user must provide the symbol for Update.
153  uda.setUpdateFnSymbol(uda.lookupSymbol(
154  checkAndGetOptArg(OptArg.UPDATE_FN), TSymbolType.UDF_EVALUATE, intermediateType,
155  uda.hasVarArgs(), uda.getArgs()));
156 
157  // If the ddl did not specify the init/serialize/merge/finalize function
158  // Symbols, guess them based on the update fn Symbol.
159  Preconditions.checkNotNull(uda.getUpdateFnSymbol());
160  uda.setInitFnSymbol(getSymbolSymbol(OptArg.INIT_FN, "init"));
161  uda.setSerializeFnSymbol(getSymbolSymbol(OptArg.SERIALIZE_FN, "serialize"));
162  uda.setMergeFnSymbol(getSymbolSymbol(OptArg.MERGE_FN, "merge"));
163  uda.setFinalizeFnSymbol(getSymbolSymbol(OptArg.FINALIZE_FN, "finalize"));
164 
165  // Init and merge are required.
166  if (uda.getInitFnSymbol() == null) reportCouldNotInferSymbol("init");
167  if (uda.getMergeFnSymbol() == null) reportCouldNotInferSymbol("merge");
168 
169  // Validate that all set symbols exist.
170  uda.setInitFnSymbol(uda.lookupSymbol(uda.getInitFnSymbol(),
171  TSymbolType.UDF_EVALUATE, intermediateType, false));
172  uda.setMergeFnSymbol(uda.lookupSymbol(uda.getMergeFnSymbol(),
173  TSymbolType.UDF_EVALUATE, intermediateType, false, intermediateType));
174  if (uda.getSerializeFnSymbol() != null) {
175  try {
176  uda.setSerializeFnSymbol(uda.lookupSymbol(uda.getSerializeFnSymbol(),
177  TSymbolType.UDF_EVALUATE, null, false, intermediateType));
178  } catch (AnalysisException e) {
179  if (optArgs_.get(OptArg.SERIALIZE_FN) != null) {
180  throw e;
181  } else {
182  // Ignore, these symbols are optional.
183  uda.setSerializeFnSymbol(null);
184  }
185  }
186  }
187  if (uda.getFinalizeFnSymbol() != null) {
188  try {
189  uda.setFinalizeFnSymbol(uda.lookupSymbol(
190  uda.getFinalizeFnSymbol(), TSymbolType.UDF_EVALUATE, null, false,
191  intermediateType));
192  } catch (AnalysisException e) {
193  if (optArgs_.get(OptArg.FINALIZE_FN) != null) {
194  throw e;
195  } else {
196  // Ignore, these symbols are optional.
197  uda.setFinalizeFnSymbol(null);
198  }
199  }
200  }
201 
202  // If the intermediate type is not the return type, then finalize is
203  // required.
204  if (!intermediateType.equals(fn_.getReturnType()) &&
205  uda.getFinalizeFnSymbol() == null) {
206  throw new AnalysisException("Finalize() is required for this UDA.");
207  }
208 
209  StringBuilder sb = new StringBuilder("CREATE ");
210  sb.append("AGGREGATE FUNCTION ");
211  if (ifNotExists_) sb.append("IF NOT EXISTS ");
212  sb.append(uda.signatureString())
213  .append(" RETURNS ").append(uda.getReturnType())
214  .append(" INTERMEDIATE ").append(uda.getIntermediateType())
215  .append(" LOCATION ").append(uda.getLocation())
216  .append(" UPDATE_FN=").append(uda.getUpdateFnSymbol())
217  .append(" INIT_FN=").append(uda.getInitFnSymbol())
218  .append(" MERGE_FN=").append(uda.getMergeFnSymbol());
219  if (uda.getSerializeFnSymbol() != null) {
220  sb.append(" SERIALIZE_FN=").append(uda.getSerializeFnSymbol());
221  }
222  if (uda.getFinalizeFnSymbol() != null) {
223  sb.append(" FINALIZE_FN=").append(uda.getFinalizeFnSymbol());
224  }
225  if (getComment() != null) sb.append(" COMMENT = '" + getComment() + "'");
226  sqlString_ = sb.toString();
227  }
228 
229  @Override
230  protected Function createFunction(FunctionName fnName, ArrayList<Type> argTypes,
231  Type retType, boolean hasVarArgs) {
232  return new AggregateFunction(fnName_, args_.getArgTypes(), retTypeDef_.getType(),
233  args_.hasVarArgs());
234  }
235 }
CreateUdaStmt(FunctionName fnSymbol, FunctionArgs args, TypeDef retTypeDef, TypeDef intermediateTypeDef, HdfsUri location, boolean ifNotExists, HashMap< CreateFunctionStmtBase.OptArg, String > optArgs)
String getSymbolSymbol(OptArg arg, String defaultSymbol)
final HashMap< CreateFunctionStmtBase.OptArg, String > optArgs_
PrimitiveType
Definition: types.h:27
Function createFunction(FunctionName fnName, ArrayList< Type > argTypes, Type retType, boolean hasVarArgs)