Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
scalar-fn-call.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "exprs/scalar-fn-call.h"
16 
17 #include <vector>
18 #include <gutil/strings/substitute.h>
19 #include <llvm/IR/Attributes.h>
20 #include <llvm/ExecutionEngine/ExecutionEngine.h>
21 
22 #include "codegen/codegen-anyval.h"
23 #include "codegen/llvm-codegen.h"
24 #include "exprs/anyval-util.h"
25 #include "exprs/expr-context.h"
26 #include "runtime/hdfs-fs-cache.h"
27 #include "runtime/lib-cache.h"
28 #include "runtime/runtime-state.h"
29 #include "runtime/types.h"
30 #include "udf/udf-internal.h"
31 #include "util/debug-util.h"
32 #include "util/dynamic-util.h"
33 #include "util/symbols-util.h"
34 
35 #include "common/names.h"
36 
37 using namespace impala;
38 using namespace impala_udf;
39 using namespace strings;
40 
41 ScalarFnCall::ScalarFnCall(const TExprNode& node)
42  : Expr(node),
43  vararg_start_idx_(node.__isset.vararg_start_idx ?
44  node.vararg_start_idx : -1),
45  scalar_fn_wrapper_(NULL),
46  prepare_fn_(NULL),
47  close_fn_(NULL),
48  scalar_fn_(NULL) {
49  DCHECK_NE(fn_.binary_type, TFunctionBinaryType::HIVE);
50 }
51 
53  ExprContext* context) {
54  RETURN_IF_ERROR(Expr::Prepare(state, desc, context));
55 
56  if (fn_.scalar_fn.symbol.empty()) {
57  // This path is intended to only be used during development to test FE
58  // code before the BE has implemented the function.
59  // Having the failure in the BE (rather than during analysis) allows for
60  // better FE testing.
61  DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::BUILTIN);
62  stringstream ss;
63  ss << "Function " << fn_.name.function_name << " is not implemented.";
64  return Status(ss.str());
65  }
66 
68  vector<FunctionContext::TypeDesc> arg_types;
69  bool char_arg = false;
70  for (int i = 0; i < children_.size(); ++i) {
71  arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc(children_[i]->type_));
72  char_arg = char_arg || (children_[i]->type_.type == TYPE_CHAR);
73  }
74 
75  // Compute buffer size for varargs
76  int varargs_buffer_size = 0;
77  if (vararg_start_idx_ != -1) {
78  DCHECK_GT(GetNumChildren(), vararg_start_idx_);
79  for (int i = vararg_start_idx_; i < GetNumChildren(); ++i) {
80  varargs_buffer_size += AnyValUtil::AnyValSize(children_[i]->type());
81  }
82  }
83 
84  context_index_ = context->Register(state, return_type, arg_types, varargs_buffer_size);
85 
86  // If the codegen object hasn't been created yet and we're calling a builtin or native
87  // UDF with <= 8 non-variadic arguments, we can use the interpreted path and call the
88  // builtin without codegen. This saves us the overhead of creating the codegen object
89  // when it's not necessary (i.e., in plan fragments with no codegen-enabled operators).
90  // In addition, we can never codegen char arguments.
91  // TODO: codegen for char arguments
92  if (char_arg || (!state->codegen_created() && NumFixedArgs() <= 8 &&
93  (fn_.binary_type == TFunctionBinaryType::BUILTIN ||
94  fn_.binary_type == TFunctionBinaryType::NATIVE))) {
95  // Builtins with char arguments must still have <= 8 arguments.
96  // TODO: delete when we have codegen for char arguments
97  if (char_arg) {
98  DCHECK(NumFixedArgs() <= 8 && fn_.binary_type == TFunctionBinaryType::BUILTIN);
99  }
101  fn_.hdfs_location, fn_.scalar_fn.symbol, &scalar_fn_, &cache_entry_);
102  if (!status.ok()) {
103  if (fn_.binary_type == TFunctionBinaryType::BUILTIN) {
104  // Builtins symbols should exist unless there is a version mismatch.
105  status.SetErrorMsg(ErrorMsg(TErrorCode::MISSING_BUILTIN,
106  fn_.name.function_name, fn_.scalar_fn.symbol));
107  return status;
108  } else {
109  DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::NATIVE);
110  return Status(Substitute("Problem loading UDF '$0':\n$1",
111  fn_.name.function_name, status.GetDetail()));
112  return status;
113  }
114  }
115  } else {
116  // If we got here, either codegen is enabled or we need codegen to run this function.
117  LlvmCodeGen* codegen;
118  RETURN_IF_ERROR(state->GetCodegen(&codegen));
119 
120  if (fn_.binary_type == TFunctionBinaryType::IR) {
121  string local_path;
122  RETURN_IF_ERROR(LibCache::instance()->GetLocalLibPath(
123  fn_.hdfs_location, LibCache::TYPE_IR, &local_path));
124  // Link the UDF module into this query's main module (essentially copy the UDF
125  // module into the main module) so the UDF's functions are available in the main
126  // module.
127  RETURN_IF_ERROR(codegen->LinkModule(local_path));
128  }
129 
130  llvm::Function* ir_udf_wrapper;
131  RETURN_IF_ERROR(GetCodegendComputeFn(state, &ir_udf_wrapper));
132  // TODO: don't do this for child exprs
133  codegen->AddFunctionToJit(ir_udf_wrapper, &scalar_fn_wrapper_);
134  }
135 
136  if (fn_.scalar_fn.__isset.prepare_fn_symbol) {
137  RETURN_IF_ERROR(GetFunction(state, fn_.scalar_fn.prepare_fn_symbol,
138  reinterpret_cast<void**>(&prepare_fn_)));
139  }
140  if (fn_.scalar_fn.__isset.close_fn_symbol) {
141  RETURN_IF_ERROR(GetFunction(state, fn_.scalar_fn.close_fn_symbol,
142  reinterpret_cast<void**>(&close_fn_)));
143  }
144 
145  return Status::OK;
146 }
147 
150  // Opens and inits children
151  RETURN_IF_ERROR(Expr::Open(state, ctx, scope));
152  FunctionContext* fn_ctx = ctx->fn_context(context_index_);
153 
154  if (scalar_fn_ != NULL) {
155  // We're in the interpreted path (i.e. no JIT). Populate our FunctionContext's
156  // staging_input_vals, which will be reused across calls to scalar_fn_.
157  DCHECK(scalar_fn_wrapper_ == NULL);
158  ObjectPool* obj_pool = state->obj_pool();
159  vector<AnyVal*>* input_vals = fn_ctx->impl()->staging_input_vals();
160  for (int i = 0; i < NumFixedArgs(); ++i) {
161  input_vals->push_back(CreateAnyVal(obj_pool, children_[i]->type()));
162  }
163  }
164 
165  // Only evaluate constant arguments once per fragment
166  if (scope == FunctionContext::FRAGMENT_LOCAL) {
167  vector<AnyVal*> constant_args;
168  for (int i = 0; i < children_.size(); ++i) {
169  constant_args.push_back(children_[i]->GetConstVal(ctx));
170  }
171  fn_ctx->impl()->SetConstantArgs(constant_args);
172  }
173 
174  if (prepare_fn_ != NULL) {
175  if (scope == FunctionContext::FRAGMENT_LOCAL) {
176  prepare_fn_(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
177  if (fn_ctx->has_error()) return Status(fn_ctx->error_msg());
178  }
179  prepare_fn_(fn_ctx, FunctionContext::THREAD_LOCAL);
180  if (fn_ctx->has_error()) return Status(fn_ctx->error_msg());
181  }
182 
183  // If we're calling MathFunctions::RoundUpTo(), we need to set output_scale_, which
184  // determines how many decimal places are printed.
185  // TODO: revisit this. We should be able to do this if the scale argument is
186  // non-constant.
187  if (fn_.name.function_name == "round" && type_.type == TYPE_DOUBLE) {
188  DCHECK_EQ(children_.size(), 2);
189  if (children_[1]->IsConstant()) {
190  IntVal scale_arg = children_[1]->GetIntVal(ctx, NULL);
191  output_scale_ = scale_arg.val;
192  }
193  }
194 
195  return Status::OK;
196 }
197 
200  if (context_index_ != -1 && close_fn_ != NULL) {
201  FunctionContext* fn_ctx = context->fn_context(context_index_);
202 
203  close_fn_(fn_ctx, FunctionContext::THREAD_LOCAL);
204  if (scope == FunctionContext::FRAGMENT_LOCAL) {
205  close_fn_(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
206  }
207  }
208  Expr::Close(state, context, scope);
209 }
210 
212  if (fn_.name.function_name == "rand") return false;
213  return Expr::IsConstant();
214 }
215 
216 // Dynamically loads the pre-compiled UDF and codegens a function that calls each child's
217 // codegen'd function, then passes those values to the UDF and returns the result.
218 // Example generated IR for a UDF with signature
219 // create function Udf(double, int...) returns double
220 // select Udf(1.0, 2, 3, 4, 5)
221 // define { i8, double } @UdfWrapper(i8* %context, %"class.impala::TupleRow"* %row) {
222 // entry:
223 // %arg_val = call { i8, double }
224 // @ExprWrapper(i8* %context, %"class.impala::TupleRow"* %row)
225 // %arg_ptr = alloca { i8, double }
226 // store { i8, double } %arg_val, { i8, double }* %arg_ptr
227 // %arg_val1 = call i64 @ExprWrapper1(i8* %context, %"class.impala::TupleRow"* %row)
228 // store i64 %arg_val1, i64* inttoptr (i64 89111072 to i64*)
229 // %arg_val2 = call i64 @ExprWrapper2(i8* %context, %"class.impala::TupleRow"* %row)
230 // store i64 %arg_val2, i64* inttoptr (i64 89111080 to i64*)
231 // %arg_val3 = call i64 @ExprWrapper3(i8* %context, %"class.impala::TupleRow"* %row)
232 // store i64 %arg_val3, i64* inttoptr (i64 89111088 to i64*)
233 // %arg_val4 = call i64 @ExprWrapper4(i8* %context, %"class.impala::TupleRow"* %row)
234 // store i64 %arg_val4, i64* inttoptr (i64 89111096 to i64*)
235 // %result = call { i8, double }
236 // @_Z14VarSumMultiplyPN10impala_udf15FunctionContextERKNS_9DoubleValEiPKNS_6IntValE(
237 // %"class.impala_udf::FunctionContext"* inttoptr
238 // (i64 37522464 to %"class.impala_udf::FunctionContext"*),
239 // {i8, double }* %arg_ptr,
240 // i32 4,
241 // i64* inttoptr (i64 89111072 to i64*))
242 // ret { i8, double } %result
244  if (ir_compute_fn_ != NULL) {
245  *fn = ir_compute_fn_;
246  return Status::OK;
247  }
248  for (int i = 0; i < GetNumChildren(); ++i) {
249  if (children_[i]->type().type == TYPE_CHAR) {
250  *fn = NULL;
251  return Status("ScalarFnCall Codegen not supported for CHAR");
252  }
253  }
254 
255  LlvmCodeGen* codegen;
256  RETURN_IF_ERROR(state->GetCodegen(&codegen));
257  llvm::Function* udf;
258  RETURN_IF_ERROR(GetUdf(state, &udf));
259 
260  // Create wrapper that computes args and calls UDF
261  stringstream fn_name;
262  fn_name << SymbolsUtil::DemangleNameOnly(udf->getName().str()) << "Wrapper";
263 
264  llvm::Value* args[2];
265  *fn = CreateIrFunctionPrototype(codegen, fn_name.str(), &args);
266  llvm::Value* expr_ctx = args[0];
267  llvm::Value* row = args[1];
268  llvm::BasicBlock* block = llvm::BasicBlock::Create(codegen->context(), "entry", *fn);
269  LlvmCodeGen::LlvmBuilder builder(block);
270 
271  // Populate UDF arguments
272  vector<llvm::Value*> udf_args;
273 
274  // First argument is always FunctionContext*.
275  // Index into our registered offset in the ExprContext.
276  llvm::Value* expr_ctx_gep = builder.CreateStructGEP(expr_ctx, 1, "expr_ctx_gep");
277  llvm::Value* fn_ctxs_base = builder.CreateLoad(expr_ctx_gep, "fn_ctxs_base");
278  // Use GEP to add our index to the base pointer
279  llvm::Value* fn_ctx_ptr =
280  builder.CreateConstGEP1_32(fn_ctxs_base, context_index_, "fn_ctx_ptr");
281  llvm::Value* fn_ctx = builder.CreateLoad(fn_ctx_ptr, "fn_ctx");
282  udf_args.push_back(fn_ctx);
283 
284  // Get IR i8* pointer to varargs buffer from FunctionContext* argument
285  // (if there are varargs)
286  llvm::Value* varargs_buffer = NULL;
287  if (vararg_start_idx_ != -1) {
288  // FunctionContextImpl is first field of FunctionContext
289  // fn_ctx_impl_ptr has type FunctionContextImpl**
290  llvm::Value* fn_ctx_impl_ptr = builder.CreateStructGEP(fn_ctx, 0, "fn_ctx_impl_ptr");
291  llvm::Value* fn_ctx_impl = builder.CreateLoad(fn_ctx_impl_ptr, "fn_ctx_impl");
292  // varargs_buffer is first field of FunctionContextImpl
293  // varargs_buffer_ptr has type i8**
294  llvm::Value* varargs_buffer_ptr =
295  builder.CreateStructGEP(fn_ctx_impl, 0, "varargs_buffer");
296  varargs_buffer = builder.CreateLoad(varargs_buffer_ptr);
297  }
298  // Tracks where to write the next vararg to
299  int varargs_buffer_offset = 0;
300 
301  // Call children to populate remaining arguments
302  for (int i = 0; i < GetNumChildren(); ++i) {
303  llvm::Function* child_fn = NULL;
304  vector<llvm::Value*> child_fn_args;
305  if (state->codegen_enabled()) {
306  // Set 'child_fn' to the codegen'd function, sets child_fn = NULL if codegen fails
307  children_[i]->GetCodegendComputeFn(state, &child_fn);
308  }
309  if (child_fn == NULL) {
310  // Set 'child_fn' to the interpreted function
311  child_fn = GetStaticGetValWrapper(children_[i]->type(), codegen);
312  // First argument to interpreted function is children_[i]
313  llvm::Type* expr_ptr_type = codegen->GetPtrType(Expr::LLVM_CLASS_NAME);
314  child_fn_args.push_back(codegen->CastPtrToLlvmPtr(expr_ptr_type, children_[i]));
315  }
316  child_fn_args.push_back(expr_ctx);
317  child_fn_args.push_back(row);
318 
319  // Call 'child_fn', adding the result to either 'udf_args' or 'varargs_buffer'
320  DCHECK(child_fn != NULL);
321  llvm::Type* arg_type = CodegenAnyVal::GetUnloweredType(codegen, children_[i]->type());
322  llvm::Value* arg_val_ptr;
323  if (vararg_start_idx_ == -1 || i < vararg_start_idx_) {
324  // Either no varargs or arguments before varargs begin. Allocate space to store
325  // 'child_fn's result so we can pass the pointer to the UDF.
326  arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
327 
328  if (children_[i]->type().type == TYPE_DECIMAL) {
329  // UDFs may manipulate DecimalVal arguments via SIMD instructions such as 'movaps'
330  // that require 16-byte memory alignment. LLVM uses 8-byte alignment by default,
331  // so explicitly set the alignment for DecimalVals.
332  llvm::cast<llvm::AllocaInst>(arg_val_ptr)->setAlignment(16);
333  }
334  udf_args.push_back(arg_val_ptr);
335  } else {
336  // Store the result of 'child_fn' in varargs_buffer + varargs_buffer_offset
337  arg_val_ptr =
338  builder.CreateConstGEP1_32(varargs_buffer, varargs_buffer_offset, "arg_val_ptr");
339  varargs_buffer_offset += AnyValUtil::AnyValSize(children_[i]->type());
340  // Cast arg_val_ptr from i8* to AnyVal pointer type
341  arg_val_ptr =
342  builder.CreateBitCast(arg_val_ptr, arg_type->getPointerTo(), "arg_val_ptr");
343  }
344  DCHECK_EQ(arg_val_ptr->getType(), arg_type->getPointerTo());
345  // The result of the call must be stored in a lowered AnyVal
346  llvm::Value* lowered_arg_val_ptr = builder.CreateBitCast(
347  arg_val_ptr, CodegenAnyVal::GetLoweredPtrType(codegen, children_[i]->type()),
348  "lowered_arg_val_ptr");
350  codegen, &builder, child_fn, child_fn_args, "arg_val", lowered_arg_val_ptr);
351  }
352 
353  if (vararg_start_idx_ != -1) {
354  // We've added the FunctionContext argument plus any non-variadic arguments
355  DCHECK_EQ(udf_args.size(), vararg_start_idx_ + 1);
356  DCHECK_GE(GetNumChildren(), 1);
357  // Add the number of varargs
358  udf_args.push_back(codegen->GetIntConstant(
359  TYPE_INT, GetNumChildren() - vararg_start_idx_));
360  // Add all the accumulated vararg inputs as one input argument.
361  llvm::PointerType* vararg_type = codegen->GetPtrType(
362  CodegenAnyVal::GetUnloweredType(codegen, children_.back()->type()));
363  udf_args.push_back(builder.CreateBitCast(varargs_buffer, vararg_type, "varargs"));
364  }
365 
366  // Call UDF
367  llvm::Value* result_val =
368  CodegenAnyVal::CreateCall(codegen, &builder, udf, udf_args, "result");
369  builder.CreateRet(result_val);
370 
371  *fn = codegen->FinalizeFunction(*fn);
372  DCHECK(*fn != NULL);
373  ir_compute_fn_ = *fn;
374  return Status::OK;
375 }
376 
377 Status ScalarFnCall::GetUdf(RuntimeState* state, llvm::Function** udf) {
378  LlvmCodeGen* codegen;
379  RETURN_IF_ERROR(state->GetCodegen(&codegen));
380 
381  // from_utc_timestamp and to_utc_timestamp have inline ASM that cannot be JIT'd.
382  // TimestampFunctions::DateAddSub() contains a try/catch which doesn't work in JIT'd
383  // code. Always use the statically compiled versions of these functions so the
384  // xcompiled versions are not included in the final module to be JIT'd.
385  // TODO: fix this
386  bool broken_builtin = fn_.name.function_name == "from_utc_timestamp" ||
387  fn_.name.function_name == "to_utc_timestamp" ||
388  fn_.scalar_fn.symbol.find("DateAddSub") != string::npos;
389  if (fn_.binary_type == TFunctionBinaryType::NATIVE ||
390  (fn_.binary_type == TFunctionBinaryType::BUILTIN &&
391  (!state->codegen_enabled() || broken_builtin))) {
392  // In this path, we are code that has been statically compiled to assembly.
393  // This can either be a UDF implemented in a .so or a builtin using the UDF
394  // interface with the code in impalad.
395  void* fn_ptr;
397  fn_.hdfs_location, fn_.scalar_fn.symbol, &fn_ptr, &cache_entry_);
398  if (!status.ok() && fn_.binary_type == TFunctionBinaryType::BUILTIN) {
399  // Builtins symbols should exist unless there is a version mismatch.
400  status.AddDetail(ErrorMsg(TErrorCode::MISSING_BUILTIN,
401  fn_.name.function_name, fn_.scalar_fn.symbol).msg());
402  }
403  RETURN_IF_ERROR(status);
404  DCHECK(fn_ptr != NULL);
405 
406  // Convert UDF function pointer to llvm::Function*
407  // First generate the llvm::FunctionType* corresponding to the UDF.
408  llvm::Type* return_type = CodegenAnyVal::GetLoweredType(codegen, type());
409  vector<llvm::Type*> arg_types;
410 
411  if (type().type == TYPE_DECIMAL) {
412  // Per the x64 ABI, DecimalVals are returned via a DecmialVal* output argument
413  return_type = codegen->void_type();
414  arg_types.push_back(
415  codegen->GetPtrType(CodegenAnyVal::GetUnloweredType(codegen, type())));
416  }
417 
418  arg_types.push_back(codegen->GetPtrType("class.impala_udf::FunctionContext"));
419  for (int i = 0; i < NumFixedArgs(); ++i) {
420  llvm::Type* arg_type = codegen->GetPtrType(
422  arg_types.push_back(arg_type);
423  }
424 
425  if (vararg_start_idx_ >= 0) {
426  llvm::Type* vararg_type = CodegenAnyVal::GetUnloweredPtrType(
427  codegen, children_[vararg_start_idx_]->type());
428  arg_types.push_back(codegen->GetType(TYPE_INT));
429  arg_types.push_back(vararg_type);
430  }
431  llvm::FunctionType* udf_type = llvm::FunctionType::get(return_type, arg_types, false);
432 
433  // Create a llvm::Function* with the generated type. This is only a function
434  // declaration, not a definition, since we do not create any basic blocks or
435  // instructions in it.
436  *udf = llvm::Function::Create(
437  udf_type, llvm::GlobalValue::ExternalLinkage,
438  fn_.scalar_fn.symbol, codegen->module());
439 
440  // Associate the dynamically loaded function pointer with the Function* we
441  // defined. This tells LLVM where the compiled function definition is located in
442  // memory.
443  codegen->execution_engine()->addGlobalMapping(*udf, fn_ptr);
444  } else if (fn_.binary_type == TFunctionBinaryType::BUILTIN) {
445  // In this path, we're running a builtin with the UDF interface. The IR is
446  // in the llvm module.
447  DCHECK(state->codegen_enabled());
448  *udf = codegen->module()->getFunction(fn_.scalar_fn.symbol);
449  if (*udf == NULL) {
450  // Builtins symbols should exist unless there is a version mismatch.
451  stringstream ss;
452  ss << "Builtin '" << fn_.name.function_name << "' with symbol '"
453  << fn_.scalar_fn.symbol << "' does not exist. "
454  << "Verify that all your impalads are the same version.";
455  return Status(ss.str());
456  }
457  } else {
458  // We're running a IR UDF.
459  DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::IR);
460  *udf = codegen->module()->getFunction(fn_.scalar_fn.symbol);
461  if (*udf == NULL) {
462  stringstream ss;
463  ss << "Unable to locate function " << fn_.scalar_fn.symbol
464  << " from LLVM module " << fn_.hdfs_location;
465  return Status(ss.str());
466  }
467  }
468  (*udf)->addFnAttr(llvm::Attribute::AlwaysInline);
469  return Status::OK;
470 }
471 
472 Status ScalarFnCall::GetFunction(RuntimeState* state, const string& symbol, void** fn) {
473  if (fn_.binary_type == TFunctionBinaryType::NATIVE ||
474  fn_.binary_type == TFunctionBinaryType::BUILTIN) {
475  return LibCache::instance()->GetSoFunctionPtr(fn_.hdfs_location, symbol, fn,
476  &cache_entry_);
477  } else {
478  DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::IR);
479  LlvmCodeGen* codegen;
480  RETURN_IF_ERROR(state->GetCodegen(&codegen));
481  llvm::Function* ir_fn = codegen->module()->getFunction(symbol);
482  if (ir_fn == NULL) {
483  stringstream ss;
484  ss << "Unable to locate function " << symbol
485  << " from LLVM module " << fn_.hdfs_location;
486  return Status(ss.str());
487  }
488  codegen->AddFunctionToJit(ir_fn, fn);
489  return Status::OK;
490  }
491 }
492 
494  vector<AnyVal*>* input_vals) {
495  DCHECK_EQ(input_vals->size(), NumFixedArgs());
496  FunctionContext* fn_ctx = context->fn_context(context_index_);
497  uint8_t* varargs_buffer = fn_ctx->impl()->varargs_buffer();
498  for (int i = 0; i < children_.size(); ++i) {
499  void* src_slot = context->GetValue(children_[i], row);
500  AnyVal* dst_val;
501  if (vararg_start_idx_ == -1 || i < vararg_start_idx_) {
502  dst_val = (*input_vals)[i];
503  } else {
504  dst_val = reinterpret_cast<AnyVal*>(varargs_buffer);
505  varargs_buffer += AnyValUtil::AnyValSize(children_[i]->type());
506  }
507  AnyValUtil::SetAnyVal(src_slot, children_[i]->type(), dst_val);
508  }
509 }
510 
511 template<typename RETURN_TYPE>
512 RETURN_TYPE ScalarFnCall::InterpretEval(ExprContext* context, TupleRow* row) {
513  DCHECK(scalar_fn_ != NULL);
514  FunctionContext* fn_ctx = context->fn_context(context_index_);
515  vector<AnyVal*>* input_vals = fn_ctx->impl()->staging_input_vals();
516  EvaluateChildren(context, row, input_vals);
517 
518  if (vararg_start_idx_ == -1) {
519  switch (children_.size()) {
520  case 0:
521  typedef RETURN_TYPE (*ScalarFn0)(FunctionContext*);
522  return reinterpret_cast<ScalarFn0>(scalar_fn_)(fn_ctx);
523  case 1:
524  typedef RETURN_TYPE (*ScalarFn1)(FunctionContext*, const AnyVal& a1);
525  return reinterpret_cast<ScalarFn1>(scalar_fn_)(fn_ctx,
526  *(*input_vals)[0]);
527  case 2:
528  typedef RETURN_TYPE (*ScalarFn2)(FunctionContext*, const AnyVal& a1,
529  const AnyVal& a2);
530  return reinterpret_cast<ScalarFn2>(scalar_fn_)(fn_ctx,
531  *(*input_vals)[0], *(*input_vals)[1]);
532  case 3:
533  typedef RETURN_TYPE (*ScalarFn3)(FunctionContext*, const AnyVal& a1,
534  const AnyVal& a2, const AnyVal& a3);
535  return reinterpret_cast<ScalarFn3>(scalar_fn_)(fn_ctx,
536  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2]);
537  case 4:
538  typedef RETURN_TYPE (*ScalarFn4)(FunctionContext*, const AnyVal& a1,
539  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4);
540  return reinterpret_cast<ScalarFn4>(scalar_fn_)(fn_ctx,
541  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3]);
542  case 5:
543  typedef RETURN_TYPE (*ScalarFn5)(FunctionContext*, const AnyVal& a1,
544  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5);
545  return reinterpret_cast<ScalarFn5>(scalar_fn_)(fn_ctx,
546  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
547  *(*input_vals)[4]);
548  case 6:
549  typedef RETURN_TYPE (*ScalarFn6)(FunctionContext*, const AnyVal& a1,
550  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
551  const AnyVal& a6);
552  return reinterpret_cast<ScalarFn6>(scalar_fn_)(fn_ctx,
553  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
554  *(*input_vals)[4], *(*input_vals)[5]);
555  case 7:
556  typedef RETURN_TYPE (*ScalarFn7)(FunctionContext*, const AnyVal& a1,
557  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
558  const AnyVal& a6, const AnyVal& a7);
559  return reinterpret_cast<ScalarFn7>(scalar_fn_)(fn_ctx,
560  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
561  *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6]);
562  case 8:
563  typedef RETURN_TYPE (*ScalarFn8)(FunctionContext*, const AnyVal& a1,
564  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
565  const AnyVal& a6, const AnyVal& a7, const AnyVal& a8);
566  return reinterpret_cast<ScalarFn8>(scalar_fn_)(fn_ctx,
567  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
568  *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7]);
569  default:
570  DCHECK(false) << "Interpreted path not implemented. We should have "
571  << "codegen'd the wrapper";
572  }
573  } else {
574  int num_varargs = children_.size() - NumFixedArgs();
575  const AnyVal* varargs = reinterpret_cast<AnyVal*>(fn_ctx->impl()->varargs_buffer());
576  switch (NumFixedArgs()) {
577  case 0:
578  typedef RETURN_TYPE (*VarargFn0)(FunctionContext*, int num_varargs,
579  const AnyVal* varargs);
580  return reinterpret_cast<VarargFn0>(scalar_fn_)(fn_ctx, num_varargs, varargs);
581  case 1:
582  typedef RETURN_TYPE (*VarargFn1)(FunctionContext*, const AnyVal& a1,
583  int num_varargs, const AnyVal* varargs);
584  return reinterpret_cast<VarargFn1>(scalar_fn_)(fn_ctx, *(*input_vals)[0],
585  num_varargs, varargs);
586  case 2:
587  typedef RETURN_TYPE (*VarargFn2)(FunctionContext*, const AnyVal& a1,
588  const AnyVal& a2, int num_varargs, const AnyVal* varargs);
589  return reinterpret_cast<VarargFn2>(scalar_fn_)(fn_ctx, *(*input_vals)[0],
590  *(*input_vals)[1], num_varargs, varargs);
591  case 3:
592  typedef RETURN_TYPE (*VarargFn3)(FunctionContext*, const AnyVal& a1,
593  const AnyVal& a2, const AnyVal& a3, int num_varargs, const AnyVal* varargs);
594  return reinterpret_cast<VarargFn3>(scalar_fn_)(fn_ctx, *(*input_vals)[0],
595  *(*input_vals)[1], *(*input_vals)[2], num_varargs, varargs);
596  case 4:
597  typedef RETURN_TYPE (*VarargFn4)(FunctionContext*, const AnyVal& a1,
598  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, int num_varargs,
599  const AnyVal* varargs);
600  return reinterpret_cast<VarargFn4>(scalar_fn_)(fn_ctx,
601  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
602  num_varargs, varargs);
603  case 5:
604  typedef RETURN_TYPE (*VarargFn5)(FunctionContext*, const AnyVal& a1,
605  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
606  int num_varargs, const AnyVal* varargs);
607  return reinterpret_cast<VarargFn5>(scalar_fn_)(fn_ctx,
608  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
609  *(*input_vals)[4], num_varargs, varargs);
610  case 6:
611  typedef RETURN_TYPE (*VarargFn6)(FunctionContext*, const AnyVal& a1,
612  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
613  const AnyVal& a6, int num_varargs, const AnyVal* varargs);
614  return reinterpret_cast<VarargFn6>(scalar_fn_)(fn_ctx,
615  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
616  *(*input_vals)[4], *(*input_vals)[5], num_varargs, varargs);
617  case 7:
618  typedef RETURN_TYPE (*VarargFn7)(FunctionContext*, const AnyVal& a1,
619  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
620  const AnyVal& a6, const AnyVal& a7, int num_varargs, const AnyVal* varargs);
621  return reinterpret_cast<VarargFn7>(scalar_fn_)(fn_ctx,
622  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
623  *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], num_varargs, varargs);
624  case 8:
625  typedef RETURN_TYPE (*VarargFn8)(FunctionContext*, const AnyVal& a1,
626  const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
627  const AnyVal& a6, const AnyVal& a7, const AnyVal& a8, int num_varargs,
628  const AnyVal* varargs);
629  return reinterpret_cast<VarargFn8>(scalar_fn_)(fn_ctx,
630  *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
631  *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7],
632  num_varargs, varargs);
633  default:
634  DCHECK(false) << "Interpreted path not implemented. We should have "
635  << "codegen'd the wrapper";
636  }
637  }
638  return RETURN_TYPE::null();
639 }
640 
651 
652 // TODO: macroify this?
654  DCHECK_EQ(type_.type, TYPE_BOOLEAN);
655  DCHECK(context != NULL);
656  if (scalar_fn_wrapper_ == NULL) return InterpretEval<BooleanVal>(context, row);
657  BooleanWrapper fn = reinterpret_cast<BooleanWrapper>(scalar_fn_wrapper_);
658  return fn(context, row);
659 }
660 
662  DCHECK_EQ(type_.type, TYPE_TINYINT);
663  DCHECK(context != NULL);
664  if (scalar_fn_wrapper_ == NULL) return InterpretEval<TinyIntVal>(context, row);
665  TinyIntWrapper fn = reinterpret_cast<TinyIntWrapper>(scalar_fn_wrapper_);
666  return fn(context, row);
667 }
668 
670  DCHECK_EQ(type_.type, TYPE_SMALLINT);
671  DCHECK(context != NULL);
672  if (scalar_fn_wrapper_ == NULL) return InterpretEval<SmallIntVal>(context, row);
673  SmallIntWrapper fn = reinterpret_cast<SmallIntWrapper>(scalar_fn_wrapper_);
674  return fn(context, row);
675 }
676 
678  DCHECK_EQ(type_.type, TYPE_INT);
679  DCHECK(context != NULL);
680  if (scalar_fn_wrapper_ == NULL) return InterpretEval<IntVal>(context, row);
681  IntWrapper fn = reinterpret_cast<IntWrapper>(scalar_fn_wrapper_);
682  return fn(context, row);
683 }
684 
686  DCHECK_EQ(type_.type, TYPE_BIGINT);
687  DCHECK(context != NULL);
688  if (scalar_fn_wrapper_ == NULL) return InterpretEval<BigIntVal>(context, row);
689  BigIntWrapper fn = reinterpret_cast<BigIntWrapper>(scalar_fn_wrapper_);
690  return fn(context, row);
691 }
692 
694  DCHECK_EQ(type_.type, TYPE_FLOAT);
695  DCHECK(context != NULL);
696  if (scalar_fn_wrapper_ == NULL) return InterpretEval<FloatVal>(context, row);
697  FloatWrapper fn = reinterpret_cast<FloatWrapper>(scalar_fn_wrapper_);
698  return fn(context, row);
699 }
700 
702  DCHECK_EQ(type_.type, TYPE_DOUBLE);
703  DCHECK(context != NULL);
704  if (scalar_fn_wrapper_ == NULL) return InterpretEval<DoubleVal>(context, row);
705  DoubleWrapper fn = reinterpret_cast<DoubleWrapper>(scalar_fn_wrapper_);
706  return fn(context, row);
707 }
708 
710  DCHECK(type_.IsStringType());
711  DCHECK(context != NULL);
712  if (scalar_fn_wrapper_ == NULL) return InterpretEval<StringVal>(context, row);
713  StringWrapper fn = reinterpret_cast<StringWrapper>(scalar_fn_wrapper_);
714  return fn(context, row);
715 }
716 
718  DCHECK_EQ(type_.type, TYPE_TIMESTAMP);
719  DCHECK(context != NULL);
720  if (scalar_fn_wrapper_ == NULL) return InterpretEval<TimestampVal>(context, row);
721  TimestampWrapper fn = reinterpret_cast<TimestampWrapper>(scalar_fn_wrapper_);
722  return fn(context, row);
723 }
724 
726  DCHECK_EQ(type_.type, TYPE_DECIMAL);
727  DCHECK(context != NULL);
728  if (scalar_fn_wrapper_ == NULL) return InterpretEval<DecimalVal>(context, row);
729  DecimalWrapper fn = reinterpret_cast<DecimalWrapper>(scalar_fn_wrapper_);
730  return fn(context, row);
731 }
732 
734  stringstream out;
735  out << "ScalarFnCall(udf_type=" << fn_.binary_type
736  << " location=" << fn_.hdfs_location
737  << " symbol_name=" << fn_.scalar_fn.symbol << Expr::DebugString() << ")";
738  return out.str();
739 }
const std::string & msg() const
Returns the formatted error string.
Definition: error-util.h:118
virtual DecimalVal GetDecimalVal(ExprContext *context, TupleRow *)
AnyVal * CreateAnyVal(ObjectPool *pool, const ColumnType &type)
Creates the corresponding AnyVal subclass for type. The object is added to the pool.
Definition: anyval-util.cc:26
static llvm::Type * GetLoweredPtrType(LlvmCodeGen *cg, const ColumnType &type)
llvm::Function * ir_compute_fn_
Cached codegened compute function. Exprs should set this in GetCodegendComputeFn().
Definition: expr.h:299
RETURN_TYPE InterpretEval(ExprContext *context, TupleRow *row)
Function to call scalar_fn_. Used in the interpreted path.
virtual TinyIntVal GetTinyIntVal(ExprContext *context, TupleRow *)
static FunctionContext::TypeDesc ColumnTypeToTypeDesc(const ColumnType &type)
Definition: anyval-util.cc:52
const std::string GetDetail() const
Definition: status.cc:184
virtual SmallIntVal GetSmallIntVal(ExprContext *context, TupleRow *)
llvm::PointerType * GetPtrType(llvm::Type *type)
Return a pointer type to 'type'.
impala::FunctionContextImpl * impl()
TODO: Add mechanism for UDAs to update stats similar to runtime profile counters. ...
Definition: udf.h:202
BooleanVal(* BooleanWrapper)(ExprContext *, TupleRow *)
bool codegen_created() const
int NumFixedArgs() const
Returns the number of non-vararg arguments.
static Status Open(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for opening multiple expr trees.
TinyIntVal(* TinyIntWrapper)(ExprContext *, TupleRow *)
static llvm::Type * GetUnloweredType(LlvmCodeGen *cg, const ColumnType &type)
void SetConstantArgs(const std::vector< impala_udf::AnyVal * > &constant_args)
Sets constant_args_. The AnyVal* values are owned by the caller.
Definition: udf.cc:414
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242
int32_t val
Definition: udf.h:421
virtual StringVal GetStringVal(ExprContext *context, TupleRow *)
static llvm::Value * CreateCall(LlvmCodeGen *cg, LlvmCodeGen::LlvmBuilder *builder, llvm::Function *fn, llvm::ArrayRef< llvm::Value * > args, const char *name="", llvm::Value *result_ptr=NULL)
'name' optionally specifies the name of the returned value.
FloatVal(* FloatWrapper)(ExprContext *, TupleRow *)
void * GetValue(TupleRow *row)
bool has_error() const
Returns true if there's been an error set.
Definition: udf.cc:253
ScalarFnCall(const TExprNode &node)
int context_index_
Definition: expr.h:296
virtual bool IsConstant() const
Definition: expr.cc:411
void AddDetail(const std::string &msg)
Add a detail string. Calling this method is only defined on a non-OK message.
Definition: status.cc:166
This object has a compatible storage format with boost::ptime.
Definition: udf.h:495
void EvaluateChildren(ExprContext *context, TupleRow *row, std::vector< impala_udf::AnyVal * > *input_vals)
static llvm::Type * GetUnloweredPtrType(LlvmCodeGen *cg, const ColumnType &type)
BigIntVal(* BigIntWrapper)(ExprContext *, TupleRow *)
DecimalVal(* DecimalWrapper)(ExprContext *, TupleRow *)
static void Close(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for closing multiple expr trees.
ObjectPool * obj_pool()
Returns a local object pool.
Definition: coordinator.h:263
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
llvm::Function * CreateIrFunctionPrototype(LlvmCodeGen *codegen, const std::string &name, llvm::Value *(*args)[2])
Definition: expr.cc:456
bool IsStringType() const
Definition: types.h:168
virtual bool IsConstant() const
virtual void Close(RuntimeState *state, ExprContext *context, FunctionContext::FunctionStateScope scope=FunctionContext::FRAGMENT_LOCAL)
Subclasses overriding this function should call Expr::Close().
virtual Status GetCodegendComputeFn(RuntimeState *state, llvm::Function **fn)
PrimitiveType type
Definition: types.h:60
DoubleVal(* DoubleWrapper)(ExprContext *, TupleRow *)
llvm::Value * CastPtrToLlvmPtr(llvm::Type *type, const void *ptr)
Status GetUdf(RuntimeState *state, llvm::Function **udf)
Loads the native or IR function from HDFS and puts the result in *udf.
virtual BooleanVal GetBooleanVal(ExprContext *context, TupleRow *)
llvm::ExecutionEngine * execution_engine()
Returns execution engine interface.
Definition: llvm-codegen.h:217
static const char * LLVM_CLASS_NAME
Definition: expr.h:232
ObjectPool * obj_pool() const
Definition: runtime-state.h:92
virtual AnyVal * GetConstVal(ExprContext *context)
Definition: expr.cc:491
Status LinkModule(const std::string &file)
virtual TimestampVal GetTimestampVal(ExprContext *context, TupleRow *)
static llvm::Type * GetLoweredType(LlvmCodeGen *cg, const ColumnType &type)
IntVal(* IntWrapper)(ExprContext *, TupleRow *)
LibCache::LibCacheEntry * cache_entry_
Cache entry for the library implementing this function.
Definition: expr.h:281
Status GetFunction(RuntimeState *state, const std::string &symbol, void **fn)
Status GetSoFunctionPtr(const std::string &hdfs_lib_file, const std::string &symbol, void **fn_ptr, LibCacheEntry **entry, bool quiet=false)
If 'quiet' is true, returned error statuses will not be logged.
Definition: lib-cache.cc:130
void AddFunctionToJit(llvm::Function *fn, void **fn_ptr)
This is the superclass of all expr evaluation nodes.
Definition: expr.h:116
static LibCache * instance()
Definition: lib-cache.h:63
virtual BigIntVal GetBigIntVal(ExprContext *context, TupleRow *)
virtual std::string DebugString() const
const char * error_msg() const
Returns the current error message. Returns NULL if there is no error.
Definition: udf.cc:257
virtual Status Open(RuntimeState *state, ExprContext *context, FunctionContext::FunctionStateScope scope=FunctionContext::FRAGMENT_LOCAL)
TimestampVal(* TimestampWrapper)(ExprContext *, TupleRow *)
virtual FloatVal GetFloatVal(ExprContext *context, TupleRow *)
TFunction fn_
Function description.
Definition: expr.h:284
const ColumnType & type() const
Definition: expr.h:145
StringVal(* StringWrapper)(ExprContext *, TupleRow *)
void SetErrorMsg(const ErrorMsg &m)
Definition: status.h:197
int output_scale_
Definition: expr.h:291
bool codegen_enabled() const
Returns true if codegen is enabled for this query.
static const Status OK
Definition: status.h:87
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
Status GetCodegen(LlvmCodeGen **codegen, bool initialize=true)
SmallIntVal(* SmallIntWrapper)(ExprContext *, TupleRow *)
int Register(RuntimeState *state, const FunctionContext::TypeDesc &return_type, const std::vector< FunctionContext::TypeDesc > &arg_types, int varargs_buffer_size=0)
Definition: expr-context.cc:81
const ColumnType type_
analysis is done, types are fixed at this point
Definition: expr.h:289
FunctionContext * fn_context(int i)
Definition: expr-context.h:100
virtual Status Prepare(RuntimeState *state, const RowDescriptor &desc, ExprContext *context)
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
llvm::Function * FinalizeFunction(llvm::Function *function)
static Status Prepare(const std::vector< ExprContext * > &ctxs, RuntimeState *state, const RowDescriptor &row_desc, MemTracker *tracker)
virtual DoubleVal GetDoubleVal(ExprContext *context, TupleRow *)
bool ok() const
Definition: status.h:172
std::vector< Expr * > children_
Definition: expr.h:290
llvm::Type * void_type()
Definition: llvm-codegen.h:394
virtual IntVal GetIntVal(ExprContext *context, TupleRow *)
llvm::Function * GetStaticGetValWrapper(ColumnType type, LlvmCodeGen *codegen)
Definition: expr.cc:426
static int AnyValSize(const ColumnType &t)
Returns the byte size of *Val for type t.
Definition: anyval-util.h:158
llvm::LLVMContext & context()
Definition: llvm-codegen.h:214
virtual std::string DebugString() const
Definition: expr.cc:385
static void SetAnyVal(const void *slot, const ColumnType &type, AnyVal *dst)
Utility to put val into an AnyVal struct.
Definition: anyval-util.h:205
llvm::AllocaInst * CreateEntryBlockAlloca(llvm::Function *f, const NamedVariable &var)
int GetNumChildren() const
Definition: expr.h:143
std::vector< impala_udf::AnyVal * > * staging_input_vals()
Definition: udf-internal.h:81
llvm::Module * module()
Returns the underlying llvm module.
Definition: llvm-codegen.h:220
static std::string DemangleNameOnly(const std::string &symbol)
Definition: symbols-util.cc:66