18 #include <gutil/strings/substitute.h>
19 #include <llvm/IR/Attributes.h>
20 #include <llvm/ExecutionEngine/ExecutionEngine.h>
37 using namespace impala;
38 using namespace impala_udf;
39 using namespace strings;
43 vararg_start_idx_(node.__isset.vararg_start_idx ?
44 node.vararg_start_idx : -1),
45 scalar_fn_wrapper_(NULL),
49 DCHECK_NE(
fn_.binary_type, TFunctionBinaryType::HIVE);
56 if (
fn_.scalar_fn.symbol.empty()) {
61 DCHECK_EQ(
fn_.binary_type, TFunctionBinaryType::BUILTIN);
63 ss <<
"Function " <<
fn_.name.function_name <<
" is not implemented.";
68 vector<FunctionContext::TypeDesc> arg_types;
69 bool char_arg =
false;
70 for (
int i = 0; i <
children_.size(); ++i) {
76 int varargs_buffer_size = 0;
93 (
fn_.binary_type == TFunctionBinaryType::BUILTIN ||
94 fn_.binary_type == TFunctionBinaryType::NATIVE))) {
98 DCHECK(
NumFixedArgs() <= 8 &&
fn_.binary_type == TFunctionBinaryType::BUILTIN);
103 if (
fn_.binary_type == TFunctionBinaryType::BUILTIN) {
106 fn_.name.function_name,
fn_.scalar_fn.symbol));
109 DCHECK_EQ(
fn_.binary_type, TFunctionBinaryType::NATIVE);
110 return Status(Substitute(
"Problem loading UDF '$0':\n$1",
120 if (
fn_.binary_type == TFunctionBinaryType::IR) {
130 llvm::Function* ir_udf_wrapper;
136 if (
fn_.scalar_fn.__isset.prepare_fn_symbol) {
140 if (
fn_.scalar_fn.__isset.close_fn_symbol) {
166 if (scope == FunctionContext::FRAGMENT_LOCAL) {
167 vector<AnyVal*> constant_args;
168 for (
int i = 0; i <
children_.size(); ++i) {
175 if (scope == FunctionContext::FRAGMENT_LOCAL) {
176 prepare_fn_(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
179 prepare_fn_(fn_ctx, FunctionContext::THREAD_LOCAL);
203 close_fn_(fn_ctx, FunctionContext::THREAD_LOCAL);
204 if (scope == FunctionContext::FRAGMENT_LOCAL) {
205 close_fn_(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
212 if (
fn_.name.function_name ==
"rand")
return false;
251 return Status(
"ScalarFnCall Codegen not supported for CHAR");
264 llvm::Value* args[2];
266 llvm::Value* expr_ctx = args[0];
267 llvm::Value* row = args[1];
268 llvm::BasicBlock* block = llvm::BasicBlock::Create(codegen->
context(),
"entry", *fn);
272 vector<llvm::Value*> udf_args;
276 llvm::Value* expr_ctx_gep = builder.CreateStructGEP(expr_ctx, 1,
"expr_ctx_gep");
277 llvm::Value* fn_ctxs_base = builder.CreateLoad(expr_ctx_gep,
"fn_ctxs_base");
279 llvm::Value* fn_ctx_ptr =
280 builder.CreateConstGEP1_32(fn_ctxs_base,
context_index_,
"fn_ctx_ptr");
281 llvm::Value* fn_ctx = builder.CreateLoad(fn_ctx_ptr,
"fn_ctx");
282 udf_args.push_back(fn_ctx);
286 llvm::Value* varargs_buffer = NULL;
290 llvm::Value* fn_ctx_impl_ptr = builder.CreateStructGEP(fn_ctx, 0,
"fn_ctx_impl_ptr");
291 llvm::Value* fn_ctx_impl = builder.CreateLoad(fn_ctx_impl_ptr,
"fn_ctx_impl");
294 llvm::Value* varargs_buffer_ptr =
295 builder.CreateStructGEP(fn_ctx_impl, 0,
"varargs_buffer");
296 varargs_buffer = builder.CreateLoad(varargs_buffer_ptr);
299 int varargs_buffer_offset = 0;
303 llvm::Function* child_fn = NULL;
304 vector<llvm::Value*> child_fn_args;
307 children_[i]->GetCodegendComputeFn(state, &child_fn);
309 if (child_fn == NULL) {
316 child_fn_args.push_back(expr_ctx);
317 child_fn_args.push_back(row);
320 DCHECK(child_fn != NULL);
322 llvm::Value* arg_val_ptr;
332 llvm::cast<llvm::AllocaInst>(arg_val_ptr)->setAlignment(16);
334 udf_args.push_back(arg_val_ptr);
338 builder.CreateConstGEP1_32(varargs_buffer, varargs_buffer_offset,
"arg_val_ptr");
342 builder.CreateBitCast(arg_val_ptr, arg_type->getPointerTo(),
"arg_val_ptr");
344 DCHECK_EQ(arg_val_ptr->getType(), arg_type->getPointerTo());
346 llvm::Value* lowered_arg_val_ptr = builder.CreateBitCast(
348 "lowered_arg_val_ptr");
350 codegen, &builder, child_fn, child_fn_args,
"arg_val", lowered_arg_val_ptr);
361 llvm::PointerType* vararg_type = codegen->
GetPtrType(
363 udf_args.push_back(builder.CreateBitCast(varargs_buffer, vararg_type,
"varargs"));
367 llvm::Value* result_val =
369 builder.CreateRet(result_val);
386 bool broken_builtin =
fn_.name.function_name ==
"from_utc_timestamp" ||
387 fn_.name.function_name ==
"to_utc_timestamp" ||
388 fn_.scalar_fn.symbol.find(
"DateAddSub") != string::npos;
389 if (
fn_.binary_type == TFunctionBinaryType::NATIVE ||
390 (
fn_.binary_type == TFunctionBinaryType::BUILTIN &&
398 if (!status.
ok() &&
fn_.binary_type == TFunctionBinaryType::BUILTIN) {
401 fn_.name.function_name,
fn_.scalar_fn.symbol).
msg());
404 DCHECK(fn_ptr != NULL);
409 vector<llvm::Type*> arg_types;
418 arg_types.push_back(codegen->
GetPtrType(
"class.impala_udf::FunctionContext"));
422 arg_types.push_back(arg_type);
429 arg_types.push_back(vararg_type);
431 llvm::FunctionType* udf_type = llvm::FunctionType::get(return_type, arg_types,
false);
436 *udf = llvm::Function::Create(
437 udf_type, llvm::GlobalValue::ExternalLinkage,
438 fn_.scalar_fn.symbol, codegen->
module());
444 }
else if (
fn_.binary_type == TFunctionBinaryType::BUILTIN) {
448 *udf = codegen->
module()->getFunction(
fn_.scalar_fn.symbol);
452 ss <<
"Builtin '" <<
fn_.name.function_name <<
"' with symbol '"
453 <<
fn_.scalar_fn.symbol <<
"' does not exist. "
454 <<
"Verify that all your impalads are the same version.";
459 DCHECK_EQ(
fn_.binary_type, TFunctionBinaryType::IR);
460 *udf = codegen->
module()->getFunction(
fn_.scalar_fn.symbol);
463 ss <<
"Unable to locate function " <<
fn_.scalar_fn.symbol
464 <<
" from LLVM module " <<
fn_.hdfs_location;
468 (*udf)->addFnAttr(llvm::Attribute::AlwaysInline);
473 if (
fn_.binary_type == TFunctionBinaryType::NATIVE ||
474 fn_.binary_type == TFunctionBinaryType::BUILTIN) {
478 DCHECK_EQ(
fn_.binary_type, TFunctionBinaryType::IR);
481 llvm::Function* ir_fn = codegen->
module()->getFunction(symbol);
484 ss <<
"Unable to locate function " << symbol
485 <<
" from LLVM module " <<
fn_.hdfs_location;
494 vector<AnyVal*>* input_vals) {
498 for (
int i = 0; i <
children_.size(); ++i) {
502 dst_val = (*input_vals)[i];
504 dst_val =
reinterpret_cast<AnyVal*
>(varargs_buffer);
511 template<
typename RETURN_TYPE>
522 return reinterpret_cast<ScalarFn0
>(
scalar_fn_)(fn_ctx);
525 return reinterpret_cast<ScalarFn1
>(
scalar_fn_)(fn_ctx,
528 typedef RETURN_TYPE (*ScalarFn2)(FunctionContext*,
const AnyVal& a1,
530 return reinterpret_cast<ScalarFn2
>(
scalar_fn_)(fn_ctx,
531 *(*input_vals)[0], *(*input_vals)[1]);
533 typedef RETURN_TYPE (*ScalarFn3)(FunctionContext*,
const AnyVal& a1,
535 return reinterpret_cast<ScalarFn3
>(
scalar_fn_)(fn_ctx,
536 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2]);
538 typedef RETURN_TYPE (*ScalarFn4)(FunctionContext*,
const AnyVal& a1,
540 return reinterpret_cast<ScalarFn4
>(
scalar_fn_)(fn_ctx,
541 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3]);
543 typedef RETURN_TYPE (*ScalarFn5)(FunctionContext*,
const AnyVal& a1,
545 return reinterpret_cast<ScalarFn5
>(
scalar_fn_)(fn_ctx,
546 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
549 typedef RETURN_TYPE (*ScalarFn6)(FunctionContext*,
const AnyVal& a1,
552 return reinterpret_cast<ScalarFn6
>(
scalar_fn_)(fn_ctx,
553 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
554 *(*input_vals)[4], *(*input_vals)[5]);
556 typedef RETURN_TYPE (*ScalarFn7)(FunctionContext*,
const AnyVal& a1,
559 return reinterpret_cast<ScalarFn7
>(
scalar_fn_)(fn_ctx,
560 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
561 *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6]);
563 typedef RETURN_TYPE (*ScalarFn8)(FunctionContext*,
const AnyVal& a1,
566 return reinterpret_cast<ScalarFn8
>(
scalar_fn_)(fn_ctx,
567 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
568 *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7]);
570 DCHECK(
false) <<
"Interpreted path not implemented. We should have "
571 <<
"codegen'd the wrapper";
580 return reinterpret_cast<VarargFn0
>(
scalar_fn_)(fn_ctx, num_varargs, varargs);
582 typedef RETURN_TYPE (*VarargFn1)(FunctionContext*,
const AnyVal& a1,
583 int num_varargs,
const AnyVal* varargs);
584 return reinterpret_cast<VarargFn1
>(
scalar_fn_)(fn_ctx, *(*input_vals)[0],
585 num_varargs, varargs);
587 typedef RETURN_TYPE (*VarargFn2)(FunctionContext*,
const AnyVal& a1,
588 const AnyVal& a2,
int num_varargs,
const AnyVal* varargs);
589 return reinterpret_cast<VarargFn2
>(
scalar_fn_)(fn_ctx, *(*input_vals)[0],
590 *(*input_vals)[1], num_varargs, varargs);
592 typedef RETURN_TYPE (*VarargFn3)(FunctionContext*,
const AnyVal& a1,
594 return reinterpret_cast<VarargFn3
>(
scalar_fn_)(fn_ctx, *(*input_vals)[0],
595 *(*input_vals)[1], *(*input_vals)[2], num_varargs, varargs);
597 typedef RETURN_TYPE (*VarargFn4)(FunctionContext*,
const AnyVal& a1,
600 return reinterpret_cast<VarargFn4
>(
scalar_fn_)(fn_ctx,
601 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
602 num_varargs, varargs);
604 typedef RETURN_TYPE (*VarargFn5)(FunctionContext*,
const AnyVal& a1,
606 int num_varargs,
const AnyVal* varargs);
607 return reinterpret_cast<VarargFn5
>(
scalar_fn_)(fn_ctx,
608 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
609 *(*input_vals)[4], num_varargs, varargs);
611 typedef RETURN_TYPE (*VarargFn6)(FunctionContext*,
const AnyVal& a1,
613 const AnyVal& a6,
int num_varargs,
const AnyVal* varargs);
614 return reinterpret_cast<VarargFn6
>(
scalar_fn_)(fn_ctx,
615 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
616 *(*input_vals)[4], *(*input_vals)[5], num_varargs, varargs);
618 typedef RETURN_TYPE (*VarargFn7)(FunctionContext*,
const AnyVal& a1,
621 return reinterpret_cast<VarargFn7
>(
scalar_fn_)(fn_ctx,
622 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
623 *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], num_varargs, varargs);
625 typedef RETURN_TYPE (*VarargFn8)(FunctionContext*,
const AnyVal& a1,
629 return reinterpret_cast<VarargFn8
>(
scalar_fn_)(fn_ctx,
630 *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
631 *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7],
632 num_varargs, varargs);
634 DCHECK(
false) <<
"Interpreted path not implemented. We should have "
635 <<
"codegen'd the wrapper";
638 return RETURN_TYPE::null();
655 DCHECK(context != NULL);
658 return fn(context, row);
663 DCHECK(context != NULL);
666 return fn(context, row);
671 DCHECK(context != NULL);
674 return fn(context, row);
679 DCHECK(context != NULL);
682 return fn(context, row);
687 DCHECK(context != NULL);
690 return fn(context, row);
695 DCHECK(context != NULL);
698 return fn(context, row);
703 DCHECK(context != NULL);
706 return fn(context, row);
711 DCHECK(context != NULL);
714 return fn(context, row);
719 DCHECK(context != NULL);
722 return fn(context, row);
727 DCHECK(context != NULL);
730 return fn(context, row);
735 out <<
"ScalarFnCall(udf_type=" <<
fn_.binary_type
736 <<
" location=" <<
fn_.hdfs_location
const std::string & msg() const
Returns the formatted error string.
virtual DecimalVal GetDecimalVal(ExprContext *context, TupleRow *)
AnyVal * CreateAnyVal(ObjectPool *pool, const ColumnType &type)
Creates the corresponding AnyVal subclass for type. The object is added to the pool.
static llvm::Type * GetLoweredPtrType(LlvmCodeGen *cg, const ColumnType &type)
llvm::Function * ir_compute_fn_
Cached codegened compute function. Exprs should set this in GetCodegendComputeFn().
RETURN_TYPE InterpretEval(ExprContext *context, TupleRow *row)
Function to call scalar_fn_. Used in the interpreted path.
virtual TinyIntVal GetTinyIntVal(ExprContext *context, TupleRow *)
static FunctionContext::TypeDesc ColumnTypeToTypeDesc(const ColumnType &type)
uint8_t * varargs_buffer()
const std::string GetDetail() const
virtual SmallIntVal GetSmallIntVal(ExprContext *context, TupleRow *)
llvm::PointerType * GetPtrType(llvm::Type *type)
Return a pointer type to 'type'.
impala::FunctionContextImpl * impl()
TODO: Add mechanism for UDAs to update stats similar to runtime profile counters. ...
BooleanVal(* BooleanWrapper)(ExprContext *, TupleRow *)
bool codegen_created() const
int NumFixedArgs() const
Returns the number of non-vararg arguments.
static Status Open(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for opening multiple expr trees.
TinyIntVal(* TinyIntWrapper)(ExprContext *, TupleRow *)
static llvm::Type * GetUnloweredType(LlvmCodeGen *cg, const ColumnType &type)
void SetConstantArgs(const std::vector< impala_udf::AnyVal * > &constant_args)
Sets constant_args_. The AnyVal* values are owned by the caller.
#define RETURN_IF_ERROR(stmt)
some generally useful macros
virtual StringVal GetStringVal(ExprContext *context, TupleRow *)
static llvm::Value * CreateCall(LlvmCodeGen *cg, LlvmCodeGen::LlvmBuilder *builder, llvm::Function *fn, llvm::ArrayRef< llvm::Value * > args, const char *name="", llvm::Value *result_ptr=NULL)
'name' optionally specifies the name of the returned value.
FloatVal(* FloatWrapper)(ExprContext *, TupleRow *)
void * GetValue(TupleRow *row)
bool has_error() const
Returns true if there's been an error set.
ScalarFnCall(const TExprNode &node)
virtual bool IsConstant() const
void AddDetail(const std::string &msg)
Add a detail string. Calling this method is only defined on a non-OK message.
This object has a compatible storage format with boost::ptime.
void EvaluateChildren(ExprContext *context, TupleRow *row, std::vector< impala_udf::AnyVal * > *input_vals)
static llvm::Type * GetUnloweredPtrType(LlvmCodeGen *cg, const ColumnType &type)
BigIntVal(* BigIntWrapper)(ExprContext *, TupleRow *)
DecimalVal(* DecimalWrapper)(ExprContext *, TupleRow *)
static void Close(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for closing multiple expr trees.
ObjectPool * obj_pool()
Returns a local object pool.
LLVM code generator. This is the top level object to generate jitted code.
llvm::Function * CreateIrFunctionPrototype(LlvmCodeGen *codegen, const std::string &name, llvm::Value *(*args)[2])
bool IsStringType() const
virtual bool IsConstant() const
virtual void Close(RuntimeState *state, ExprContext *context, FunctionContext::FunctionStateScope scope=FunctionContext::FRAGMENT_LOCAL)
Subclasses overriding this function should call Expr::Close().
virtual Status GetCodegendComputeFn(RuntimeState *state, llvm::Function **fn)
DoubleVal(* DoubleWrapper)(ExprContext *, TupleRow *)
llvm::Value * CastPtrToLlvmPtr(llvm::Type *type, const void *ptr)
Status GetUdf(RuntimeState *state, llvm::Function **udf)
Loads the native or IR function from HDFS and puts the result in *udf.
virtual BooleanVal GetBooleanVal(ExprContext *context, TupleRow *)
llvm::ExecutionEngine * execution_engine()
Returns execution engine interface.
static const char * LLVM_CLASS_NAME
ObjectPool * obj_pool() const
virtual AnyVal * GetConstVal(ExprContext *context)
Status LinkModule(const std::string &file)
virtual TimestampVal GetTimestampVal(ExprContext *context, TupleRow *)
static llvm::Type * GetLoweredType(LlvmCodeGen *cg, const ColumnType &type)
IntVal(* IntWrapper)(ExprContext *, TupleRow *)
LibCache::LibCacheEntry * cache_entry_
Cache entry for the library implementing this function.
Status GetFunction(RuntimeState *state, const std::string &symbol, void **fn)
Status GetSoFunctionPtr(const std::string &hdfs_lib_file, const std::string &symbol, void **fn_ptr, LibCacheEntry **entry, bool quiet=false)
If 'quiet' is true, returned error statuses will not be logged.
void AddFunctionToJit(llvm::Function *fn, void **fn_ptr)
This is the superclass of all expr evaluation nodes.
static LibCache * instance()
virtual BigIntVal GetBigIntVal(ExprContext *context, TupleRow *)
virtual std::string DebugString() const
const char * error_msg() const
Returns the current error message. Returns NULL if there is no error.
virtual Status Open(RuntimeState *state, ExprContext *context, FunctionContext::FunctionStateScope scope=FunctionContext::FRAGMENT_LOCAL)
TimestampVal(* TimestampWrapper)(ExprContext *, TupleRow *)
virtual FloatVal GetFloatVal(ExprContext *context, TupleRow *)
TFunction fn_
Function description.
void * scalar_fn_wrapper_
const ColumnType & type() const
StringVal(* StringWrapper)(ExprContext *, TupleRow *)
void SetErrorMsg(const ErrorMsg &m)
bool codegen_enabled() const
Returns true if codegen is enabled for this query.
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
Status GetCodegen(LlvmCodeGen **codegen, bool initialize=true)
SmallIntVal(* SmallIntWrapper)(ExprContext *, TupleRow *)
int Register(RuntimeState *state, const FunctionContext::TypeDesc &return_type, const std::vector< FunctionContext::TypeDesc > &arg_types, int varargs_buffer_size=0)
const ColumnType type_
analysis is done, types are fixed at this point
FunctionContext * fn_context(int i)
virtual Status Prepare(RuntimeState *state, const RowDescriptor &desc, ExprContext *context)
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
llvm::Function * FinalizeFunction(llvm::Function *function)
static Status Prepare(const std::vector< ExprContext * > &ctxs, RuntimeState *state, const RowDescriptor &row_desc, MemTracker *tracker)
virtual DoubleVal GetDoubleVal(ExprContext *context, TupleRow *)
std::vector< Expr * > children_
virtual IntVal GetIntVal(ExprContext *context, TupleRow *)
llvm::Function * GetStaticGetValWrapper(ColumnType type, LlvmCodeGen *codegen)
static int AnyValSize(const ColumnType &t)
Returns the byte size of *Val for type t.
llvm::LLVMContext & context()
virtual std::string DebugString() const
static void SetAnyVal(const void *slot, const ColumnType &type, AnyVal *dst)
Utility to put val into an AnyVal struct.
llvm::AllocaInst * CreateEntryBlockAlloca(llvm::Function *f, const NamedVariable &var)
int GetNumChildren() const
std::vector< impala_udf::AnyVal * > * staging_input_vals()
llvm::Module * module()
Returns the underlying llvm module.
static std::string DemangleNameOnly(const std::string &symbol)