Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
udf.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_UDF_UDF_H
17 #define IMPALA_UDF_UDF_H
18 
19 #include <assert.h>
20 #include <boost/cstdint.hpp>
21 #include <string.h>
22 
26 namespace impala {
27  class FunctionContextImpl;
28 }
29 
30 namespace impala_udf {
31 
35 struct AnyVal;
36 struct BooleanVal;
37 struct TinyIntVal;
38 struct SmallIntVal;
39 struct IntVal;
40 struct BigIntVal;
41 struct StringVal;
42 struct TimestampVal;
43 
48  public:
52  };
53 
54  enum Type {
69  };
70 
71  struct TypeDesc {
73 
75  int precision;
76  int scale;
77 
79  int len;
80  };
81 
82  struct UniqueId {
83  int64_t hi;
84  int64_t lo;
85  };
86 
99 
107  };
108 
110  ImpalaVersion version() const;
111 
114  const char* user() const;
115 
117  UniqueId query_id() const;
118 
121  void SetError(const char* error_msg);
122 
127  bool AddWarning(const char* warning_msg);
128 
130  bool has_error() const;
131 
133  const char* error_msg() const;
134 
139  uint8_t* Allocate(int byte_size);
140 
146  uint8_t* Reallocate(uint8_t* ptr, int byte_size);
147 
149  void Free(uint8_t* buffer);
150 
155  void TrackAllocation(int64_t byte_size);
156  void Free(int64_t byte_size);
157 
163  void SetFunctionState(FunctionStateScope scope, void* ptr);
164  void* GetFunctionState(FunctionStateScope scope) const;
165 
168  const TypeDesc& GetReturnType() const;
169 
172  const TypeDesc& GetIntermediateType() const;
173 
176  int GetNumArgs() const;
177 
180  const TypeDesc* GetArgType(int arg_idx) const;
181 
184  bool IsArgConstant(int arg_idx) const;
185 
190  AnyVal* GetConstantArg(int arg_idx) const;
191 
195 
197 
199 
203 
205 
206  private:
208  FunctionContext();
209 
211  FunctionContext(const FunctionContext& other);
213 
214  impala::FunctionContextImpl* impl_; // Owned by this object.
215 };
216 
217 //----------------------------------------------------------------------------
218 //------------------------------- UDFs ---------------------------------------
219 //----------------------------------------------------------------------------
223 //
229 //
236 //
243 //
248 //
256 //
262 //
271 //
275 typedef void (*UdfPrepare)(FunctionContext* context,
277 
284 //
288 typedef void (*UdfClose)(FunctionContext* context,
290 
291 //----------------------------------------------------------------------------
292 //------------------------------- UDAs ---------------------------------------
293 //----------------------------------------------------------------------------
302 //
310 //
318 //
326 
329 typedef void (*UdaInit)(FunctionContext* context, IntermediateType* result);
330 
334 typedef void (*UdaUpdate)(FunctionContext* context, const InputType& input,
335  IntermediateType* result);
336 typedef void (*UdaUpdate2)(FunctionContext* context, const InputType& input,
337  const InputType2& input2, IntermediateType* result);
338 
340 typedef void (*UdaMerge)(FunctionContext* context, const IntermediateType& src,
341  IntermediateType* dst);
342 
348  const IntermediateType& type);
349 
354 
355 //----------------------------------------------------------------------------
356 //-------------Implementation of the *Val structs ----------------------------
357 //----------------------------------------------------------------------------
358 struct AnyVal {
359  bool is_null;
360  AnyVal(bool is_null = false) : is_null(is_null) {}
361 };
362 
363 struct BooleanVal : public AnyVal {
364  bool val;
365 
366  BooleanVal(bool val = false) : val(val) {}
367 
368  static BooleanVal null() {
369  BooleanVal result;
370  result.is_null = true;
371  return result;
372  }
373 
374  bool operator==(const BooleanVal& other) const {
375  if (is_null && other.is_null) return true;
376  if (is_null || other.is_null) return false;
377  return val == other.val;
378  }
379  bool operator!=(const BooleanVal& other) const { return !(*this == other); }
380 };
381 
382 struct TinyIntVal : public AnyVal {
383  int8_t val;
384 
385  TinyIntVal(int8_t val = 0) : val(val) { }
386 
387  static TinyIntVal null() {
388  TinyIntVal result;
389  result.is_null = true;
390  return result;
391  }
392 
393  bool operator==(const TinyIntVal& other) const {
394  if (is_null && other.is_null) return true;
395  if (is_null || other.is_null) return false;
396  return val == other.val;
397  }
398  bool operator!=(const TinyIntVal& other) const { return !(*this == other); }
399 };
400 
401 struct SmallIntVal : public AnyVal {
402  int16_t val;
403 
404  SmallIntVal(int16_t val = 0) : val(val) { }
405 
406  static SmallIntVal null() {
407  SmallIntVal result;
408  result.is_null = true;
409  return result;
410  }
411 
412  bool operator==(const SmallIntVal& other) const {
413  if (is_null && other.is_null) return true;
414  if (is_null || other.is_null) return false;
415  return val == other.val;
416  }
417  bool operator!=(const SmallIntVal& other) const { return !(*this == other); }
418 };
419 
420 struct IntVal : public AnyVal {
421  int32_t val;
422 
423  IntVal(int32_t val = 0) : val(val) { }
424 
425  static IntVal null() {
426  IntVal result;
427  result.is_null = true;
428  return result;
429  }
430 
431  bool operator==(const IntVal& other) const {
432  if (is_null && other.is_null) return true;
433  if (is_null || other.is_null) return false;
434  return val == other.val;
435  }
436  bool operator!=(const IntVal& other) const { return !(*this == other); }
437 };
438 
439 struct BigIntVal : public AnyVal {
440  int64_t val;
441 
442  BigIntVal(int64_t val = 0) : val(val) { }
443 
444  static BigIntVal null() {
445  BigIntVal result;
446  result.is_null = true;
447  return result;
448  }
449 
450  bool operator==(const BigIntVal& other) const {
451  if (is_null && other.is_null) return true;
452  if (is_null || other.is_null) return false;
453  return val == other.val;
454  }
455  bool operator!=(const BigIntVal& other) const { return !(*this == other); }
456 };
457 
458 struct FloatVal : public AnyVal {
459  float val;
460 
461  FloatVal(float val = 0) : val(val) { }
462 
463  static FloatVal null() {
464  FloatVal result;
465  result.is_null = true;
466  return result;
467  }
468 
469  bool operator==(const FloatVal& other) const {
470  return is_null == other.is_null && val == other.val;
471  }
472  bool operator!=(const FloatVal& other) const { return !(*this == other); }
473 };
474 
475 struct DoubleVal : public AnyVal {
476  double val;
477 
478  DoubleVal(double val = 0) : val(val) { }
479 
480  static DoubleVal null() {
481  DoubleVal result;
482  result.is_null = true;
483  return result;
484  }
485 
486  bool operator==(const DoubleVal& other) const {
487  if (is_null && other.is_null) return true;
488  if (is_null || other.is_null) return false;
489  return val == other.val;
490  }
491  bool operator!=(const DoubleVal& other) const { return !(*this == other); }
492 };
493 
495 struct TimestampVal : public AnyVal {
497  int32_t date;
499  int64_t time_of_day;
500 
501  TimestampVal(int32_t date = 0, int64_t time_of_day = 0) :
503  }
504 
505  static TimestampVal null() {
506  TimestampVal result;
507  result.is_null = true;
508  return result;
509  }
510 
511  bool operator==(const TimestampVal& other) const {
512  if (is_null && other.is_null) return true;
513  if (is_null || other.is_null) return false;
514  return date == other.date && time_of_day == other.time_of_day;
515  }
516  bool operator!=(const TimestampVal& other) const { return !(*this == other); }
517 };
518 
521 struct StringVal : public AnyVal {
522  int len;
523  uint8_t* ptr;
524 
527  StringVal(uint8_t* ptr = NULL, int len = 0) : len(len), ptr(ptr) {
528  assert(len >= 0);
529  };
530 
531 
534  StringVal(const char* ptr) : len(strlen(ptr)), ptr((uint8_t*)ptr) {}
535 
536  static StringVal null() {
537  StringVal sv;
538  sv.is_null = true;
539  return sv;
540  }
541 
545  StringVal(FunctionContext* context, int len);
546 
547  bool operator==(const StringVal& other) const {
548  if (is_null != other.is_null) return false;
549  if (is_null) return true;
550  if (len != other.len) return false;
551  return ptr == other.ptr || memcmp(ptr, other.ptr, len) == 0;
552  }
553  bool operator!=(const StringVal& other) const { return !(*this == other); }
554 };
555 
561  //
565  //
569  union {
570  int32_t val4;
571  int64_t val8;
572  __int128_t val16;
573  };
574 
575  DecimalVal() : val16(0) {}
576  DecimalVal(int32_t v) : val16(v) {}
577  DecimalVal(int64_t v) : val16(v) {}
578  DecimalVal(__int128_t v) : val16(v) {}
579 
580  static DecimalVal null() {
581  DecimalVal result;
582  result.is_null = true;
583  return result;
584  }
585 
586  DecimalVal& operator=(const DecimalVal& other) {
587  // Depending on the compiler, the default assignment operator may require 16-byte
588  // alignment of 'this' and 'other'. Cast to void* so the compiler doesn't change back
589  // to an assignment.
590  memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(&other),
591  sizeof(DecimalVal));
592  return *this;
593  }
594 
595  DecimalVal(const DecimalVal& other) {
596  *this = other;
597  }
598 };
599 
600 typedef uint8_t* BufferVal;
601 
602 }
603 
604 #endif
bool operator==(const IntVal &other) const
Definition: udf.h:431
int precision
Only valid if type == TYPE_DECIMAL.
Definition: udf.h:75
bool operator!=(const FloatVal &other) const
Definition: udf.h:472
void(* UdfClose)(FunctionContext *context, FunctionContext::FunctionStateScope scope)
Definition: udf.h:288
static BigIntVal null()
Definition: udf.h:444
bool operator!=(const TimestampVal &other) const
Definition: udf.h:516
bool operator==(const SmallIntVal &other) const
Definition: udf.h:412
int64_t time_of_day
Nanoseconds in current day.
Definition: udf.h:499
bool operator==(const TimestampVal &other) const
Definition: udf.h:511
bool operator!=(const BooleanVal &other) const
Definition: udf.h:379
FloatVal(float val=0)
Definition: udf.h:461
impala::FunctionContextImpl * impl()
TODO: Add mechanism for UDAs to update stats similar to runtime profile counters. ...
Definition: udf.h:202
bool operator!=(const IntVal &other) const
Definition: udf.h:436
void(* UdaUpdate)(FunctionContext *context, const InputType &input, IntermediateType *result)
Definition: udf.h:334
const TypeDesc & GetReturnType() const
Definition: udf-ir.cc:34
BooleanVal(bool val=false)
Definition: udf.h:366
AnyVal(bool is_null=false)
Definition: udf.h:360
static IntVal null()
Definition: udf.h:425
void(* UdfPrepare)(FunctionContext *context, FunctionContext::FunctionStateScope scope)
Definition: udf.h:275
static FloatVal null()
Definition: udf.h:463
__int128_t val16
Definition: udf.h:572
int32_t val
Definition: udf.h:421
bool operator!=(const DoubleVal &other) const
Definition: udf.h:491
DoubleVal(double val=0)
Definition: udf.h:478
AnyVal IntermediateType
Definition: udf.h:325
void(* UdaUpdate2)(FunctionContext *context, const InputType &input, const InputType2 &input2, IntermediateType *result)
Definition: udf.h:336
static TinyIntVal null()
Definition: udf.h:387
bool has_error() const
Returns true if there's been an error set.
Definition: udf.cc:253
AnyVal InputType2
Definition: udf.h:323
const IntermediateType(* UdaSerialize)(FunctionContext *context, const IntermediateType &type)
Definition: udf.h:347
int32_t date
Gregorian date. This has the same binary format as boost::gregorian::date.
Definition: udf.h:497
TimestampVal(int32_t date=0, int64_t time_of_day=0)
Definition: udf.h:501
void(* UdaInit)(FunctionContext *context, IntermediateType *result)
Definition: udf.h:329
This object has a compatible storage format with boost::ptime.
Definition: udf.h:495
bool operator==(const StringVal &other) const
Definition: udf.h:547
uint8_t * BufferVal
Definition: udf.h:600
bool operator==(const BooleanVal &other) const
Definition: udf.h:374
uint8_t * ptr
Definition: udf.h:523
static SmallIntVal null()
Definition: udf.h:406
ImpalaVersion version() const
Returns the version of Impala that's currently running.
Definition: udf.cc:233
bool AddWarning(const char *warning_msg)
Definition: udf.cc:345
bool is_null
Definition: udf.h:359
AnyVal ResultType
Definition: udf.h:324
DecimalVal(int32_t v)
Definition: udf.h:576
DecimalVal(const DecimalVal &other)
Definition: udf.h:595
DecimalVal(__int128_t v)
Definition: udf.h:578
StringVal(uint8_t *ptr=NULL, int len=0)
Definition: udf.h:527
static TimestampVal null()
Definition: udf.h:505
const TypeDesc * GetArgType(int arg_idx) const
Definition: udf.cc:425
FunctionContext & operator=(const FunctionContext &other)
bool operator==(const FloatVal &other) const
Definition: udf.h:469
bool operator==(const DoubleVal &other) const
Definition: udf.h:486
void * GetFunctionState(FunctionStateScope scope) const
Definition: udf-ir.cc:38
bool operator!=(const TinyIntVal &other) const
Definition: udf.h:398
void Free(uint8_t *buffer)
Frees a buffer returned from Allocate() or Reallocate()
Definition: udf.cc:291
static BooleanVal null()
Definition: udf.h:368
IntVal(int32_t val=0)
Definition: udf.h:423
bool IsArgConstant(int arg_idx) const
Definition: udf-ir.cc:20
void SetFunctionState(FunctionStateScope scope, void *ptr)
Definition: udf.cc:370
impala::FunctionContextImpl * impl_
Definition: udf.h:214
const char * error_msg() const
Returns the current error message. Returns NULL if there is no error.
Definition: udf.cc:257
StringVal(const char *ptr)
Definition: udf.h:534
AnyVal InputType
Definition: udf.h:322
bool operator!=(const BigIntVal &other) const
Definition: udf.h:455
const TypeDesc & GetIntermediateType() const
static DecimalVal null()
Definition: udf.h:580
int GetNumArgs() const
Definition: udf-ir.cc:30
SmallIntVal(int16_t val=0)
Definition: udf.h:404
uint8_t * Reallocate(uint8_t *ptr, int byte_size)
Definition: udf.cc:276
BigIntVal(int64_t val=0)
Definition: udf.h:442
uint8_t * Allocate(int byte_size)
Definition: udf.cc:262
void TrackAllocation(int64_t byte_size)
Definition: udf.cc:312
static StringVal null()
Definition: udf.h:536
bool operator==(const BigIntVal &other) const
Definition: udf.h:450
ResultType(* UdaFinalize)(FunctionContext *context, const IntermediateType &v)
Definition: udf.h:353
const char * user() const
Definition: udf.cc:237
DecimalVal(int64_t v)
Definition: udf.h:577
int len
Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR.
Definition: udf.h:79
static DoubleVal null()
Definition: udf.h:480
UniqueId query_id() const
Returns the query_id for the current query.
Definition: udf.cc:242
void SetError(const char *error_msg)
Definition: udf.cc:332
AnyVal * GetConstantArg(int arg_idx) const
Definition: udf-ir.cc:25
void(* UdaMerge)(FunctionContext *context, const IntermediateType &src, IntermediateType *dst)
Merge an intermediate result 'src' into 'dst'.
Definition: udf.h:340
bool operator==(const TinyIntVal &other) const
Definition: udf.h:393
DecimalVal & operator=(const DecimalVal &other)
Definition: udf.h:586
bool operator!=(const SmallIntVal &other) const
Definition: udf.h:417
TinyIntVal(int8_t val=0)
Definition: udf.h:385
bool operator!=(const StringVal &other) const
Definition: udf.h:553