18 #include <gutil/strings/substitute.h>
24 #include "gen-cpp/Exprs_types.h"
30 using boost::gregorian::date;
31 using boost::gregorian::date_duration;
32 using boost::posix_time::ptime;
33 using boost::posix_time::time_duration;
34 using namespace impala;
35 using namespace strings;
49 for (
int i = 0; i < v.
len; ++i) {
50 result.
ptr[i] = tolower(v.
ptr[i]);
56 return IntVal(numeric_limits<int32_t>::max());
60 return TinyIntVal(numeric_limits<int8_t>::max());
68 return BigIntVal(numeric_limits<int64_t>::max());
72 return IntVal(numeric_limits<int32_t>::min());
76 return TinyIntVal(numeric_limits<int8_t>::min());
84 return BigIntVal(numeric_limits<int64_t>::min());
117 if ((unit ==
"syyyy") || (unit ==
"yyyy") || (unit ==
"year") || (unit ==
"syear") ||
118 (unit ==
"yyy") || (unit ==
"yy") || (unit ==
"y")) {
120 }
else if (unit ==
"q") {
122 }
else if ((unit ==
"month") || (unit ==
"mon") || (unit ==
"mm") || (unit ==
"rm")) {
124 }
else if (unit ==
"ww") {
126 }
else if (unit ==
"w") {
128 }
else if ((unit ==
"ddd") || (unit ==
"dd") || (unit ==
"j")) {
130 }
else if ((unit ==
"day") || (unit ==
"dy") || (unit ==
"d")) {
132 }
else if ((unit ==
"hh") || (unit ==
"hh12") || (unit ==
"hh24")) {
134 }
else if (unit ==
"mi") {
144 int current_week_day = orig_date.day_of_week();
145 int diff = current_week_day - week_day;
146 if (diff == 0)
return orig_date;
149 return orig_date - date_duration(diff);
153 return orig_date - date_duration(7 + diff);
158 date new_date(orig_date.year(), 1, 1);
159 time_duration new_time(0, 0, 0, 0);
166 date new_date(orig_date.year(), first_month_of_quarter, 1);
167 time_duration new_time(0, 0, 0, 0);
173 date new_date(orig_date.year(), orig_date.month(), 1);
174 time_duration new_time(0, 0, 0, 0);
181 int target_week_day = first_day_of_year.day_of_week();
183 time_duration new_time(0, 0, 0, 0);
190 const date& new_date =
GoBackToWeekday(orig_date, first_day_of_mon.day_of_week());
191 time_duration new_time(0, 0, 0, 0);
197 time_duration new_time(0, 0, 0, 0);
204 time_duration new_time(0, 0, 0, 0);
210 time_duration new_time(orig_time.hours(), 0, 0, 0);
216 time_duration new_time(orig_time.hours(), orig_time.minutes(), 0, 0);
222 if (tv.
is_null)
return TimestampVal::null();
224 const date& orig_date = ts.
date();
225 const time_duration& orig_time = ts.
time();
236 string string_unit(reinterpret_cast<char*>(unit_str.
ptr), unit_str.
len);
237 context->
SetError(Substitute(
"Invalid Truncate Unit: $0", string_unit).c_str());
238 return TimestampVal::null();
246 switch (trunc_unit) {
254 if (orig_date.is_special())
return TimestampVal::null();
258 if (orig_time.is_special())
return TimestampVal::null();
294 context->
SetError(Substitute(
"truncate unit $0 not supported", trunc_unit).c_str());
295 return TimestampVal::null();
309 string string_unit(reinterpret_cast<char*>(unit_str->
ptr), unit_str->
len);
310 ctx->
SetError(Substitute(
"Invalid Truncate Unit: $0", string_unit).c_str());
324 ctx->
Free(reinterpret_cast<uint8_t*>(state));
334 if (unit ==
"month")
return TExtractField::MONTH;
335 if (unit ==
"day")
return TExtractField::DAY;
336 if (unit ==
"hour")
return TExtractField::HOUR;
337 if (unit ==
"minute")
return TExtractField::MINUTE;
338 if (unit ==
"second")
return TExtractField::SECOND;
339 if (unit ==
"millisecond")
return TExtractField::MILLISECOND;
341 return TExtractField::INVALID_FIELD;
348 if (tv.
is_null)
return IntVal::null();
350 TExtractField::type field;
353 field = *
reinterpret_cast<TExtractField::type*
>(state);
356 if (field == TExtractField::INVALID_FIELD) {
357 string string_unit(reinterpret_cast<char*>(unit_str.
ptr), unit_str.
len);
358 context->
SetError(Substitute(
"invalid extract field: $0", string_unit).c_str());
359 return IntVal::null();
363 const date& orig_date = *
reinterpret_cast<const date*
>(&tv.
date);
364 const time_duration& time = *
reinterpret_cast<const time_duration*
>(&tv.
time_of_day);
368 case TExtractField::MONTH:
369 case TExtractField::DAY:
370 if (orig_date.is_special())
return IntVal::null();
372 case TExtractField::HOUR:
373 case TExtractField::MINUTE:
374 case TExtractField::SECOND:
375 case TExtractField::MILLISECOND:
376 if (time.is_special())
return IntVal::null();
379 if (time.is_special() || orig_date.is_special())
return IntVal::null();
381 case TExtractField::INVALID_FIELD:
387 return IntVal(orig_date.year());
389 case TExtractField::MONTH: {
390 return IntVal(orig_date.month());
392 case TExtractField::DAY: {
393 return IntVal(orig_date.day());
395 case TExtractField::HOUR: {
396 return IntVal(time.hours());
398 case TExtractField::MINUTE: {
399 return IntVal(time.minutes());
401 case TExtractField::SECOND: {
402 return IntVal(time.seconds());
404 case TExtractField::MILLISECOND: {
405 return IntVal(time.total_milliseconds() - time.total_seconds() * 1000);
408 ptime epoch_date(date(1970, 1, 1), time_duration(0, 0, 0));
409 ptime cur_date(orig_date, time);
410 time_duration diff = cur_date - epoch_date;
411 return IntVal(diff.total_seconds());
414 DCHECK(
false) << field;
415 return IntVal::null();
422 return Extract(context, unit_str, tv);
431 if (field == TExtractField::INVALID_FIELD) {
432 string string_field(reinterpret_cast<char*>(unit_str->
ptr), unit_str->
len);
433 ctx->
SetError(Substitute(
"invalid extract field: $0", string_field).c_str());
435 TExtractField::type* state =
reinterpret_cast<TExtractField::type*
>(
436 ctx->
Allocate(
sizeof(TExtractField::type)));
445 ExtractPrepare(ctx, scope, 0);
450 ExtractPrepare(ctx, scope, 1);
457 ctx->
Free(reinterpret_cast<uint8_t*>(state));
463 if (arr.
ptr == NULL) {
464 context->
SetError(
"MADlib vector is null");
467 if (arr.
len % 8 != 0) {
468 context->
SetError(Substitute(
"MADlib vector of incorrect length $0,"
469 " expected multiple of 8", arr.
len).c_str());
476 StringVal s(context, n *
sizeof(
double));
477 double* darr =
reinterpret_cast<double*
>(s.
ptr);
478 for (
int i = 0; i < n; ++i) {
479 if (vals[i].is_null) {
480 context->
SetError(Substitute(
"madlib vector entry $0 is NULL", i).c_str());
481 return StringVal::null();
483 darr[i] = vals[i].
val;
490 double* darr =
reinterpret_cast<double*
>(arr.
ptr);
491 int len = arr.
len /
sizeof(double);
494 for (
int i = 0; i < len; ++i) {
495 if (i != 0) ss <<
", ";
499 const string& str = ss.str();
501 memcpy(result.ptr, str.c_str(), str.size());
508 if (index.
is_null)
return DoubleVal::null();
511 if (index.
val < 0 || len <= i)
return DoubleVal::null();
512 double* darr =
reinterpret_cast<double*
>(arr.
ptr);
517 for (
uint64_t i = 0; i < len; ++i) {
518 char* hex =
reinterpret_cast<char*
>(&arr[i]);
520 float float_val = arr[i];
521 uint32_t float_as_int = *
reinterpret_cast<int32_t*
>(&float_val);
522 for (
int k = 0; k < 8; ++k) {
526 hex[k] =
'a' + ((float_as_int >> (4*k)) & 0xF);
533 for (
uint64_t i = 0; i < len; i += 8) {
534 double* dub =
reinterpret_cast<double*
>(&arr[i]);
536 int32_t float_as_int = 0;
537 for (
int k = 7; k >= 0; --k) {
538 float_as_int = (float_as_int <<4) | ((arr[i+k] -
'a') & 0xF);
540 float* float_ptr =
reinterpret_cast<float*
>(&float_as_int);
546 if (arr.
is_null)
return StringVal::null();
547 double* darr =
reinterpret_cast<double*
>(arr.
ptr);
548 int len = arr.
len /
sizeof(double);
550 memcpy(result.
ptr, darr, arr.
len);
556 if (arr.
is_null)
return StringVal::null();
static void TruncPrepare(FunctionContext *context, FunctionContext::FunctionStateScope scope)
int64_t time_of_day
Nanoseconds in current day.
TimestampValue TruncWW(const date &orig_date)
static StringVal ToVector(FunctionContext *context, int n, const DoubleVal *values)
static void SwappedExtractPrepare(FunctionContext *context, FunctionContext::FunctionStateScope scope)
static TinyIntVal MinTinyInt(FunctionContext *context)
static BooleanVal IsNan(FunctionContext *context, const DoubleVal &val)
date GoBackToWeekday(const date &orig_date, int week_day)
static StringVal EncodeVector(FunctionContext *context, const StringVal &arr)
TimestampValue TruncHour(const date &orig_date, const time_duration &orig_time)
TExtractField::type StrToExtractField(FunctionContext *ctx, const StringVal &unit_str)
static StringVal DecodeVector(FunctionContext *context, const StringVal &arr)
Converts a printable ascii encoding of a vector to a double[] stored as a StringVal.
void InplaceDoubleDecode(char *arr, uint64_t len)
static TimestampVal Trunc(FunctionContext *context, const TimestampVal &date, const StringVal &unit_str)
static IntVal MinInt(FunctionContext *context)
TimestampValue TruncMonth(const date &orig_date)
static SmallIntVal MinSmallInt(FunctionContext *context)
const boost::posix_time::time_duration & time() const
int32_t date
Gregorian date. This has the same binary format as boost::gregorian::date.
TimestampValue TruncDay(const date &orig_date)
This object has a compatible storage format with boost::ptime.
static StringVal PrintVector(FunctionContext *context, const StringVal &arr)
Converts a double[] stored as a StringVal into a human readable string.
static BigIntVal MinBigInt(FunctionContext *context)
static DoubleVal VectorGet(FunctionContext *context, const BigIntVal &n, const StringVal &arr)
Returns the n-th (0-indexed) element of a double[] stored as a StringVal.
static SmallIntVal MaxSmallInt(FunctionContext *context)
bool ValidateMADlibVector(FunctionContext *context, const StringVal &arr)
void ToTimestampVal(impala_udf::TimestampVal *tv) const
static int RoundDown(int value, int factor)
Returns 'value' rounded down to the nearest multiple of 'factor'.
TimestampValue TruncQuarter(const date &orig_date)
TimestampValue TruncMinute(const date &orig_date, const time_duration &orig_time)
TimestampValue TruncYear(const date &orig_date)
static StringVal Lower(FunctionContext *context, const StringVal &str)
static DoubleVal Pi(FunctionContext *context)
void * GetFunctionState(FunctionStateScope scope) const
void Free(uint8_t *buffer)
Frees a buffer returned from Allocate() or Reallocate()
bool IsArgConstant(int arg_idx) const
void SetFunctionState(FunctionStateScope scope, void *ptr)
TruncUnit::Type StrToTruncUnit(FunctionContext *ctx, const StringVal &unit_str)
static TimestampValue FromTimestampVal(const impala_udf::TimestampVal &udf_value)
static DoubleVal Abs(FunctionContext *context, const DoubleVal &val)
uint8_t * Allocate(int byte_size)
static BooleanVal IsInf(FunctionContext *context, const DoubleVal &val)
static IntVal Extract(FunctionContext *context, const StringVal &field_str, const TimestampVal &date)
This is used by the DATE_PART function.
void InplaceDoubleEncode(double *arr, uint64_t len)
TimestampValue TruncDayOfWeek(const date &orig_date)
TimestampValue TruncW(const date &orig_date)
static void TruncClose(FunctionContext *context, FunctionContext::FunctionStateScope scope)
static void ExtractClose(FunctionContext *context, FunctionContext::FunctionStateScope scope)
This is used by both EXTRACT and DATE_PART.
void SetError(const char *error_msg)
const boost::gregorian::date & date() const
AnyVal * GetConstantArg(int arg_idx) const
static BigIntVal MaxBigInt(FunctionContext *context)
static IntVal MaxInt(FunctionContext *context)
static void ExtractPrepare(FunctionContext *context, FunctionContext::FunctionStateScope scope)
This is used by the DATE_PART function.
static TinyIntVal MaxTinyInt(FunctionContext *context)