21 #include <boost/random/ranlux.hpp>
22 #include <boost/random/uniform_int.hpp>
33 using boost::uniform_int;
34 using boost::ranlux64_3;
48 const string &str = ss.str();
49 StringVal string_val(context, str.size());
50 memcpy(string_val.ptr, str.c_str(), str.size());
109 DCHECK_GE(dst->
val, 0);
116 DCHECK_GE(dst->
val, 0);
138 template <
typename T>
140 if (src.is_null)
return;
141 DCHECK(dst->
ptr != NULL);
148 template <
typename T>
152 if (src.is_null)
return;
153 DCHECK(dst->
ptr != NULL);
158 DCHECK_GE(avg->
count, 0);
164 DCHECK(dst->
ptr != NULL);
167 dst_struct->
sum += src_struct->
sum;
173 if (val_struct->
count == 0)
return DoubleVal::null();
186 DCHECK(dst->
ptr != NULL);
197 DCHECK(dst->
ptr != NULL);
203 DCHECK_GE(avg->
count, 0);
209 if (val_struct->
count == 0)
return TimestampVal::null();
248 DCHECK(dst->
ptr != NULL);
252 DCHECK(arg_desc != NULL);
257 int m =
remove ? -1 : 1;
269 DCHECK(
false) <<
"Invalid byte size for type " << arg_type.
DebugString();
273 DCHECK_GE(avg->
count, 0);
283 DCHECK(dst->
ptr != NULL);
293 if (val_struct->
count == 0)
return DecimalVal::null();
304 bool overflow =
false;
306 output_desc.
scale, &is_nan, &overflow);
307 if (
UNLIKELY(is_nan))
return DecimalVal::null();
309 ctx->
AddWarning(
"Avg computation overflowed, returning NULL");
310 return DecimalVal::null();
322 template<
typename SRC_VAL,
typename DST_VAL>
330 if (dst->is_null) InitZero<DST_VAL>(ctx, dst);
334 template<
typename SRC_VAL,
typename DST_VAL>
340 *dst = DST_VAL::null();
343 if (src.is_null)
return;
344 if (dst->is_null) InitZero<DST_VAL>(ctx, dst);
356 *dst = DecimalVal::null();
365 if (dst->
is_null) InitZero<DecimalVal>(ctx, dst);
369 int m = subtract ? -1 : 1;
370 if (arg_desc->precision <= 9) {
372 }
else if (arg_desc->precision <= 19) {
382 if (dst->
is_null) InitZero<DecimalVal>(ctx, dst);
388 if (src.is_null)
return;
389 if (dst->is_null || src.val < dst->val) *dst = src;
394 if (src.is_null)
return;
395 if (dst->is_null || src.val > dst->val) *dst = src;
411 memcpy(copy, src.
ptr, src.
len);
423 memcpy(copy, src.
ptr, src.
len);
468 if (src_tv < dst_tv) *dst = src;
481 if (src_tv > dst_tv) *dst = src;
502 DCHECK(header_len ==
sizeof(sep->
len));
506 int new_len = result->
len + sep->
len + src.
len;
512 DCHECK(result->
len == new_len);
523 *reinterpret_cast<StringConcatHeader*>(src.
ptr);
526 int new_len = result->
len + src.
len - header_len;
528 memcpy(result->
ptr + result->
len, src.
ptr + header_len, src.
len - header_len);
529 result->
len += src.
len - header_len;
530 DCHECK(result->
len == new_len);
537 DCHECK(src.
len >= header_len);
539 DCHECK(src.
len >= header_len + sep_len);
542 memcpy(result.
ptr, src.
ptr + header_len + sep_len, result.
len);
585 memset(dst->
ptr, 0, str_len);
589 uint32_t row_index, uint32_t bit_index) {
593 uint32_t *int_bitmap =
reinterpret_cast<uint32_t*
>(bitmap);
594 int_bitmap[row_index] |= (1 << bit_index);
598 uint32_t row_index, uint32_t bit_index) {
599 uint32_t *int_bitmap =
reinterpret_cast<uint32_t*
>(bitmap);
600 return ((int_bitmap[row_index] & (1 << bit_index)) > 0);
605 if (input.is_null)
return;
612 int bit_index = __builtin_ctz(hash_value);
621 if (input.is_null)
return;
640 stringstream debugstr;
649 return debugstr.str();
662 *(dst->
ptr + i) |= *(src.
ptr + i);
665 VLOG_ROW <<
"UpdateMergeEstimateSlot Src Bit map:\n"
667 VLOG_ROW <<
"UpdateMergeEstimateSlot Dst Bit map:\n"
674 VLOG_ROW <<
"FinalizeEstimateSlot Bit map:\n"
680 bool is_empty =
true;
682 if (src.
ptr[i] != 0) {
687 if (is_empty)
return 0;
697 int row_bit_count = 0;
705 sum += row_bit_count;
707 double avg =
static_cast<double>(sum) / static_cast<double>(NUM_PC_BITMAPS);
708 double result = pow(static_cast<double>(2), avg) /
PC_THETA;
716 return static_cast<int64_t
>(estimate);
724 return static_cast<int64_t
>(estimate);
734 template <
typename T>
760 memcpy(&
val[0], string_val.
ptr, len);
766 memcpy(result.
ptr, &
val[0], len);
771 template <
typename T>
786 uniform_int<int64_t> dist(0, max);
791 template <
typename T>
797 memset(dst->
ptr, 0, str_len);
801 template <
typename T>
804 if (src.is_null)
return;
818 template <
typename T>
840 if (state->
samples[i].key >= 0)
continue;
847 template <
typename T>
849 return i.
val.val < j.
val.val;
855 int n = min(i.
len, j.
len);
856 int result = memcmp(&i.
val[0], &j.
val[0], n);
857 if (result == 0)
return i.
len < j.
len;
874 template <
typename T>
879 template <
typename T>
895 DCHECK_GE(src->
samples[src_idx].key, 0);
901 while (src_idx < src_max) {
902 DCHECK_GE(src->
samples[src_idx].key, 0);
913 template <
typename T>
918 *os << static_cast<int32_t>(v.
val.
val);
923 string s(reinterpret_cast<const char*>(&v.
val[0]), v.
len);
937 template <
typename T>
946 PrintSample<T>(src->
samples[i], &out);
949 const string& out_str = out.str();
950 StringVal result_str(ctx, out_str.size());
951 memcpy(result_str.ptr, out_str.c_str(), result_str.len);
956 template <
typename T>
968 for (
int bucket_idx = 0; bucket_idx < num_buckets; ++bucket_idx) {
969 int sample_idx = (bucket_idx + 1) * samples_per_bucket - 1;
970 PrintSample<T>(src->
samples[sample_idx], &out);
971 if (bucket_idx < (num_buckets - 1)) out <<
", ";
973 const string& out_str = out.str();
974 StringVal result_str(ctx, out_str.size());
975 memcpy(result_str.ptr, out_str.c_str(), result_str.len);
980 template <
typename T>
1003 memset(dst->
ptr, 0, str_len);
1006 template <
typename T>
1008 if (src.is_null)
return;
1013 if (hash_value != 0) {
1017 uint8_t first_one_bit = __builtin_ctzl(hash_value >>
HLL_PRECISION) + 1;
1018 dst->
ptr[
idx] = ::max(dst->
ptr[idx], first_one_bit);
1028 for (
int i = 0; i < src.
len; ++i) {
1029 dst->
ptr[i] = ::max(dst->
ptr[i], src.
ptr[i]);
1034 int32_t num_buckets) {
1035 DCHECK_NOTNULL(buckets);
1036 DCHECK_EQ(num_buckets,
HLL_LEN);
1047 alpha = 0.7213f / (1 + 1.079f /
HLL_LEN);
1050 float harmonic_mean = 0;
1051 int num_zero_registers = 0;
1053 for (
int i = 0; i < num_buckets; ++i) {
1054 harmonic_mean += powf(2.0f, -buckets[i]);
1055 if (buckets[i] == 0) ++num_zero_registers;
1057 harmonic_mean = 1.0f / harmonic_mean;
1060 if (num_zero_registers != 0) {
1063 estimate =
HLL_LEN * log(static_cast<float>(
HLL_LEN) / num_zero_registers);
1089 if (state.
count == 1)
return 0.0;
1090 if (pop)
return state.
m2 / state.
count;
1091 return state.
m2 / (state.
count - 1);
1097 memset(dst->
ptr, 0, dst->
len);
1100 template <
typename T>
1105 if (src.is_null)
return;
1107 double temp = 1 + state->
count;
1108 double delta = src.val - state->
mean;
1109 double r = delta / temp;
1111 state->
m2 += state->
count * delta * r;
1112 state->
count = temp;
1125 if (src_state->
count == 0)
return;
1126 double delta = dst_state->
mean - src_state->
mean;
1127 double sum_count = dst_state->
count + src_state->
count;
1128 dst_state->
mean = src_state->
mean + delta * (dst_state->
count / sum_count);
1129 dst_state->
m2 = (src_state->
m2) + dst_state->
m2 +
1130 (delta * delta) * (src_state->
count * dst_state->
count / sum_count);
1131 dst_state->
count = sum_count;
1137 if (state->
count == 0)
return DoubleVal::null();
1147 if (state->
count == 0)
return DoubleVal::null();
1156 if (state->
count == 0)
return DoubleVal::null();
1165 if (state->
count == 0)
return DoubleVal::null();
1197 DCHECK_GT(state->
count, 0);
1198 DCHECK_GT(state->
rank, 0);
1199 int64_t result = state->
rank;
1212 DCHECK_EQ(state->
count, 0);
1213 DCHECK_GT(state->
rank, 0);
1214 int64_t result = state->
rank;
1226 int64_t result = state->
rank;
1231 template <
typename T>
1241 *dst = StringVal::null();
1255 template <
typename T>
1265 *dst = StringVal::null();
1269 template <
typename T>
1287 *dst = StringVal::null();
1294 template <
typename T>
1297 LastValUpdate<T>(ctx, src, dst);
1300 template <
typename T>
1309 template <
typename T>
1311 const BigIntVal&,
const T& default_value, T* dst) {
1318 template void AggregateFunctions::AvgUpdate<BigIntVal>(
1320 template void AggregateFunctions::AvgUpdate<DoubleVal>(
1322 template void AggregateFunctions::AvgRemove<BigIntVal>(
1324 template void AggregateFunctions::AvgRemove<DoubleVal>(
1327 template void AggregateFunctions::SumUpdate<TinyIntVal, BigIntVal>(
1329 template void AggregateFunctions::SumUpdate<SmallIntVal, BigIntVal>(
1331 template void AggregateFunctions::SumUpdate<IntVal, BigIntVal>(
1333 template void AggregateFunctions::SumUpdate<BigIntVal, BigIntVal>(
1335 template void AggregateFunctions::SumUpdate<FloatVal, DoubleVal>(
1337 template void AggregateFunctions::SumUpdate<DoubleVal, DoubleVal>(
1340 template void AggregateFunctions::SumRemove<TinyIntVal, BigIntVal>(
1342 template void AggregateFunctions::SumRemove<SmallIntVal, BigIntVal>(
1344 template void AggregateFunctions::SumRemove<IntVal, BigIntVal>(
1346 template void AggregateFunctions::SumRemove<BigIntVal, BigIntVal>(
1348 template void AggregateFunctions::SumRemove<FloatVal, DoubleVal>(
1350 template void AggregateFunctions::SumRemove<DoubleVal, DoubleVal>(
1353 template void AggregateFunctions::Min<BooleanVal>(
1355 template void AggregateFunctions::Min<TinyIntVal>(
1357 template void AggregateFunctions::Min<SmallIntVal>(
1359 template void AggregateFunctions::Min<IntVal>(
1361 template void AggregateFunctions::Min<BigIntVal>(
1363 template void AggregateFunctions::Min<FloatVal>(
1365 template void AggregateFunctions::Min<DoubleVal>(
1367 template void AggregateFunctions::Min<StringVal>(
1369 template void AggregateFunctions::Min<DecimalVal>(
1372 template void AggregateFunctions::Max<BooleanVal>(
1374 template void AggregateFunctions::Max<TinyIntVal>(
1376 template void AggregateFunctions::Max<SmallIntVal>(
1378 template void AggregateFunctions::Max<IntVal>(
1380 template void AggregateFunctions::Max<BigIntVal>(
1382 template void AggregateFunctions::Max<FloatVal>(
1384 template void AggregateFunctions::Max<DoubleVal>(
1386 template void AggregateFunctions::Max<StringVal>(
1388 template void AggregateFunctions::Max<DecimalVal>(
1433 template void AggregateFunctions::ReservoirSampleInit<BooleanVal>(
1435 template void AggregateFunctions::ReservoirSampleInit<TinyIntVal>(
1437 template void AggregateFunctions::ReservoirSampleInit<SmallIntVal>(
1439 template void AggregateFunctions::ReservoirSampleInit<IntVal>(
1441 template void AggregateFunctions::ReservoirSampleInit<BigIntVal>(
1443 template void AggregateFunctions::ReservoirSampleInit<FloatVal>(
1445 template void AggregateFunctions::ReservoirSampleInit<DoubleVal>(
1447 template void AggregateFunctions::ReservoirSampleInit<StringVal>(
1449 template void AggregateFunctions::ReservoirSampleInit<TimestampVal>(
1451 template void AggregateFunctions::ReservoirSampleInit<DecimalVal>(
1475 template const StringVal AggregateFunctions::ReservoirSampleSerialize<BooleanVal>(
1477 template const StringVal AggregateFunctions::ReservoirSampleSerialize<TinyIntVal>(
1479 template const StringVal AggregateFunctions::ReservoirSampleSerialize<SmallIntVal>(
1481 template const StringVal AggregateFunctions::ReservoirSampleSerialize<IntVal>(
1483 template const StringVal AggregateFunctions::ReservoirSampleSerialize<BigIntVal>(
1485 template const StringVal AggregateFunctions::ReservoirSampleSerialize<FloatVal>(
1487 template const StringVal AggregateFunctions::ReservoirSampleSerialize<DoubleVal>(
1489 template const StringVal AggregateFunctions::ReservoirSampleSerialize<StringVal>(
1491 template const StringVal AggregateFunctions::ReservoirSampleSerialize<TimestampVal>(
1493 template const StringVal AggregateFunctions::ReservoirSampleSerialize<DecimalVal>(
1496 template void AggregateFunctions::ReservoirSampleMerge<BooleanVal>(
1498 template void AggregateFunctions::ReservoirSampleMerge<TinyIntVal>(
1500 template void AggregateFunctions::ReservoirSampleMerge<SmallIntVal>(
1502 template void AggregateFunctions::ReservoirSampleMerge<IntVal>(
1504 template void AggregateFunctions::ReservoirSampleMerge<BigIntVal>(
1506 template void AggregateFunctions::ReservoirSampleMerge<FloatVal>(
1508 template void AggregateFunctions::ReservoirSampleMerge<DoubleVal>(
1510 template void AggregateFunctions::ReservoirSampleMerge<StringVal>(
1512 template void AggregateFunctions::ReservoirSampleMerge<TimestampVal>(
1514 template void AggregateFunctions::ReservoirSampleMerge<DecimalVal>(
1517 template StringVal AggregateFunctions::ReservoirSampleFinalize<BooleanVal>(
1519 template StringVal AggregateFunctions::ReservoirSampleFinalize<TinyIntVal>(
1521 template StringVal AggregateFunctions::ReservoirSampleFinalize<SmallIntVal>(
1523 template StringVal AggregateFunctions::ReservoirSampleFinalize<IntVal>(
1525 template StringVal AggregateFunctions::ReservoirSampleFinalize<BigIntVal>(
1527 template StringVal AggregateFunctions::ReservoirSampleFinalize<FloatVal>(
1529 template StringVal AggregateFunctions::ReservoirSampleFinalize<DoubleVal>(
1531 template StringVal AggregateFunctions::ReservoirSampleFinalize<StringVal>(
1533 template StringVal AggregateFunctions::ReservoirSampleFinalize<TimestampVal>(
1535 template StringVal AggregateFunctions::ReservoirSampleFinalize<DecimalVal>(
1538 template StringVal AggregateFunctions::HistogramFinalize<BooleanVal>(
1540 template StringVal AggregateFunctions::HistogramFinalize<TinyIntVal>(
1542 template StringVal AggregateFunctions::HistogramFinalize<SmallIntVal>(
1544 template StringVal AggregateFunctions::HistogramFinalize<IntVal>(
1546 template StringVal AggregateFunctions::HistogramFinalize<BigIntVal>(
1548 template StringVal AggregateFunctions::HistogramFinalize<FloatVal>(
1550 template StringVal AggregateFunctions::HistogramFinalize<DoubleVal>(
1552 template StringVal AggregateFunctions::HistogramFinalize<StringVal>(
1554 template StringVal AggregateFunctions::HistogramFinalize<TimestampVal>(
1556 template StringVal AggregateFunctions::HistogramFinalize<DecimalVal>(
1559 template BooleanVal AggregateFunctions::AppxMedianFinalize<BooleanVal>(
1561 template TinyIntVal AggregateFunctions::AppxMedianFinalize<TinyIntVal>(
1563 template SmallIntVal AggregateFunctions::AppxMedianFinalize<SmallIntVal>(
1565 template IntVal AggregateFunctions::AppxMedianFinalize<IntVal>(
1567 template BigIntVal AggregateFunctions::AppxMedianFinalize<BigIntVal>(
1569 template FloatVal AggregateFunctions::AppxMedianFinalize<FloatVal>(
1571 template DoubleVal AggregateFunctions::AppxMedianFinalize<DoubleVal>(
1573 template StringVal AggregateFunctions::AppxMedianFinalize<StringVal>(
1575 template TimestampVal AggregateFunctions::AppxMedianFinalize<TimestampVal>(
1577 template DecimalVal AggregateFunctions::AppxMedianFinalize<DecimalVal>(
1614 template void AggregateFunctions::LastValUpdate<BooleanVal>(
1616 template void AggregateFunctions::LastValUpdate<TinyIntVal>(
1618 template void AggregateFunctions::LastValUpdate<SmallIntVal>(
1620 template void AggregateFunctions::LastValUpdate<IntVal>(
1622 template void AggregateFunctions::LastValUpdate<BigIntVal>(
1624 template void AggregateFunctions::LastValUpdate<FloatVal>(
1626 template void AggregateFunctions::LastValUpdate<DoubleVal>(
1628 template void AggregateFunctions::LastValUpdate<StringVal>(
1630 template void AggregateFunctions::LastValUpdate<TimestampVal>(
1632 template void AggregateFunctions::LastValUpdate<DecimalVal>(
1635 template void AggregateFunctions::LastValRemove<BooleanVal>(
1637 template void AggregateFunctions::LastValRemove<TinyIntVal>(
1639 template void AggregateFunctions::LastValRemove<SmallIntVal>(
1641 template void AggregateFunctions::LastValRemove<IntVal>(
1643 template void AggregateFunctions::LastValRemove<BigIntVal>(
1645 template void AggregateFunctions::LastValRemove<FloatVal>(
1647 template void AggregateFunctions::LastValRemove<DoubleVal>(
1649 template void AggregateFunctions::LastValRemove<StringVal>(
1651 template void AggregateFunctions::LastValRemove<TimestampVal>(
1653 template void AggregateFunctions::LastValRemove<DecimalVal>(
1656 template void AggregateFunctions::FirstValUpdate<BooleanVal>(
1658 template void AggregateFunctions::FirstValUpdate<TinyIntVal>(
1660 template void AggregateFunctions::FirstValUpdate<SmallIntVal>(
1662 template void AggregateFunctions::FirstValUpdate<IntVal>(
1664 template void AggregateFunctions::FirstValUpdate<BigIntVal>(
1666 template void AggregateFunctions::FirstValUpdate<FloatVal>(
1668 template void AggregateFunctions::FirstValUpdate<DoubleVal>(
1670 template void AggregateFunctions::FirstValUpdate<StringVal>(
1672 template void AggregateFunctions::FirstValUpdate<TimestampVal>(
1674 template void AggregateFunctions::FirstValUpdate<DecimalVal>(
1677 template void AggregateFunctions::FirstValRewriteUpdate<BooleanVal>(
1679 template void AggregateFunctions::FirstValRewriteUpdate<TinyIntVal>(
1681 template void AggregateFunctions::FirstValRewriteUpdate<SmallIntVal>(
1683 template void AggregateFunctions::FirstValRewriteUpdate<IntVal>(
1685 template void AggregateFunctions::FirstValRewriteUpdate<BigIntVal>(
1687 template void AggregateFunctions::FirstValRewriteUpdate<FloatVal>(
1689 template void AggregateFunctions::FirstValRewriteUpdate<DoubleVal>(
1691 template void AggregateFunctions::FirstValRewriteUpdate<StringVal>(
1693 template void AggregateFunctions::FirstValRewriteUpdate<TimestampVal>(
1695 template void AggregateFunctions::FirstValRewriteUpdate<DecimalVal>(
1698 template void AggregateFunctions::OffsetFnInit<BooleanVal>(
1700 template void AggregateFunctions::OffsetFnInit<TinyIntVal>(
1702 template void AggregateFunctions::OffsetFnInit<SmallIntVal>(
1704 template void AggregateFunctions::OffsetFnInit<IntVal>(
1706 template void AggregateFunctions::OffsetFnInit<BigIntVal>(
1708 template void AggregateFunctions::OffsetFnInit<FloatVal>(
1710 template void AggregateFunctions::OffsetFnInit<DoubleVal>(
1712 template void AggregateFunctions::OffsetFnInit<StringVal>(
1714 template void AggregateFunctions::OffsetFnInit<TimestampVal>(
1716 template void AggregateFunctions::OffsetFnInit<DecimalVal>(
1719 template void AggregateFunctions::OffsetFnUpdate<BooleanVal>(
1722 template void AggregateFunctions::OffsetFnUpdate<TinyIntVal>(
1725 template void AggregateFunctions::OffsetFnUpdate<SmallIntVal>(
1728 template void AggregateFunctions::OffsetFnUpdate<IntVal>(
1730 template void AggregateFunctions::OffsetFnUpdate<BigIntVal>(
1733 template void AggregateFunctions::OffsetFnUpdate<FloatVal>(
1736 template void AggregateFunctions::OffsetFnUpdate<DoubleVal>(
1739 template void AggregateFunctions::OffsetFnUpdate<StringVal>(
1742 template void AggregateFunctions::OffsetFnUpdate<TimestampVal>(
1745 template void AggregateFunctions::OffsetFnUpdate<DecimalVal>(
static void SumDecimalAddOrSubtract(FunctionContext *, const DecimalVal &src, DecimalVal *dst, bool subtract=false)
Adds or or subtracts src from dst. Implements Update() and Remove().
ReservoirSample(const T &val)
static void ReservoirSampleUpdate(FunctionContext *, const T &src, StringVal *dst)
ReservoirSample(const StringVal &string_val)
int precision
Only valid if type == TYPE_DECIMAL.
static void OffsetFnUpdate(FunctionContext *, const T &src, const BigIntVal &, const T &, T *dst)
static void SumDecimalRemove(FunctionContext *, const DecimalVal &src, DecimalVal *dst)
static DecimalVal DecimalAvgGetValue(FunctionContext *ctx, const StringVal &val)
int64_t time_of_day
Nanoseconds in current day.
static void RankUpdate(FunctionContext *, StringVal *dst)
Update state for RANK.
static const int NUM_SAMPLES_PER_BUCKET
static void PcInit(FunctionContext *, StringVal *slot)
static const int NUM_BUCKETS
string DistinctEstimateBitMapToString(uint8_t *v)
static void CountStarRemove(FunctionContext *, BigIntVal *dst)
static void DecimalAvgUpdate(FunctionContext *ctx, const DecimalVal &src, StringVal *dst)
impala::FunctionContextImpl * impl()
TODO: Add mechanism for UDAs to update stats similar to runtime profile counters. ...
static uint32_t Hash(const BooleanVal &v, const FunctionContext::TypeDesc &, int seed)
static void InitZero(FunctionContext *, T *dst)
Initializes dst to 0.
const TypeDesc & GetReturnType() const
static void InitNull(FunctionContext *, AnyVal *dst)
Initializes dst to NULL.
static void FirstValUpdate(FunctionContext *, const T &src, T *dst)
Implements FIRST_VALUE.
static void DecimalAvgAddOrRemove(FunctionContext *ctx, const DecimalVal &src, StringVal *dst, bool remove=false)
static ColumnType TypeDescToColumnType(const FunctionContext::TypeDesc &type)
static void Max(FunctionContext *, const T &src, T *dst)
MaxUpdate/MaxMerge.
static void RankInit(FunctionContext *, StringVal *slot)
Initializes the state for RANK and DENSE_RANK.
static void CountStarUpdate(FunctionContext *, BigIntVal *dst)
static void HllInit(FunctionContext *, StringVal *slot)
static DoubleVal KnuthVarFinalize(FunctionContext *context, const StringVal &val)
static void ReservoirSampleMerge(FunctionContext *, const StringVal &src, StringVal *dst)
ReservoirSample< T > samples[NUM_SAMPLES]
static BigIntVal PcFinalize(FunctionContext *, const StringVal &src)
static void HllUpdate(FunctionContext *, const T &src, StringVal *dst)
static T AppxMedianFinalize(FunctionContext *, const StringVal &src)
Returns an approximate median using reservoir sampling.
StringVal ToStringVal(FunctionContext *context, T val)
int32_t date
Gregorian date. This has the same binary format as boost::gregorian::date.
static const int MAX_STRING_SAMPLE_LEN
static const StringVal DEFAULT_STRING_CONCAT_DELIM((uint8_t *)", ", 2)
StringVal GetValue(FunctionContext *ctx)
static const int PC_BITMAP_LENGTH
This object has a compatible storage format with boost::ptime.
static void LastValRemove(FunctionContext *, const T &src, T *dst)
int64_t num_updates() const
static void OffsetFnInit(FunctionContext *, T *dst)
static void CountUpdate(FunctionContext *, const AnyVal &src, BigIntVal *dst)
Implementation of Count and Count(*)
std::size_t hash_value(const Decimal4Value &v)
This function must be called 'hash_value' to be picked up by boost.
void PrintSample(const ReservoirSample< T > &v, ostream *os)
static void StringConcatMerge(FunctionContext *, const StringVal &src, StringVal *result)
void ToTimestampVal(impala_udf::TimestampVal *tv) const
uint8_t val[MAX_STRING_SAMPLE_LEN]
static void SumDecimalMerge(FunctionContext *, const DecimalVal &src, DecimalVal *dst)
static BigIntVal HllFinalize(FunctionContext *, const StringVal &src)
bool AddWarning(const char *warning_msg)
static StringVal ReservoirSampleFinalize(FunctionContext *, const StringVal &src)
Returns 20,000 unsorted samples as a list of comma-separated values.
static const StringVal ReservoirSampleSerialize(FunctionContext *, const StringVal &src)
static void AvgInit(FunctionContext *ctx, StringVal *dst)
std::string DebugString() const
static void SumRemove(FunctionContext *, const SRC_VAL &src, DST_VAL *dst)
static void AvgUpdate(FunctionContext *ctx, const T &src, StringVal *dst)
void IncrementNumUpdates(int64_t n=1)
const TypeDesc * GetArgType(int arg_idx) const
static void SumDecimalUpdate(FunctionContext *, const DecimalVal &src, DecimalVal *dst)
Sum for decimals.
static void SetDistinctEstimateBit(uint8_t *bitmap, uint32_t row_index, uint32_t bit_index)
static void HllMerge(FunctionContext *, const StringVal &src, StringVal *dst)
static void DenseRankUpdate(FunctionContext *, StringVal *dst)
Update state for DENSE_RANK.
static void AvgRemove(FunctionContext *ctx, const T &src, StringVal *dst)
static void KnuthVarUpdate(FunctionContext *context, const T &input, StringVal *val)
static DoubleVal KnuthVarPopFinalize(FunctionContext *context, const StringVal &val)
Calculates the biased variance, uses KnuthVar Init-Update-Merge functions.
static const uint64_t FNV64_SEED
static void TimestampAvgUpdate(FunctionContext *ctx, const TimestampVal &src, StringVal *dst)
Avg for timestamp. Uses AvgInit() and AvgMerge().
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
void Free(uint8_t *buffer)
Frees a buffer returned from Allocate() or Reallocate()
static void Min(FunctionContext *, const T &src, T *dst)
MinUpdate/MinMerge.
static void CountMerge(FunctionContext *, const BigIntVal &src, BigIntVal *dst)
static void KnuthVarInit(FunctionContext *context, StringVal *val)
static void PcMerge(FunctionContext *, const StringVal &src, StringVal *dst)
double ComputeKnuthVariance(const KnuthVarianceState &state, bool pop)
static bool GetDistinctEstimateBit(uint8_t *bitmap, uint32_t row_index, uint32_t bit_index)
static StringVal StringConcatFinalize(FunctionContext *, const StringVal &src)
static void DecimalAvgMerge(FunctionContext *ctx, const StringVal &src, StringVal *dst)
DecimalValue< RESULT_T > Divide(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type, int result_scale, bool *is_nan, bool *overflow) const
is_nan is set to true if 'other' is 0. The value returned is undefined.
static void PcsaUpdate(FunctionContext *, const T &src, StringVal *dst)
static void DecimalAvgInit(FunctionContext *ctx, StringVal *dst)
Avg for decimals.
bool IsArgConstant(int arg_idx) const
int64_t num_removes() const
static uint64_t Hash64(const BooleanVal &v, const FunctionContext::TypeDesc &, int64_t seed)
static StringVal StringValSerializeOrFinalize(FunctionContext *ctx, const StringVal &src)
StringVal Serialize/Finalize function that copies and frees src.
static DoubleVal KnuthStddevPopFinalize(FunctionContext *context, const StringVal &val)
Calculates the biased STDDEV, uses KnuthVar Init-Update-Merge functions.
static void LastValUpdate(FunctionContext *, const T &src, T *dst)
Implements LAST_VALUE.
T GetValue(FunctionContext *ctx)
static TimestampValue FromTimestampVal(const impala_udf::TimestampVal &udf_value)
uint8_t * Reallocate(uint8_t *ptr, int byte_size)
static const int HLL_PRECISION
double ToSubsecondUnixTime() const
static void SumUpdate(FunctionContext *, const SRC_VAL &src, DST_VAL *dst)
SumUpdate, SumMerge.
uint8_t * Allocate(int byte_size)
static void FirstValRewriteUpdate(FunctionContext *, const T &src, const BigIntVal &, T *dst)
static TimestampVal TimestampAvgFinalize(FunctionContext *ctx, const StringVal &val)
static StringValue FromStringVal(const impala_udf::StringVal &sv)
static void ReservoirSampleInit(FunctionContext *, StringVal *slot)
static DoubleVal AvgGetValue(FunctionContext *ctx, const StringVal &val)
static BigIntVal PcsaFinalize(FunctionContext *, const StringVal &src)
static void InitNullString(FunctionContext *c, StringVal *dst)
Initializes dst to NULL and sets dst->ptr to NULL.
static BigIntVal RankGetValue(FunctionContext *, StringVal &src)
Returns the result for RANK and prepares the state for the next Update().
static DoubleVal AvgFinalize(FunctionContext *ctx, const StringVal &val)
bool SampleKeyGreater(const ReservoirSample< T > &i, const ReservoirSample< T > &j)
bool SampleValLess(const ReservoirSample< T > &i, const ReservoirSample< T > &j)
static void TimestampAvgRemove(FunctionContext *ctx, const TimestampVal &src, StringVal *dst)
static BigIntVal RankFinalize(FunctionContext *, StringVal &src)
Returns the result for RANK and DENSE_RANK and cleans up intermediate state in src.
static void PcUpdate(FunctionContext *, const T &src, StringVal *dst)
static TimestampVal TimestampAvgGetValue(FunctionContext *ctx, const StringVal &val)
static ColumnType CreateDecimalType(int precision, int scale)
static void KnuthVarMerge(FunctionContext *context, const StringVal &src, StringVal *dst)
static const int NUM_PC_BITMAPS
static BigIntVal DenseRankGetValue(FunctionContext *, StringVal &src)
AnyVal * GetConstantArg(int arg_idx) const
static StringVal HistogramFinalize(FunctionContext *, const StringVal &src)
static const int NUM_SAMPLES
static void CountRemove(FunctionContext *, const AnyVal &src, BigIntVal *dst)
static void DecimalAvgRemove(FunctionContext *ctx, const DecimalVal &src, StringVal *dst)
static StringVal StringValGetValue(FunctionContext *ctx, const StringVal &src)
StringVal GetValue() function that returns a copy of src.
static DoubleVal KnuthStddevFinalize(FunctionContext *context, const StringVal &val)
Calculates STDDEV, uses KnuthVar Init-Update-Merge functions.
void IncrementNumRemoves(int64_t n=1)
double DistinceEstimateFinalize(const StringVal &src)
static void AvgMerge(FunctionContext *ctx, const StringVal &src, StringVal *dst)
int64_t GetNext64(int64_t max)
static uint64_t HllFinalEstimate(const uint8_t *buckets, int32_t num_buckets)
static const float PC_THETA
static DecimalVal DecimalAvgFinalize(FunctionContext *ctx, const StringVal &val)
static void StringConcatUpdate(FunctionContext *, const StringVal &src, StringVal *result)
String concat.
std::string DebugString() const
__int128_t int128_t
We use the c++ int128_t type. This is stored using 16 bytes and very performant.