25 using namespace impala_udf;
41 memset(dst->
ptr, 0, str_len);
48 const uint8_t* ptr =
reinterpret_cast<const uint8_t*
>(data);
66 if (hash_value != 0) {
69 int idx = hash_value % dst->
len;
70 uint8_t first_one_bit = __builtin_ctzl(hash_value >>
HLL_PRECISION) + 1;
71 dst->
ptr[
idx] = ::max(dst->
ptr[idx], first_one_bit);
81 for (
int i = 0; i < src.
len; ++i) {
82 dst->
ptr[i] = ::max(dst->
ptr[i], src.
ptr[i]);
101 if (num_streams == 16) {
103 }
else if (num_streams == 32) {
105 }
else if (num_streams == 64) {
108 alpha = 0.7213f / (1 + 1.079f / num_streams);
111 float harmonic_mean = 0;
112 int num_zero_registers = 0;
113 for (
int i = 0; i < src.
len; ++i) {
114 harmonic_mean += powf(2.0f, -src.
ptr[i]);
115 if (src.
ptr[i] == 0) ++num_zero_registers;
117 harmonic_mean = 1.0f / harmonic_mean;
118 int64_t estimate = alpha * num_streams * num_streams * harmonic_mean;
120 if (num_zero_registers != 0) {
123 estimate = num_streams * log(static_cast<float>(num_streams) / num_zero_registers);
129 string out_str = out.str();
130 StringVal result_str(ctx, out_str.size());
131 memcpy(result_str.ptr, out_str.c_str(), result_str.len);
void HllMerge(FunctionContext *ctx, const StringVal &src, StringVal *dst)
StringVal HllFinalize(FunctionContext *ctx, const StringVal &src)
const StringSearch UrlParser::hash_search & hash
std::size_t hash_value(const Decimal4Value &v)
This function must be called 'hash_value' to be picked up by boost.
static uint64_t FnvHash(const void *data, int32_t bytes, uint64_t hash)
void HllInit(FunctionContext *ctx, StringVal *dst)
void HllUpdate(FunctionContext *ctx, const IntVal &src, StringVal *dst)
void Free(uint8_t *buffer)
Frees a buffer returned from Allocate() or Reallocate()
static uint64_t Hash(const IntVal &v)
static const uint64_t FNV64_PRIME
uint8_t * Allocate(int byte_size)
StringVal HllSerialize(FunctionContext *ctx, const StringVal &src)
static const uint64_t FNV64_SEED