Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
anyval-util.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXPRS_ANYVAL_UTIL_H
17 #define IMPALA_EXPRS_ANYVAL_UTIL_H
18 
20 #include "udf/udf-internal.h"
21 #include "util/hash-util.h"
22 
23 #include "common/names.h"
24 
25 using namespace impala_udf;
26 
27 namespace impala {
28 
29 class ObjectPool;
30 
32 class AnyValUtil {
33  public:
34  static uint32_t Hash(const BooleanVal& v, const FunctionContext::TypeDesc&, int seed) {
35  return HashUtil::Hash(&v.val, 1, seed);
36  }
37 
38  static uint32_t Hash(const TinyIntVal& v, const FunctionContext::TypeDesc&, int seed) {
39  return HashUtil::Hash(&v.val, 1, seed);
40  }
41 
42  static uint32_t Hash(const SmallIntVal& v, const FunctionContext::TypeDesc&, int seed) {
43  return HashUtil::Hash(&v.val, 2, seed);
44  }
45 
46  static uint32_t Hash(const IntVal& v, const FunctionContext::TypeDesc&, int seed) {
47  return HashUtil::Hash(&v.val, 4, seed);
48  }
49 
50  static uint32_t Hash(const BigIntVal& v, const FunctionContext::TypeDesc&, int seed) {
51  return HashUtil::Hash(&v.val, 8, seed);
52  }
53 
54  static uint32_t Hash(const FloatVal& v, const FunctionContext::TypeDesc&, int seed) {
55  return HashUtil::Hash(&v.val, 4, seed);
56  }
57 
58  static uint32_t Hash(const DoubleVal& v, const FunctionContext::TypeDesc&, int seed) {
59  return HashUtil::Hash(&v.val, 8, seed);
60  }
61 
62  static uint32_t Hash(const StringVal& v, const FunctionContext::TypeDesc&, int seed) {
63  return HashUtil::Hash(v.ptr, v.len, seed);
64  }
65 
66  static uint32_t Hash(const TimestampVal& v, const FunctionContext::TypeDesc&,
67  int seed) {
68  TimestampValue tv = TimestampValue::FromTimestampVal(v);
69  return tv.Hash(seed);
70  }
71 
73  int64_t seed) {
74  DCHECK_GT(t.precision, 0);
75  switch (ColumnType::GetDecimalByteSize(t.precision)) {
76  case 4: return HashUtil::Hash(&v.val4, 4, seed);
77  case 8: return HashUtil::Hash(&v.val8, 8, seed);
78  case 16: return HashUtil::Hash(&v.val16, 16, seed);
79  default:
80  DCHECK(false);
81  return 0;
82  }
83  }
84 
86  int64_t seed) {
87  return HashUtil::FnvHash64(&v.val, 1, seed);
88  }
89 
91  int64_t seed) {
92  return HashUtil::FnvHash64(&v.val, 1, seed);
93  }
94 
96  int64_t seed) {
97  return HashUtil::FnvHash64(&v.val, 2, seed);
98  }
99 
101  int64_t seed) {
102  return HashUtil::FnvHash64(&v.val, 4, seed);
103  }
104 
106  int64_t seed) {
107  return HashUtil::FnvHash64(&v.val, 8, seed);
108  }
109 
111  int64_t seed) {
112  return HashUtil::FnvHash64(&v.val, 4, seed);
113  }
114 
116  int64_t seed) {
117  return HashUtil::FnvHash64(&v.val, 8, seed);
118  }
119 
121  int64_t seed) {
122  return HashUtil::FnvHash64(v.ptr, v.len, seed);
123  }
124 
126  int64_t seed) {
127  TimestampValue tv = TimestampValue::FromTimestampVal(v);
128  return HashUtil::FnvHash64(&tv, 12, seed);
129  }
130 
132  int64_t seed) {
133  switch (ColumnType::GetDecimalByteSize(t.precision)) {
134  case 4: return HashUtil::FnvHash64(&v.val4, 4, seed);
135  case 8: return HashUtil::FnvHash64(&v.val8, 8, seed);
136  case 16: return HashUtil::FnvHash64(&v.val16, 16, seed);
137  default:
138  DCHECK(false);
139  return 0;
140  }
141  }
142 
144  template<typename T>
145  static inline bool Equals(const FunctionContext::TypeDesc* type, const T& x, const T& y) {
146  DCHECK_NOTNULL(type);
147  return Equals(TypeDescToColumnType(*type), x, y);
148  }
149 
150  template<typename T>
151  static inline bool Equals(const ColumnType& type, const T& x, const T& y) {
152  DCHECK(!x.is_null);
153  DCHECK(!y.is_null);
154  return x.val == y.val;
155  }
156 
158  static int AnyValSize(const ColumnType& t) {
159  switch (t.type) {
160  case TYPE_BOOLEAN: return sizeof(BooleanVal);
161  case TYPE_TINYINT: return sizeof(TinyIntVal);
162  case TYPE_SMALLINT: return sizeof(SmallIntVal);
163  case TYPE_INT: return sizeof(IntVal);
164  case TYPE_BIGINT: return sizeof(BigIntVal);
165  case TYPE_FLOAT: return sizeof(FloatVal);
166  case TYPE_DOUBLE: return sizeof(DoubleVal);
167  case TYPE_STRING:
168  case TYPE_VARCHAR:
169  case TYPE_CHAR:
170  return sizeof(StringVal);
171  case TYPE_TIMESTAMP: return sizeof(TimestampVal);
172  case TYPE_DECIMAL: return sizeof(DecimalVal);
173  default:
174  DCHECK(false) << t;
175  return 0;
176  }
177  }
178 
179  static std::string ToString(const StringVal& v) {
180  return std::string(reinterpret_cast<char*>(v.ptr), v.len);
181  }
182 
183  static StringVal FromString(FunctionContext* ctx, const std::string& s) {
184  StringVal val = FromBuffer(ctx, s.c_str(), s.size());
185  return val;
186  }
187 
188  static void TruncateIfNecessary(const ColumnType& type, StringVal *val) {
189  if (type.type == TYPE_VARCHAR) {
190  DCHECK(type.len >= 0);
191  val->len = min(val->len, type.len);
192  }
193  }
194 
195  static StringVal FromBuffer(FunctionContext* ctx, const char* ptr, int len) {
196  StringVal result(ctx, len);
197  memcpy(result.ptr, ptr, len);
198  return result;
199  }
200 
201  static FunctionContext::TypeDesc ColumnTypeToTypeDesc(const ColumnType& type);
202  static ColumnType TypeDescToColumnType(const FunctionContext::TypeDesc& type);
203 
205  static void SetAnyVal(const void* slot, const ColumnType& type, AnyVal* dst) {
206  if (slot == NULL) {
207  dst->is_null = true;
208  return;
209  }
210 
211  dst->is_null = false;
212  switch (type.type) {
213  case TYPE_NULL: return;
214  case TYPE_BOOLEAN:
215  reinterpret_cast<BooleanVal*>(dst)->val = *reinterpret_cast<const bool*>(slot);
216  return;
217  case TYPE_TINYINT:
218  reinterpret_cast<TinyIntVal*>(dst)->val = *reinterpret_cast<const int8_t*>(slot);
219  return;
220  case TYPE_SMALLINT:
221  reinterpret_cast<SmallIntVal*>(dst)->val = *reinterpret_cast<const int16_t*>(slot);
222  return;
223  case TYPE_INT:
224  reinterpret_cast<IntVal*>(dst)->val = *reinterpret_cast<const int32_t*>(slot);
225  return;
226  case TYPE_BIGINT:
227  reinterpret_cast<BigIntVal*>(dst)->val = *reinterpret_cast<const int64_t*>(slot);
228  return;
229  case TYPE_FLOAT:
230  reinterpret_cast<FloatVal*>(dst)->val = *reinterpret_cast<const float*>(slot);
231  return;
232  case TYPE_DOUBLE:
233  reinterpret_cast<DoubleVal*>(dst)->val = *reinterpret_cast<const double*>(slot);
234  return;
235  case TYPE_STRING:
236  case TYPE_VARCHAR:
237  case TYPE_CHAR: {
238  if (type.IsVarLen()) {
239  reinterpret_cast<const StringValue*>(slot)->ToStringVal(
240  reinterpret_cast<StringVal*>(dst));
241  if (type.type == TYPE_VARCHAR) {
242  StringVal* sv = reinterpret_cast<StringVal*>(dst);
243  DCHECK(type.len >= 0);
244  DCHECK_LE(sv->len, type.len);
245  }
246  } else {
247  DCHECK_EQ(type.type, TYPE_CHAR);
248  StringVal* sv = reinterpret_cast<StringVal*>(dst);
249  sv->ptr = const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(slot));
250  sv->len = type.len;
251  }
252  return;
253  }
254  case TYPE_TIMESTAMP:
255  reinterpret_cast<const TimestampValue*>(slot)->ToTimestampVal(
256  reinterpret_cast<TimestampVal*>(dst));
257  return;
258  case TYPE_DECIMAL:
259  switch (type.GetByteSize()) {
260  case 4:
261  reinterpret_cast<DecimalVal*>(dst)->val4 =
262  *reinterpret_cast<const int32_t*>(slot);
263  return;
264  case 8:
265  reinterpret_cast<DecimalVal*>(dst)->val8 =
266  *reinterpret_cast<const int64_t*>(slot);
267  return;
268 #if __BYTE_ORDER == __LITTLE_ENDIAN
269  case 16:
270  memcpy(&reinterpret_cast<DecimalVal*>(dst)->val4, slot, type.GetByteSize());
271 #else
272  DCHECK(false) << "Not implemented.";
273 #endif
274  return;
275  default:
276  break;
277  }
278  default:
279  DCHECK(false) << "NYI: " << type;
280  }
281  }
282 };
283 
285 impala_udf::AnyVal* CreateAnyVal(ObjectPool* pool, const ColumnType& type);
286 
288 impala_udf::AnyVal* CreateAnyVal(const ColumnType& type);
289 
290 template<> inline bool AnyValUtil::Equals(
291  const ColumnType& type, const StringVal& x, const StringVal& y) {
292  DCHECK(!x.is_null);
293  DCHECK(!y.is_null);
294  StringValue x_sv = StringValue::FromStringVal(x);
295  StringValue y_sv = StringValue::FromStringVal(y);
296  return x_sv == y_sv;
297 }
298 
299 template<> inline bool AnyValUtil::Equals(
300  const ColumnType& type, const TimestampVal& x, const TimestampVal& y) {
301  DCHECK(!x.is_null);
302  DCHECK(!y.is_null);
303  TimestampValue x_tv = TimestampValue::FromTimestampVal(x);
304  TimestampValue y_tv = TimestampValue::FromTimestampVal(y);
305  return x_tv == y_tv;
306 }
307 
308 template<> inline bool AnyValUtil::Equals(
309  const ColumnType& type, const DecimalVal& x, const DecimalVal& y) {
310  DCHECK(!x.is_null);
311  DCHECK(!y.is_null);
312  if (type.precision <= ColumnType::MAX_DECIMAL4_PRECISION) {
313  return x.val4 == y.val4;
314  } else if (type.precision <= ColumnType::MAX_DECIMAL8_PRECISION) {
315  return x.val8 == y.val8;
316  } else {
317  return x.val16 == y.val16;
318  }
319 }
320 
321 }
322 
323 #endif
int precision
Only valid if type == TYPE_DECIMAL.
Definition: udf.h:75
bool IsVarLen() const
Definition: types.h:172
AnyVal * CreateAnyVal(ObjectPool *pool, const ColumnType &type)
Creates the corresponding AnyVal subclass for type. The object is added to the pool.
Definition: anyval-util.cc:26
static void TruncateIfNecessary(const ColumnType &type, StringVal *val)
Definition: anyval-util.h:188
static uint32_t Hash(const BooleanVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:34
static uint32_t Hash(const StringVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:62
static StringVal FromBuffer(FunctionContext *ctx, const char *ptr, int len)
Definition: anyval-util.h:195
__int128_t val16
Definition: udf.h:572
int precision
Only set if type == TYPE_DECIMAL.
Definition: types.h:68
static bool Equals(const FunctionContext::TypeDesc *type, const T &x, const T &y)
Templated equality functions. These assume the input values are not NULL.
Definition: anyval-util.h:145
int32_t val
Definition: udf.h:421
static uint64_t Hash64(const BigIntVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:105
StringVal ToStringVal(FunctionContext *context, T val)
static uint64_t Hash64(const SmallIntVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:95
This object has a compatible storage format with boost::ptime.
Definition: udf.h:495
uint8_t * ptr
Definition: udf.h:523
static uint64_t Hash64(const DecimalVal &v, const FunctionContext::TypeDesc &t, int64_t seed)
Definition: anyval-util.h:131
Utilities for AnyVals.
Definition: anyval-util.h:32
static uint32_t Hash(const IntVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:46
bool is_null
Definition: udf.h:359
PrimitiveType type
Definition: types.h:60
static uint64_t Hash(const DecimalVal &v, const FunctionContext::TypeDesc &t, int64_t seed)
Definition: anyval-util.h:72
ObjectPool pool
uint32_t Hash(int seed=0) const
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
Definition: types.h:178
static uint32_t Hash(const TimestampVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:66
static uint64_t Hash64(const FloatVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:110
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62
static StringVal FromString(FunctionContext *ctx, const std::string &s)
Definition: anyval-util.h:183
static uint32_t Hash(const BigIntVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:50
static uint64_t Hash(const IntVal &v)
static uint32_t Hash(const DoubleVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:58
static uint64_t Hash64(const BooleanVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:85
static std::string ToString(const StringVal &v)
Definition: anyval-util.h:179
static bool Equals(const ColumnType &type, const T &x, const T &y)
Definition: anyval-util.h:151
static uint32_t Hash(const FloatVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:54
static uint64_t Hash64(const IntVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:100
static uint64_t Hash64(const TimestampVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:125
static uint32_t Hash(const TinyIntVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:38
static uint64_t Hash64(const StringVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:120
static uint64_t Hash64(const TinyIntVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:90
static uint32_t Hash(const SmallIntVal &v, const FunctionContext::TypeDesc &, int seed)
Definition: anyval-util.h:42
static int AnyValSize(const ColumnType &t)
Returns the byte size of *Val for type t.
Definition: anyval-util.h:158
static void SetAnyVal(const void *slot, const ColumnType &type, AnyVal *dst)
Utility to put val into an AnyVal struct.
Definition: anyval-util.h:205
static uint64_t Hash64(const DoubleVal &v, const FunctionContext::TypeDesc &, int64_t seed)
Definition: anyval-util.h:115