Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
raw-value.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_RUNTIME_RAW_VALUE_H
17 #define IMPALA_RUNTIME_RAW_VALUE_H
18 
19 #include <string>
20 
21 #include <boost/functional/hash.hpp>
22 #include <math.h>
23 
24 #include "common/logging.h"
27 #include "runtime/types.h"
28 #include "util/hash-util.h"
29 
30 namespace impala {
31 
32 class MemPool;
33 class SlotDescriptor;
34 class Tuple;
35 
37 class RawValue {
38  public:
40  static const int ASCII_PRECISION;
41 
47  static void PrintValue(const void* value, const ColumnType& type, int scale,
48  std::stringstream* stream);
49 
51  static void PrintValue(const void* value, const ColumnType& type, int scale,
52  std::string* str);
53 
55  static void PrintValueAsBytes(const void* value, const ColumnType& type,
56  std::stringstream* stream);
57 
60  static uint32_t GetHashValue(const void* v, const ColumnType& type, uint32_t seed = 0);
61 
67  static uint32_t GetHashValueFnv(const void* v, const ColumnType& type, uint32_t seed);
68 
71  static int Compare(const void* v1, const void* v2, const ColumnType& type);
72 
76  static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
77  MemPool* pool);
78 
82  static void Write(const void* src, void* dst, const ColumnType& type, MemPool* pool);
83 
87  static void Write(const void* src, const ColumnType& type, void* dst, uint8_t** buf);
88 
92  static bool Eq(const void* v1, const void* v2, const ColumnType& type);
93 
94  private:
96  static const uint32_t HASH32_COMBINE_SEED = 0x9e3779b9;
97 
101  static inline uint32_t HashCombine32(uint32_t value, uint32_t seed) {
102  return seed ^ (HASH32_COMBINE_SEED + value + (seed << 6) + (seed >> 2));
103  }
104 };
105 
106 inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type) {
107  const StringValue* string_value1;
108  const StringValue* string_value2;
109  switch (type.type) {
110  case TYPE_BOOLEAN:
111  return *reinterpret_cast<const bool*>(v1)
112  == *reinterpret_cast<const bool*>(v2);
113  case TYPE_TINYINT:
114  return *reinterpret_cast<const int8_t*>(v1)
115  == *reinterpret_cast<const int8_t*>(v2);
116  case TYPE_SMALLINT:
117  return *reinterpret_cast<const int16_t*>(v1)
118  == *reinterpret_cast<const int16_t*>(v2);
119  case TYPE_INT:
120  return *reinterpret_cast<const int32_t*>(v1)
121  == *reinterpret_cast<const int32_t*>(v2);
122  case TYPE_BIGINT:
123  return *reinterpret_cast<const int64_t*>(v1)
124  == *reinterpret_cast<const int64_t*>(v2);
125  case TYPE_FLOAT:
126  return *reinterpret_cast<const float*>(v1)
127  == *reinterpret_cast<const float*>(v2);
128  case TYPE_DOUBLE:
129  return *reinterpret_cast<const double*>(v1)
130  == *reinterpret_cast<const double*>(v2);
131  case TYPE_STRING:
132  case TYPE_VARCHAR:
133  string_value1 = reinterpret_cast<const StringValue*>(v1);
134  string_value2 = reinterpret_cast<const StringValue*>(v2);
135  return string_value1->Eq(*string_value2);
136  case TYPE_TIMESTAMP:
137  return *reinterpret_cast<const TimestampValue*>(v1) ==
138  *reinterpret_cast<const TimestampValue*>(v2);
139  case TYPE_CHAR: {
140  const char* v1ptr = StringValue::CharSlotToPtr(v1, type);
141  const char* v2ptr = StringValue::CharSlotToPtr(v2, type);
142  int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len);
143  int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len);
144  return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2)) == 0;
145  }
146  case TYPE_DECIMAL:
147  switch (type.GetByteSize()) {
148  case 4:
149  return reinterpret_cast<const Decimal4Value*>(v1)->value()
150  == reinterpret_cast<const Decimal4Value*>(v2)->value();
151  case 8:
152  return reinterpret_cast<const Decimal8Value*>(v1)->value()
153  == reinterpret_cast<const Decimal8Value*>(v2)->value();
154  case 16:
155  return reinterpret_cast<const Decimal16Value*>(v1)->value()
156  == reinterpret_cast<const Decimal16Value*>(v2)->value();
157  default:
158  break;
159  }
160  default:
161  DCHECK(false) << type;
162  return 0;
163  };
164 }
165 
168 inline uint32_t RawValue::GetHashValue(const void* v, const ColumnType& type,
169  uint32_t seed) {
170  // Hash_combine with v = 0
171  if (v == NULL) return HashCombine32(0, seed);
172 
173  switch (type.type) {
174  case TYPE_STRING:
175  case TYPE_VARCHAR: {
176  const StringValue* string_value = reinterpret_cast<const StringValue*>(v);
177  return HashUtil::Hash(string_value->ptr, string_value->len, seed);
178  }
179  case TYPE_BOOLEAN: return HashCombine32(*reinterpret_cast<const bool*>(v), seed);
180  case TYPE_TINYINT: return HashUtil::Hash(v, 1, seed);
181  case TYPE_SMALLINT: return HashUtil::Hash(v, 2, seed);
182  case TYPE_INT: return HashUtil::Hash(v, 4, seed);
183  case TYPE_BIGINT: return HashUtil::Hash(v, 8, seed);
184  case TYPE_FLOAT: return HashUtil::Hash(v, 4, seed);
185  case TYPE_DOUBLE: return HashUtil::Hash(v, 8, seed);
186  case TYPE_TIMESTAMP: return HashUtil::Hash(v, 12, seed);
188  type.len, seed);
189  case TYPE_DECIMAL: return HashUtil::Hash(v, type.GetByteSize(), seed);
190  default:
191  DCHECK(false);
192  return 0;
193  }
194 }
195 
196 inline uint32_t RawValue::GetHashValueFnv(const void* v, const ColumnType& type,
197  uint32_t seed) {
198  // Hash_combine with v = 0
199  if (v == NULL) return HashCombine32(0, seed);
200 
201  switch (type.type ) {
202  case TYPE_STRING:
203  case TYPE_VARCHAR: {
204  const StringValue* string_value = reinterpret_cast<const StringValue*>(v);
205  return HashUtil::FnvHash64to32(string_value->ptr, string_value->len, seed);
206  }
207  case TYPE_BOOLEAN: return HashCombine32(*reinterpret_cast<const bool*>(v), seed);
208  case TYPE_TINYINT: return HashUtil::FnvHash64to32(v, 1, seed);
209  case TYPE_SMALLINT: return HashUtil::FnvHash64to32(v, 2, seed);
210  case TYPE_INT: return HashUtil::FnvHash64to32(v, 4, seed);
211  case TYPE_BIGINT: return HashUtil::FnvHash64to32(v, 8, seed);
212  case TYPE_FLOAT: return HashUtil::FnvHash64to32(v, 4, seed);
213  case TYPE_DOUBLE: return HashUtil::FnvHash64to32(v, 8, seed);
214  case TYPE_TIMESTAMP: return HashUtil::FnvHash64to32(v, 12, seed);
216  type.len, seed);
217  case TYPE_DECIMAL: return HashUtil::FnvHash64to32(v, type.GetByteSize(), seed);
218  default:
219  DCHECK(false);
220  return 0;
221  }
222 }
223 
224 inline void RawValue::PrintValue(const void* value, const ColumnType& type, int scale,
225  std::stringstream* stream) {
226  if (value == NULL) {
227  *stream << "NULL";
228  return;
229  }
230 
231  int old_precision = stream->precision();
232  std::ios_base::fmtflags old_flags = stream->flags();
233  if (scale > -1) {
234  stream->precision(scale);
235  // Setting 'fixed' causes precision to set the number of digits printed after the
236  // decimal (by default it sets the maximum number of digits total).
237  *stream << std::fixed;
238  }
239 
240  const StringValue* string_val = NULL;
241  switch (type.type) {
242  case TYPE_BOOLEAN: {
243  bool val = *reinterpret_cast<const bool*>(value);
244  *stream << (val ? "true" : "false");
245  return;
246  }
247  case TYPE_TINYINT:
248  // Extra casting for chars since they should not be interpreted as ASCII.
249  *stream << static_cast<int>(*reinterpret_cast<const int8_t*>(value));
250  break;
251  case TYPE_SMALLINT:
252  *stream << *reinterpret_cast<const int16_t*>(value);
253  break;
254  case TYPE_INT:
255  *stream << *reinterpret_cast<const int32_t*>(value);
256  break;
257  case TYPE_BIGINT:
258  *stream << *reinterpret_cast<const int64_t*>(value);
259  break;
260  case TYPE_FLOAT:
261  {
262  float val = *reinterpret_cast<const float*>(value);
263  if (LIKELY(std::isfinite(val))) {
264  *stream << val;
265  } else if (isinf(val)) {
266  // 'Infinity' is Java's text representation of inf. By staying close to Java, we
267  // allow Hive to read text tables containing non-finite values produced by
268  // Impala. (The same logic applies to 'NaN', below).
269  *stream << (val < 0 ? "-Infinity" : "Infinity");
270  } else if (isnan(val)) {
271  *stream << "NaN";
272  }
273  }
274  break;
275  case TYPE_DOUBLE:
276  {
277  double val = *reinterpret_cast<const double*>(value);
278  if (LIKELY(std::isfinite(val))) {
279  *stream << val;
280  } else if (isinf(val)) {
281  // See TYPE_FLOAT for rationale.
282  *stream << (val < 0 ? "-Infinity" : "Infinity");
283  } else if (isnan(val)) {
284  *stream << "NaN";
285  }
286  }
287  break;
288  case TYPE_VARCHAR:
289  case TYPE_STRING:
290  string_val = reinterpret_cast<const StringValue*>(value);
291  if (type.type == TYPE_VARCHAR) DCHECK(string_val->len <= type.len);
292  stream->write(string_val->ptr, string_val->len);
293  break;
294  case TYPE_TIMESTAMP:
295  *stream << *reinterpret_cast<const TimestampValue*>(value);
296  break;
297  case TYPE_CHAR:
298  stream->write(StringValue::CharSlotToPtr(value, type), type.len);
299  break;
300  case TYPE_DECIMAL:
301  switch (type.GetByteSize()) {
302  case 4:
303  *stream << reinterpret_cast<const Decimal4Value*>(value)->ToString(type);
304  break;
305  case 8:
306  *stream << reinterpret_cast<const Decimal8Value*>(value)->ToString(type);
307  break;
308  case 16:
309  *stream << reinterpret_cast<const Decimal16Value*>(value)->ToString(type);
310  break;
311  default:
312  DCHECK(false) << type;
313  }
314  break;
315  default:
316  DCHECK(false);
317  }
318  stream->precision(old_precision);
319  // Undo setting stream to fixed
320  stream->flags(old_flags);
321 }
322 
323 }
324 
325 #endif
bool Eq(const StringValue &other) const
==
static void PrintValue(const void *value, const ColumnType &type, int scale, std::stringstream *stream)
Definition: raw-value.h:224
static void PrintValueAsBytes(const void *value, const ColumnType &type, std::stringstream *stream)
Writes the byte representation of a value to a stringstream character-by-character.
Definition: raw-value.cc:28
static bool Eq(const void *v1, const void *v2, const ColumnType &type)
Definition: raw-value.h:106
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
static const int ASCII_PRECISION
Ascii output precision for double/float.
Definition: raw-value.h:40
static const uint32_t HASH32_COMBINE_SEED
The magic number (used in hash_combine()) 0x9e3779b9 = 2^32 / (golden ratio).
Definition: raw-value.h:96
static uint32_t GetHashValueFnv(const void *v, const ColumnType &type, uint32_t seed)
Definition: raw-value.h:196
static int64_t UnpaddedCharLength(const char *cptr, int64_t len)
Returns number of characters in a char array (ignores trailing spaces)
PrimitiveType type
Definition: types.h:60
static uint32_t HashCombine32(uint32_t value, uint32_t seed)
Definition: raw-value.h:101
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
Definition: hash-util.h:135
ObjectPool pool
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
Definition: types.h:178
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62
static void Write(const void *value, Tuple *tuple, const SlotDescriptor *slot_desc, MemPool *pool)
Definition: raw-value.cc:303
static int Compare(const void *v1, const void *v2, const ColumnType &type)
Definition: raw-value.cc:109
#define LIKELY(expr)
Definition: compiler-util.h:32
static char * CharSlotToPtr(void *slot, const ColumnType &type)
Useful utility functions for runtime values (which are passed around as void*).
Definition: raw-value.h:37
static uint32_t FnvHash64to32(const void *data, int32_t bytes, uint32_t hash)
Definition: hash-util.h:125
static int StringCompare(const char *s1, int n1, const char *s2, int n2, int len)
static uint32_t GetHashValue(const void *v, const ColumnType &type, uint32_t seed=0)
Definition: raw-value.h:168