Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
raw-value.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <sstream>
16 #include <boost/functional/hash.hpp>
17 
18 #include "runtime/raw-value.h"
20 #include "runtime/tuple.h"
21 
22 #include "common/names.h"
23 
24 namespace impala {
25 
26 const int RawValue::ASCII_PRECISION = 16; // print 16 digits for double/float
27 
28 void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type,
29  stringstream* stream) {
30  if (value == NULL) return;
31 
32  const char* chars = reinterpret_cast<const char*>(value);
33  const StringValue* string_val = NULL;
34  switch (type.type) {
35  case TYPE_BOOLEAN:
36  stream->write(chars, sizeof(bool));
37  return;
38  case TYPE_TINYINT:
39  stream->write(chars, sizeof(int8_t));
40  break;
41  case TYPE_SMALLINT:
42  stream->write(chars, sizeof(int16_t));
43  break;
44  case TYPE_INT:
45  stream->write(chars, sizeof(int32_t));
46  break;
47  case TYPE_BIGINT:
48  stream->write(chars, sizeof(int64_t));
49  break;
50  case TYPE_FLOAT:
51  stream->write(chars, sizeof(float));
52  break;
53  case TYPE_DOUBLE:
54  stream->write(chars, sizeof(double));
55  break;
56  case TYPE_STRING:
57  case TYPE_VARCHAR:
58  string_val = reinterpret_cast<const StringValue*>(value);
59  stream->write(static_cast<char*>(string_val->ptr), string_val->len);
60  break;
61  case TYPE_TIMESTAMP:
62  stream->write(chars, TimestampValue::Size());
63  break;
64  case TYPE_CHAR:
65  stream->write(StringValue::CharSlotToPtr(chars, type), type.len);
66  break;
67  case TYPE_DECIMAL:
68  stream->write(chars, type.GetByteSize());
69  break;
70  default:
71  DCHECK(false) << "bad RawValue::PrintValue() type: " << type.DebugString();
72  }
73 }
74 
75 void RawValue::PrintValue(const void* value, const ColumnType& type, int scale,
76  string* str) {
77  if (value == NULL) {
78  *str = "NULL";
79  return;
80  }
81 
82  stringstream out;
83  out.precision(ASCII_PRECISION);
84  const StringValue* string_val = NULL;
85  string tmp;
86  bool val;
87 
88  // Special case types that we can print more efficiently without using a stringstream
89  switch (type.type) {
90  case TYPE_BOOLEAN:
91  val = *reinterpret_cast<const bool*>(value);
92  *str = (val ? "true" : "false");
93  return;
94  case TYPE_STRING:
95  case TYPE_VARCHAR:
96  string_val = reinterpret_cast<const StringValue*>(value);
97  tmp.assign(static_cast<char*>(string_val->ptr), string_val->len);
98  str->swap(tmp);
99  return;
100  case TYPE_CHAR:
101  *str = string(StringValue::CharSlotToPtr(value, type), type.len);
102  return;
103  default:
104  PrintValue(value, type, scale, &out);
105  }
106  *str = out.str();
107 }
108 
109 int RawValue::Compare(const void* v1, const void* v2, const ColumnType& type) {
110  const StringValue* string_value1;
111  const StringValue* string_value2;
112  const TimestampValue* ts_value1;
113  const TimestampValue* ts_value2;
114  float f1, f2;
115  double d1, d2;
116  int32_t i1, i2;
117  int64_t b1, b2;
118  switch (type.type) {
119  case TYPE_NULL:
120  return 0;
121  case TYPE_BOOLEAN:
122  return *reinterpret_cast<const bool*>(v1) - *reinterpret_cast<const bool*>(v2);
123  case TYPE_TINYINT:
124  return *reinterpret_cast<const int8_t*>(v1) - *reinterpret_cast<const int8_t*>(v2);
125  case TYPE_SMALLINT:
126  return *reinterpret_cast<const int16_t*>(v1) -
127  *reinterpret_cast<const int16_t*>(v2);
128  case TYPE_INT:
129  i1 = *reinterpret_cast<const int32_t*>(v1);
130  i2 = *reinterpret_cast<const int32_t*>(v2);
131  return i1 > i2 ? 1 : (i1 < i2 ? -1 : 0);
132  case TYPE_BIGINT:
133  b1 = *reinterpret_cast<const int64_t*>(v1);
134  b2 = *reinterpret_cast<const int64_t*>(v2);
135  return b1 > b2 ? 1 : (b1 < b2 ? -1 : 0);
136  case TYPE_FLOAT:
137  // TODO: can this be faster? (just returning the difference has underflow problems)
138  f1 = *reinterpret_cast<const float*>(v1);
139  f2 = *reinterpret_cast<const float*>(v2);
140  if (isnan(f1) && isnan(f2)) return 0;
141  if (isnan(f1)) return -1;
142  if (isnan(f2)) return 1;
143  return f1 > f2 ? 1 : (f1 < f2 ? -1 : 0);
144  case TYPE_DOUBLE:
145  // TODO: can this be faster?
146  d1 = *reinterpret_cast<const double*>(v1);
147  d2 = *reinterpret_cast<const double*>(v2);
148  if (isnan(d1) && isnan(d2)) return 0;
149  if (isnan(d1)) return -1;
150  if (isnan(d2)) return 1;
151  return d1 > d2 ? 1 : (d1 < d2 ? -1 : 0);
152  case TYPE_STRING:
153  case TYPE_VARCHAR:
154  string_value1 = reinterpret_cast<const StringValue*>(v1);
155  string_value2 = reinterpret_cast<const StringValue*>(v2);
156  return string_value1->Compare(*string_value2);
157  case TYPE_TIMESTAMP:
158  ts_value1 = reinterpret_cast<const TimestampValue*>(v1);
159  ts_value2 = reinterpret_cast<const TimestampValue*>(v2);
160  return *ts_value1 > *ts_value2 ? 1 : (*ts_value1 < *ts_value2 ? -1 : 0);
161  case TYPE_CHAR: {
162  const char* v1ptr = StringValue::CharSlotToPtr(v1, type);
163  const char* v2ptr = StringValue::CharSlotToPtr(v2, type);
164  int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len);
165  int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len);
166  return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2));
167  }
168  case TYPE_DECIMAL:
169  switch (type.GetByteSize()) {
170  case 4:
171  return reinterpret_cast<const Decimal4Value*>(v1)->Compare(
172  *reinterpret_cast<const Decimal4Value*>(v2));
173  case 8:
174  return reinterpret_cast<const Decimal8Value*>(v1)->Compare(
175  *reinterpret_cast<const Decimal8Value*>(v2));
176  case 16:
177  return reinterpret_cast<const Decimal16Value*>(v1)->Compare(
178  *reinterpret_cast<const Decimal16Value*>(v2));
179  default:
180  DCHECK(false) << type;
181  return 0;
182  }
183  default:
184  DCHECK(false) << "invalid type: " << type.DebugString();
185  return 0;
186  };
187 }
188 
189 void RawValue::Write(const void* value, void* dst, const ColumnType& type,
190  MemPool* pool) {
191  DCHECK(value != NULL);
192  switch (type.type) {
193  case TYPE_NULL:
194  break;
195  case TYPE_BOOLEAN:
196  *reinterpret_cast<bool*>(dst) = *reinterpret_cast<const bool*>(value);
197  break;
198  case TYPE_TINYINT:
199  *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(value);
200  break;
201  case TYPE_SMALLINT:
202  *reinterpret_cast<int16_t*>(dst) = *reinterpret_cast<const int16_t*>(value);
203  break;
204  case TYPE_INT:
205  *reinterpret_cast<int32_t*>(dst) = *reinterpret_cast<const int32_t*>(value);
206  break;
207  case TYPE_BIGINT:
208  *reinterpret_cast<int64_t*>(dst) = *reinterpret_cast<const int64_t*>(value);
209  break;
210  case TYPE_FLOAT:
211  *reinterpret_cast<float*>(dst) = *reinterpret_cast<const float*>(value);
212  break;
213  case TYPE_DOUBLE:
214  *reinterpret_cast<double*>(dst) = *reinterpret_cast<const double*>(value);
215  break;
216  case TYPE_TIMESTAMP:
217  *reinterpret_cast<TimestampValue*>(dst) =
218  *reinterpret_cast<const TimestampValue*>(value);
219  break;
220  case TYPE_STRING:
221  case TYPE_VARCHAR:
222  case TYPE_CHAR: {
223  if (!type.IsVarLen()) {
224  DCHECK_EQ(type.type, TYPE_CHAR);
225  memcpy(StringValue::CharSlotToPtr(dst, type), value, type.len);
226  break;
227  }
228  const StringValue* src = reinterpret_cast<const StringValue*>(value);
229  StringValue* dest = reinterpret_cast<StringValue*>(dst);
230  dest->len = src->len;
231  if (type.type == TYPE_VARCHAR) DCHECK_LE(dest->len, type.len);
232  if (pool != NULL) {
233  dest->ptr = reinterpret_cast<char*>(pool->Allocate(dest->len));
234  memcpy(dest->ptr, src->ptr, dest->len);
235  } else {
236  dest->ptr = src->ptr;
237  }
238  break;
239  }
240  case TYPE_DECIMAL:
241  memcpy(dst, value, type.GetByteSize());
242  break;
243  default:
244  DCHECK(false) << "RawValue::Write(): bad type: " << type.DebugString();
245  }
246 }
247 
248 // TODO: can we remove some of this code duplication? Templated allocator?
249 void RawValue::Write(const void* value, const ColumnType& type,
250  void* dst, uint8_t** buf) {
251  DCHECK(value != NULL);
252  switch (type.type) {
253  case TYPE_BOOLEAN:
254  *reinterpret_cast<bool*>(dst) = *reinterpret_cast<const bool*>(value);
255  break;
256  case TYPE_TINYINT:
257  *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(value);
258  break;
259  case TYPE_SMALLINT:
260  *reinterpret_cast<int16_t*>(dst) = *reinterpret_cast<const int16_t*>(value);
261  break;
262  case TYPE_INT:
263  *reinterpret_cast<int32_t*>(dst) = *reinterpret_cast<const int32_t*>(value);
264  break;
265  case TYPE_BIGINT:
266  *reinterpret_cast<int64_t*>(dst) = *reinterpret_cast<const int64_t*>(value);
267  break;
268  case TYPE_FLOAT:
269  *reinterpret_cast<float*>(dst) = *reinterpret_cast<const float*>(value);
270  break;
271  case TYPE_DOUBLE:
272  *reinterpret_cast<double*>(dst) = *reinterpret_cast<const double*>(value);
273  break;
274  case TYPE_TIMESTAMP:
275  *reinterpret_cast<TimestampValue*>(dst) =
276  *reinterpret_cast<const TimestampValue*>(value);
277  break;
278  case TYPE_STRING:
279  case TYPE_VARCHAR:
280  case TYPE_CHAR: {
281  DCHECK(buf != NULL);
282  if (!type.IsVarLen()) {
283  DCHECK_EQ(type.type, TYPE_CHAR);
284  memcpy(dst, value, type.len);
285  break;
286  }
287  const StringValue* src = reinterpret_cast<const StringValue*>(value);
288  StringValue* dest = reinterpret_cast<StringValue*>(dst);
289  dest->len = src->len;
290  dest->ptr = reinterpret_cast<char*>(*buf);
291  memcpy(dest->ptr, src->ptr, dest->len);
292  *buf += dest->len;
293  break;
294  }
295  case TYPE_DECIMAL:
296  memcpy(dst, value, type.GetByteSize());
297  break;
298  default:
299  DCHECK(false) << "RawValue::Write(): bad type: " << type.DebugString();
300  }
301 }
302 
303 void RawValue::Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,
304  MemPool* pool) {
305  if (value == NULL) {
306  tuple->SetNull(slot_desc->null_indicator_offset());
307  } else {
308  void* slot = tuple->GetSlot(slot_desc->tuple_offset());
309  RawValue::Write(value, slot, slot_desc->type(), pool);
310  }
311 }
312 
313 }
bool IsVarLen() const
Definition: types.h:172
static void PrintValue(const void *value, const ColumnType &type, int scale, std::stringstream *stream)
Definition: raw-value.h:224
void SetNull(const NullIndicatorOffset &offset)
Definition: tuple.h:101
static void PrintValueAsBytes(const void *value, const ColumnType &type, std::stringstream *stream)
Writes the byte representation of a value to a stringstream character-by-character.
Definition: raw-value.cc:28
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
static const int ASCII_PRECISION
Ascii output precision for double/float.
Definition: raw-value.h:40
void * GetSlot(int offset)
Definition: tuple.h:118
const NullIndicatorOffset & null_indicator_offset() const
Definition: descriptors.h:89
static int64_t UnpaddedCharLength(const char *cptr, int64_t len)
Returns number of characters in a char array (ignores trailing spaces)
std::string DebugString() const
Definition: types.cc:194
PrimitiveType type
Definition: types.h:60
const ColumnType & type() const
Definition: descriptors.h:78
int Compare(const StringValue &other) const
ObjectPool pool
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
Definition: types.h:178
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62
static void Write(const void *value, Tuple *tuple, const SlotDescriptor *slot_desc, MemPool *pool)
Definition: raw-value.cc:303
static int Compare(const void *v1, const void *v2, const ColumnType &type)
Definition: raw-value.cc:109
static char * CharSlotToPtr(void *slot, const ColumnType &type)
int tuple_offset() const
Definition: descriptors.h:88
static int StringCompare(const char *s1, int n1, const char *s2, int n2, int len)
uint8_t * Allocate(int size)
Definition: mem-pool.h:92