Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hs2-util.cc
Go to the documentation of this file.
1 // Copyright 2014 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "service/hs2-util.h"
16 
17 #include "common/logging.h"
18 #include "runtime/raw-value.h"
19 #include "runtime/types.h"
20 
21 #include <gutil/strings/substitute.h>
22 
23 #include "common/names.h"
24 
25 using namespace apache::hive::service::cli;
26 using namespace impala;
27 using namespace strings;
28 
29 // Set the null indicator bit for row 'row_idx', assuming this will be called for
30 // successive increasing values of row_idx. If 'is_null' is true, the row_idx'th bit will
31 // be set in 'nulls' (taking the LSB as bit 0). If 'is_null' is false, the row_idx'th bit
32 // will be unchanged. If 'nulls' does not contain 'row_idx' bits, it will be extended by
33 // one byte.
34 inline void SetNullBit(uint32_t row_idx, bool is_null, string* nulls) {
35  DCHECK_LE(row_idx / 8, nulls->size());
36  int16_t mod_8 = row_idx % 8;
37  if (mod_8 == 0) (*nulls) += '\0';
38  (*nulls)[row_idx / 8] |= (1 << mod_8) * is_null;
39 }
40 
41 inline bool GetNullBit(const string& nulls, uint32_t row_idx) {
42  DCHECK_LE(row_idx / 8, nulls.size());
43  return nulls[row_idx / 8] & (1 << row_idx % 8);
44 }
45 
46 void impala::StitchNulls(uint32_t num_rows_before, uint32_t num_rows_added,
47  uint32_t start_idx, const string& from, string* to) {
48  to->reserve((num_rows_before + num_rows_added + 7) / 8);
49 
50  // TODO: This is very inefficient, since we could conceivably go one byte at a time
51  // (although the operands should stay live in registers in the loop). However doing this
52  // more efficiently leads to very complex code: we have to deal with the fact that
53  // 'start_idx' and 'num_rows_before' might both lead to offsets into the null bitset
54  // that don't start on a byte boundary. We should revisit this, ideally with a good
55  // bitset implementation.
56  for (int i = 0; i < num_rows_added; ++i) {
57  SetNullBit(num_rows_before + i, GetNullBit(from, i + start_idx), to);
58  }
59 }
60 
61 // For V6 and above
62 void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
63  const TColumnType& type, uint32_t row_idx, thrift::TColumn* column) {
64  string* nulls;
65  bool is_null;
66  switch (type.types[0].scalar_type.type) {
67  case TPrimitiveType::NULL_TYPE:
68  case TPrimitiveType::BOOLEAN:
69  is_null = !col_val.__isset.bool_val;
70  column->boolVal.values.push_back(col_val.bool_val);
71  nulls = &column->boolVal.nulls;
72  break;
73  case TPrimitiveType::TINYINT:
74  is_null = !col_val.__isset.byte_val;
75  column->byteVal.values.push_back(col_val.byte_val);
76  nulls = &column->byteVal.nulls;
77  break;
78  case TPrimitiveType::SMALLINT:
79  is_null = !col_val.__isset.short_val;
80  column->i16Val.values.push_back(col_val.short_val);
81  nulls = &column->i16Val.nulls;
82  break;
83  case TPrimitiveType::INT:
84  is_null = !col_val.__isset.int_val;
85  column->i32Val.values.push_back(col_val.int_val);
86  nulls = &column->i32Val.nulls;
87  break;
88  case TPrimitiveType::BIGINT:
89  is_null = !col_val.__isset.long_val;
90  column->i64Val.values.push_back(col_val.long_val);
91  nulls = &column->i64Val.nulls;
92  break;
93  case TPrimitiveType::FLOAT:
94  case TPrimitiveType::DOUBLE:
95  is_null = !col_val.__isset.double_val;
96  column->doubleVal.values.push_back(col_val.double_val);
97  nulls = &column->doubleVal.nulls;
98  break;
99  case TPrimitiveType::TIMESTAMP:
100  case TPrimitiveType::STRING:
101  case TPrimitiveType::CHAR:
102  case TPrimitiveType::VARCHAR:
103  case TPrimitiveType::DECIMAL:
104  is_null = !col_val.__isset.string_val;
105  column->stringVal.values.push_back(col_val.string_val);
106  nulls = &column->stringVal.nulls;
107  break;
108  default:
109  DCHECK(false) << "Unhandled type: "
110  << TypeToString(ThriftToType(type.types[0].scalar_type.type));
111  return;
112  }
113 
114  SetNullBit(row_idx, is_null, nulls);
115 }
116 
117 // For V6 and above
118 void impala::ExprValueToHS2TColumn(const void* value, const TColumnType& type,
119  uint32_t row_idx, thrift::TColumn* column) {
120  string* nulls;
121  switch (type.types[0].scalar_type.type) {
122  case TPrimitiveType::NULL_TYPE:
123  case TPrimitiveType::BOOLEAN:
124  column->boolVal.values.push_back(
125  value == NULL ? false : *reinterpret_cast<const bool*>(value));
126  nulls = &column->boolVal.nulls;
127  break;
128  case TPrimitiveType::TINYINT:
129  column->byteVal.values.push_back(
130  value == NULL ? 0 : *reinterpret_cast<const int8_t*>(value));
131  nulls = &column->byteVal.nulls;
132  break;
133  case TPrimitiveType::SMALLINT:
134  column->i16Val.values.push_back(
135  value == NULL ? 0 : *reinterpret_cast<const int16_t*>(value));
136  nulls = &column->i16Val.nulls;
137  break;
138  case TPrimitiveType::INT:
139  column->i32Val.values.push_back(
140  value == NULL ? 0 : *reinterpret_cast<const int32_t*>(value));
141  nulls = &column->i32Val.nulls;
142  break;
143  case TPrimitiveType::BIGINT:
144  column->i64Val.values.push_back(
145  value == NULL ? 0 : *reinterpret_cast<const int64_t*>(value));
146  nulls = &column->i64Val.nulls;
147  break;
148  case TPrimitiveType::FLOAT:
149  column->doubleVal.values.push_back(
150  value == NULL ? 0.f : *reinterpret_cast<const float*>(value));
151  nulls = &column->doubleVal.nulls;
152  break;
153  case TPrimitiveType::DOUBLE:
154  column->doubleVal.values.push_back(
155  value == NULL ? 0.0 : *reinterpret_cast<const double*>(value));
156  nulls = &column->doubleVal.nulls;
157  break;
158  case TPrimitiveType::TIMESTAMP:
159  column->stringVal.values.push_back("");
160  if (value != NULL) {
161  RawValue::PrintValue(value, TYPE_TIMESTAMP, -1,
162  &(column->stringVal.values.back()));
163  }
164  nulls = &column->stringVal.nulls;
165  break;
166  case TPrimitiveType::STRING:
167  case TPrimitiveType::VARCHAR:
168  column->stringVal.values.push_back("");
169  if (value != NULL) {
170  const StringValue* str_val = reinterpret_cast<const StringValue*>(value);
171  column->stringVal.values.back().assign(
172  static_cast<char*>(str_val->ptr), str_val->len);
173  }
174  nulls = &column->stringVal.nulls;
175  break;
176  case TPrimitiveType::CHAR:
177  column->stringVal.values.push_back("");
178  if (value != NULL) {
179  ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
180  column->stringVal.values.back().assign(
181  StringValue::CharSlotToPtr(value, char_type), char_type.len);
182  }
183  nulls = &column->stringVal.nulls;
184  break;
185  case TPrimitiveType::DECIMAL: {
186  // HiveServer2 requires decimal to be presented as string.
187  column->stringVal.values.push_back("");
188  ColumnType decimalType(type);
189  if (value != NULL) {
190  switch (decimalType.GetByteSize()) {
191  case 4:
192  column->stringVal.values.back() =
193  reinterpret_cast<const Decimal4Value*>(value)->ToString(type);
194  break;
195  case 8:
196  column->stringVal.values.back() =
197  reinterpret_cast<const Decimal8Value*>(value)->ToString(type);
198  break;
199  case 16:
200  column->stringVal.values.back() =
201  reinterpret_cast<const Decimal16Value*>(value)->ToString(type);
202  break;
203  default:
204  DCHECK(false) << "bad type: " << type;
205  }
206  }
207  nulls = &column->stringVal.nulls;
208  break;
209  }
210  default:
211  DCHECK(false) << "Unhandled type: "
212  << TypeToString(ThriftToType(type.types[0].scalar_type.type));
213  return;
214  }
215 
216  SetNullBit(row_idx, (value == NULL), nulls);
217 }
218 
219 // For V1 -> V5
220 void impala::TColumnValueToHS2TColumnValue(const TColumnValue& col_val,
221  const TColumnType& type, thrift::TColumnValue* hs2_col_val) {
222  // TODO: Handle complex types.
223  DCHECK_EQ(1, type.types.size());
224  DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
225  DCHECK_EQ(true, type.types[0].__isset.scalar_type);
226  switch (type.types[0].scalar_type.type) {
227  case TPrimitiveType::BOOLEAN:
228  hs2_col_val->__isset.boolVal = true;
229  hs2_col_val->boolVal.value = col_val.bool_val;
230  hs2_col_val->boolVal.__isset.value = col_val.__isset.bool_val;
231  break;
232  case TPrimitiveType::TINYINT:
233  hs2_col_val->__isset.byteVal = true;
234  hs2_col_val->byteVal.value = col_val.byte_val;
235  hs2_col_val->byteVal.__isset.value = col_val.__isset.byte_val;
236  break;
237  case TPrimitiveType::SMALLINT:
238  hs2_col_val->__isset.i16Val = true;
239  hs2_col_val->i16Val.value = col_val.short_val;
240  hs2_col_val->i16Val.__isset.value = col_val.__isset.short_val;
241  break;
242  case TPrimitiveType::INT:
243  hs2_col_val->__isset.i32Val = true;
244  hs2_col_val->i32Val.value = col_val.int_val;
245  hs2_col_val->i32Val.__isset.value = col_val.__isset.int_val;
246  break;
247  case TPrimitiveType::BIGINT:
248  hs2_col_val->__isset.i64Val = true;
249  hs2_col_val->i64Val.value = col_val.long_val;
250  hs2_col_val->i64Val.__isset.value = col_val.__isset.long_val;
251  break;
252  case TPrimitiveType::FLOAT:
253  case TPrimitiveType::DOUBLE:
254  hs2_col_val->__isset.doubleVal = true;
255  hs2_col_val->doubleVal.value = col_val.double_val;
256  hs2_col_val->doubleVal.__isset.value = col_val.__isset.double_val;
257  break;
258  case TPrimitiveType::DECIMAL:
259  case TPrimitiveType::STRING:
260  case TPrimitiveType::TIMESTAMP:
261  case TPrimitiveType::VARCHAR:
262  case TPrimitiveType::CHAR:
263  // HiveServer2 requires timestamp to be presented as string. Note that the .thrift
264  // spec says it should be a BIGINT; AFAICT Hive ignores that and produces a string.
265  hs2_col_val->__isset.stringVal = true;
266  hs2_col_val->stringVal.__isset.value = col_val.__isset.string_val;
267  if (col_val.__isset.string_val) {
268  hs2_col_val->stringVal.value = col_val.string_val;
269  }
270  break;
271  default:
272  DCHECK(false) << "bad type: "
273  << TypeToString(ThriftToType(type.types[0].scalar_type.type));
274  break;
275  }
276 }
277 
278 // For V1 -> V5
279 void impala::ExprValueToHS2TColumnValue(const void* value, const TColumnType& type,
280  thrift::TColumnValue* hs2_col_val) {
281  bool not_null = (value != NULL);
282  // TODO: Handle complex types.
283  DCHECK_EQ(1, type.types.size());
284  DCHECK_EQ(TTypeNodeType::SCALAR, type.types[0].type);
285  DCHECK_EQ(1, type.types[0].__isset.scalar_type);
286  switch (type.types[0].scalar_type.type) {
287  case TPrimitiveType::NULL_TYPE:
288  // Set NULLs in the bool_val.
289  hs2_col_val->__isset.boolVal = true;
290  hs2_col_val->boolVal.__isset.value = false;
291  break;
292  case TPrimitiveType::BOOLEAN:
293  hs2_col_val->__isset.boolVal = true;
294  if (not_null) hs2_col_val->boolVal.value = *reinterpret_cast<const bool*>(value);
295  hs2_col_val->boolVal.__isset.value = not_null;
296  break;
297  case TPrimitiveType::TINYINT:
298  hs2_col_val->__isset.byteVal = true;
299  if (not_null) hs2_col_val->byteVal.value = *reinterpret_cast<const int8_t*>(value);
300  hs2_col_val->byteVal.__isset.value = not_null;
301  break;
302  case TPrimitiveType::SMALLINT:
303  hs2_col_val->__isset.i16Val = true;
304  if (not_null) hs2_col_val->i16Val.value = *reinterpret_cast<const int16_t*>(value);
305  hs2_col_val->i16Val.__isset.value = not_null;
306  break;
307  case TPrimitiveType::INT:
308  hs2_col_val->__isset.i32Val = true;
309  if (not_null) hs2_col_val->i32Val.value = *reinterpret_cast<const int32_t*>(value);
310  hs2_col_val->i32Val.__isset.value = not_null;
311  break;
312  case TPrimitiveType::BIGINT:
313  hs2_col_val->__isset.i64Val = true;
314  if (not_null) hs2_col_val->i64Val.value = *reinterpret_cast<const int64_t*>(value);
315  hs2_col_val->i64Val.__isset.value = not_null;
316  break;
317  case TPrimitiveType::FLOAT:
318  hs2_col_val->__isset.doubleVal = true;
319  if (not_null) hs2_col_val->doubleVal.value = *reinterpret_cast<const float*>(value);
320  hs2_col_val->doubleVal.__isset.value = not_null;
321  break;
322  case TPrimitiveType::DOUBLE:
323  hs2_col_val->__isset.doubleVal = true;
324  if (not_null) {
325  hs2_col_val->doubleVal.value = *reinterpret_cast<const double*>(value);
326  }
327  hs2_col_val->doubleVal.__isset.value = not_null;
328  break;
329  case TPrimitiveType::STRING:
330  case TPrimitiveType::VARCHAR:
331  hs2_col_val->__isset.stringVal = true;
332  hs2_col_val->stringVal.__isset.value = not_null;
333  if (not_null) {
334  const StringValue* string_val = reinterpret_cast<const StringValue*>(value);
335  hs2_col_val->stringVal.value.assign(static_cast<char*>(string_val->ptr),
336  string_val->len);
337  }
338  break;
339  case TPrimitiveType::CHAR:
340  hs2_col_val->__isset.stringVal = true;
341  hs2_col_val->stringVal.__isset.value = not_null;
342  if (not_null) {
343  ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len);
344  hs2_col_val->stringVal.value.assign(
345  StringValue::CharSlotToPtr(value, char_type), char_type.len);
346  }
347  break;
348  case TPrimitiveType::TIMESTAMP:
349  // HiveServer2 requires timestamp to be presented as string.
350  hs2_col_val->__isset.stringVal = true;
351  hs2_col_val->stringVal.__isset.value = not_null;
352  if (not_null) {
353  RawValue::PrintValue(value, TYPE_TIMESTAMP, -1, &(hs2_col_val->stringVal.value));
354  }
355  break;
356  case TPrimitiveType::DECIMAL: {
357  // HiveServer2 requires decimal to be presented as string.
358  hs2_col_val->__isset.stringVal = true;
359  hs2_col_val->stringVal.__isset.value = not_null;
360  ColumnType decimalType(type);
361  if (not_null) {
362  switch (decimalType.GetByteSize()) {
363  case 4:
364  hs2_col_val->stringVal.value =
365  reinterpret_cast<const Decimal4Value*>(value)->ToString(type);
366  break;
367  case 8:
368  hs2_col_val->stringVal.value =
369  reinterpret_cast<const Decimal8Value*>(value)->ToString(type);
370  break;
371  case 16:
372  hs2_col_val->stringVal.value =
373  reinterpret_cast<const Decimal16Value*>(value)->ToString(type);
374  break;
375  default:
376  DCHECK(false) << "bad type: " << type;
377  }
378  }
379  break;
380  }
381  default:
382  DCHECK(false) << "bad type: "
383  << TypeToString(ThriftToType(type.types[0].scalar_type.type));
384  break;
385  }
386 }
387 
388 template<typename T>
389 void PrintVal(const T& val, ostream* ss) {
390  if (val.__isset.value) {
391  (*ss) << val.value;
392  } else {
393  (*ss) << "NULL";
394  }
395 }
396 
398  const apache::hive::service::cli::thrift::TColumnValue& colval, stringstream* out) {
399  if (colval.__isset.boolVal) {
400  if (colval.boolVal.__isset.value) {
401  (*out) << ((colval.boolVal.value) ? "true" : "false");
402  } else {
403  (*out) << "NULL";
404  }
405  } else if (colval.__isset.doubleVal) {
406  PrintVal(colval.doubleVal, out);
407  } else if (colval.__isset.byteVal) {
408  PrintVal(colval.byteVal, out);
409  } else if (colval.__isset.i32Val) {
410  PrintVal(colval.i32Val, out);
411  } else if (colval.__isset.i16Val) {
412  PrintVal(colval.i16Val, out);
413  } else if (colval.__isset.i64Val) {
414  PrintVal(colval.i64Val, out);
415  } else if (colval.__isset.stringVal) {
416  PrintVal(colval.stringVal, out);
417  } else {
418  (*out) << "NULL";
419  }
420 }
const T & value() const
void TColumnValueToHS2TColumnValue(const TColumnValue &col_val, const TColumnType &type, apache::hive::service::cli::thrift::TColumnValue *hs2_col_val)
For V1->V5.
void StitchNulls(uint32_t num_rows_before, uint32_t num_rows_added, uint32_t start_idx, const std::string &from, std::string *to)
PrimitiveType ThriftToType(TPrimitiveType::type ttype)
Definition: types.cc:27
void PrintVal(const T &val, ostream *ss)
Definition: hs2-util.cc:389
void PrintTColumnValue(const apache::hive::service::cli::thrift::TColumnValue &colval, std::stringstream *out)
void SetNullBit(uint32_t row_idx, bool is_null, string *nulls)
Definition: hs2-util.cc:34
string TypeToString(PrimitiveType t)
Definition: types.cc:73
void TColumnValueToHS2TColumn(const TColumnValue &col_val, const TColumnType &type, uint32_t row_idx, apache::hive::service::cli::thrift::TColumn *column)
For V6->
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62
void ExprValueToHS2TColumn(const void *value, const TColumnType &type, uint32_t row_idx, apache::hive::service::cli::thrift::TColumn *column)
For V6->
bool GetNullBit(const string &nulls, uint32_t row_idx)
Definition: hs2-util.cc:41
void ExprValueToHS2TColumnValue(const void *value, const TColumnType &type, apache::hive::service::cli::thrift::TColumnValue *hs2_col_val)
For V1->V5.