Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
text-converter.inline.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXEC_TEXT_CONVERTER_INLINE_H
17 #define IMPALA_EXEC_TEXT_CONVERTER_INLINE_H
18 
19 #include "text-converter.h"
20 
21 #include <boost/algorithm/string.hpp>
22 
23 #include "runtime/runtime-state.h"
24 #include "runtime/descriptors.h"
25 #include "runtime/tuple.h"
26 #include "util/string-parser.h"
27 #include "runtime/string-value.h"
29 #include "runtime/mem-pool.h"
31 #include "exprs/string-functions.h"
32 
33 namespace impala {
34 
37 inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
38  const char* data, int len, bool copy_string, bool need_escape, MemPool* pool) {
39  if ((len == 0 && !slot_desc->type().IsStringType()) || data == NULL) {
40  tuple->SetNull(slot_desc->null_indicator_offset());
41  return true;
42  } else if (check_null_ && len == null_col_val_.size() &&
43  StringCompare(data, len, null_col_val_.data(), null_col_val_.size(), len) == 0) {
44  // We matched the special NULL indicator.
45  tuple->SetNull(slot_desc->null_indicator_offset());
46  return true;
47  }
48 
50  void* slot = tuple->GetSlot(slot_desc->tuple_offset());
51 
52  // Parse the raw-text data. Translate the text string to internal format.
53  const ColumnType& type = slot_desc->type();
54  switch (type.type) {
55  case TYPE_STRING:
56  case TYPE_VARCHAR:
57  case TYPE_CHAR: {
58  int buffer_len = len;
59  if (type.type == TYPE_VARCHAR || type.type == TYPE_CHAR) buffer_len = type.len;
60 
61  bool reuse_data = type.IsVarLen() && !(len != 0 && (copy_string || need_escape));
62  if (type.type == TYPE_CHAR) reuse_data &= (buffer_len <= len);
63 
64  StringValue str;
65  str.len = std::min(buffer_len, len);
66  if (reuse_data) {
67  str.ptr = const_cast<char*>(data);
68  } else {
69  str.ptr = type.IsVarLen() ? reinterpret_cast<char*>(pool->Allocate(buffer_len)) :
70  reinterpret_cast<char*>(slot);
71  if (need_escape) {
72  UnescapeString(data, str.ptr, &str.len, buffer_len);
73  } else {
74  memcpy(str.ptr, data, str.len);
75  }
76  }
77 
78  if (type.type == TYPE_CHAR) {
79  StringValue::PadWithSpaces(str.ptr, buffer_len, str.len);
80  str.len = type.len;
81  }
82  // write back to the slot, if !IsVarLen() we already wrote to the slot
83  if (type.IsVarLen()) {
84  StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
85  *str_slot = str;
86  }
87  break;
88  }
89  case TYPE_BOOLEAN:
90  *reinterpret_cast<bool*>(slot) =
91  StringParser::StringToBool(data, len, &parse_result);
92  break;
93  case TYPE_TINYINT:
94  *reinterpret_cast<int8_t*>(slot) =
95  StringParser::StringToInt<int8_t>(data, len, &parse_result);
96  break;
97  case TYPE_SMALLINT:
98  *reinterpret_cast<int16_t*>(slot) =
99  StringParser::StringToInt<int16_t>(data, len, &parse_result);
100  break;
101  case TYPE_INT:
102  *reinterpret_cast<int32_t*>(slot) =
103  StringParser::StringToInt<int32_t>(data, len, &parse_result);
104  break;
105  case TYPE_BIGINT:
106  *reinterpret_cast<int64_t*>(slot) =
107  StringParser::StringToInt<int64_t>(data, len, &parse_result);
108  break;
109  case TYPE_FLOAT:
110  *reinterpret_cast<float*>(slot) =
111  StringParser::StringToFloat<float>(data, len, &parse_result);
112  break;
113  case TYPE_DOUBLE:
114  *reinterpret_cast<double*>(slot) =
115  StringParser::StringToFloat<double>(data, len, &parse_result);
116  break;
117  case TYPE_TIMESTAMP: {
118  TimestampValue* ts_slot = reinterpret_cast<TimestampValue*>(slot);
119  *ts_slot = TimestampValue(data, len);
120  if (!ts_slot->HasDateOrTime()) {
121  parse_result = StringParser::PARSE_FAILURE;
122  }
123  break;
124  }
125  case TYPE_DECIMAL: {
126  switch (slot_desc->slot_size()) {
127  case 4:
128  *reinterpret_cast<Decimal4Value*>(slot) =
129  StringParser::StringToDecimal<int32_t>(
130  data, len, slot_desc->type(), &parse_result);
131  break;
132  case 8:
133  *reinterpret_cast<Decimal8Value*>(slot) =
134  StringParser::StringToDecimal<int64_t>(
135  data, len, slot_desc->type(), &parse_result);
136  break;
137  case 12:
138  DCHECK(false) << "Planner should not generate this.";
139  break;
140  case 16:
141  *reinterpret_cast<Decimal16Value*>(slot) =
142  StringParser::StringToDecimal<int128_t>(
143  data, len, slot_desc->type(), &parse_result);
144  break;
145  default:
146  DCHECK(false) << "Decimal slots can't be this size.";
147  }
148  if (parse_result != StringParser::PARSE_SUCCESS) {
149  // Don't accept underflow and overflow for decimals.
150  parse_result = StringParser::PARSE_FAILURE;
151  }
152  break;
153  }
154  default:
155  DCHECK(false) << "bad slot type: " << slot_desc->type();
156  break;
157  }
158 
159  // TODO: add warning for overflow case
160  if (parse_result == StringParser::PARSE_FAILURE) {
161  tuple->SetNull(slot_desc->null_indicator_offset());
162  return false;
163  }
164 
165  return true;
166 }
167 
168 }
169 
170 #endif
bool IsVarLen() const
Definition: types.h:172
void SetNull(const NullIndicatorOffset &offset)
Definition: tuple.h:101
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
bool WriteSlot(const SlotDescriptor *slot_desc, Tuple *tuple, const char *data, int len, bool copy_string, bool need_escape, MemPool *pool)
void UnescapeString(const char *src, char *dest, int *len, int64_t maxlen=-1)
void * GetSlot(int offset)
Definition: tuple.h:118
bool HasDateOrTime() const
static void PadWithSpaces(char *cptr, int64_t cptr_len, int64_t num_chars)
const NullIndicatorOffset & null_indicator_offset() const
Definition: descriptors.h:89
bool IsStringType() const
Definition: types.h:168
PrimitiveType type
Definition: types.h:60
const ColumnType & type() const
Definition: descriptors.h:78
bool check_null_
Indicates whether we should check for null_col_val_ and set slots to NULL.
ObjectPool pool
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62
static bool StringToBool(const char *s, int len, ParseResult *result)
Parses a string for 'true' or 'false', case insensitive.
Definition: string-parser.h:87
int slot_size() const
Definition: descriptors.h:94
std::string null_col_val_
Special string to indicate NULL column values.
int tuple_offset() const
Definition: descriptors.h:88
static int StringCompare(const char *s1, int n1, const char *s2, int n2, int len)
uint8_t * Allocate(int size)
Definition: mem-pool.h:92