Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
key-normalizer.inline.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_UTIL_KEY_NORMALIZER_INLINE_H_
16 #define IMPALA_UTIL_KEY_NORMALIZER_INLINE_H_
17 
18 #include "util/key-normalizer.h"
19 
20 #include <boost/date_time/gregorian/gregorian_types.hpp>
21 
22 #include "runtime/descriptors.h"
23 #include "runtime/string-value.h"
25 #include "util/bit-util.h"
26 
27 namespace impala {
28 
29 inline bool KeyNormalizer::WriteNullBit(uint8_t null_bit, uint8_t* value, uint8_t* dst,
30  int* bytes_left) {
31  // If there's not enough space for the null byte, return.
32  if (*bytes_left < 1) return true;
33  *dst = (value == NULL ? null_bit : !null_bit);
34  --*bytes_left;
35  return false;
36 }
37 
38 template <typename ValueType>
39 inline void KeyNormalizer::StoreFinalValue(ValueType value, void* dst, bool is_asc) {
40  if (sizeof(ValueType) > 1) value = BitUtil::ToBigEndian(value);
41  if (!is_asc) value = ~value;
42  memcpy(dst, &value, sizeof(ValueType));
43 }
44 
45 template <typename IntType>
46 inline void KeyNormalizer::NormalizeInt(void* src, void* dst, bool is_asc) {
47  const int num_bits = 8 * sizeof(IntType);
48  IntType sign_bit = (1LL << (num_bits - 1));
49 
50  IntType value = *(reinterpret_cast<IntType*>(src));
51  value = (sign_bit ^ value);
52  StoreFinalValue<IntType>(value, dst, is_asc);
53 }
54 
55 template <typename FloatType, typename ResultType>
56 inline void KeyNormalizer::NormalizeFloat(void* src, void* dst, bool is_asc) {
57  DCHECK_EQ(sizeof(FloatType), sizeof(ResultType));
58 
59  const int num_bits = 8 * sizeof(FloatType);
60  const ResultType sign_bit = (1LL << (num_bits - 1));
61 
62  ResultType value = *(reinterpret_cast<ResultType*>(src));
63  if (value & sign_bit) {
64  // If the sign is negative, we'll end up inverting the whole thing.
65  value = ~value;
66  } else {
67  // Otherwise, just invert the sign bit.
68  value = (sign_bit ^ value);
69  }
70  StoreFinalValue<ResultType>(value, dst, is_asc);
71 }
72 
73 inline void KeyNormalizer::NormalizeTimestamp(uint8_t* src, uint8_t* dst, bool is_asc) {
74  TimestampValue timestamp = *(reinterpret_cast<TimestampValue*>(src));
75 
76  // Need 5 bits for day and 4 bits for month. Rest given to year.
77  boost::gregorian::date::ymd_type ymd = timestamp.date().year_month_day();
78  uint32_t date = ymd.day | (ymd.month << 5) | (ymd.year << 9);
79  StoreFinalValue<uint32_t>(date, dst, is_asc);
80 
81  // Write time of day in nanoseconds in the next slot.
82  uint64_t time_ns = timestamp.time_of_day().total_nanoseconds();
83  StoreFinalValue<uint64_t>(time_ns, dst + sizeof(date), is_asc);
84 }
85 
86 inline bool KeyNormalizer::WriteNormalizedKey(const ColumnType& type, bool is_asc,
87  uint8_t* value, uint8_t* dst, int* bytes_left) {
88  // Expend bytes_left or fail if we don't have enough.
89  // Variable-length data types (i.e., strings) account for themselves.
90  int byte_size = type.GetByteSize();
91  if (byte_size != 0) {
92  if (*bytes_left >= byte_size) {
93  *bytes_left -= byte_size;
94  } else {
95  return true;
96  }
97  }
98 
99  switch(type.type) {
100  case TYPE_BIGINT:
101  NormalizeInt<int64_t>(value, dst, is_asc);
102  break;
103  case TYPE_INT:
104  NormalizeInt<int32_t>(value, dst, is_asc);
105  break;
106  case TYPE_SMALLINT:
107  NormalizeInt<int16_t>(value, dst, is_asc);
108  break;
109  case TYPE_TINYINT:
110  NormalizeInt<int8_t>(value, dst, is_asc);
111  break;
112 
113  case TYPE_DOUBLE:
114  NormalizeFloat<double, uint64_t>(value, dst, is_asc);
115  break;
116  case TYPE_FLOAT:
117  NormalizeFloat<float, uint32_t>(value, dst, is_asc);
118  break;
119 
120  case TYPE_TIMESTAMP:
121  NormalizeTimestamp(value, dst, is_asc);
122  break;
123 
124  case TYPE_STRING:
125  case TYPE_VARCHAR: {
126  StringValue* string_val = reinterpret_cast<StringValue*>(value);
127 
128  // Copy the string over, with an additional NULL at the end.
129  int size = std::min(string_val->len, *bytes_left);
130  for (int i = 0; i < size; ++i) {
131  StoreFinalValue<uint8_t>(string_val->ptr[i], dst + i, is_asc);
132  }
133  *bytes_left -= size;
134 
135  if (*bytes_left == 0) return true;
136 
137  StoreFinalValue<uint8_t>(0, dst + size, is_asc);
138  --*bytes_left;
139  return false;
140  }
141 
142  case TYPE_BOOLEAN:
143  StoreFinalValue<uint8_t>(*reinterpret_cast<uint8_t*>(value), dst, is_asc);
144  break;
145  case TYPE_NULL:
146  StoreFinalValue<uint8_t>(0, dst, is_asc);
147  break;
148  default:
149  DCHECK(false) << "Value type not supported for normalization";
150  }
151 
152  return false;
153 }
154 
155 inline bool KeyNormalizer::NormalizeKeyColumn(const ColumnType& type, uint8_t null_bit,
156  bool is_asc, uint8_t* value, uint8_t* dst, int* bytes_left) {
157  bool went_over = WriteNullBit(null_bit, value, dst, bytes_left);
158  if (went_over || value == NULL) return went_over;
159  return WriteNormalizedKey(type, is_asc, value, dst + 1, bytes_left);
160 }
161 
162 inline bool KeyNormalizer::NormalizeKey(TupleRow* row, uint8_t* dst,
163  int* key_idx_over_budget) {
164  int bytes_left = key_len_;
165  for (int i = 0; i < key_expr_ctxs_.size(); ++i) {
166  uint8_t* key = reinterpret_cast<uint8_t*>(key_expr_ctxs_[i]->GetValue(row));
167  int offset = key_len_ - bytes_left;
168  bool went_over = NormalizeKeyColumn(key_expr_ctxs_[i]->root()->type(),
169  !nulls_first_[i], is_asc_[i], key, dst + offset, &bytes_left);
170  if (went_over) {
171  if (key_idx_over_budget != NULL) *key_idx_over_budget = i;
172  return true;
173  }
174  }
175 
176  // Zero out any unused bytes of the sort key.
177  int offset = key_len_ - bytes_left;
178  bzero(dst + offset, bytes_left);
179 
180  return false;
181 }
182 
183 }
184 
185 #endif
bool NormalizeKey(TupleRow *tuple_row, uint8_t *dst, int *key_idx_over_budget=NULL)
static void NormalizeTimestamp(uint8_t *src, uint8_t *dst, bool is_asc)
static void NormalizeFloat(void *src, void *dst, bool is_asc)
static void NormalizeInt(void *src, void *dst, bool is_asc)
PrimitiveType type
Definition: types.h:60
static bool NormalizeKeyColumn(const ColumnType &type, uint8_t null_bit, bool is_asc, uint8_t *value, uint8_t *dst, int *bytes_left)
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
Definition: types.h:178
static int64_t ToBigEndian(int64_t value)
Definition: bit-util.h:199
static bool WriteNormalizedKey(const ColumnType &type, bool is_asc, uint8_t *value, uint8_t *dst, int *bytes_left)
std::vector< ExprContext * > key_expr_ctxs_
uint8_t offset[7 *64-sizeof(uint64_t)]
static void StoreFinalValue(ValueType value, void *dst, bool is_asc)
std::vector< bool > is_asc_
static bool WriteNullBit(uint8_t null_bit, uint8_t *value, uint8_t *dst, int *bytes_left)
Returns true if we went over the max key size while writing the null bit.
std::vector< bool > nulls_first_
const boost::gregorian::date & date() const