Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
read-write-util.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXEC_READ_WRITE_UTIL_H
17 #define IMPALA_EXEC_READ_WRITE_UTIL_H
18 
19 #include <boost/cstdint.hpp>
20 #include <sstream>
21 #include "common/logging.h"
22 #include "common/status.h"
23 #include "util/bit-util.h"
24 
25 namespace impala {
26 
27 #define RETURN_IF_FALSE(x) if (UNLIKELY(!(x))) return false
28 
35  public:
37  static const int MAX_VINT_LEN = 9;
38 
40  const static int MAX_ZINT_LEN = 5;
41  const static int MAX_ZLONG_LEN = 10;
42 
44  static int PutZInt(int32_t integer, uint8_t* buf);
45 
47  static int PutZLong(int64_t longint, uint8_t* buf);
48 
50  template<typename T>
51  static T GetInt(const uint8_t* buffer);
52 
56  static int GetVLong(uint8_t* buf, int64_t* vlong);
57  static int GetVInt(uint8_t* buf, int32_t* vint);
58 
61  static int64_t PutVLong(int64_t val, uint8_t* buf);
62  static int64_t PutVInt(int32_t val, uint8_t* buf);
63 
65  static int VLongRequiredBytes(int64_t val);
66 
69  static int GetVLong(uint8_t* buf, int64_t offset, int64_t* vlong);
70 
73  static void PutInt(uint8_t* buf, uint16_t integer);
74  static void PutInt(uint8_t* buf, uint32_t integer);
75  static void PutInt(uint8_t* buf, uint64_t integer);
76 
78  static std::string HexDump(const uint8_t* buf, int64_t length);
79  static std::string HexDump(const char* buf, int64_t length);
80 
82  static bool IsNegativeVInt(int8_t byte);
83 
85  static int DecodeVIntSize(int8_t byte);
86 
90  static int64_t ReadZLong(uint8_t** buf);
91 
93  static int32_t ReadZInt(uint8_t** buf);
94 
99 
102  template <class T>
103  static bool Read(uint8_t** buf, int* buf_len, T* val, Status* status);
104 
106  static bool SkipBytes(uint8_t** buf, int* buf_len, int num_bytes, Status* status);
107 };
108 
109 template<>
110 inline uint16_t ReadWriteUtil::GetInt(const uint8_t* buf) {
111  return (buf[0] << 8) | buf[1];
112 }
113 
114 template<>
115 inline uint32_t ReadWriteUtil::GetInt(const uint8_t* buf) {
116  return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
117 }
118 
119 template<>
120 inline uint64_t ReadWriteUtil::GetInt(const uint8_t* buf) {
121  uint64_t upper_half = GetInt<uint32_t>(buf);
122  uint64_t lower_half = GetInt<uint32_t>(buf + 4);
123  return lower_half | upper_half << 32;
124 }
125 
126 inline void ReadWriteUtil::PutInt(uint8_t* buf, uint16_t integer) {
127  buf[0] = integer >> 8;
128  buf[1] = integer;
129 }
130 
131 inline void ReadWriteUtil::PutInt(uint8_t* buf, uint32_t integer) {
132  uint32_t big_endian = BitUtil::ByteSwap(integer);
133  memcpy(buf, &big_endian, sizeof(uint32_t));
134 }
135 
136 inline void ReadWriteUtil::PutInt(uint8_t* buf, uint64_t integer) {
137  uint64_t big_endian = BitUtil::ByteSwap(integer);
138  memcpy(buf, &big_endian, sizeof(uint64_t));
139 }
140 
141 inline int ReadWriteUtil::GetVInt(uint8_t* buf, int32_t* vint) {
142  int64_t vlong = 0;
143  int len = GetVLong(buf, &vlong);
144  *vint = static_cast<int32_t>(vlong);
145  return len;
146 }
147 
148 inline int ReadWriteUtil::GetVLong(uint8_t* buf, int64_t* vlong) {
149  return GetVLong(buf, 0, vlong);
150 }
151 
152 inline int ReadWriteUtil::GetVLong(uint8_t* buf, int64_t offset, int64_t* vlong) {
153  int8_t firstbyte = (int8_t) buf[0 + offset];
154 
155  int len = DecodeVIntSize(firstbyte);
156  if (len > MAX_VINT_LEN) return -1;
157  if (len == 1) {
158  *vlong = static_cast<int64_t>(firstbyte);
159  return len;
160  }
161 
162  *vlong &= ~*vlong;
163 
164  for (int i = 1; i < len; i++) {
165  *vlong = (*vlong << 8) | buf[i+offset];
166  }
167 
168  if (IsNegativeVInt(firstbyte)) {
169  *vlong = *vlong ^ ((int64_t) - 1);
170  }
171 
172  return len;
173 }
174 
175 inline int ReadWriteUtil::VLongRequiredBytes(int64_t val) {
176  // returns size of the encoded long value, not including the 1 byte for length
177  if (val & 0xFF00000000000000llu) return 8;
178  if (val & 0x00FF000000000000llu) return 7;
179  if (val & 0x0000FF0000000000llu) return 6;
180  if (val & 0x000000FF00000000llu) return 5;
181  if (val & 0x00000000FF000000llu) return 4;
182  if (val & 0x0000000000FF0000llu) return 3;
183  if (val & 0x000000000000FF00llu) return 2;
184  // Values between -112 and 127 are stored using 1 byte,
185  // values between -127 and -112 are stored using 2 bytes
186  // See ReadWriteUtil::DecodeVIntSize for this case
187  if (val < -112) return 2;
188  return 1;
189 }
190 
191 inline int64_t ReadWriteUtil::PutVLong(int64_t val, uint8_t* buf) {
192  int64_t num_bytes = VLongRequiredBytes(val);
193 
194  if (num_bytes == 1) {
195  // store the value itself instead of the length
196  buf[0] = static_cast<int8_t>(val);
197  return 1;
198  }
199 
200  // This is how we encode the length for a length less than or equal to 8
201  buf[0] = -119 + num_bytes;
202 
203  // write to buffer in reversed endianness
204  for (int i = 0; i < num_bytes; ++i) {
205  buf[i+1] = (val >> (8 * (num_bytes - i - 1))) & 0xFF;
206  }
207 
208  // +1 for the length byte
209  return num_bytes + 1;
210 }
211 
212 inline int64_t ReadWriteUtil::PutVInt(int32_t val, uint8_t* buf) {
213  return PutVLong(val, buf);
214 }
215 
216 inline int32_t ReadWriteUtil::ReadZInt(uint8_t** buf) {
217  int64_t zlong = ReadZLong(buf);
218  return static_cast<int32_t>(zlong);
219 }
220 
221 template <class T>
222 inline bool ReadWriteUtil::Read(uint8_t** buf, int* buf_len, T* val, Status* status) {
223  int val_len = sizeof(T);
224  if (UNLIKELY(val_len > *buf_len)) {
225  std::stringstream ss;
226  ss << "Cannot read " << val_len << " bytes, buffer length is " << *buf_len;
227  *status = Status(ss.str());
228  return false;
229  }
230  *val = *reinterpret_cast<T*>(*buf);
231  *buf += val_len;
232  *buf_len -= val_len;
233  return true;
234 }
235 
236 inline bool ReadWriteUtil::SkipBytes(uint8_t** buf, int* buf_len, int num_bytes,
237  Status* status) {
238  DCHECK_GE(*buf_len, 0);
239  if (UNLIKELY(num_bytes > *buf_len)) {
240  std::stringstream ss;
241  ss << "Cannot skip " << num_bytes << " bytes, buffer length is " << *buf_len;
242  *status = Status(ss.str());
243  return false;
244  }
245  *buf += num_bytes;
246  *buf_len -= num_bytes;
247  return true;
248 }
249 
250 inline bool ReadWriteUtil::IsNegativeVInt(int8_t byte) {
251  return byte < -120 || (byte >= -112 && byte < 0);
252 }
253 
254 inline int ReadWriteUtil::DecodeVIntSize(int8_t byte) {
255  if (byte >= -112) {
256  return 1;
257  } else if (byte < -120) {
258  return -119 - byte;
259  }
260  return -111 - byte;
261 }
262 
263 }
264 #endif
static int DecodeVIntSize(int8_t byte)
Determines the total length in bytes of a Writable VInt/VLong from the first byte.
static const int MAX_ZINT_LEN
Maximum lengths for Zigzag encodings.
static int PutZInt(int32_t integer, uint8_t *buf)
Put a zigzag encoded integer into a buffer and return its length.
static bool SkipBytes(uint8_t **buf, int *buf_len, int num_bytes, Status *status)
Skip the next num_bytes bytes.
static int64_t ByteSwap(int64_t value)
Swaps the byte order (i.e. endianess)
Definition: bit-util.h:149
static void PutInt(uint8_t *buf, uint16_t integer)
static int64_t PutVLong(int64_t val, uint8_t *buf)
static bool Read(uint8_t **buf, int *buf_len, T *val, Status *status)
static int64_t ReadZLong(uint8_t **buf)
static const int MAX_ZLONG_LEN
static int32_t ReadZInt(uint8_t **buf)
Read a zig-zag encoded int.
static int GetVInt(uint8_t *buf, int32_t *vint)
static bool IsNegativeVInt(int8_t byte)
Determines the sign of a VInt/VLong from the first byte.
#define UNLIKELY(expr)
Definition: compiler-util.h:33
static int GetVLong(uint8_t *buf, int64_t *vlong)
uint8_t offset[7 *64-sizeof(uint64_t)]
static std::string HexDump(const uint8_t *buf, int64_t length)
Dump the first length bytes of buf to a Hex string.
static int64_t PutVInt(int32_t val, uint8_t *buf)
static int VLongRequiredBytes(int64_t val)
returns size of the encoded long value, not including the 1 byte for length
static const int MAX_VINT_LEN
Maximum length for Writeable VInt.
static T GetInt(const uint8_t *buffer)
Get a big endian integer from a buffer. The buffer does not have to be word aligned.
static int PutZLong(int64_t longint, uint8_t *buf)
Put a zigzag encoded long integer into a buffer and return its length.