Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
rle-test.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 
19 #include <boost/utility.hpp>
20 #include <gtest/gtest.h>
21 #include <math.h>
22 
23 #include "common/init.h"
24 #include "util/rle-encoding.h"
26 
27 #include "common/names.h"
28 
29 namespace impala {
30 
31 const int MAX_WIDTH = 32;
32 
33 TEST(BitArray, TestBool) {
34  const int len = 8;
35  uint8_t buffer[len];
36 
37  BitWriter writer(buffer, len);
38 
39  // Write alternating 0's and 1's
40  for (int i = 0; i < 8; ++i) {
41  bool result = writer.PutValue(i % 2, 1);
42  EXPECT_TRUE(result);
43  }
44  writer.Flush();
45  EXPECT_EQ((int)buffer[0], BOOST_BINARY(1 0 1 0 1 0 1 0));
46 
47  // Write 00110011
48  for (int i = 0; i < 8; ++i) {
49  bool result = false;
50  switch (i) {
51  case 0:
52  case 1:
53  case 4:
54  case 5:
55  result = writer.PutValue(false, 1);
56  break;
57  default:
58  result = writer.PutValue(true, 1);
59  break;
60  }
61  EXPECT_TRUE(result);
62  }
63  writer.Flush();
64 
65  // Validate the exact bit value
66  EXPECT_EQ((int)buffer[0], BOOST_BINARY(1 0 1 0 1 0 1 0));
67  EXPECT_EQ((int)buffer[1], BOOST_BINARY(1 1 0 0 1 1 0 0));
68 
69  // Use the reader and validate
70  BitReader reader(buffer, len);
71  for (int i = 0; i < 8; ++i) {
72  bool val = false;
73  bool result = reader.GetValue(1, &val);
74  EXPECT_TRUE(result);
75  EXPECT_EQ(val, i % 2);
76  }
77 
78  for (int i = 0; i < 8; ++i) {
79  bool val = false;
80  bool result = reader.GetValue(1, &val);
81  EXPECT_TRUE(result);
82  switch (i) {
83  case 0:
84  case 1:
85  case 4:
86  case 5:
87  EXPECT_EQ(val, false);
88  break;
89  default:
90  EXPECT_EQ(val, true);
91  break;
92  }
93  }
94 }
95 
96 // Writes 'num_vals' values with width 'bit_width' and reads them back.
97 void TestBitArrayValues(int bit_width, int num_vals) {
98  const int len = BitUtil::Ceil(bit_width * num_vals, 8);
99  const uint64_t mod = bit_width == 64? 1 : 1LL << bit_width;
100 
101  uint8_t buffer[len];
102  BitWriter writer(buffer, len);
103  for (int i = 0; i < num_vals; ++i) {
104  bool result = writer.PutValue(i % mod, bit_width);
105  EXPECT_TRUE(result);
106  }
107  writer.Flush();
108  EXPECT_EQ(writer.bytes_written(), len);
109 
110  BitReader reader(buffer, len);
111  for (int i = 0; i < num_vals; ++i) {
112  int64_t val;
113  bool result = reader.GetValue(bit_width, &val);
114  EXPECT_TRUE(result);
115  EXPECT_EQ(val, i % mod);
116  }
117  EXPECT_EQ(reader.bytes_left(), 0);
118 }
119 
120 TEST(BitArray, TestValues) {
121  for (int width = 0; width <= MAX_WIDTH; ++width) {
122  TestBitArrayValues(width, 1);
123  TestBitArrayValues(width, 2);
124  // Don't write too many values
125  TestBitArrayValues(width, (width < 12) ? (1 << width) : 4096);
126  TestBitArrayValues(width, 1024);
127  }
128 }
129 
130 // Test some mixed values
131 TEST(BitArray, TestMixed) {
132  const int len = 1024;
133  uint8_t buffer[len];
134  bool parity = true;
135 
136  BitWriter writer(buffer, len);
137  for (int i = 0; i < len; ++i) {
138  bool result;
139  if (i % 2 == 0) {
140  result = writer.PutValue(parity, 1);
141  parity = !parity;
142  } else {
143  result = writer.PutValue(i, 10);
144  }
145  EXPECT_TRUE(result);
146  }
147  writer.Flush();
148 
149  parity = true;
150  BitReader reader(buffer, len);
151  for (int i = 0; i < len; ++i) {
152  bool result;
153  if (i % 2 == 0) {
154  bool val;
155  result = reader.GetValue(1, &val);
156  EXPECT_EQ(val, parity);
157  parity = !parity;
158  } else {
159  int val;
160  result = reader.GetValue(10, &val);
161  EXPECT_EQ(val, i);
162  }
163  EXPECT_TRUE(result);
164  }
165 }
166 
167 // Validates encoding of values by encoding and decoding them. If
168 // expected_encoding != NULL, also validates that the encoded buffer is
169 // exactly 'expected_encoding'.
170 // if expected_len is not -1, it will validate the encoded size is correct.
171 void ValidateRle(const vector<int>& values, int bit_width,
172  uint8_t* expected_encoding, int expected_len) {
173  const int len = 64 * 1024;
174  uint8_t buffer[len];
175  EXPECT_LE(expected_len, len);
176 
177  RleEncoder encoder(buffer, len, bit_width);
178  for (int i = 0; i < values.size(); ++i) {
179  bool result = encoder.Put(values[i]);
180  EXPECT_TRUE(result);
181  }
182  int encoded_len = encoder.Flush();
183 
184  if (expected_len != -1) {
185  EXPECT_EQ(encoded_len, expected_len);
186  }
187  if (expected_encoding != NULL) {
188  EXPECT_TRUE(memcmp(buffer, expected_encoding, expected_len) == 0);
189  }
190 
191  // Verify read
192  RleDecoder decoder(buffer, len, bit_width);
193  for (int i = 0; i < values.size(); ++i) {
194  uint64_t val;
195  bool result = decoder.Get(&val);
196  EXPECT_TRUE(result);
197  EXPECT_EQ(values[i], val);
198  }
199 }
200 
201 TEST(Rle, SpecificSequences) {
202  const int len = 1024;
203  uint8_t expected_buffer[len];
204  vector<int> values;
205 
206  // Test 50 0' followed by 50 1's
207  values.resize(100);
208  for (int i = 0; i < 50; ++i) {
209  values[i] = 0;
210  }
211  for (int i = 50; i < 100; ++i) {
212  values[i] = 1;
213  }
214 
215  // expected_buffer valid for bit width <= 1 byte
216  expected_buffer[0] = (50 << 1);
217  expected_buffer[1] = 0;
218  expected_buffer[2] = (50 << 1);
219  expected_buffer[3] = 1;
220  for (int width = 1; width <= 8; ++width) {
221  ValidateRle(values, width, expected_buffer, 4);
222  }
223 
224  for (int width = 9; width <= MAX_WIDTH; ++width) {
225  ValidateRle(values, width, NULL, 2 * (1 + BitUtil::Ceil(width, 8)));
226  }
227 
228  // Test 100 0's and 1's alternating
229  for (int i = 0; i < 100; ++i) {
230  values[i] = i % 2;
231  }
232  int num_groups = BitUtil::Ceil(100, 8);
233  expected_buffer[0] = (num_groups << 1) | 1;
234  for (int i = 1; i <= 100/8; ++i) {
235  expected_buffer[i] = BOOST_BINARY(1 0 1 0 1 0 1 0);
236  }
237  // Values for the last 4 0 and 1's. The upper 4 bits should be padded to 0.
238  expected_buffer[100/8 + 1] = BOOST_BINARY(0 0 0 0 1 0 1 0);
239 
240  // num_groups and expected_buffer only valid for bit width = 1
241  ValidateRle(values, 1, expected_buffer, 1 + num_groups);
242  for (int width = 2; width <= MAX_WIDTH; ++width) {
243  int num_values = BitUtil::Ceil(100, 8) * 8;
244  ValidateRle(values, width, NULL, 1 + BitUtil::Ceil(width * num_values, 8));
245  }
246 }
247 
248 // ValidateRle on 'num_vals' values with width 'bit_width'. If 'value' != -1, that value
249 // is used, otherwise alternating values are used.
250 void TestRleValues(int bit_width, int num_vals, int value = -1) {
251  const uint64_t mod = (bit_width == 64) ? 1 : 1LL << bit_width;
252  vector<int> values;
253  for (int v = 0; v < num_vals; ++v) {
254  values.push_back((value != -1) ? value : (v % mod));
255  }
256  ValidateRle(values, bit_width, NULL, -1);
257 }
258 
259 TEST(Rle, TestValues) {
260  for (int width = 1; width <= MAX_WIDTH; ++width) {
261  TestRleValues(width, 1);
262  TestRleValues(width, 1024);
263  TestRleValues(width, 1024, 0);
264  TestRleValues(width, 1024, 1);
265  }
266 }
267 
268 TEST(Rle, BitWidthZeroRepeated) {
269  uint8_t buffer[1];
270  const int num_values = 15;
271  buffer[0] = num_values << 1; // repeated indicator byte
272  RleDecoder decoder(buffer, sizeof(buffer), 0);
273  uint8_t val;
274  for (int i = 0; i < num_values; ++i) {
275  bool result = decoder.Get(&val);
276  EXPECT_TRUE(result);
277  EXPECT_EQ(val, 0); // can only encode 0s with bit width 0
278  }
279  EXPECT_FALSE(decoder.Get(&val));
280 }
281 
282 TEST(Rle, BitWidthZeroLiteral) {
283  uint8_t buffer[1];
284  const int num_groups = 4;
285  buffer[0] = num_groups << 1 | 1; // literal indicator byte
286  RleDecoder decoder = RleDecoder(buffer, sizeof(buffer), 0);
287  const int num_values = num_groups * 8;
288  uint8_t val;
289  for (int i = 0; i < num_values; ++i) {
290  bool result = decoder.Get(&val);
291  EXPECT_TRUE(result);
292  EXPECT_EQ(val, 0); // can only encode 0s with bit width 0
293  }
294  EXPECT_FALSE(decoder.Get(&val));
295 }
296 
297 // Test that writes out a repeated group and then a literal
298 // group but flush before finishing.
299 TEST(BitRle, Flush) {
300  vector<int> values;
301  for (int i = 0; i < 16; ++i) values.push_back(1);
302  values.push_back(0);
303  ValidateRle(values, 1, NULL, -1);
304  values.push_back(1);
305  ValidateRle(values, 1, NULL, -1);
306  values.push_back(1);
307  ValidateRle(values, 1, NULL, -1);
308  values.push_back(1);
309  ValidateRle(values, 1, NULL, -1);
310 }
311 
312 // Test some random sequences.
313 TEST(BitRle, Random) {
314  int iters = 0;
315  while (iters < 1000) {
316  srand(iters++);
317  if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
318  vector<int> values;
319  bool parity = 0;
320  for (int i = 0; i < 1000; ++i) {
321  int group_size = rand() % 20 + 1;
322  if (group_size > 16) {
323  group_size = 1;
324  }
325  for (int i = 0; i < group_size; ++i) {
326  values.push_back(parity);
327  }
328  parity = !parity;
329  }
330  ValidateRle(values, (iters % MAX_WIDTH) + 1, NULL, -1);
331  }
332 }
333 
334 // Test a sequence of 1 0's, 2 1's, 3 0's. etc
335 // e.g. 011000111100000
336 TEST(BitRle, RepeatedPattern) {
337  vector<int> values;
338  const int min_run = 1;
339  const int max_run = 32;
340 
341  for (int i = min_run; i <= max_run; ++i) {
342  int v = i % 2;
343  for (int j = 0; j < i; ++j) {
344  values.push_back(v);
345  }
346  }
347 
348  // And go back down again
349  for (int i = max_run; i >= min_run; --i) {
350  int v = i % 2;
351  for (int j = 0; j < i; ++j) {
352  values.push_back(v);
353  }
354  }
355 
356  ValidateRle(values, 1, NULL, -1);
357 }
358 
359 TEST(BitRle, Overflow) {
360  for (int bit_width = 1; bit_width < 32; bit_width += 3) {
361  const int len = RleEncoder::MinBufferSize(bit_width);
362  uint8_t buffer[len];
363  int num_added = 0;
364  bool parity = true;
365 
366  RleEncoder encoder(buffer, len, bit_width);
367  // Insert alternating true/false until there is no space left
368  while (true) {
369  bool result = encoder.Put(parity);
370  parity = !parity;
371  if (!result) break;
372  ++num_added;
373  }
374 
375  int bytes_written = encoder.Flush();
376  EXPECT_LE(bytes_written, len);
377  EXPECT_GT(num_added, 0);
378 
379  RleDecoder decoder(buffer, bytes_written, bit_width);
380  parity = true;
381  uint32_t v;
382  for (int i = 0; i < num_added; ++i) {
383  bool result = decoder.Get(&v);
384  EXPECT_TRUE(result);
385  EXPECT_EQ(v, parity);
386  parity = !parity;
387  }
388  // Make sure we get false when reading past end a couple times.
389  EXPECT_FALSE(decoder.Get(&v));
390  EXPECT_FALSE(decoder.Get(&v));
391  }
392 }
393 
394 }
395 
396 int main(int argc, char **argv) {
397  ::testing::InitGoogleTest(&argc, argv);
398  impala::InitCommonRuntime(argc, argv, true);
399  return RUN_ALL_TESTS();
400 }
401 
bool PutValue(uint64_t v, int num_bits)
void TestBitArrayValues(int bit_width, int num_vals)
Definition: rle-test.cc:97
void InitCommonRuntime(int argc, char **argv, bool init_jvm, TestInfo::Mode m=TestInfo::NON_TEST)
Definition: init.cc:122
TEST(AtomicTest, Basic)
Definition: atomic-test.cc:28
int main(int argc, char **argv)
Definition: rle-test.cc:396
int bytes_written() const
void Flush(bool align=false)
static int Ceil(int value, int divisor)
Returns the ceil of value/divisor.
Definition: bit-util.h:32
void ValidateRle(const vector< int > &values, int bit_width, uint8_t *expected_encoding, int expected_len)
Definition: rle-test.cc:171
static int MinBufferSize(int bit_width)
Definition: rle-encoding.h:132
bool Put(uint64_t value)
Definition: rle-encoding.h:264
const int MAX_WIDTH
Definition: rle-test.cc:31
bool Get(T *val)
Gets the next value. Returns false if there are no more.
Definition: rle-encoding.h:229
void TestRleValues(int bit_width, int num_vals, int value=-1)
Definition: rle-test.cc:250
Decoder class for RLE encoded data.
Definition: rle-encoding.h:77
bool GetValue(int num_bits, T *v)