Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dict-test.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 
19 #include <boost/foreach.hpp>
20 #include <gtest/gtest.h>
21 
22 #include "common/init.h"
23 #include "runtime/mem-tracker.h"
26 #include "util/dict-encoding.h"
27 
28 #include "common/names.h"
29 
30 namespace impala {
31 
32 template<typename T>
33 void ValidateDict(const vector<T>& values, int fixed_buffer_byte_size) {
34  set<T> values_set(values.begin(), values.end());
35 
37  MemPool pool(&tracker);
38  DictEncoder<T> encoder(&pool, fixed_buffer_byte_size);
39  BOOST_FOREACH(T i, values) {
40  encoder.Put(i);
41  }
42  EXPECT_EQ(encoder.num_entries(), values_set.size());
43 
44  uint8_t dict_buffer[encoder.dict_encoded_size()];
45  encoder.WriteDict(dict_buffer);
46 
47  int data_buffer_len = encoder.EstimatedDataEncodedSize();
48  uint8_t data_buffer[data_buffer_len];
49  int data_len = encoder.WriteData(data_buffer, data_buffer_len);
50  EXPECT_GT(data_len, 0);
51  encoder.ClearIndices();
52 
53  DictDecoder<T> decoder(
54  dict_buffer, encoder.dict_encoded_size(), fixed_buffer_byte_size);
55  decoder.SetData(data_buffer, data_len);
56  BOOST_FOREACH(T i, values) {
57  T j;
58  decoder.GetValue(&j);
59  EXPECT_EQ(i, j);
60  }
61  pool.FreeAll();
62 }
63 
64 TEST(DictTest, TestStrings) {
65  StringValue sv1("hello world");
66  StringValue sv2("foo");
67  StringValue sv3("bar");
68  StringValue sv4("baz");
69 
70  vector<StringValue> values;
71  values.push_back(sv1);
72  values.push_back(sv1);
73  values.push_back(sv1);
74  values.push_back(sv2);
75  values.push_back(sv1);
76  values.push_back(sv2);
77  values.push_back(sv2);
78  values.push_back(sv3);
79  values.push_back(sv3);
80  values.push_back(sv3);
81  values.push_back(sv4);
82 
83  ValidateDict(values, -1);
84 }
85 
86 TEST(DictTest, TestTimestamps) {
87  TimestampValue tv1("2011-01-01 09:01:01", 19);
88  TimestampValue tv2("2012-01-01 09:01:01", 19);
89  TimestampValue tv3("2011-01-01 09:01:02", 19);
90 
91  vector<TimestampValue> values;
92  values.push_back(tv1);
93  values.push_back(tv2);
94  values.push_back(tv3);
95  values.push_back(tv1);
96  values.push_back(tv1);
97  values.push_back(tv1);
98 
100 }
101 
102 template<typename T>
103 void IncrementValue(T* t) { ++(*t); }
104 
105 template <> void IncrementValue(Decimal4Value* t) { ++(t->value()); }
106 template <> void IncrementValue(Decimal8Value* t) { ++(t->value()); }
107 template <> void IncrementValue(Decimal16Value* t) { ++(t->value()); }
108 
109 template<typename T>
110 void TestNumbers(int max_value, int repeat, int value_byte_size) {
111  vector<T> values;
112  for (T val = 0; val < max_value; IncrementValue(&val)) {
113  for (int i = 0; i < repeat; ++i) {
114  values.push_back(val);
115  }
116  }
117  ValidateDict(values, value_byte_size);
118 }
119 
120 template<typename T>
121 void TestNumbers(int value_byte_size) {
122  TestNumbers<T>(100, 1, value_byte_size);
123  TestNumbers<T>(1, 100, value_byte_size);
124  TestNumbers<T>(1, 1, value_byte_size);
125  TestNumbers<T>(1, 2, value_byte_size);
126 }
127 
128 TEST(DictTest, TestNumbers) {
131  TestNumbers<int32_t>(ParquetPlainEncoder::ByteSize(ColumnType(TYPE_INT)));
132  TestNumbers<int64_t>(ParquetPlainEncoder::ByteSize(ColumnType(TYPE_BIGINT)));
135 
136  for (int i = 1; i <=16; ++i) {
137  if (i <= 4) TestNumbers<Decimal4Value>(i);
138  if (i <= 8) TestNumbers<Decimal8Value>(i);
139  TestNumbers<Decimal16Value>(i);
140  }
141 }
142 
143 }
144 
145 int main(int argc, char **argv) {
146  ::testing::InitGoogleTest(&argc, argv);
147  impala::InitCommonRuntime(argc, argv, true);
148  return RUN_ALL_TESTS();
149 }
const T & value() const
void IncrementValue(T *t)
Definition: dict-test.cc:103
MemTracker tracker
void InitCommonRuntime(int argc, char **argv, bool init_jvm, TestInfo::Mode m=TestInfo::NON_TEST)
Definition: init.cc:122
TEST(AtomicTest, Basic)
Definition: atomic-test.cc:28
int WriteData(uint8_t *buffer, int buffer_len)
void ValidateDict(const vector< T > &values, int fixed_buffer_byte_size)
Definition: dict-test.cc:33
static int ByteSize(const T &v)
Returns the byte size of 'v'.
virtual int num_entries() const
The number of entries in the dictionary.
void FreeAll()
Definition: mem-pool.cc:73
virtual void WriteDict(uint8_t *buffer)
ObjectPool pool
This class is thread-safe.
Definition: mem-tracker.h:61
void ClearIndices()
Clears all the indices (but leaves the dictionary).
Definition: dict-encoding.h:62
int Put(const T &value)
void TestNumbers(int max_value, int repeat, int value_byte_size)
Definition: dict-test.cc:110
void SetData(uint8_t *buffer, int buffer_len)
The rle encoded indices into the dictionary.
int main(int argc, char **argv)
Definition: dict-test.cc:145