Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
old-hash-table-test.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include <vector>
19 
20 #include <gtest/gtest.h>
21 
22 #include "common/compiler-util.h"
24 #include "exprs/expr.h"
25 #include "exprs/expr-context.h"
26 #include "exprs/slot-ref.h"
27 #include "runtime/mem-pool.h"
28 #include "runtime/mem-tracker.h"
29 #include "runtime/string-value.h"
30 #include "runtime/mem-tracker.h"
31 #include "util/cpu-info.h"
32 #include "util/runtime-profile.h"
33 
34 #include "common/names.h"
35 
36 namespace impala {
37 
39  public:
41 
42  protected:
46  vector<ExprContext*> build_expr_ctxs_;
47  vector<ExprContext*> probe_expr_ctxs_;
48 
49  virtual void SetUp() {
50  RowDescriptor desc;
51  Status status;
52 
53  // Not very easy to test complex tuple layouts so this test will use the
54  // simplest. The purpose of these tests is to exercise the hash map
55  // internals so a simple build/probe expr is fine.
56  Expr* expr = pool_.Add(new SlotRef(TYPE_INT, 0));
57  build_expr_ctxs_.push_back(pool_.Add(new ExprContext(expr)));
58  status = Expr::Prepare(build_expr_ctxs_, NULL, desc, &tracker_);
59  EXPECT_TRUE(status.ok());
60  status = Expr::Open(build_expr_ctxs_, NULL);
61  EXPECT_TRUE(status.ok());
62 
63  expr = pool_.Add(new SlotRef(TYPE_INT, 0));
64  probe_expr_ctxs_.push_back(pool_.Add(new ExprContext(expr)));
65  status = Expr::Prepare(probe_expr_ctxs_, NULL, desc, &tracker_);
66  EXPECT_TRUE(status.ok());
67  status = Expr::Open(probe_expr_ctxs_, NULL);
68  EXPECT_TRUE(status.ok());
69  }
70 
71  virtual void TearDown() {
74  }
75 
76  TupleRow* CreateTupleRow(int32_t val) {
77  uint8_t* tuple_row_mem = mem_pool_.Allocate(sizeof(int32_t*));
78  Tuple* tuple_mem = Tuple::Create(sizeof(int32_t), &mem_pool_);
79  *reinterpret_cast<int32_t*>(tuple_mem) = val;
80  TupleRow* row = reinterpret_cast<TupleRow*>(tuple_row_mem);
81  row->SetTuple(0, tuple_mem);
82  return row;
83  }
84 
85  // Wrapper to call private methods on OldHashTable
86  // TODO: understand google testing, there must be a more natural way to do this
87  void ResizeTable(OldHashTable* table, int64_t new_size) {
88  table->ResizeBuckets(new_size);
89  }
90 
91  // Do a full table scan on table. All values should be between [min,max). If
92  // all_unique, then each key(int value) should only appear once. Results are
93  // stored in results, indexed by the key. Results must have been preallocated to
94  // be at least max size.
95  void FullScan(OldHashTable* table, int min, int max, bool all_unique,
96  TupleRow** results, TupleRow** expected) {
97  OldHashTable::Iterator iter = table->Begin();
98  while (iter != table->End()) {
99  TupleRow* row = iter.GetRow();
100  int32_t val = *reinterpret_cast<int32_t*>(build_expr_ctxs_[0]->GetValue(row));
101  EXPECT_GE(val, min);
102  EXPECT_LT(val, max);
103  if (all_unique) EXPECT_TRUE(results[val] == NULL);
104  EXPECT_EQ(row->GetTuple(0), expected[val]->GetTuple(0));
105  results[val] = row;
106  iter.Next<false>();
107  }
108  }
109 
110  // Validate that probe_row evaluates overs probe_exprs is equal to build_row
111  // evaluated over build_exprs
112  void ValidateMatch(TupleRow* probe_row, TupleRow* build_row) {
113  EXPECT_TRUE(probe_row != build_row);
114  int32_t build_val =
115  *reinterpret_cast<int32_t*>(build_expr_ctxs_[0]->GetValue(probe_row));
116  int32_t probe_val =
117  *reinterpret_cast<int32_t*>(probe_expr_ctxs_[0]->GetValue(build_row));
118  EXPECT_EQ(build_val, probe_val);
119  }
120 
121  struct ProbeTestData {
123  vector<TupleRow*> expected_build_rows;
124  };
125 
126  void ProbeTest(OldHashTable* table, ProbeTestData* data, int num_data, bool scan) {
127  for (int i = 0; i < num_data; ++i) {
128  TupleRow* row = data[i].probe_row;
129 
131  iter = table->Find(row);
132 
133  if (data[i].expected_build_rows.size() == 0) {
134  EXPECT_TRUE(iter == table->End());
135  } else {
136  if (scan) {
137  map<TupleRow*, bool> matched;
138  while (iter != table->End()) {
139  EXPECT_TRUE(matched.find(iter.GetRow()) == matched.end());
140  matched[iter.GetRow()] = true;
141  iter.Next<true>();
142  }
143  EXPECT_EQ(matched.size(), data[i].expected_build_rows.size());
144  for (int j = 0; i < data[j].expected_build_rows.size(); ++j) {
145  EXPECT_TRUE(matched[data[i].expected_build_rows[j]]);
146  }
147  } else {
148  EXPECT_EQ(data[i].expected_build_rows.size(), 1);
149  EXPECT_TRUE(
150  data[i].expected_build_rows[0]->GetTuple(0) == iter.GetRow()->GetTuple(0));
151  ValidateMatch(row, iter.GetRow());
152  }
153  }
154  }
155  }
156 };
157 
159  TupleRow* build_row1 = CreateTupleRow(1);
160  TupleRow* build_row2 = CreateTupleRow(2);
161  TupleRow* probe_row3 = CreateTupleRow(3);
162  TupleRow* probe_row4 = CreateTupleRow(4);
163 
164  int32_t* val_row1 =
165  reinterpret_cast<int32_t*>(build_expr_ctxs_[0]->GetValue(build_row1));
166  EXPECT_EQ(*val_row1, 1);
167  int32_t* val_row2 =
168  reinterpret_cast<int32_t*>(build_expr_ctxs_[0]->GetValue(build_row2));
169  EXPECT_EQ(*val_row2, 2);
170  int32_t* val_row3 =
171  reinterpret_cast<int32_t*>(probe_expr_ctxs_[0]->GetValue(probe_row3));
172  EXPECT_EQ(*val_row3, 3);
173  int32_t* val_row4 =
174  reinterpret_cast<int32_t*>(probe_expr_ctxs_[0]->GetValue(probe_row4));
175  EXPECT_EQ(*val_row4, 4);
176 
177  mem_pool_.FreeAll();
178 }
179 
180 // This tests inserts the build rows [0->5) to hash table. It validates that they
181 // are all there using a full table scan. It also validates that Find() is correct
182 // testing for probe rows that are both there and not.
183 // The hash table is rehashed a few times and the scans/finds are tested again.
185  TupleRow* build_rows[5];
186  TupleRow* scan_rows[5] = {0};
187  for (int i = 0; i < 5; ++i) {
188  build_rows[i] = CreateTupleRow(i);
189  }
190 
191  ProbeTestData probe_rows[10];
192  for (int i = 0; i < 10; ++i) {
193  probe_rows[i].probe_row = CreateTupleRow(i);
194  if (i < 5) {
195  probe_rows[i].expected_build_rows.push_back(build_rows[i]);
196  }
197  }
198 
199  // Create the hash table and insert the build rows
201  OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_,
202  1, false, false, 0, &tracker);
203  for (int i = 0; i < 5; ++i) {
204  hash_table.Insert(build_rows[i]);
205  }
206  EXPECT_EQ(hash_table.size(), 5);
207 
208  // Do a full table scan and validate returned pointers
209  FullScan(&hash_table, 0, 5, true, scan_rows, build_rows);
210  ProbeTest(&hash_table, probe_rows, 10, false);
211 
212  // Resize and scan again
213  ResizeTable(&hash_table, 64);
214  EXPECT_EQ(hash_table.num_buckets(), 64);
215  EXPECT_EQ(hash_table.size(), 5);
216  memset(scan_rows, 0, sizeof(scan_rows));
217  FullScan(&hash_table, 0, 5, true, scan_rows, build_rows);
218  ProbeTest(&hash_table, probe_rows, 10, false);
219 
220  // Resize to two and cause some collisions
221  ResizeTable(&hash_table, 2);
222  EXPECT_EQ(hash_table.num_buckets(), 2);
223  EXPECT_EQ(hash_table.size(), 5);
224  memset(scan_rows, 0, sizeof(scan_rows));
225  FullScan(&hash_table, 0, 5, true, scan_rows, build_rows);
226  ProbeTest(&hash_table, probe_rows, 10, false);
227 
228  // Resize to one and turn it into a linked list
229  ResizeTable(&hash_table, 1);
230  EXPECT_EQ(hash_table.num_buckets(), 1);
231  EXPECT_EQ(hash_table.size(), 5);
232  memset(scan_rows, 0, sizeof(scan_rows));
233  FullScan(&hash_table, 0, 5, true, scan_rows, build_rows);
234  ProbeTest(&hash_table, probe_rows, 10, false);
235 
236  hash_table.Close();
237  mem_pool_.FreeAll();
238 }
239 
240 // This tests makes sure we can scan ranges of buckets
243  OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_,
244  1, false, false, 0, &tracker);
245  // Add 1 row with val 1, 2 with val 2, etc
246  vector<TupleRow*> build_rows;
247  ProbeTestData probe_rows[15];
248  probe_rows[0].probe_row = CreateTupleRow(0);
249  for (int val = 1; val <= 10; ++val) {
250  probe_rows[val].probe_row = CreateTupleRow(val);
251  for (int i = 0; i < val; ++i) {
252  TupleRow* row = CreateTupleRow(val);
253  hash_table.Insert(row);
254  build_rows.push_back(row);
255  probe_rows[val].expected_build_rows.push_back(row);
256  }
257  }
258 
259  // Add some more probe rows that aren't there
260  for (int val = 11; val < 15; ++val) {
261  probe_rows[val].probe_row = CreateTupleRow(val);
262  }
263 
264  // Test that all the builds were found
265  ProbeTest(&hash_table, probe_rows, 15, true);
266 
267  // Resize and try again
268  ResizeTable(&hash_table, 128);
269  EXPECT_EQ(hash_table.num_buckets(), 128);
270  ProbeTest(&hash_table, probe_rows, 15, true);
271 
272  ResizeTable(&hash_table, 16);
273  EXPECT_EQ(hash_table.num_buckets(), 16);
274  ProbeTest(&hash_table, probe_rows, 15, true);
275 
276  ResizeTable(&hash_table, 2);
277  EXPECT_EQ(hash_table.num_buckets(), 2);
278  ProbeTest(&hash_table, probe_rows, 15, true);
279 
280  hash_table.Close();
281  mem_pool_.FreeAll();
282 }
283 
284 // This test continues adding to the hash table to trigger the resize code paths
285 TEST_F(OldHashTableTest, GrowTableTest) {
286  int num_to_add = 4;
287  int expected_size = 0;
288  MemTracker tracker(100 * 1024 * 1024);
289  OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_,
290  1, false, false, 0, &tracker, false, num_to_add);
291  EXPECT_FALSE(hash_table.mem_limit_exceeded());
292  EXPECT_TRUE(!tracker.LimitExceeded());
293 
294  // This inserts about 5M entries
295  int build_row_val = 0;
296  for (int i = 0; i < 20; ++i) {
297  for (int j = 0; j < num_to_add; ++build_row_val, ++j) {
298  hash_table.Insert(CreateTupleRow(build_row_val));
299  }
300  expected_size += num_to_add;
301  num_to_add *= 2;
302  }
303  EXPECT_TRUE(hash_table.mem_limit_exceeded());
304  EXPECT_TRUE(tracker.LimitExceeded());
305 
306  // Validate that we can find the entries before we went over the limit
307  for (int i = 0; i < expected_size * 5; i += 100000) {
308  TupleRow* probe_row = CreateTupleRow(i);
309  OldHashTable::Iterator iter = hash_table.Find(probe_row);
310  if (i < hash_table.size()) {
311  EXPECT_TRUE(iter != hash_table.End());
312  ValidateMatch(probe_row, iter.GetRow());
313  } else {
314  EXPECT_TRUE(iter == hash_table.End());
315  }
316  }
317  hash_table.Close();
318  mem_pool_.FreeAll();
319 }
320 
321 }
322 
323 int main(int argc, char** argv) {
324  ::testing::InitGoogleTest(&argc, argv);
326  return RUN_ALL_TESTS();
327 }
stl-like iterator interface.
TupleRow * CreateTupleRow(int32_t val)
TEST_F(InstructionCounterTest, Count)
Tuple * GetTuple(int tuple_idx)
Definition: tuple-row.h:30
MemTracker tracker
void ProbeTest(OldHashTable *table, ProbeTestData *data, int num_data, bool scan)
static Status Open(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for opening multiple expr trees.
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
vector< ExprContext * > build_expr_ctxs_
void FullScan(OldHashTable *table, int min, int max, bool all_unique, TupleRow **results, TupleRow **expected)
static Tuple * Create(int size, MemPool *pool)
initialize individual tuple with data residing in mem pool
Definition: tuple.h:51
int64_t size() const
Returns number of elements in the hash table.
int main(int argc, char **argv)
static void Close(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for closing multiple expr trees.
bool mem_limit_exceeded() const
void ResizeTable(OldHashTable *table, int64_t new_size)
void ValidateMatch(TupleRow *probe_row, TupleRow *build_row)
The hash table does not support removes. The hash table is not thread safe.
This is the superclass of all expr evaluation nodes.
Definition: expr.h:116
bool IR_ALWAYS_INLINE Insert(TupleRow *row)
This class is thread-safe.
Definition: mem-tracker.h:61
Iterator End()
Returns end marker.
void IR_ALWAYS_INLINE Next()
void Close()
Call to cleanup any resources. Must be called once.
uint64_t Test(T *ht, const ProbeTuple *input, uint64_t num_tuples)
void ResizeBuckets(int64_t num_buckets)
Resize the hash table to 'num_buckets'.
void SetTuple(int tuple_idx, Tuple *tuple)
Definition: tuple-row.h:34
Iterator IR_ALWAYS_INLINE Find(TupleRow *probe_row)
Reference to a single slot of a tuple.
Definition: slot-ref.h:23
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75
static Status Prepare(const std::vector< ExprContext * > &ctxs, RuntimeState *state, const RowDescriptor &row_desc, MemTracker *tracker)
int64_t num_buckets() const
Returns the number of buckets.
bool ok() const
Definition: status.h:172
vector< ExprContext * > probe_expr_ctxs_
uint8_t * Allocate(int size)
Definition: mem-pool.h:92