Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hash-partition-test.cc
Go to the documentation of this file.
1 // Copyright (c) 2012 Cloudera, Inc. All rights reserved.
2 
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <iostream>
6 #include <vector>
7 
8 #include "util/hash-util.h"
9 #include "util/cpu-info.h"
10 
11 #include "common/names.h"
12 
13 using namespace impala;
14 
15 // Test collision problem with multiple mod steps (IMPALA-219)
16 int main(int argc, char **argv) {
17  google::InitGoogleLogging(argv[0]);
18  CpuInfo::Init();
19 
20  int num_buckets1 = 16;
21  int num_buckets2 = 1024;
22  int num_values = num_buckets1 * num_buckets2;
23 
24  int num_collisions1 = 0;
25  int num_collisions2 = 0;
26  int num_empty2 = num_buckets2;
27  vector<bool> buckets1;
28  vector<bool> buckets2;
29  buckets1.resize(num_buckets1);
30  buckets2.resize(num_buckets2);
31 
32  // First test using the same hash fn both times
33  for (int i = 0; i < num_values; ++i) {
34  uint32_t hash1 = HashUtil::Hash(&i, sizeof(int), 0) >> 8;
35  uint32_t hash2 = HashUtil::Hash(&i, sizeof(int), 1) >> 8;
36  uint32_t bucket1_idx = hash1 % num_buckets1;
37  if (buckets1[bucket1_idx]) ++num_collisions1;
38  buckets1[bucket1_idx] = true;
39 
40  LOG(ERROR) << i << ":" << hash1 << ":" << hash2;
41  // If they matched bucket 0, put it into buckets2
42  if (bucket1_idx == 0) {
43  uint32_t bucket2_idx = hash2 % num_buckets2;
44  if (buckets2[bucket2_idx]) {
45  ++num_collisions2;
46  } else {
47  buckets2[bucket2_idx] = true;
48  --num_empty2;
49  }
50  }
51  }
52 
53  LOG(ERROR) << "Same hash:" << endl
54  << " Bucket 1 Collisions: " << num_collisions1 << endl
55  << " Expected 1 Collisions: " << num_values - num_buckets1 << endl
56  << " Bucket 2 Collisions: " << num_collisions2 << endl
57  << " Bucket 2 Empties: " << num_empty2 << endl
58  << " Bucket 2 Total Values: " << num_values / num_buckets1;
59 
60  return 0;
61 }
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
Definition: hash-util.h:135
int main(int argc, char **argv)
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75