Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
growing-test.cc
Go to the documentation of this file.
1 #include <gtest/gtest.h>
2 
3 #include "hash-store.h"
4 #include "standard-hash-table.h"
5 #include "tuple-types.h"
6 #include "util/cpu-info.h"
7 #include "util/debug-util.h"
8 #include "util/pretty-printer.h"
9 #include "util/stopwatch.h"
10 
11 using namespace impala;
12 
13 namespace impala {
14 
15 // Tests performance of hash aggregation with HashStore, which grows to contain multiple
16 // hash tables as needed. Allows exploring how buffer size and size of the keyspace
17 // affect performance.
18 class GrowingTest : public testing::Test {
19  public:
20  static const uint64_t NUM_PROBE_TUPLES = 10000000; // 10^7
21 
22  // how many total build tuples we'll use to fill a table
23  // Needs to be comfortably less than the total capacity of num_tables tables
24  // (the below expression without the constant factor multiplied by it)
25  // because the hashes will not be spread perfectly.
26  // But should be more than 1/2 of that expression because otherwise if the hashes
27  // spread out perfectly, it will fit in num_tables / 2 and not give us the fanout
28  // we expect. (A test will fail in this case so that we know.)
29  static const int TUPLES_IN_TABLE = (StandardHashTable::NODES / 10) * 8;
30 
31 
32  template <int buffer_size>
33  inline static uint64_t AggregateTest(uint64_t num_probe_tuples,
34  int num_tables) {
35  ProbeTuple* tuples = GenTuples(num_probe_tuples, TUPLES_IN_TABLE, num_tables);
36  HashStore<buffer_size> hs;
37  StopWatch watch;
38  watch.Start();
39  hs.Aggregate(tuples, num_probe_tuples);
40  watch.Stop();
41  SanityCheck<buffer_size>(hs, num_probe_tuples, num_tables);
42  free(tuples);
43  return watch.ElapsedTime();
44  }
45 
46  // Confirm that hs appears to be the correct result of aggregating num_probe_tuples,
47  // with a fanout into num_tables tables.
48  template <int buffer_size>
49  static void SanityCheck(const HashStore<buffer_size> & hs,
50  uint64_t num_probe_tuples, int num_tables) {
51  uint64_t total_count = 0;
52  uint64_t build_tuples = 0;
53  for (int i = 0; i < hs.num_tables_; ++i) {
54  StandardHashTable& ht = hs.tables_[i];
55  for (StandardHashTable::Iterator it = ht.Begin(); it.HasNext(); it.Next()) {
56  total_count += it.GetRow()->count;
57  ++build_tuples;
58  }
59  }
60  ASSERT_EQ(num_probe_tuples, total_count);
61  ASSERT_EQ(num_tables * TUPLES_IN_TABLE, build_tuples);
62  ASSERT_EQ(num_tables, hs.num_tables_)
63  << "You do not have the number of tables that you hoped.\n"
64  << "This could mean that there weren't enough probe tuples to fill the keyspace, "
65  << "skewed hashing lead a hash table that we expected not to overflow to overflow, "
66  << "or a genuine bug.";
67  if (buffer_size > 0) {
68  ASSERT_EQ(buffer_size, sizeof(typename HashStore<buffer_size>::Buffer));
69  }
70  }
71 
72  template <int buffer_size>
73  inline static uint64_t NextTestCase(uint64_t prev, int num_tables) {
74  uint64_t time = AggregateTest<buffer_size>(NUM_PROBE_TUPLES, num_tables);
75  int64_t delta;
76  if (prev == 0) {
77  // just print 0 for the delta the first time.
78  delta = 0;
79  }
80  else {
81  delta = static_cast<int64_t>(time) - static_cast<int64_t>(prev);
82  }
83  LOG(ERROR) << num_tables * TUPLES_IN_TABLE << "\t"
84  << PrettyPrinter::Print(time, TUnit::CPU_TICKS)
85  << "\t" << PrettyPrinter::Print(delta, TUnit::CPU_TICKS);
86  return time;
87  }
88 
89  // Run a test aggregation with buffers of size buffer_size bytes.
90  // Try the full range of working set size (and thus fanout) that we can do with
91  // single-level fanout.
92  template <int buffer_size>
93  inline static void Test() {
94  LOG(ERROR) << "Buffer size " << HashStore<buffer_size>::BUFFER_SIZE << " tuples ("
95  << PrettyPrinter::Print(buffer_size, TUnit::BYTES)
96  << "):";
97  LOG(ERROR) << "#BuildTuples\tTime\tdTime";
98  uint64_t prev = 0;
99  // only go up to 2^12 hashtables because after that, there are at least as many
100  // buffers as L2 cache lines. We won't let this happen; we'll do multi-level fanout.
101  // Note that we should probably allow 2 cache lines per buffer (since a write might
102  // span a line break), plus we need some memory aside from the buffers, so we'll
103  // actually stop at 2^11, or maybe 2^12. And we might want to keep them in L1,
104  // in which case we'll stop at 2^7 or so.
105  for (int num_tables = 1; num_tables <= 1<<10; num_tables *= 2) {
106  prev = NextTestCase<buffer_size>(prev, num_tables);
107  }
108  }
109 };
110 
111 }
112 
113 int main(int argc, char** argv) {
114  google::InitGoogleLogging(argv[0]);
115  ::testing::InitGoogleTest(&argc, argv);
116  CpuInfo::Init();
117  LOG(ERROR) << "Testing with " << GrowingTest::NUM_PROBE_TUPLES << " tuples.";
118  return RUN_ALL_TESTS();
119 }
120 
122  // test without buffers
123  GrowingTest::Test<0>();
124  // test with one of the best buffer sizes
125  // Make it slightly more than a power of 2 cache lines so that they are offset.
126  GrowingTest::Test< (1 << 13) + 3 * 64 >();
127 }
static const int TUPLES_IN_TABLE
Definition: growing-test.cc:29
uint64_t ElapsedTime() const
Returns time in cpu ticks.
Definition: stopwatch.h:50
static void Test()
Definition: growing-test.cc:93
static void SanityCheck(const HashStore< buffer_size > &hs, uint64_t num_probe_tuples, int num_tables)
Definition: growing-test.cc:49
ProbeTuple tuples[BUFFER_TUPLES]
static uint64_t NextTestCase(uint64_t prev, int num_tables)
Definition: growing-test.cc:73
TEST(AtomicTest, Basic)
Definition: atomic-test.cc:28
#define BUFFER_SIZE
static std::string Print(bool value, TUnit::type ignored, bool verbose=false)
static uint64_t AggregateTest(uint64_t num_probe_tuples, int num_tables)
Definition: growing-test.cc:33
static const uint64_t NUM_PROBE_TUPLES
Definition: growing-test.cc:20
uint64_t Test(T *ht, const ProbeTuple *input, uint64_t num_tuples)
int main(int argc, char **argv)
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75