Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
tuple-layout-benchmark.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include <algorithm>
19 #include "util/benchmark.h"
20 #include "util/cpu-info.h"
21 #include "runtime/string-search.h"
22 
23 #include "common/names.h"
24 
25 using namespace impala;
26 
27 // Benchmark tests for padded & aligned vs unpadded tuple layouts.
28 
29 // Machine Info: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz
30 // Tuple Layout: Function Rate Comparison
31 // ----------------------------------------------------------------------
32 // SequentialPadded 0.4013 1X
33 // SequentialImpala 0.4374 1.09X
34 // SequentialUnaligned 0.4286 1.068X
35 // RandomPadded 0.1342 0.3345X
36 // RandomImpala 0.1437 0.358X
37 // RandomUnaligned 0.1452 0.3619X
38 
39 #define VALIDATE 0
40 
41 const int NUM_TUPLES = 1024 * 500;
42 const int MAX_ID = 10000;
43 
45  int8_t a;
46  int8_t b;
47  int16_t c;
48  float d;
49  int64_t id;
50  double val;
51 };
52 
54  int8_t a;
55  double val;
56  int16_t c;
57  int64_t id;
58 
59  static const int UnpaddedSize = 1 + 8 + 2 + 8;
60 };
61 
63  int8_t a;
64  int16_t c;
65  int64_t id;
66  double val;
67 };
68 
69 struct TestData {
70  double result;
75  vector<int> rand_access_order;
76 };
77 
78 void InitTestData(TestData* data) {
79  data->unpadded_data =
81  data->padded_data =
83  data->impala_data =
86  data->rand_access_order.resize(NUM_TUPLES);
87 
88  char* unpadded_ptr = data->unaligned_data;
89  for (int i = 0; i < NUM_TUPLES; ++i) {
90  data->rand_access_order[i] = i;
91 
92  int8_t rand_a = rand() % 256;
93  double rand_val = rand() / (double)RAND_MAX;
94  int16_t rand_c = rand() % 30000;
95  int64_t rand_id = rand() % MAX_ID;
96 
97  data->padded_data[i].a = rand_a;
98  data->padded_data[i].val = rand_val;
99  data->padded_data[i].c = rand_c;
100  data->padded_data[i].id = rand_id;
101 
102  data->unpadded_data[i].id = rand_id;
103  data->unpadded_data[i].val = rand_val;
104 
105  data->impala_data[i].id = rand_id;
106  data->impala_data[i].val = rand_val;
107 
108  *reinterpret_cast<int8_t*>(unpadded_ptr) = rand_a;
109  unpadded_ptr += 1;
110  *reinterpret_cast<double*>(unpadded_ptr) = rand_val;
111  unpadded_ptr += 8;
112  *reinterpret_cast<int16_t*>(unpadded_ptr) = rand_c;
113  unpadded_ptr += 2;
114  *reinterpret_cast<int64_t*>(unpadded_ptr) = rand_id;
115  unpadded_ptr += 8;
116  }
117 
118  DCHECK_EQ(unpadded_ptr,
119  data->unaligned_data + NUM_TUPLES * PaddedTupleStruct::UnpaddedSize);
120  random_shuffle(data->rand_access_order.begin(), data->rand_access_order.end());
121 }
122 
123 
124 void TestSequentialUnpadded(int batch_size, void* d) {
125  TestData* data = reinterpret_cast<TestData*>(d);
126  for (int i = 0; i < batch_size; ++i) {
127  data->result = 0;
128  for (int j = 0; j < NUM_TUPLES; ++j) {
129  const UnpaddedTupleStruct& item = data->unpadded_data[j];
130  if (item.id > MAX_ID / 2) data->result += item.val;
131  }
132  }
133 }
134 
135 void TestSequentialPadded(int batch_size, void* d) {
136  TestData* data = reinterpret_cast<TestData*>(d);
137  for (int i = 0; i < batch_size; ++i) {
138  data->result = 0;
139  for (int j = 0; j < NUM_TUPLES; ++j) {
140  const PaddedTupleStruct& item = data->padded_data[j];
141  if (item.id > MAX_ID / 2) data->result += item.val;
142  }
143  }
144 }
145 
146 void TestSequentialImpala(int batch_size, void* d) {
147  TestData* data = reinterpret_cast<TestData*>(d);
148  for (int i = 0; i < batch_size; ++i) {
149  data->result = 0;
150  for (int j = 0; j < NUM_TUPLES; ++j) {
151  const ImpalaTupleStruct& item = data->impala_data[j];
152  if (item.id > MAX_ID / 2) data->result += item.val;
153  }
154  }
155 }
156 
157 void TestSequentialUnaligned(int batch_size, void* d) {
158  TestData* data = reinterpret_cast<TestData*>(d);
159  for (int i = 0; i < batch_size; ++i) {
160  data->result = 0;
161  char* data_ptr = data->unaligned_data;
162  for (int j = 0; j < NUM_TUPLES; ++j) {
163  int64_t id = *reinterpret_cast<int64_t*>(data_ptr + 11);
164  if (id > MAX_ID / 2) {
165  data->result += *reinterpret_cast<double*>(data_ptr + 1);
166  }
168  }
169  }
170 }
171 
172 void TestRandomPadded(int batch_size, void* d) {
173  TestData* data = reinterpret_cast<TestData*>(d);
174  for (int i = 0; i < batch_size; ++i) {
175  int* order = &data->rand_access_order[0];
176  data->result = 0;
177  for (int j = 0; j < NUM_TUPLES; ++j) {
178  const PaddedTupleStruct& item = data->padded_data[order[j]];
179  if (item.id > MAX_ID / 2) data->result += item.val;
180  }
181  }
182 }
183 
184 void TestRandomImpala(int batch_size, void* d) {
185  TestData* data = reinterpret_cast<TestData*>(d);
186  for (int i = 0; i < batch_size; ++i) {
187  int* order = &data->rand_access_order[0];
188  data->result = 0;
189  for (int j = 0; j < NUM_TUPLES; ++j) {
190  const ImpalaTupleStruct& item = data->impala_data[order[j]];
191  if (item.id > MAX_ID / 2) data->result += item.val;
192  }
193  }
194 }
195 
196 void TestRandomUnaligned(int batch_size, void* d) {
197  TestData* data = reinterpret_cast<TestData*>(d);
198  for (int i = 0; i < batch_size; ++i) {
199  data->result = 0;
200  int* order = &data->rand_access_order[0];
201  for (int j = 0; j < NUM_TUPLES; ++j) {
202  char* data_ptr = data->unaligned_data + PaddedTupleStruct::UnpaddedSize * order[j];
203  int64_t id = *reinterpret_cast<int64_t*>(data_ptr + 11);
204  if (id > MAX_ID / 2) {
205  data->result += *reinterpret_cast<double*>(data_ptr + 1);
206  }
207  }
208  }
209 }
210 
211 int main(int argc, char **argv) {
212  CpuInfo::Init();
213  cout << Benchmark::GetMachineInfo() << endl;
214 
215  DCHECK_EQ(sizeof(UnpaddedTupleStruct), 24);
216  DCHECK_EQ(sizeof(PaddedTupleStruct), 32);
217  DCHECK_EQ(sizeof(ImpalaTupleStruct), 24);
218 
219  TestData data;
220  InitTestData(&data);
221 
222 #if VALIDATE
223  TestSequentialUnpadded(1, &data);
224  cout << data.result << endl;
225  TestSequentialPadded(1, &data);
226  cout << data.result << endl;
227  TestSequentialImpala(1, &data);
228  cout << data.result << endl;
229  TestSequentialUnaligned(1, &data);
230  cout << data.result << endl;
231  TestRandomPadded(1, &data);
232  cout << data.result << endl;
233  TestRandomImpala(1, &data);
234  cout << data.result << endl;
235  TestRandomUnaligned(1, &data);
236  cout << data.result << endl;
237 #else
238  Benchmark suite("Tuple Layout");
239  suite.AddBenchmark("SequentialPadded", TestSequentialPadded, &data);
240  suite.AddBenchmark("SequentialImpala", TestSequentialImpala, &data);
241  suite.AddBenchmark("SequentialUnaligned", TestSequentialUnaligned, &data);
242  suite.AddBenchmark("RandomPadded", TestRandomPadded, &data);
243  suite.AddBenchmark("RandomImpala", TestRandomImpala, &data);
244  suite.AddBenchmark("RandomUnaligned", TestRandomUnaligned, &data);
245  cout << suite.Measure();
246 #endif
247 
248  return 0;
249 }
int AddBenchmark(const std::string &name, BenchmarkFunction fn, void *args, int baseline_idx=0)
Definition: benchmark.cc:70
void TestSequentialUnpadded(int batch_size, void *d)
static std::string GetMachineInfo()
Output machine/build configuration as a string.
Definition: benchmark.cc:124
PaddedTupleStruct * padded_data
UnpaddedTupleStruct * unpadded_data
void TestSequentialUnaligned(int batch_size, void *d)
vector< int > rand_access_order
std::string Measure()
Runs all the benchmarks and returns the result in a formatted string.
Definition: benchmark.cc:83
ImpalaTupleStruct * impala_data
void InitTestData(TestData *data)
void TestSequentialImpala(int batch_size, void *d)
void TestRandomUnaligned(int batch_size, void *d)
void TestRandomImpala(int batch_size, void *d)
const int NUM_TUPLES
static const int UnpaddedSize
void TestRandomPadded(int batch_size, void *d)
int main(int argc, char **argv)
vector< Decimal > result
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75
const int MAX_ID
void TestSequentialPadded(int batch_size, void *d)