Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
atoi-benchmark.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include <vector>
19 #include <sstream>
20 #include "runtime/string-value.h"
21 #include "util/benchmark.h"
22 #include "util/cpu-info.h"
23 #include "util/string-parser.h"
24 
25 #include "common/names.h"
26 
27 using namespace impala;
28 
29 // Benchmark for doing atoi. This benchmark compares various implementations
30 // to convert string to int32s. The data is mostly positive, relatively small
31 // numbers.
32 //
33 // Machine Info: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz
34 // atoi: Function Rate (iters/ms) Comparison
35 // ----------------------------------------------------------------------
36 // strtol 59.71 1X
37 // atoi 59.87 1.003X
38 // impala 162.6 2.723X
39 // impala_unsafe 200.9 3.365X
40 // impala_unrolled 176.5 2.956X
41 // impala_cased 232.1 3.887X
42 
43 #define VALIDATE 0
44 
45 #if VALIDATE
46 #define VALIDATE_RESULT(actual, expected, str) \
47  if (actual != expected) { \
48  cout << "Parse Error. " \
49  << "String: " << str \
50  << ". Parsed: " << actual << endl; \
51  exit(-1); \
52  }
53 #else
54 #define VALIDATE_RESULT(actual, expected, str)
55 #endif
56 
57 
58 struct TestData {
59  vector<StringValue> data;
60  vector<string> memory;
61  vector<int32_t> result;
62 };
63 
64 void AddTestData(TestData* data, const string& input) {
65  data->memory.push_back(input);
66  const string& str = data->memory.back();
67  data->data.push_back(StringValue(const_cast<char*>(str.c_str()), str.length()));
68 }
69 
70 void AddTestData(TestData* data, int n, int32_t min = -10, int32_t max = 10,
71  bool leading_space = false, bool trailing_space = false) {
72  for (int i = 0; i < n; ++i) {
73  double val = rand();
74  val /= RAND_MAX;
75  val = static_cast<int32_t>((val * (max - min)) + min);
76  stringstream ss;
77  if (leading_space) ss << " ";
78  ss << val;
79  if (trailing_space) ss << " ";
80  AddTestData(data, ss.str());
81  }
82 }
83 
84 #define DIGIT(c) (c -'0')
85 
86 inline int32_t AtoiUnsafe(char* s, int len) {
87  int32_t val = 0;
88  bool negative = false;
89  int i = 0;
90  switch (*s) {
91  case '-': negative = true;
92  case '+': ++i;
93  }
94 
95  for (; i < len; ++i) {
96  val = val * 10 + DIGIT(s[i]);
97  }
98 
99  return negative ? -val : val;
100 }
101 
102 inline int32_t AtoiUnrolled(char* s, int len) {
103  if (LIKELY(len <= 8)) {
104  int32_t val = 0;
105  bool negative = false;
106  switch (*s) {
107  case '-': negative = true;
108  case '+': --len, ++s;
109  }
110 
111  switch(len) {
112  case 8:
113  val += (DIGIT(s[len - 8])) * 10000;
114  case 7:
115  val += (DIGIT(s[len - 7])) * 10000;
116  case 6:
117  val += (DIGIT(s[len - 6])) * 10000;
118  case 5:
119  val += (DIGIT(s[len - 5])) * 10000;
120  case 4:
121  val += (DIGIT(s[len - 4])) * 1000;
122  case 3:
123  val += (DIGIT(s[len - 3])) * 100;
124  case 2:
125  val += (DIGIT(s[len - 2])) * 10;
126  case 1:
127  val += (DIGIT(s[len - 1]));
128  }
129  return negative ? -val : val;
130  } else {
131  return AtoiUnsafe(s, len);
132  }
133 }
134 
135 inline int32_t AtoiCased(char* s, int len) {
136  if (LIKELY(len <= 5)) {
137  int32_t val = 0;
138  bool negative = false;
139  switch (*s) {
140  case '-': negative = true;
141  case '+': --len, ++s;
142  }
143 
144  switch(len) {
145  case 5:
146  val = DIGIT(s[0])*10000 + DIGIT(s[1])*1000 + DIGIT(s[2])*100 +
147  DIGIT(s[3])*10 + DIGIT(s[4]);
148  break;
149  case 4:
150  val = DIGIT(s[0])*1000 + DIGIT(s[1])*100 + DIGIT(s[2])*10 + DIGIT(s[3]);
151  break;
152  case 3:
153  val = DIGIT(s[0])*100 + DIGIT(s[1])*10 + DIGIT(s[2]);
154  break;
155  case 2:
156  val = DIGIT(s[0])*10 + DIGIT(s[1]);
157  break;
158  case 1:
159  val = DIGIT(s[0]);
160  break;
161  }
162  return negative ? -val : val;
163  } else {
164  return AtoiUnsafe(s, len);
165  }
166 }
167 
168 void TestAtoi(int batch_size, void* d) {
169  TestData* data = reinterpret_cast<TestData*>(d);
170  for (int i = 0; i < batch_size; ++i) {
171  int n = data->data.size();
172  for (int j = 0; j < n; ++j) {
173  data->result[j] = atoi(data->data[j].ptr);
174  }
175  }
176 }
177 
178 void TestStrtol(int batch_size, void* d) {
179  TestData* data = reinterpret_cast<TestData*>(d);
180  for (int i = 0; i < batch_size; ++i) {
181  int n = data->data.size();
182  for (int j = 0; j < n; ++j) {
183  data->result[j] = strtol(data->data[j].ptr, NULL, 10);
184  }
185  }
186 }
187 
188 void TestImpala(int batch_size, void* d) {
189  TestData* data = reinterpret_cast<TestData*>(d);
190  for (int i = 0; i < batch_size; ++i) {
191  int n = data->data.size();
192  for (int j = 0; j < n; ++j) {
193  const StringValue& str = data->data[j];
195  int32_t val = StringParser::StringToInt<int32_t>(str.ptr, str.len, &dummy);
196  VALIDATE_RESULT(val, data->result[j], str.ptr);
197  data->result[j] = val;
198  }
199  }
200 }
201 
202 void TestImpalaUnsafe(int batch_size, void* d) {
203  TestData* data = reinterpret_cast<TestData*>(d);
204  for (int i = 0; i < batch_size; ++i) {
205  int n = data->data.size();
206  for (int j = 0; j < n; ++j) {
207  const StringValue& str = data->data[j];
208  int32_t val = AtoiUnsafe(str.ptr, str.len);
209  VALIDATE_RESULT(val, data->result[j], str.ptr);
210  data->result[j] = val;
211  }
212  }
213 }
214 
215 void TestImpalaUnrolled(int batch_size, void* d) {
216  TestData* data = reinterpret_cast<TestData*>(d);
217  for (int i = 0; i < batch_size; ++i) {
218  int n = data->data.size();
219  for (int j = 0; j < n; ++j) {
220  const StringValue& str = data->data[j];
221  int32_t val = AtoiUnrolled(str.ptr, str.len);
222  VALIDATE_RESULT(val, data->result[j], str.ptr);
223  data->result[j] = val;
224  }
225  }
226 }
227 
228 void TestImpalaCased(int batch_size, void* d) {
229  TestData* data = reinterpret_cast<TestData*>(d);
230  for (int i = 0; i < batch_size; ++i) {
231  int n = data->data.size();
232  for (int j = 0; j < n; ++j) {
233  const StringValue& str = data->data[j];
234  int32_t val = AtoiCased(str.ptr, str.len);
235  VALIDATE_RESULT(val, data->result[j], str.ptr);
236  data->result[j] = val;
237  }
238  }
239 }
240 
241 int main(int argc, char **argv) {
242  CpuInfo::Init();
243  cout << Benchmark::GetMachineInfo() << endl;
244 
245  TestData data;
246 
247  // Most data is probably positive
248  AddTestData(&data, 1000, -5, 1000);
249  data.result.resize(data.data.size());
250 
251  TestData data_leading_space;
252  AddTestData(&data_leading_space, 1000, -5, 1000, true, false);
253  data_leading_space.result.resize(data_leading_space.data.size());
254 
255  TestData data_trailing_space;
256  AddTestData(&data_trailing_space, 1000, -5, 1000, false, true);
257  data_trailing_space.result.resize(data_trailing_space.data.size());
258 
259  TestData data_both_space;
260  AddTestData(&data_both_space, 1000, -5, 1000, true, true);
261  data_both_space.result.resize(data_trailing_space.data.size());
262 
263  TestData data_garbage;
264  for (int i = 0; i < 1000; ++i) {
265  AddTestData(&data_garbage, "sdfsfdsfasd");
266  }
267  data_garbage.result.resize(data_garbage.data.size());
268 
269  TestData data_trailing_garbage;
270  for (int i = 0; i < 1000; ++i) {
271  AddTestData(&data_trailing_garbage, "123 a");
272  }
273  data_trailing_garbage.result.resize(data_trailing_garbage.data.size());
274 
275  Benchmark suite("atoi");
276  suite.AddBenchmark("strtol", TestStrtol, &data);
277  suite.AddBenchmark("atoi", TestAtoi, &data);
278  suite.AddBenchmark("impala_unsafe", TestImpalaUnsafe, &data);
279  suite.AddBenchmark("impala_unrolled", TestImpalaUnrolled, &data);
280  suite.AddBenchmark("impala_cased", TestImpalaCased, &data);
281 
282  suite.AddBenchmark("impala", TestImpala, &data);
283  suite.AddBenchmark("impala_leading_space", TestImpala, &data_leading_space);
284  suite.AddBenchmark("impala_trailing_space", TestImpala, &data_trailing_space);
285  suite.AddBenchmark("impala_both_space", TestImpala, &data_both_space);
286  suite.AddBenchmark("impala_garbage", TestImpala, &data_garbage);
287  suite.AddBenchmark("impala_trailing_garbage", TestImpala, &data_trailing_garbage);
288 
289  cout << suite.Measure();
290 
291  return 0;
292 }
void TestImpalaCased(int batch_size, void *d)
vector< string > memory
int AddBenchmark(const std::string &name, BenchmarkFunction fn, void *args, int baseline_idx=0)
Definition: benchmark.cc:70
vector< int32_t > result
static std::string GetMachineInfo()
Output machine/build configuration as a string.
Definition: benchmark.cc:124
void TestImpalaUnrolled(int batch_size, void *d)
std::string Measure()
Runs all the benchmarks and returns the result in a formatted string.
Definition: benchmark.cc:83
int32_t AtoiUnsafe(char *s, int len)
void AddTestData(TestData *data, const string &input)
void TestImpalaUnsafe(int batch_size, void *d)
int32_t AtoiUnrolled(char *s, int len)
#define DIGIT(c)
int main(int argc, char **argv)
void TestAtoi(int batch_size, void *d)
int32_t AtoiCased(char *s, int len)
vector< StringValue > data
#define LIKELY(expr)
Definition: compiler-util.h:32
vector< Decimal > result
void TestStrtol(int batch_size, void *d)
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75
#define VALIDATE_RESULT(actual, expected, str)
void TestImpala(int batch_size, void *d)