Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
atof-benchmark.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include <vector>
19 #include <sstream>
20 #include "runtime/string-value.h"
21 #include "util/benchmark.h"
22 #include "util/cpu-info.h"
23 #include "util/string-parser.h"
24 
25 #include "common/names.h"
26 
27 using namespace impala;
28 
29 // Benchmark for computing atof. This benchmarks tests converting from
30 // strings into floats on what we expect to be typical data. The data
31 // is mostly positive numbers with just a couple of them in scientific
32 // notation.
33 //
34 // Machine Info: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz
35 // atof: Function Rate Comparison
36 // ----------------------------------------------------------------------
37 // Strtod 8.171 1X
38 // Atof 8.057 0.9861X
39 // Impala 67.86 8.306X
40 
41 #define VALIDATE 0
42 
43 #if VALIDATE
44 // Use fabs?
45 #define VALIDATE_RESULT(actual, expected, str) \
46  if (actual != expected) { \
47  cout << "Parse Error. " \
48  << "String: " << str \
49  << ". Parsed: " << actual << endl; \
50  exit(-1); \
51  }
52 #else
53 #define VALIDATE_RESULT(actual, expected, str)
54 #endif
55 
56 struct TestData {
57  vector<StringValue> data;
58  vector<string> memory;
59  vector<double> result;
60 };
61 
62 void AddTestData(TestData* data, const string& input) {
63  data->memory.push_back(input);
64  const string& str = data->memory.back();
65  data->data.push_back(StringValue(const_cast<char*>(str.c_str()), str.length()));
66 }
67 
68 void AddTestData(TestData* data, int n, double min = -10, double max = 10) {
69  for (int i = 0; i < n; ++i) {
70  double val = rand();
71  val /= RAND_MAX;
72  val = (val * (max - min)) + min;
73  stringstream ss;
74  ss << val;
75  AddTestData(data, ss.str());
76  }
77 }
78 
79 void TestAtof(int batch_size, void* d) {
80  TestData* data = reinterpret_cast<TestData*>(d);
81  for (int i = 0; i < batch_size; ++i) {
82  int n = data->data.size();
83  for (int j = 0; j < n; ++j) {
84  data->result[j] = atof(data->data[j].ptr);
85  }
86  }
87 }
88 
89 void TestImpala(int batch_size, void* d) {
90  TestData* data = reinterpret_cast<TestData*>(d);
91  for (int i = 0; i < batch_size; ++i) {
92  int n = data->data.size();
93  for (int j = 0; j < n; ++j) {
94  const StringValue& str = data->data[j];
96  double val = StringParser::StringToFloat<double>(str.ptr, str.len, &dummy);
97  VALIDATE_RESULT(val, data->result[j], str.ptr);
98  data->result[j] = val;
99  }
100  }
101 }
102 
103 void TestStrtod(int batch_size, void* d) {
104  TestData* data = reinterpret_cast<TestData*>(d);
105  for (int i = 0; i < batch_size; ++i) {
106  int n = data->data.size();
107  for (int j = 0; j < n; ++j) {
108  data->result[j] = strtod(data->data[j].ptr, NULL);
109  }
110  }
111 }
112 
113 int main(int argc, char **argv) {
114  CpuInfo::Init();
115  cout << Benchmark::GetMachineInfo() << endl;
116 
117  TestData data;
118 
119  // Most data is probably positive
120  AddTestData(&data, 1000, -5, 1000);
121  AddTestData(&data, "1.1e12");
122 
123  data.result.resize(data.data.size());
124 
125  Benchmark suite("atof");
126  suite.AddBenchmark("Strtod", TestStrtod, &data);
127  suite.AddBenchmark("Atof", TestAtof, &data);
128  suite.AddBenchmark("Impala", TestImpala, &data);
129  cout << suite.Measure();
130 
131  return 0;
132 }
vector< string > memory
int AddBenchmark(const std::string &name, BenchmarkFunction fn, void *args, int baseline_idx=0)
Definition: benchmark.cc:70
vector< double > result
void TestAtof(int batch_size, void *d)
void TestStrtod(int batch_size, void *d)
static std::string GetMachineInfo()
Output machine/build configuration as a string.
Definition: benchmark.cc:124
std::string Measure()
Runs all the benchmarks and returns the result in a formatted string.
Definition: benchmark.cc:83
void AddTestData(TestData *data, const string &input)
void TestImpala(int batch_size, void *d)
vector< StringValue > data
vector< Decimal > result
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75
#define VALIDATE_RESULT(actual, expected, str)
int main(int argc, char **argv)