Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
parse-timestamp-benchmark.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include <vector>
19 #include <sstream>
20 #include "runtime/string-value.h"
22 #include "util/benchmark.h"
23 #include "util/cpu-info.h"
24 #include "common/names.h"
25 
26 namespace gregorian = boost::gregorian;
27 using boost::posix_time::duration_from_string;
28 using boost::posix_time::hours;
29 using boost::posix_time::nanoseconds;
30 using boost::posix_time::time_duration;
31 using boost::posix_time::to_iso_extended_string;
32 using boost::posix_time::to_simple_string;
33 using namespace impala;
34 
35 // Benchmark for parsing timestamps.
36 // Machine Info: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz
37 // ParseDate: Function Rate Comparison
38 // ----------------------------------------------------------------------
39 // BoostStringDate 0.6793 1X
40 // BoostDate 0.6583 0.9691X
41 // Impala 28.75 42.32X
42 //
43 // ParseTimestamp: Function Rate Comparison
44 // ----------------------------------------------------------------------
45 // BoostTime 0.455 1X
46 // Impala 28.39 62.39X
47 
48 #define VALIDATE 0
49 
50 #if VALIDATE
51 #define VALIDATE_RESULT(actual, expected, str) \
52  if (actual != expected) { \
53  cout << "Parse Error. " \
54  << "String: " << str \
55  << ". Parsed: " << actual << endl; \
56  exit(-1); \
57  }
58 #else
59 #define VALIDATE_RESULT(actual, expected, str)
60 #endif
61 
62 
63 struct TestData {
64  vector<StringValue> data;
65  vector<string> memory;
66  vector<TimestampValue> result;
67 };
68 
69 void AddTestData(TestData* data, const string& input) {
70  data->memory.push_back(input);
71  const string& str = data->memory.back();
72  data->data.push_back(StringValue(const_cast<char*>(str.c_str()), str.length()));
73 }
74 
75 void AddTestDataDates(TestData* data, int n, const string& startstr) {
76  gregorian::date start(gregorian::from_string(startstr));
77  for (int i = 0; i < n; ++i) {
78  int val = rand();
79  val %= 100;
80  gregorian::date_duration days(val);
81  start += days;
82  stringstream ss;
83  ss << to_iso_extended_string(start);
84  AddTestData(data, ss.str());
85  }
86 }
87 
88 void AddTestDataTimes(TestData* data, int n, const string& startstr) {
89  time_duration start(duration_from_string(startstr));
90  for (int i = 0; i < n; ++i) {
91  int val = rand();
92  start += nanoseconds(val);
93  if (start.hours() >= 24) start -= hours(24);
94  stringstream ss;
95  ss << to_simple_string(start);
96  AddTestData(data, ss.str());
97  }
98 }
99 
100 void TestImpalaDate(int batch_size, void* d) {
101  TestData* data = reinterpret_cast<TestData*>(d);
102  for (int i = 0; i < batch_size; ++i) {
103  int n = data->data.size();
104  for (int j = 0; j < n; ++j) {
105  data->result[j] = TimestampValue(data->data[j].ptr, data->data[j].len);
106  }
107  }
108 }
109 
110 void TestBoostStringDate(int batch_size, void* d) {
111  TestData* data = reinterpret_cast<TestData*>(d);
112  for (int i = 0; i < batch_size; ++i) {
113  int n = data->data.size();
114  for (int j = 0; j < n; ++j) {
115  data->result[j].set_date(gregorian::from_string(data->memory[j]));
116  }
117  }
118 }
119 
120 void TestBoostDate(int batch_size, void* d) {
121  TestData* data = reinterpret_cast<TestData*>(d);
122  for (int i = 0; i < batch_size; ++i) {
123  int n = data->data.size();
124  for (int j = 0; j < n; ++j) {
125  string s(data->data[j].ptr, data->data[j].len);
126  data->result[j].set_date(gregorian::from_string(s));
127  }
128  }
129 }
130 
131 void TestBoostTime(int batch_size, void* d) {
132  TestData* data = reinterpret_cast<TestData*>(d);
133  for (int i = 0; i < batch_size; ++i) {
134  int n = data->data.size();
135  for (int j = 0; j < n; ++j) {
136  string s(data->data[j].ptr, data->data[j].len);
137  data->result[j].set_time(duration_from_string(s));
138  }
139  }
140 }
141 
142 
143 int main(int argc, char **argv) {
144  CpuInfo::Init();
145  cout << Benchmark::GetMachineInfo() << endl;
146 
147  TestData dates, times;
148 
149  AddTestDataDates(&dates, 1000, "1953-04-22");
150  AddTestDataTimes(&times, 1000, "01:02:03.45678");
151 
152  dates.result.resize(dates.data.size());
153  times.result.resize(times.data.size());
154 
155  Benchmark date_suite("ParseDate");
156  date_suite.AddBenchmark("BoostStringDate", TestBoostStringDate, &dates);
157  date_suite.AddBenchmark("BoostDate", TestBoostDate, &dates);
158  date_suite.AddBenchmark("Impala", TestImpalaDate, &dates);
159 
160  Benchmark timestamp_suite("ParseTimestamp");
161  timestamp_suite.AddBenchmark("BoostTime", TestBoostTime, &times);
162  timestamp_suite.AddBenchmark("Impala", TestImpalaDate, &times);
163 
164  cout << date_suite.Measure();
165  cout << endl;
166  cout << timestamp_suite.Measure();
167 
168  return 0;
169 }
vector< string > memory
int AddBenchmark(const std::string &name, BenchmarkFunction fn, void *args, int baseline_idx=0)
Definition: benchmark.cc:70
void TestImpalaDate(int batch_size, void *d)
void AddTestData(TestData *data, const string &input)
static std::string GetMachineInfo()
Output machine/build configuration as a string.
Definition: benchmark.cc:124
void TestBoostStringDate(int batch_size, void *d)
std::string Measure()
Runs all the benchmarks and returns the result in a formatted string.
Definition: benchmark.cc:83
vector< TimestampValue > result
void AddTestDataTimes(TestData *data, int n, const string &startstr)
void AddTestDataDates(TestData *data, int n, const string &startstr)
void TestBoostTime(int batch_size, void *d)
vector< StringValue > data
int main(int argc, char **argv)
vector< Decimal > result
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75
void TestBoostDate(int batch_size, void *d)