Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
string-compare-benchmark.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include "runtime/string-value.h"
19 #include "util/benchmark.h"
20 #include "util/cpu-info.h"
21 #include "util/sse-util.h"
22 
23 #include "common/names.h"
24 
25 using namespace impala;
26 
27 // Original
28 int StringCompare1(const char* s1, int n1, const char* s2, int n2, int len) {
29  DCHECK_EQ(len, std::min(n1, n2));
32  __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
33  __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
34  int chars_match = SSE4_cmpestri(xmm0, SSEUtil::CHARS_PER_128_BIT_REGISTER,
36  if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
37  return s1[chars_match] - s2[chars_match];
38  }
42  }
44  // Load 64 bits at a time, the upper 64 bits of the xmm register is set to 0
45  __m128i xmm0 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(s1));
46  __m128i xmm1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(s2));
47  // The upper bits always match (always 0), hence the comparison to
48  // CHAR_PER_128_REGISTER
49  int chars_match = SSE4_cmpestri(xmm0, SSEUtil::CHARS_PER_128_BIT_REGISTER,
51  if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
52  return s1[chars_match] - s2[chars_match];
53  }
57  }
58  }
59  // TODO: for some reason memcmp is way slower than strncmp (2.5x) why?
60  int result = strncmp(s1, s2, len);
61  if (result != 0) return result;
62  return n1 - n2;
63 }
64 
65 // Simplified but broken (can't safely load s1 and s2)
66 int StringCompare2(const char* s1, int n1, const char* s2, int n2, int len) {
67  DCHECK_EQ(len, std::min(n1, n2));
69  while (len > 0) {
70  __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
71  __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
72  int n = std::min(len, 16);
73  int chars_match = SSE4_cmpestri(xmm0, n, xmm1, n, SSEUtil::STRCMP_MODE);
74  if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
75  return s1[chars_match] - s2[chars_match];
76  }
80  }
81  return n1 - n2;
82  }
83  // TODO: for some reason memcmp is way slower than strncmp (2.5x) why?
84  int result = strncmp(s1, s2, len);
85  if (result != 0) return result;
86  return n1 - n2;
87 }
88 
89 // Simplified and not broken
90 int StringCompare3(const char* s1, int n1, const char* s2, int n2, int len) {
91  DCHECK_EQ(len, std::min(n1, n2));
94  __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
95  __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
96  int chars_match = SSE4_cmpestri(xmm0, SSEUtil::CHARS_PER_128_BIT_REGISTER,
98  if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
99  return s1[chars_match] - s2[chars_match];
100  }
104  }
105  }
106  // TODO: for some reason memcmp is way slower than strncmp (2.5x) why?
107  int result = strncmp(s1, s2, len);
108  if (result != 0) return result;
109  return n1 - n2;
110 }
111 
112 struct TestData {
113  char* s1;
114  int n1;
115  char* s2;
116  int n2;
117  int result;
118 };
119 
120 void TestStringCompare1(int batch_size, void* d) {
121  TestData* data = reinterpret_cast<TestData*>(d);
122  int len = std::min(data->n1, data->n2);
123  for (int i = 0; i < batch_size; ++i) {
124  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
125  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
126  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
127  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
128  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
129  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
130  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
131  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
132  data->result = StringCompare1(data->s1, data->n1, data->s2, data->n2, len);
133  }
134 }
135 
136 void TestStringCompare2(int batch_size, void* d) {
137  TestData* data = reinterpret_cast<TestData*>(d);
138  int len = std::min(data->n1, data->n2);
139  for (int i = 0; i < batch_size; ++i) {
140  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
141  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
142  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
143  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
144  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
145  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
146  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
147  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
148  data->result = StringCompare2(data->s1, data->n1, data->s2, data->n2, len);
149  }
150 }
151 
152 void TestStringCompare3(int batch_size, void* d) {
153  TestData* data = reinterpret_cast<TestData*>(d);
154  int len = std::min(data->n1, data->n2);
155  for (int i = 0; i < batch_size; ++i) {
156  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
157  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
158  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
159  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
160  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
161  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
162  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
163  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
164  data->result = StringCompare3(data->s1, data->n1, data->s2, data->n2, len);
165  }
166 }
167 
169  TestData data;
170  data.s1 = new char[len];
171  data.s2 = new char[len];
172  data.n1 = data.n2 = len;
173  for (int i = 0; i < len; ++i) {
174  data.s1[i] = data.s2[i] = 'a';
175  }
176  return data;
177 }
178 
179 int main(int argc, char **argv) {
180  CpuInfo::Init();
181 
182  Benchmark long_suite("Long strings (10000)");
183  TestData long_data = InitTestData(10000);
184  long_suite.AddBenchmark("Original", TestStringCompare1, &long_data);
185  long_suite.AddBenchmark("Simplified, broken", TestStringCompare2, &long_data);
186  long_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &long_data);
187  cout << long_suite.Measure();
188 
189  Benchmark med_suite("Med strings (100)");
190  TestData med_data = InitTestData(100);
191  med_suite.AddBenchmark("Original", TestStringCompare1, &med_data);
192  med_suite.AddBenchmark("Simplified, broken", TestStringCompare2, &med_data);
193  med_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &med_data);
194  cout << med_suite.Measure();
195 
196  Benchmark short_suite("Short strings (10)");
197  TestData short_data = InitTestData(10);
198  short_suite.AddBenchmark("Original", TestStringCompare1, &short_data);
199  short_suite.AddBenchmark("Simplified, broken", TestStringCompare2, &short_data);
200  short_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &short_data);
201  cout << short_suite.Measure();
202 
203  return 0;
204 }
int AddBenchmark(const std::string &name, BenchmarkFunction fn, void *args, int baseline_idx=0)
Definition: benchmark.cc:70
void TestStringCompare3(int batch_size, void *d)
int StringCompare1(const char *s1, int n1, const char *s2, int n2, int len)
int StringCompare3(const char *s1, int n1, const char *s2, int n2, int len)
void TestStringCompare2(int batch_size, void *d)
static SSE_ALWAYS_INLINE int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2, const int mode)
Definition: sse-util.h:99
std::string Measure()
Runs all the benchmarks and returns the result in a formatted string.
Definition: benchmark.cc:83
TestData InitTestData(int len)
static const int64_t SSE4_2
Definition: cpu-info.h:34
void TestStringCompare1(int batch_size, void *d)
int main(int argc, char **argv)
int StringCompare2(const char *s1, int n1, const char *s2, int n2, int len)
static const int STRCMP_MODE
Definition: sse-util.h:44
static const int CHARS_PER_64_BIT_REGISTER
Definition: sse-util.h:27
vector< Decimal > result
static void Init()
Initialize CpuInfo.
Definition: cpu-info.cc:75
static bool IsSupported(long flag)
Returns whether of not the cpu supports this flag.
Definition: cpu-info.h:58
static const int CHARS_PER_128_BIT_REGISTER
Definition: sse-util.h:28