Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
string-parser-test.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <iostream>
18 #include <limits>
19 #include <gtest/gtest.h>
20 #include <boost/cstdint.hpp>
21 #include <boost/lexical_cast.hpp>
22 #include "util/string-parser.h"
23 
24 #include "common/names.h"
25 
26 using std::min;
27 using std::numeric_limits;
28 
29 namespace impala {
30 
31 string space[] = {"", " ", "\t\t\t", "\n\n\n", "\v\v\v", "\f\f\f", "\r\r\r"};
32 int space_len = 7;
33 
34 // Tests conversion of s to integer with and without leading/trailing whitespace
35 template<typename T>
36 void TestIntValue(const char* s, T exp_val, StringParser::ParseResult exp_result) {
37  for (int i = 0; i < space_len; ++i) {
38  for (int j = 0; j < space_len; ++j) {
39  // All combinations of leading and/or trailing whitespace.
40  string str = space[i] + s + space[j];
42  T val = StringParser::StringToInt<T>(str.data(), str.length(), &result);
43  EXPECT_EQ(exp_val, val) << str;
44  EXPECT_EQ(result, exp_result);
45  }
46  }
47 }
48 
49 // Tests conversion of s, given a base, to an integer with and without leading/trailing
50 // whitespace
51 template<typename T>
53  const char* s, int base, T exp_val, StringParser::ParseResult exp_result) {
54  for (int i = 0; i < space_len; ++i) {
55  for (int j = 0; j < space_len; ++j) {
56  // All combinations of leading and/or trailing whitespace.
57  string str = space[i] + s + space[j];
59  T val = StringParser::StringToInt<T>(str.data(), str.length(), base, &result);
60  EXPECT_EQ(exp_val, val) << str;
61  EXPECT_EQ(result, exp_result);
62  }
63  }
64 }
65 
66 void TestBoolValue(const char* s, bool exp_val, StringParser::ParseResult exp_result) {
67  for (int i = 0; i < space_len; ++i) {
68  for (int j = 0; j < space_len; ++j) {
69  // All combinations of leading and/or trailing whitespace.
70  string str = space[i] + s + space[j];
72  bool val = StringParser::StringToBool(str.data(), str.length(), &result);
73  EXPECT_EQ(exp_val, val) << s;
74  EXPECT_EQ(result, exp_result);
75  }
76  }
77 }
78 
79 // Compare Impala's float conversion function against strtod.
80 template<typename T>
81 void TestFloatValue(const string& s, StringParser::ParseResult exp_result) {
83  T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
84  EXPECT_EQ(exp_result, result);
85 
86  if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
87  T exp_val = strtod(s.c_str(), NULL);
88  EXPECT_EQ(exp_val, val);
89  }
90 }
91 
92 template<typename T>
93 void TestFloatValueIsNan(const string& s, StringParser::ParseResult exp_result) {
95  T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
96  EXPECT_EQ(exp_result, result);
97 
98  if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
99  EXPECT_TRUE(isnan(val));
100  }
101 }
102 
103 // Tests conversion of s to double and float with +/- prefixing (and no prefix) and with
104 // and without leading/trailing whitespace
105 void TestAllFloatVariants(const string& s, StringParser::ParseResult exp_result) {
106  string sign[] = {"", "+", "-"};
107  for (int i = 0; i < space_len; ++i) {
108  for (int j = 0; j < space_len; ++j) {
109  for (int k = 0; k < 3; ++k) {
110  // All combinations of leading and/or trailing whitespace and +/- sign.
111  string str = space[i] + sign[k] + s + space[j];
112  TestFloatValue<float>(str, exp_result);
113  TestFloatValue<double>(str, exp_result);
114  }
115  }
116  }
117 }
118 
119 template<typename T>
121  T min_val = numeric_limits<T>::min();
122  T max_val = numeric_limits<T>::max();
123 
124  // Keep multiplying by 2.
125  T cur_val = 1.0;
126  while (cur_val < max_val) {
127  string s = lexical_cast<string>(cur_val);
128  TestFloatValue<T>(s, StringParser::PARSE_SUCCESS);
129  cur_val *= 2;
130  }
131 
132  // Keep dividing by 2.
133  cur_val = 1.0;
134  while (cur_val > min_val) {
135  string s = lexical_cast<string>(cur_val);
136  TestFloatValue<T>(s, StringParser::PARSE_SUCCESS);
137  cur_val /= 2;
138  }
139 }
140 
141 TEST(StringToInt, Basic) {
142  TestIntValue<int8_t>("123", 123, StringParser::PARSE_SUCCESS);
143  TestIntValue<int16_t>("123", 123, StringParser::PARSE_SUCCESS);
144  TestIntValue<int32_t>("123", 123, StringParser::PARSE_SUCCESS);
145  TestIntValue<int64_t>("123", 123, StringParser::PARSE_SUCCESS);
146 
147  TestIntValue<int8_t>("123", 123, StringParser::PARSE_SUCCESS);
148  TestIntValue<int16_t>("12345", 12345, StringParser::PARSE_SUCCESS);
149  TestIntValue<int32_t>("12345678", 12345678, StringParser::PARSE_SUCCESS);
150  TestIntValue<int64_t>("12345678901234", 12345678901234, StringParser::PARSE_SUCCESS);
151 
152  TestIntValue<int8_t>("-10", -10, StringParser::PARSE_SUCCESS);
153  TestIntValue<int16_t>("-10", -10, StringParser::PARSE_SUCCESS);
154  TestIntValue<int32_t>("-10", -10, StringParser::PARSE_SUCCESS);
155  TestIntValue<int64_t>("-10", -10, StringParser::PARSE_SUCCESS);
156 
157  TestIntValue<int8_t>("+1", 1, StringParser::PARSE_SUCCESS);
158  TestIntValue<int16_t>("+1", 1, StringParser::PARSE_SUCCESS);
159  TestIntValue<int32_t>("+1", 1, StringParser::PARSE_SUCCESS);
160  TestIntValue<int64_t>("+1", 1, StringParser::PARSE_SUCCESS);
161 
162  TestIntValue<int8_t>("+0", 0, StringParser::PARSE_SUCCESS);
163  TestIntValue<int16_t>("-0", 0, StringParser::PARSE_SUCCESS);
164  TestIntValue<int32_t>("+0", 0, StringParser::PARSE_SUCCESS);
165  TestIntValue<int64_t>("-0", 0, StringParser::PARSE_SUCCESS);
166 }
167 
168 TEST(StringToInt, InvalidLeadingTrailing) {
169  // Test that trailing garbage is not allowed.
170  TestIntValue<int8_t>("123xyz ", 0, StringParser::PARSE_FAILURE);
171  TestIntValue<int8_t>("-123xyz ", 0, StringParser::PARSE_FAILURE);
172  TestIntValue<int8_t>(" 123xyz ", 0, StringParser::PARSE_FAILURE);
173  TestIntValue<int8_t>(" -12 3xyz ", 0, StringParser::PARSE_FAILURE);
174  TestIntValue<int8_t>("12 3", 0, StringParser::PARSE_FAILURE);
175  TestIntValue<int8_t>("-12 3", 0, StringParser::PARSE_FAILURE);
176 
177  // Must have at least one leading valid digit.
178  TestIntValue<int8_t>("x123", 0, StringParser::PARSE_FAILURE);
179  TestIntValue<int8_t>(" x123", 0, StringParser::PARSE_FAILURE);
180  TestIntValue<int8_t>(" -x123", 0, StringParser::PARSE_FAILURE);
181  TestIntValue<int8_t>(" x-123", 0, StringParser::PARSE_FAILURE);
182 
183  // Test empty string and string with only whitespaces.
184  TestIntValue<int8_t>("", 0, StringParser::PARSE_FAILURE);
185  TestIntValue<int8_t>(" ", 0, StringParser::PARSE_FAILURE);
186 }
187 
188 TEST(StringToInt, Limit) {
189  TestIntValue<int8_t>("127", 127, StringParser::PARSE_SUCCESS);
190  TestIntValue<int8_t>("-128", -128, StringParser::PARSE_SUCCESS);
191  TestIntValue<int16_t>("32767", 32767, StringParser::PARSE_SUCCESS);
192  TestIntValue<int16_t>("-32768", -32768, StringParser::PARSE_SUCCESS);
193  TestIntValue<int32_t>("2147483647", 2147483647, StringParser::PARSE_SUCCESS);
194  TestIntValue<int32_t>("-2147483648", -2147483648, StringParser::PARSE_SUCCESS);
195  TestIntValue<int64_t>("9223372036854775807", numeric_limits<int64_t>::max(),
197  TestIntValue<int64_t>("-9223372036854775808", numeric_limits<int64_t>::min(),
199 }
200 
201 TEST(StringToInt, Overflow) {
202  TestIntValue<int8_t>("128", 127, StringParser::PARSE_OVERFLOW);
203  TestIntValue<int8_t>("-129", -128, StringParser::PARSE_OVERFLOW);
204  TestIntValue<int16_t>("32768", 32767, StringParser::PARSE_OVERFLOW);
205  TestIntValue<int16_t>("-32769", -32768, StringParser::PARSE_OVERFLOW);
206  TestIntValue<int32_t>("2147483648", 2147483647, StringParser::PARSE_OVERFLOW);
207  TestIntValue<int32_t>("-2147483649", -2147483648, StringParser::PARSE_OVERFLOW);
208  TestIntValue<int64_t>("9223372036854775808", 9223372036854775807LL,
210  TestIntValue<int64_t>("-9223372036854775809", numeric_limits<int64_t>::min(),
212 }
213 
214 TEST(StringToInt, Int8_Exhaustive) {
215  char buffer[5];
216  for (int i = -256; i <= 256; ++i) {
217  sprintf(buffer, "%d", i);
218  int8_t expected = i;
219  if (i > 127) {
220  expected = 127;
221  } else if (i < -128) {
222  expected = -128;
223  }
224  TestIntValue<int8_t>(buffer, expected,
226  }
227 }
228 
229 TEST(StringToIntWithBase, Basic) {
230  TestIntValue<int8_t>("123", 10, 123, StringParser::PARSE_SUCCESS);
231  TestIntValue<int16_t>("123", 10, 123, StringParser::PARSE_SUCCESS);
232  TestIntValue<int32_t>("123", 10, 123, StringParser::PARSE_SUCCESS);
233  TestIntValue<int64_t>("123", 10, 123, StringParser::PARSE_SUCCESS);
234 
235  TestIntValue<int8_t>("123", 10, 123, StringParser::PARSE_SUCCESS);
236  TestIntValue<int16_t>("12345", 10, 12345, StringParser::PARSE_SUCCESS);
237  TestIntValue<int32_t>("12345678", 10, 12345678, StringParser::PARSE_SUCCESS);
238  TestIntValue<int64_t>("12345678901234", 10, 12345678901234, StringParser::PARSE_SUCCESS);
239 
240  TestIntValue<int8_t>("-10", 10, -10, StringParser::PARSE_SUCCESS);
241  TestIntValue<int16_t>("-10", 10, -10, StringParser::PARSE_SUCCESS);
242  TestIntValue<int32_t>("-10", 10, -10, StringParser::PARSE_SUCCESS);
243  TestIntValue<int64_t>("-10", 10, -10, StringParser::PARSE_SUCCESS);
244 
245  TestIntValue<int8_t>("+1", 10, 1, StringParser::PARSE_SUCCESS);
246  TestIntValue<int16_t>("+1", 10, 1, StringParser::PARSE_SUCCESS);
247  TestIntValue<int32_t>("+1", 10, 1, StringParser::PARSE_SUCCESS);
248  TestIntValue<int64_t>("+1", 10, 1, StringParser::PARSE_SUCCESS);
249 
250  TestIntValue<int8_t>("+0", 10, 0, StringParser::PARSE_SUCCESS);
251  TestIntValue<int16_t>("-0", 10, 0, StringParser::PARSE_SUCCESS);
252  TestIntValue<int32_t>("+0", 10, 0, StringParser::PARSE_SUCCESS);
253  TestIntValue<int64_t>("-0", 10, 0, StringParser::PARSE_SUCCESS);
254 
255  TestIntValue<int8_t>("a", 16, 10, StringParser::PARSE_SUCCESS);
256  TestIntValue<int8_t>("A", 16, 10, StringParser::PARSE_SUCCESS);
257  TestIntValue<int8_t>("b", 20, 11, StringParser::PARSE_SUCCESS);
258  TestIntValue<int8_t>("B", 20, 11, StringParser::PARSE_SUCCESS);
259  TestIntValue<int8_t>("z", 36, 35, StringParser::PARSE_SUCCESS);
260  TestIntValue<int16_t>("f0a", 16, 3850, StringParser::PARSE_SUCCESS);
261  TestIntValue<int8_t>("7", 8, 7, StringParser::PARSE_SUCCESS);
262  TestIntValue<int8_t>("10", 2, 2, StringParser::PARSE_SUCCESS);
263 }
264 
265 TEST(StringToIntWithBase, NonNumericCharacters) {
266  // Alphanumeric digits that are not in base are ok
267  TestIntValue<int8_t>("123abc ", 10, 123, StringParser::PARSE_SUCCESS);
268  TestIntValue<int8_t>("-123abc ", 10, -123, StringParser::PARSE_SUCCESS);
269  TestIntValue<int8_t>(" 123abc ", 10, 123, StringParser::PARSE_SUCCESS);
270  TestIntValue<int8_t>("a123", 10, 0, StringParser::PARSE_SUCCESS);
271  TestIntValue<int8_t>(" a123", 10, 0, StringParser::PARSE_SUCCESS);
272  TestIntValue<int8_t>(" -a123", 10, 0, StringParser::PARSE_SUCCESS);
273  TestIntValue<int8_t>(" a!123", 10, 0, StringParser::PARSE_SUCCESS);
274  TestIntValue<int8_t>(" a!123", 10, 0, StringParser::PARSE_SUCCESS);
275 
276  // Trailing white space + digits is not ok
277  TestIntValue<int8_t>(" -12 3xyz ", 10, 0, StringParser::PARSE_FAILURE);
278  TestIntValue<int8_t>("12 3", 10, 0, StringParser::PARSE_FAILURE);
279  TestIntValue<int8_t>("-12 3", 10, 0, StringParser::PARSE_FAILURE);
280 
281  // Must have at least one leading valid digit.
282  TestIntValue<int8_t>("!123", 0, StringParser::PARSE_FAILURE);
283 
284  // Test empty string and string with only whitespaces.
285  TestIntValue<int8_t>("", 0, StringParser::PARSE_FAILURE);
286  TestIntValue<int8_t>(" ", 0, StringParser::PARSE_FAILURE);
287 }
288 
289 TEST(StringToIntWithBase, Limit) {
290  TestIntValue<int8_t>("127", 10, 127, StringParser::PARSE_SUCCESS);
291  TestIntValue<int8_t>("-128", 10, -128, StringParser::PARSE_SUCCESS);
292  TestIntValue<int16_t>("32767", 10, 32767, StringParser::PARSE_SUCCESS);
293  TestIntValue<int16_t>("-32768", 10, -32768, StringParser::PARSE_SUCCESS);
294  TestIntValue<int32_t>("2147483647", 10, 2147483647, StringParser::PARSE_SUCCESS);
295  TestIntValue<int32_t>("-2147483648", 10, -2147483648, StringParser::PARSE_SUCCESS);
296  TestIntValue<int64_t>("9223372036854775807", 10, numeric_limits<int64_t>::max(),
298  TestIntValue<int64_t>("-9223372036854775808", 10, numeric_limits<int64_t>::min(),
300 }
301 
302 TEST(StringToIntWithBase, Overflow) {
303  TestIntValue<int8_t>("128", 10, 127, StringParser::PARSE_OVERFLOW);
304  TestIntValue<int8_t>("-129", 10, -128, StringParser::PARSE_OVERFLOW);
305  TestIntValue<int16_t>("32768", 10, 32767, StringParser::PARSE_OVERFLOW);
306  TestIntValue<int16_t>("-32769", 10, -32768, StringParser::PARSE_OVERFLOW);
307  TestIntValue<int32_t>("2147483648", 10, 2147483647, StringParser::PARSE_OVERFLOW);
308  TestIntValue<int32_t>("-2147483649", 10, -2147483648, StringParser::PARSE_OVERFLOW);
309  TestIntValue<int64_t>("9223372036854775808", 10, 9223372036854775807LL,
311  TestIntValue<int64_t>("-9223372036854775809", 10, numeric_limits<int64_t>::min(),
313 }
314 
315 TEST(StringToIntWithBase, Int8_Exhaustive) {
316  char buffer[5];
317  for (int i = -256; i <= 256; ++i) {
318  sprintf(buffer, "%d", i);
319  int8_t expected = i;
320  if (i > 127) {
321  expected = 127;
322  } else if (i < -128) {
323  expected = -128;
324  }
325  TestIntValue<int8_t>(buffer, 10, expected,
327  }
328 }
329 
330 TEST(StringToFloat, Basic) {
337 
338  // Scientific notation.
353 
354  // Min/max values.
355  string float_min = lexical_cast<string>(numeric_limits<float>::min());
356  string float_max = lexical_cast<string>(numeric_limits<float>::max());
357  TestFloatValue<float>(float_min, StringParser::PARSE_SUCCESS);
358  TestFloatValue<float>(float_max, StringParser::PARSE_SUCCESS);
359  string double_min = lexical_cast<string>(numeric_limits<double>::min());
360  string double_max = lexical_cast<string>(numeric_limits<double>::max());
361  TestFloatValue<double>(double_min, StringParser::PARSE_SUCCESS);
362  TestFloatValue<double>(double_max, StringParser::PARSE_SUCCESS);
363 
364  // Non-finite values
368 
369  TestFloatValueIsNan<float>("nan", StringParser::PARSE_SUCCESS);
370  TestFloatValueIsNan<double>("nan", StringParser::PARSE_SUCCESS);
371  TestFloatValueIsNan<float>("NaN", StringParser::PARSE_SUCCESS);
372  TestFloatValueIsNan<double>("NaN", StringParser::PARSE_SUCCESS);
373  TestFloatValueIsNan<float>("nana", StringParser::PARSE_SUCCESS);
374  TestFloatValueIsNan<double>("nana", StringParser::PARSE_SUCCESS);
375  TestFloatValueIsNan<float>("naN", StringParser::PARSE_SUCCESS);
376  TestFloatValueIsNan<double>("naN", StringParser::PARSE_SUCCESS);
377 
378  TestFloatValueIsNan<float>("n aN", StringParser::PARSE_FAILURE);
379  TestFloatValueIsNan<float>("nnaN", StringParser::PARSE_FAILURE);
380 
381 
382  // Overflow.
383  TestFloatValue<float>(float_max + "11111", StringParser::PARSE_OVERFLOW);
384  TestFloatValue<double>(double_max + "11111", StringParser::PARSE_OVERFLOW);
385  TestFloatValue<float>("-" + float_max + "11111", StringParser::PARSE_OVERFLOW);
386  TestFloatValue<double>("-" + double_max + "11111", StringParser::PARSE_OVERFLOW);
387 
388  // Precision limits
389  // Regression test for IMPALA-1622 (make sure we get correct result with many digits
390  // after decimal)
391  TestAllFloatVariants("1.12345678912345678912", StringParser::PARSE_SUCCESS);
392  TestAllFloatVariants("1.1234567890123456789012", StringParser::PARSE_SUCCESS);
393  TestAllFloatVariants("1.01234567890123456789012", StringParser::PARSE_SUCCESS);
394  TestAllFloatVariants("1.01111111111111111111111", StringParser::PARSE_SUCCESS);
395  TestAllFloatVariants("0.1234567890123456789012", StringParser::PARSE_SUCCESS);
396  TestAllFloatVariants("0.01234567890123456789012", StringParser::PARSE_SUCCESS);
397  TestAllFloatVariants(".1234567890123456789012", StringParser::PARSE_SUCCESS);
398  TestAllFloatVariants("0.01234567890123456789012", StringParser::PARSE_SUCCESS);
400  "12345678901234567890.1234567890123456789012", StringParser::PARSE_SUCCESS);
402  "12345678901234567890.01234567890123456789012", StringParser::PARSE_SUCCESS);
403  TestAllFloatVariants("0.000000000000000000001234", StringParser::PARSE_SUCCESS);
404  TestAllFloatVariants("1.000000000000000000001234", StringParser::PARSE_SUCCESS);
405  TestAllFloatVariants(".000000000000000000001234", StringParser::PARSE_SUCCESS);
406  TestAllFloatVariants("0.000000000000000000001234e10", StringParser::PARSE_SUCCESS);
408  "00000000000000000000.000000000000000000000", StringParser::PARSE_SUCCESS);
410  "00000000000000000000.000000000000000000001", StringParser::PARSE_SUCCESS);
411  TestAllFloatVariants("12345678901234567890123456", StringParser::PARSE_SUCCESS);
412  TestAllFloatVariants("12345678901234567890123456e10", StringParser::PARSE_SUCCESS);
413 
414  // Invalid floats.
428 }
429 
430 TEST(StringToFloat, InvalidLeadingTrailing) {
431  // Test that trailing garbage is not allowed.
432  TestFloatValue<double>("123xyz ", StringParser::PARSE_FAILURE);
433  TestFloatValue<double>("-123xyz ", StringParser::PARSE_FAILURE);
434  TestFloatValue<double>(" 123xyz ", StringParser::PARSE_FAILURE);
435  TestFloatValue<double>(" -12 3xyz ", StringParser::PARSE_FAILURE);
436  TestFloatValue<double>("12 3", StringParser::PARSE_FAILURE);
437  TestFloatValue<double>("-12 3", StringParser::PARSE_FAILURE);
438 
439  // Must have at least one leading valid digit.
440  TestFloatValue<double>("x123", StringParser::PARSE_FAILURE);
441  TestFloatValue<double>(" x123", StringParser::PARSE_FAILURE);
442  TestFloatValue<double>(" -x123", StringParser::PARSE_FAILURE);
443  TestFloatValue<double>(" x-123", StringParser::PARSE_FAILURE);
444 
445  // Test empty string and string with only whitespaces.
446  TestFloatValue<double>("", StringParser::PARSE_FAILURE);
447  TestFloatValue<double>(" ", StringParser::PARSE_FAILURE);
448 }
449 
450 TEST(StringToFloat, BruteForce) {
451  TestFloatBruteForce<float>();
452  TestFloatBruteForce<double>();
453 }
454 
455 TEST(StringToBool, Basic) {
458 
459  TestBoolValue("false xdfsd", false, StringParser::PARSE_FAILURE);
460  TestBoolValue("true xdfsd", false, StringParser::PARSE_FAILURE);
461  TestBoolValue("ffffalse xdfsd", false, StringParser::PARSE_FAILURE);
462  TestBoolValue("tttfalse xdfsd", false, StringParser::PARSE_FAILURE);
463 }
464 
465 }
466 
467 int main(int argc, char **argv) {
468  ::testing::InitGoogleTest(&argc, argv);
469  return RUN_ALL_TESTS();
470 }
void TestAllFloatVariants(const string &s, StringParser::ParseResult exp_result)
TEST(AtomicTest, Basic)
Definition: atomic-test.cc:28
void TestBoolValue(const char *s, bool exp_val, StringParser::ParseResult exp_result)
void TestFloatBruteForce()
string space[]
void TestIntValue(const char *s, T exp_val, StringParser::ParseResult exp_result)
static bool StringToBool(const char *s, int len, ParseResult *result)
Parses a string for 'true' or 'false', case insensitive.
Definition: string-parser.h:87
int main(int argc, char **argv)
void TestFloatValue(const string &s, StringParser::ParseResult exp_result)
void TestFloatValueIsNan(const string &s, StringParser::ParseResult exp_result)