16 #ifndef IMPALA_UTIL_STRING_PARSER_H
17 #define IMPALA_UTIL_STRING_PARSER_H
20 #include <boost/type_traits.hpp>
60 T ans = StringToIntInternal<T>(s, len, result);
64 return StringToIntInternal<T>(s + i, len - i, result);
70 T ans = StringToIntInternal<T>(s, len, base, result);
74 return StringToIntInternal<T>(s + i, len - i, base, result);
79 T ans = StringToFloatInternal<T>(s, len, result);
83 return StringToFloatInternal<T>(s + i, len - i, result);
102 return StringToDecimal<T>(
reinterpret_cast<const char*
>(s), len, type, result);
105 template <
typename T>
123 bool is_negative =
false;
135 bool found_value =
false;
136 while (len > 0 &&
UNLIKELY(*s ==
'0')) {
147 if (len > 0 && *s ==
'.') {
151 while (len > 0 &&
UNLIKELY(*s ==
'0')) {
160 bool found_exponent =
false;
163 for (
int i = 0; i < len; ++i) {
164 const char& c = s[i];
165 if (
LIKELY(
'0' <= c && c <=
'9')) {
172 value = (value * 10) + (c -
'0');
177 }
else if (c ==
'.' &&
LIKELY(!found_dot)) {
179 }
else if ((c ==
'e' || c ==
'E') &&
LIKELY(!found_exponent)) {
180 found_exponent =
true;
181 exponent = StringToIntInternal<int8_t>(s + i + 1, len - i - 1, result);
191 int truncated_digit_count = precision - type.
precision;
192 if (exponent > scale) {
195 precision += exponent - scale;
196 value *= DecimalUtil::GetScaleMultiplier<T>(exponent - scale);
206 if (scale > precision) precision = scale;
215 int shift = scale - type.
scale;
216 if (truncated_digit_count > 0) shift -= truncated_digit_count;
217 if (shift > 0) value /= DecimalUtil::GetScaleMultiplier<T>(shift);
219 }
else if (
UNLIKELY(!found_value && !found_dot)) {
223 if (type.
scale > scale) {
224 value *= DecimalUtil::GetScaleMultiplier<T>(type.
scale - scale);
235 template <
typename T>
242 typedef typename boost::make_unsigned<T>::type UnsignedT;
244 UnsignedT max_val = std::numeric_limits<T>::max();
245 bool negative =
false;
250 max_val = std::numeric_limits<T>::max() + 1;
256 val = StringToIntNoOverflow<UnsignedT>(s + i, len - i, result);
257 return static_cast<T
>(negative ? -val : val);
260 const T max_div_10 = max_val / 10;
261 const T max_mod_10 = max_val % 10;
264 for (; i < len; ++i) {
265 if (
LIKELY(s[i] >=
'0' && s[i] <=
'9')) {
266 T digit = s[i] -
'0';
268 if (
UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
270 return negative ? -max_val : max_val;
272 val = val * 10 + digit;
282 return static_cast<T
>(negative ? -val : val);
286 return static_cast<T
>(negative ? -val : val);
291 template <
typename T>
294 typedef typename boost::make_unsigned<T>::type UnsignedT;
296 UnsignedT max_val = std::numeric_limits<T>::max();
297 bool negative =
false;
306 max_val = std::numeric_limits<T>::max() + 1;
310 const T max_div_base = max_val / base;
311 const T max_mod_base = max_val % base;
314 for (; i < len; ++i) {
316 if (
LIKELY(s[i] >=
'0' && s[i] <=
'9')) {
318 }
else if (s[i] >=
'a' && s[i] <=
'z') {
319 digit = (s[i] -
'a' + 10);
320 }
else if (s[i] >=
'A' && s[i] <=
'Z') {
321 digit = (s[i] -
'A' + 10);
334 if (digit >= base)
break;
338 if (
UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
340 return static_cast<T
>(negative ? -max_val : max_val);
342 val = val * base + digit;
345 return static_cast<T
>(negative ? -val : val);
356 template <
typename T>
365 bool negative =
false;
368 bool decimal =
false;
369 int64_t remainder = 0;
375 case '-': negative =
true;
379 for (; i < len; ++i) {
380 if (
LIKELY(s[i] >=
'0' && s[i] <=
'9')) {
381 if (s[i] !=
'0' || sig_figs > 0) ++sig_figs;
389 if (sig_figs <= 18) {
390 remainder = remainder * 10 + s[i] -
'0';
394 val = val * 10 + s[i] -
'0';
396 }
else if (s[i] ==
'.') {
398 }
else if (s[i] ==
'e' || s[i] ==
'E') {
400 }
else if (s[i] ==
'i' || s[i] ==
'I') {
401 if (len > i + 2 && (s[i+1] ==
'n' || s[i+1] ==
'N') &&
402 (s[i+2] ==
'f' || s[i+2] ==
'F')) {
407 return negative ? -INFINITY : INFINITY;
413 }
else if (s[i] ==
'n' || s[i] ==
'N') {
414 if (len > i + 2 && (s[i+1] ==
'a' || s[i+1] ==
'A') &&
415 (s[i+2] ==
'n' || s[i+2] ==
'N')) {
417 return negative ? -NAN : NAN;
435 val += remainder / divide;
437 if (i < len && (s[i] ==
'e' || s[i] ==
'E')) {
442 char c_str[len - negative + 1];
443 memcpy(c_str, s + negative, len - negative);
444 c_str[len - negative] =
'\0';
446 val = strtod(c_str, &s_end);
447 if (s_end != c_str + len - negative) {
449 int trailing_len = len - negative - (int)(s_end - c_str);
458 if (
UNLIKELY(val == std::numeric_limits<T>::infinity())) {
463 return (T)(negative ? -val : val);
470 if (len >= 4 && (s[0] ==
't' || s[0] ==
'T')) {
471 bool match = (s[1] ==
'r' || s[1] ==
'R') &&
472 (s[2] ==
'u' || s[2] ==
'U') &&
473 (s[3] ==
'e' || s[3] ==
'E');
475 }
else if (len >= 5 && (s[0] ==
'f' || s[0] ==
'F')) {
476 bool match = (s[1] ==
'a' || s[1] ==
'A') &&
477 (s[2] ==
'l' || s[2] ==
'L') &&
478 (s[3] ==
's' || s[3] ==
'S') &&
479 (s[4] ==
'e' || s[4] ==
'E');
495 for (
int i = 0; i < len; ++i) {
512 template <
typename T>
520 if (
LIKELY(s[0] >=
'0' && s[0] <=
'9')) {
526 for (
int i = 1; i < len; ++i) {
527 if (
LIKELY(s[i] >=
'0' && s[i] <=
'9')) {
528 T digit = s[i] -
'0';
529 val = val * 10 + digit;
544 return c ==
' ' ||
UNLIKELY(c ==
'\t' || c ==
'\n' || c ==
'\v' || c ==
'\f'
static DecimalValue< T > StringToDecimal(const uint8_t *s, int len, const ColumnType &type, StringParser::ParseResult *result)
static DecimalValue< T > StringToDecimal(const char *s, int len, const ColumnType &type, StringParser::ParseResult *result)
static T StringToFloat(const char *s, int len, ParseResult *result)
static T StringToIntInternal(const char *s, int len, int base, ParseResult *result)
int precision
Only set if type == TYPE_DECIMAL.
static int max_ascii_len()
static T StringToFloatInternal(const char *s, int len, ParseResult *result)
static int SkipLeadingWhitespace(const char *s, int len)
Returns the position of the first non-whitespace character in s.
static bool IsWhitespace(const char &c)
static bool StringToBool(const char *s, int len, ParseResult *result)
Parses a string for 'true' or 'false', case insensitive.
static bool IsAllWhitespace(const char *s, int len)
Returns true if s only contains whitespace.
static T StringToInt(const char *s, int len, int base, ParseResult *result)
Convert a string s representing a number in given base into a decimal number.
static T StringToIntInternal(const char *s, int len, ParseResult *result)
static bool StringToBoolInternal(const char *s, int len, ParseResult *result)
static T StringToIntNoOverflow(const char *s, int len, ParseResult *result)
static T StringToInt(const char *s, int len, ParseResult *result)