15 #ifndef IMPALA_RUNTIME_TIMESTAMP_PARSE_UTIL_H
16 #define IMPALA_RUNTIME_TIMESTAMP_PARSE_UTIL_H
18 #include <boost/assign/list_of.hpp>
19 #include <boost/date_time/posix_time/posix_time.hpp>
20 #include <boost/foreach.hpp>
21 #include <boost/unordered_map.hpp>
81 std::vector<DateTimeFormatToken>
toks;
136 DCHECK(dt_ctx != NULL);
137 DCHECK(dt_ctx->
fmt != NULL);
139 DCHECK(dt_ctx->
toks.size() == 0);
140 const char* str_begin = dt_ctx->
fmt;
141 const char* str_end = str_begin + dt_ctx->
fmt_len;
142 const char* str = str_begin;
144 while (str < str_end) {
145 if (isdigit(*str))
return false;
147 if ((*str ==
'T') || (*str ==
'Z') || (!isalpha(*str))) {
154 case 'y': tok_type =
YEAR;
break;
160 case 'S': tok_type =
FRACTION;
break;
162 default:
return false;
169 const char* curr_tok_chr = str + 1;
170 while (curr_tok_chr < str_end) {
171 if (*curr_tok_chr != tok_chr)
break;
176 if (
UNLIKELY(tok_len > 3))
return false;
185 dt_ctx->
toks.push_back(tok);
202 static inline bool Parse(
const char* str,
int len, boost::gregorian::date* d,
203 boost::posix_time::time_duration* t) {
207 if (
UNLIKELY(str == NULL || len <= 0)) {
208 *d = boost::gregorian::date();
209 *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
213 while (len > 0 && isspace(*str)) {
218 while (len > 0 && isspace(str[len - 1])) --len;
266 }
else if (str[2] ==
':') {
275 if (
LIKELY(dt_ctx != NULL)) {
276 return Parse(str, len, *dt_ctx, d, t);
278 *d = boost::gregorian::date();
279 *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
293 boost::gregorian::date* d, boost::posix_time::time_duration* t) {
295 DCHECK(dt_ctx.
toks.size() > 0);
299 if (
UNLIKELY(str == NULL || len <= 0 ||
301 *d = boost::gregorian::date();
302 *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
307 *d = boost::gregorian::date(dt_result.
year, dt_result.
month, dt_result.
day);
308 }
catch (boost::exception& e) {
311 *d = boost::gregorian::date();
312 *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
316 *d = boost::gregorian::date();
319 *t = boost::posix_time::time_duration(dt_result.
hour, dt_result.
minute,
322 *t = boost::posix_time::time_duration(0, 0, 0, 0);
336 const boost::gregorian::date& d,
const boost::posix_time::time_duration& t,
337 int len,
char* buff) {
339 DCHECK(dt_ctx.
toks.size() > 0);
341 DCHECK(buff != NULL);
346 int32_t num_val = -1;
347 const char* str_val = NULL;
352 if (tok.
len <= 3) num_val %= 100;
357 str_val = d.month().as_short_string();
366 num_val = t.fractional_seconds();
367 if (num_val > 0)
for (
int j = tok.
len; j < 9; ++j) num_val /= 10;
372 str_val_len = tok.
len;
375 default: DCHECK(
false) <<
"Unknown date/time format token";
378 str += sprintf(str,
"%0*d", tok.
len, num_val);
380 memcpy(str, str_val, str_val_len);
393 DCHECK(dt_ctx.
toks.size() > 0);
394 DCHECK(dt_result != NULL);
395 if (str_len <= 0 || str_len < dt_ctx.
fmt_len || str == NULL)
return false;
398 const char* tok_val = str + tok.
pos;
405 dt_result->
year = StringParser::StringToInt<int>(tok_val, tok.
len, &status);
408 if (tok.
len < 4 && dt_result->
year < 99) dt_result->
year += 2000;
412 dt_result->
month = StringParser::StringToInt<int>(tok_val, tok.
len, &status);
418 char raw_buff[tok.
len];
419 std::transform(tok_val, tok_val + tok.
len, raw_buff, ::tolower);
421 boost::unordered_map<StringValue, int>::const_iterator iter =
424 dt_result->
month = iter->second;
428 dt_result->
day = StringParser::StringToInt<int>(tok_val, tok.
len, &status);
431 if (
UNLIKELY(dt_result->
day < 1 || dt_result->
day > 31))
return false;
435 dt_result->
hour = StringParser::StringToInt<int>(tok_val, tok.
len, &status);
441 dt_result->
minute = StringParser::StringToInt<int>(tok_val, tok.
len, &status);
447 dt_result->
second = StringParser::StringToInt<int>(tok_val, tok.
len, &status);
454 StringParser::StringToInt<int32_t>(tok_val, tok.
len, &status);
459 for (
int i = tok.
len; i < 9; ++i) dt_result->
fraction *= 10;
462 default: DCHECK(
false) <<
"Unknown date/time format token";
static DateTimeFormatContext DEFAULT_DATE_TIME_CTX[10]
Used for parsing both default and custom formatted timestamp values.
static const int DEFAULT_TIME_FRAC_FMT_LEN
DateTimeFormatContext(const char *fmt, int fmt_len)
static bool ParseFormatTokens(DateTimeFormatContext *dt_ctx)
DateTimeFormatTokenType
Used to indicate the type of a date/time format token group.
static bool Parse(const char *str, int len, boost::gregorian::date *d, boost::posix_time::time_duration *t)
static boost::unordered_map< StringValue, int > REV_MONTH_INDEX
Lazily initialized pseudo-constant hashmap for mapping month names to an index.
void Reset(const char *fmt, int fmt_len)
static bool initialized_
Used to indicate if the parsing state has been initialized.
static const int DEFAULT_DATE_TIME_FMT_LEN
static const int DEFAULT_DATE_FMT_LEN
Constants to hold default format lengths.
static DateTimeFormatContext DEFAULT_TIME_FRAC_CTX[10]
static DateTimeFormatContext DEFAULT_TIME_CTX
static DateTimeFormatContext DEFAULT_SHORT_DATE_TIME_CTX
static int Format(const DateTimeFormatContext &dt_ctx, const boost::gregorian::date &d, const boost::posix_time::time_duration &t, int len, char *buff)
static DateTimeFormatContext DEFAULT_ISO_DATE_TIME_CTX[10]
static const int DEFAULT_SHORT_DATE_TIME_FMT_LEN
static bool Parse(const char *str, int len, const DateTimeFormatContext &dt_ctx, boost::gregorian::date *d, boost::posix_time::time_duration *t)
static bool ParseDateTime(const char *str, int str_len, const DateTimeFormatContext &dt_ctx, DateTimeParseResult *dt_result)
std::vector< DateTimeFormatToken > toks
static DateTimeFormatContext DEFAULT_DATE_CTX
Stores the results of parsing a date/time string.
static DateTimeFormatContext DEFAULT_SHORT_ISO_DATE_TIME_CTX
static const int DEFAULT_TIME_FMT_LEN