16 #ifndef IMPALA_EXEC_DELIMITED_TEXT_PARSER_H
17 #define IMPALA_EXEC_DELIMITED_TEXT_PARSER_H
45 int num_cols,
int num_partition_keys,
const bool* is_materialized_col,
46 char tuple_delim,
char field_delim_ =
'\0',
char collection_item_delim =
'^',
77 char** byte_buffer_ptr,
char** row_end_locations,
79 int* num_tuples,
int* num_fields,
char** next_column_start);
88 template <
bool process_escapes>
115 template <
bool process_escapes>
137 template <
bool process_escapes>
138 void AddColumn(
int len,
char** next_column_start,
int* num_fields,
147 template <
bool process_escapes>
148 void ParseSse(
int max_tuples, int64_t* remaining_len,
149 char** byte_buffer_ptr,
char** row_end_locations_,
151 int* num_tuples,
int* num_fields,
char** next_column_start);
218 #endif// IMPALA_EXEC_DELIMITED_TEXT_PARSER_H
bool HasUnfinishedTuple()
uint16_t low_mask_[16]
Precomputed masks to process escape characters.
__m128i xmm_delim_search_
SSE(xmm) register containing the delimiter search character.
DelimitedTextParser(int num_cols, int num_partition_keys, const bool *is_materialized_col, char tuple_delim, char field_delim_= '\0', char collection_item_delim= '^', char escape_char= '\0')
num_cols is the total number of columns including partition keys.
char tuple_delim_
Character delimiting tuples.
void AddColumn(int len, char **next_column_start, int *num_fields, FieldLocation *field_locations)
int num_delims_
The number of delimiters contained in xmm_delim_search_, i.e. its length.
__m128i xmm_escape_search_
SSE(xmm) register containing the escape search character.
bool last_char_is_escape_
Whether or not the previous character was the escape character.
char collection_item_delim_
Character delimiting collection items (to become slots).
__m128i xmm_tuple_search_
SSE(xmm) register containing the tuple search character.
void ParserInit(HdfsScanNode *scan_node)
Initialize the parser state.
int num_cols_
Number of columns in the table (including partition columns)
char escape_char_
Escape character. Only used if process_escapes_ is true.
const bool * is_materialized_col_
bool ReturnCurrentColumn() const
void FillColumns(int len, char **last_column, int *num_fields, impala::FieldLocation *field_locations)
bool AtTupleStart()
Check if we are at the start of a tuple.
int FindFirstInstance(const char *buffer, int len)
char field_delim_
Character delimiting fields (to become slots).
void ParseSse(int max_tuples, int64_t *remaining_len, char **byte_buffer_ptr, char **row_end_locations_, FieldLocation *field_locations, int *num_tuples, int *num_fields, char **next_column_start)
Status ParseFieldLocations(int max_tuples, int64_t remaining_len, char **byte_buffer_ptr, char **row_end_locations, FieldLocation *field_locations, int *num_tuples, int *num_fields, char **next_column_start)
void ParseSingleTuple(int64_t len, char *buffer, FieldLocation *field_locations, int *num_fields)
Simplified version of ParseSSE which does not handle tuple delimiters.
bool current_column_has_escape_
bool process_escapes_
True if this parser should handle escape characters.
bool unfinished_tuple_
True if the last tuple is unfinished (not ended with tuple delimiter).
void ParserReset()
Called to initialize parser at beginning of scan range.
int32_t last_row_delim_offset_
int column_idx_
Index to keep track of the current column in the current file.
int num_partition_keys_
Number of partition columns in the table.