16 #ifndef IMPALA_EXEC_HDFS_RCFILE_SCANNER_H
17 #define IMPALA_EXEC_HDFS_RCFILE_SCANNER_H
227 class TupleDescriptor;
238 void DebugString(
int indentation_level, std::stringstream* out)
const;
264 return THdfsFileFormat::RC_FILE;
virtual FileHeader * AllocateFileHeader()
Implementation of superclass functions.
int32_t current_field_len_rep
RLE: Repetition count of the current field.
uint8_t * row_group_buffer_
virtual THdfsFileFormat::type file_format() const
Returns type of scanner: e.g. rcfile, seqfile.
std::vector< uint8_t > key_buffer_
Buffer for copying key buffers. This buffer is reused between row groups.
void DebugString(int indentation_level, std::stringstream *out) const
Status ReadNumColumnsMetadata()
A scanner for reading RCFiles into tuples.
virtual Status ReadFileHeader()
Status ReadColumnBuffers()
int32_t buffer_pos
Offset from the start of the column for the next field in the column.
int num_rows_
number of rows in this rowgroup object
int32_t buffer_len
Uncompressed and compressed byte lengths for this column.
HdfsRCFileScanner(HdfsScanNode *scan_node, RuntimeState *state)
int32_t current_field_len
RLE: Length of the current field.
int compressed_key_length_
virtual ~HdfsRCFileScanner()
bool materialize_column
If true, this column should be materialized, otherwise, it can be skipped.
bool reuse_row_group_buffer_
void ResetRowGroup()
Reset state for a new row group.
virtual Status Prepare(ScannerContext *context)
One-time initialisation of state that is constant across scan ranges.
int row_group_buffer_size_
static const char *const RCFILE_VALUE_CLASS_NAME
int32_t key_buffer_len
Length and start of the key for this column.
int32_t uncompressed_buffer_len
uint8_t * key_buffer
This is a ptr into the scanner's key_buffer_ for this column.
virtual Status ProcessRange()
Status ReadRowGroupHeader()
virtual Status InitNewRange()
Reset internal state for a new scan range.
void GetCurrentKeyBuffer(int col_idx, bool skip_col_data, uint8_t **key_buf_ptr)
int32_t start_offset
Offset into row_group_buffer_ for the start of this column.
std::vector< ColumnInfo > columns_
static const uint8_t RCFILE_VERSION_HEADER[4]
Status NextField(int col_idx)
static const char *const RCFILE_METADATA_KEY_NUM_COLS
int32_t key_buffer_pos
Current position in the key buffer.
static const char *const RCFILE_KEY_CLASS_NAME