16 #ifndef IMPALA_EXEC_HDFS_PARQUET_SCANNER_H
17 #define IMPALA_EXEC_HDFS_PARQUET_SCANNER_H
55 const std::vector<HdfsFileDesc*>& files);
190 SchemaNode* node)
const;
194 int max_def_level,
int*
idx,
int* col_idx, SchemaNode* node)
const;
Internal representation of a column schema (including nested-type columns).
Status ValidateFileMetadata()
Validates the file metadata.
bool VersionEq(int major, int minor, int patch) const
Returns true if version is equal to <major>.<minor>.<patch>
std::vector< SchemaNode > children
Any nested schema nodes. Empty for non-nested types.
SchemaNode schema_
The root schema node for this file.
Status parse_status_
Returned in ProcessSplit.
virtual Status Prepare(ScannerContext *context)
One-time initialisation of state that is constant across scan ranges.
const DiskIoMgr::ScanRange * metadata_range_
Scan range for the metadata.
static const int FOOTER_SIZE
parquet::FileMetaData file_metadata_
File metadata thrift object.
std::vector< BaseColumnReader * > column_readers_
Column reader for each materialized columns for this file.
std::string application
Application that wrote the file. e.g. "IMPALA".
Status AssembleRows(int row_group_idx)
boost::scoped_ptr< MemPool > dictionary_pool_
FileVersion file_version_
Version of the application that wrote this file.
ScopedTimer< MonotonicStopWatch > assemble_rows_timer_
Timer for materializing rows. This ignores time getting the next buffer.
HdfsParquetScanner(HdfsScanNode *scan_node, RuntimeState *state)
struct impala::HdfsParquetScanner::FileVersion::@7 version
Status ValidateColumn(const BaseColumnReader &col_reader, int row_group_idx)
Status InitColumns(int row_group_idx)
RuntimeProfile::Counter * num_cols_counter_
Number of cols that need to be read.
virtual ~HdfsParquetScanner()
const parquet::SchemaElement * element
The corresponding schema element defined in the file metadata.
Status InitNewRange()
Part of the HdfsScanner interface, not used in Parquet.
SlotDescriptor * slot_desc
Status ProcessFooter(bool *eosr)
BaseColumnReader * CreateReader(const SchemaNode &node)
virtual Status ProcessSplit()
Status CreateColumnReaders()
static Status IssueInitialRanges(HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files)
std::string DebugString(int indent=0) const
bool is_impala_internal
If true, this file was generated by an Impala internal release.
bool VersionLt(int major, int minor=0, int patch=0) const
Returns true if version is strictly less than <major>.<minor>.<patch>
Status CreateSchemaTree(const std::vector< parquet::SchemaElement > &schema, SchemaNode *node) const