Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
|
#include <hdfs-parquet-scanner.h>
Classes | |
class | BaseColumnReader |
class | BoolColumnReader |
class | ColumnReader |
struct | FileVersion |
struct | SchemaNode |
Internal representation of a column schema (including nested-type columns). More... | |
Public Member Functions | |
HdfsParquetScanner (HdfsScanNode *scan_node, RuntimeState *state) | |
virtual | ~HdfsParquetScanner () |
virtual Status | Prepare (ScannerContext *context) |
One-time initialisation of state that is constant across scan ranges. More... | |
virtual void | Close () |
virtual Status | ProcessSplit () |
Static Public Member Functions | |
static Status | IssueInitialRanges (HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files) |
Static Public Attributes | |
static const int | FILE_BLOCK_SIZE = 4096 |
static const char * | LLVM_CLASS_NAME = "class.impala::HdfsScanner" |
Protected Types | |
typedef int(* | WriteTuplesFn )(HdfsScanner *, MemPool *, TupleRow *, int, FieldLocation *, int, int, int, int) |
Protected Member Functions | |
Status | InitializeWriteTuplesFn (HdfsPartitionDescriptor *partition, THdfsFileFormat::type type, const std::string &scanner_name) |
void | StartNewRowBatch () |
Set batch_ to a new row batch and update tuple_mem_ accordingly. More... | |
int | GetMemory (MemPool **pool, Tuple **tuple_mem, TupleRow **tuple_row_mem) |
Status | CommitRows (int num_rows) |
void | AddFinalRowBatch () |
void | AttachPool (MemPool *pool, bool commit_batch) |
bool IR_ALWAYS_INLINE | EvalConjuncts (TupleRow *row) |
int | WriteEmptyTuples (RowBatch *row_batch, int num_tuples) |
int | WriteEmptyTuples (ScannerContext *context, TupleRow *tuple_row, int num_tuples) |
Write empty tuples and commit them to the context object. More... | |
int | WriteAlignedTuples (MemPool *pool, TupleRow *tuple_row_mem, int row_size, FieldLocation *fields, int num_tuples, int max_added_tuples, int slots_per_tuple, int row_start_indx) |
Status | UpdateDecompressor (const THdfsCompression::type &compression) |
Status | UpdateDecompressor (const std::string &codec) |
bool | ReportTupleParseError (FieldLocation *fields, uint8_t *errors, int row_idx) |
virtual void | LogRowParseError (int row_idx, std::stringstream *) |
bool | WriteCompleteTuple (MemPool *pool, FieldLocation *fields, Tuple *tuple, TupleRow *tuple_row, Tuple *template_tuple, uint8_t *error_fields, uint8_t *error_in_row) |
void | ReportColumnParseError (const SlotDescriptor *desc, const char *data, int len) |
void | InitTuple (Tuple *template_tuple, Tuple *tuple) |
Tuple * | next_tuple (Tuple *t) const |
TupleRow * | next_row (TupleRow *r) const |
ExprContext * | GetConjunctCtx (int idx) const |
Static Protected Member Functions | |
static llvm::Function * | CodegenWriteCompleteTuple (HdfsScanNode *, LlvmCodeGen *, const std::vector< ExprContext * > &conjunct_ctxs) |
static llvm::Function * | CodegenWriteAlignedTuples (HdfsScanNode *, LlvmCodeGen *, llvm::Function *write_tuple_fn) |
Protected Attributes | |
HdfsScanNode * | scan_node_ |
The scan node that started this scanner. More... | |
RuntimeState * | state_ |
RuntimeState for error reporting. More... | |
ScannerContext * | context_ |
Context for this scanner. More... | |
ScannerContext::Stream * | stream_ |
The first stream for context_. More... | |
std::vector< ExprContext * > | conjunct_ctxs_ |
Tuple * | template_tuple_ |
int | tuple_byte_size_ |
Fixed size of each tuple, in bytes. More... | |
Tuple * | tuple_ |
Current tuple pointer into tuple_mem_. More... | |
RowBatch * | batch_ |
uint8_t * | tuple_mem_ |
The tuple memory of batch_. More... | |
int | num_errors_in_file_ |
number of errors in current file More... | |
boost::scoped_ptr< TextConverter > | text_converter_ |
Helper class for converting text to other types;. More... | |
int32_t | num_null_bytes_ |
Number of null bytes in the tuple. More... | |
boost::scoped_ptr< Codec > | decompressor_ |
Decompressor class to use, if any. More... | |
THdfsCompression::type | decompression_type_ |
The most recently used decompression type. More... | |
boost::scoped_ptr< MemPool > | data_buffer_pool_ |
RuntimeProfile::Counter * | decompress_timer_ |
Time spent decompressing bytes. More... | |
WriteTuplesFn | write_tuples_fn_ |
Jitted write tuples function pointer. Null if codegen is disabled. More... | |
Private Member Functions | |
Status | AssembleRows (int row_group_idx) |
Status | ProcessFooter (bool *eosr) |
Status | CreateColumnReaders () |
BaseColumnReader * | CreateReader (const SchemaNode &node) |
Status | InitColumns (int row_group_idx) |
Status | ValidateFileMetadata () |
Validates the file metadata. More... | |
Status | ValidateColumn (const BaseColumnReader &col_reader, int row_group_idx) |
Status | InitNewRange () |
Part of the HdfsScanner interface, not used in Parquet. More... | |
Status | CreateSchemaTree (const std::vector< parquet::SchemaElement > &schema, SchemaNode *node) const |
Status | CreateSchemaTree (const std::vector< parquet::SchemaElement > &schema, int max_def_level, int *idx, int *col_idx, SchemaNode *node) const |
Recursive implementation used internally by the above CreateSchemaTree() function. More... | |
Private Attributes | |
std::vector< BaseColumnReader * > | column_readers_ |
Column reader for each materialized columns for this file. More... | |
parquet::FileMetaData | file_metadata_ |
File metadata thrift object. More... | |
FileVersion | file_version_ |
Version of the application that wrote this file. More... | |
SchemaNode | schema_ |
The root schema node for this file. More... | |
const DiskIoMgr::ScanRange * | metadata_range_ |
Scan range for the metadata. More... | |
Status | parse_status_ |
Returned in ProcessSplit. More... | |
boost::scoped_ptr< MemPool > | dictionary_pool_ |
ScopedTimer< MonotonicStopWatch > | assemble_rows_timer_ |
Timer for materializing rows. This ignores time getting the next buffer. More... | |
RuntimeProfile::Counter * | num_cols_counter_ |
Number of cols that need to be read. More... | |
Static Private Attributes | |
static const int | FOOTER_SIZE = 100 * 1024 |
Friends | |
class | BaseColumnReader |
template<typename T > | |
class | ColumnReader |
class | BoolColumnReader |
This scanner parses Parquet files located in HDFS, and writes the content as tuples in the Impala in-memory representation of data, e.g. (tuples, rows, row batches). For the file format spec, see: github.com/Parquet/parquet-format Parquet (and other columnar formats) use scanner ranges differently than other formats. Each materialized column maps to a single ScanRange. For streaming reads, all the columns need to be read in parallel. This is done by issuing one ScanRange (in IssueInitialRanges()) for the file footer as the other scanners do. This footer range is processed in ProcessSplit(). ProcessSplit() then computes the column ranges and submits them to the IoMgr for immediate scheduling (so they don't surface in DiskIoMgr::GetNextRange()). Scheduling them immediately also guarantees they are all read at once. Like the other scanners, each parquet scanner object is one to one with a ScannerContext. Unlike the other scanners though, the context will have multiple streams, one for each column.
Definition at line 43 of file hdfs-parquet-scanner.h.
|
protectedinherited |
Matching typedef for WriteAlignedTuples for codegen. Refer to comments for that function.
Definition at line 212 of file hdfs-scanner.h.
impala::HdfsParquetScanner::HdfsParquetScanner | ( | HdfsScanNode * | scan_node, |
RuntimeState * | state | ||
) |
Definition at line 125 of file hdfs-parquet-scanner.cc.
References assemble_rows_timer_.
|
virtual |
Definition at line 133 of file hdfs-parquet-scanner.cc.
|
protectedinherited |
Attach all remaining resources from context_ to batch_ and send batch_ to the scan node. This must be called after all rows have been committed and no further resources are needed from context_ (in practice this will happen in each scanner subclass's Close() implementation).
Definition at line 145 of file hdfs-scanner.cc.
References impala::HdfsScanNode::AddMaterializedRowBatch(), impala::HdfsScanner::batch_, impala::HdfsScanner::context_, impala::ScannerContext::ReleaseCompletedResources(), and impala::HdfsScanner::scan_node_.
Referenced by impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), and Close().
|
private |
Reads data from all the columns (in parallel) and assembles rows into the context object. Returns when the entire row group is complete or an error occurred.
Definition at line 805 of file hdfs-parquet-scanner.cc.
References assemble_rows_timer_, impala::ScannerContext::cancelled(), column_readers_, impala::HdfsScanner::CommitRows(), impala::HdfsScanner::context_, COUNTER_ADD, impala::HdfsScanner::EvalConjuncts(), file_metadata_, impala::ScannerContext::Stream::filename(), impala::Status::GetDetail(), impala::HdfsScanner::GetMemory(), impala::HdfsScanner::InitTuple(), LOG_OR_RETURN_ON_ERROR, impala::HdfsScanner::next_row(), impala::HdfsScanner::next_tuple(), impala::Status::ok(), parse_status_, pool, impala::ExecNode::ReachedLimit(), RETURN_IF_ERROR, impala::ScanNode::rows_read_counter(), impala::HdfsScanNode::runtime_state(), impala::HdfsScanner::scan_node_, impala::TupleRow::SetTuple(), impala::HdfsParquetScanner::BaseColumnReader::stream_, impala::HdfsScanner::template_tuple_, impala::HdfsScanner::tuple_byte_size_, and impala::HdfsScanNode::tuple_idx().
Referenced by ProcessSplit().
Release all memory in 'pool' to batch_. If commit_batch is true, the row batch will be committed. commit_batch should be true if the attached pool is expected to be non-trivial (i.e. a decompression buffer) to minimize scanner mem usage.
Definition at line 256 of file hdfs-scanner.h.
References impala::MemPool::AcquireData(), impala::HdfsScanner::batch_, impala::HdfsScanner::CommitRows(), and impala::RowBatch::tuple_data_pool().
Referenced by impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), Close(), impala::HdfsTextScanner::FillByteBufferGzip(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ReadCompressedBlock(), impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage(), and impala::HdfsRCFileScanner::ResetRowGroup().
|
virtual |
Release all resources the scanner has allocated. This is the last chance for the scanner to attach any resources to the ScannerContext object.
Reimplemented from impala::HdfsScanner.
Definition at line 449 of file hdfs-parquet-scanner.cc.
References impala::HdfsScanner::AddFinalRowBatch(), assemble_rows_timer_, impala::HdfsScanner::AttachPool(), impala::HdfsScanner::Close(), column_readers_, dictionary_pool_, impala::HdfsScanNode::RangeComplete(), and impala::HdfsScanner::scan_node_.
|
staticprotectedinherited |
Codegen function to replace WriteAlignedTuples. WriteAlignedTuples is cross compiled to IR. This function loads the precompiled IR function, modifies it and returns the resulting function.
Definition at line 495 of file hdfs-scanner.cc.
References impala::LlvmCodeGen::codegen_timer(), impala::LlvmCodeGen::FinalizeFunction(), impala::LlvmCodeGen::GetFunction(), impala::LlvmCodeGen::ReplaceCallSites(), and SCOPED_TIMER.
Referenced by impala::HdfsTextScanner::Codegen(), and impala::HdfsSequenceScanner::Codegen().
|
staticprotectedinherited |
Codegen function to replace WriteCompleteTuple. Should behave identically to WriteCompleteTuple.
Definition at line 296 of file hdfs-scanner.cc.
References impala::LlvmCodeGen::FnPrototype::AddArgument(), impala::TupleDescriptor::byte_size(), impala::LlvmCodeGen::codegen_timer(), impala::LlvmCodeGen::CodegenMemcpy(), impala::TextConverter::CodegenWriteSlot(), impala::HdfsScanNode::ComputeSlotMaterializationOrder(), impala::LlvmCodeGen::context(), impala::CodegenAnyVal::CreateCallWrapped(), impala::LlvmCodeGen::false_value(), impala::LlvmCodeGen::FinalizeFunction(), impala::TupleDescriptor::GenerateLlvmStruct(), impala::Status::GetDetail(), impala::LlvmCodeGen::GetFunction(), impala::LlvmCodeGen::GetIntConstant(), impala::LlvmCodeGen::GetType(), impala::CodegenAnyVal::GetVal(), impala::HdfsScanNode::hdfs_table(), impala::FieldLocation::LLVM_CLASS_NAME, impala::TupleRow::LLVM_CLASS_NAME, impala::Tuple::LLVM_CLASS_NAME, impala::HdfsScanner::LLVM_CLASS_NAME, impala::MemPool::LLVM_CLASS_NAME, impala::HdfsScanNode::materialized_slots(), impala::HdfsTableDescriptor::null_column_value(), impala::HdfsScanNode::num_materialized_partition_keys(), impala::TupleDescriptor::num_null_bytes(), impala::Status::ok(), impala::LlvmCodeGen::OptimizeFunctionWithExprs(), impala::HdfsScanNode::runtime_state(), SCOPED_TIMER, impala::LlvmCodeGen::true_value(), impala::HdfsScanNode::tuple_desc(), impala::HdfsScanNode::tuple_idx(), impala::ColumnType::type, impala::SlotDescriptor::type(), impala::TYPE_BOOLEAN, impala::TYPE_DECIMAL, impala::TYPE_INT, impala::TYPE_TIMESTAMP, and impala::TYPE_TINYINT.
Referenced by impala::HdfsTextScanner::Codegen(), and impala::HdfsSequenceScanner::Codegen().
|
protectedinherited |
Commit num_rows to the current row batch. If this completes, the row batch is enqueued with the scan node and StartNewRowBatch() is called. Returns Status::OK if the query is not cancelled and hasn't exceeded any mem limits. Scanner can call this with 0 rows to flush any pending resources (attached pools and io buffers) to minimize memory consumption.
Definition at line 124 of file hdfs-scanner.cc.
References impala::HdfsScanNode::AddMaterializedRowBatch(), impala::RowBatch::AtCapacity(), impala::HdfsScanner::batch_, impala::TupleDescriptor::byte_size(), impala::Status::CANCELLED, impala::ScannerContext::cancelled(), impala::RowBatch::capacity(), impala::RuntimeState::CheckQueryState(), impala::RowBatch::CommitRows(), impala::HdfsScanner::conjunct_ctxs_, impala::HdfsScanner::context_, impala::ExprContext::FreeLocalAllocations(), impala::ScannerContext::num_completed_io_buffers(), impala::RowBatch::num_rows(), impala::Status::OK, impala::ScannerContext::ReleaseCompletedResources(), RETURN_IF_ERROR, impala::HdfsScanner::scan_node_, impala::HdfsScanner::StartNewRowBatch(), impala::HdfsScanner::state_, impala::HdfsScanNode::tuple_desc(), and impala::HdfsScanner::tuple_mem_.
Referenced by AssembleRows(), impala::HdfsScanner::AttachPool(), impala::HdfsTextScanner::FinishScanRange(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), and ProcessSplit().
|
private |
Populates column_readers_ from the file schema. Schema resolution is handled in this function as well. We allow additional columns at the end in either the table or file schema. If there are extra columns in the file schema, it is simply ignored. If there are extra in the table schema, we return NULLs for those columns.
Definition at line 1050 of file hdfs-parquet-scanner.cc.
References impala::HdfsParquetScanner::SchemaNode::children, impala::SlotDescriptor::col_path(), column_readers_, CreateReader(), impala::HdfsParquetScanner::SchemaNode::DebugString(), impala::ScannerContext::Stream::filename(), gen_ir_descriptions::idx, impala::HdfsScanNode::InitEmptyTemplateTuple(), impala::HdfsScanNode::materialized_slots(), impala::SlotDescriptor::null_indicator_offset(), impala::HdfsScanNode::num_partition_keys(), impala::Status::OK, path(), impala::PrintPath(), impala::HdfsScanner::scan_node_, schema_, impala::Tuple::SetNull(), impala::HdfsParquetScanner::SchemaNode::slot_desc, impala::HdfsScanner::stream_, impala::HdfsScanner::template_tuple_, VLOG_FILE, and VLOG_QUERY.
Referenced by ProcessSplit().
|
private |
Creates a reader for node. node must refer to a non-nested column and node.slot_desc must be non-NULL. The reader is added to the runtime state's object pool.
Definition at line 472 of file hdfs-parquet-scanner.cc.
References impala::ObjectPool::Add(), BoolColumnReader, impala::ColumnType::GetByteSize(), impala::RuntimeState::obj_pool(), impala::HdfsScanNode::runtime_state(), impala::HdfsScanner::scan_node_, impala::HdfsParquetScanner::SchemaNode::slot_desc, impala::ColumnType::type, impala::SlotDescriptor::type(), impala::TYPE_BIGINT, impala::TYPE_BOOLEAN, impala::TYPE_CHAR, impala::TYPE_DECIMAL, impala::TYPE_DOUBLE, impala::TYPE_FLOAT, impala::TYPE_INT, impala::TYPE_SMALLINT, impala::TYPE_STRING, impala::TYPE_TIMESTAMP, impala::TYPE_TINYINT, and impala::TYPE_VARCHAR.
Referenced by CreateColumnReaders().
|
private |
Unflattens the schema metadata from a Parquet file metadata and converts it to our SchemaNode representation. Returns the result in 'n' unless an error status is returned. Does not set the slot_desc field of any SchemaNode.
Referenced by ProcessFooter().
|
private |
Recursive implementation used internally by the above CreateSchemaTree() function.
|
inlineprotectedinherited |
Convenience function for evaluating conjuncts using this scanner's ExprContexts. This must always be inlined so we can correctly replace the call to ExecNode::EvalConjuncts() during codegen.
Definition at line 266 of file hdfs-scanner.h.
References impala::HdfsScanner::conjunct_ctxs_, and impala::ExecNode::EvalConjuncts().
Referenced by AssembleRows(), impala::HdfsAvroScanner::DecodeAvroData(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsScanner::WriteCompleteTuple(), impala::HdfsScanner::WriteEmptyTuples(), and impala::HdfsTextScanner::WriteFields().
|
protectedinherited |
Simple wrapper around conjunct_ctxs_. Used in the codegen'd version of WriteCompleteTuple() because it's easier than writing IR to access conjunct_ctxs_.
Definition at line 79 of file hdfs-scanner-ir.cc.
References impala::HdfsScanner::conjunct_ctxs_, and gen_ir_descriptions::idx.
|
protectedinherited |
Gets memory for outputting tuples into batch_. *pool is the mem pool that should be used for memory allocated for those tuples. *tuple_mem should be the location to output tuples, and *tuple_row_mem for outputting tuple rows. Returns the maximum number of tuples/tuple rows that can be output (before the current row batch is complete and a new one is allocated). Memory returned from this call is invalidated after calling CommitRows. Callers must call GetMemory again after calling this function.
Definition at line 115 of file hdfs-scanner.cc.
References impala::RowBatch::AddRow(), impala::HdfsScanner::batch_, impala::RowBatch::capacity(), impala::RowBatch::GetRow(), impala::RowBatch::num_rows(), impala::RowBatch::tuple_data_pool(), and impala::HdfsScanner::tuple_mem_.
Referenced by AssembleRows(), impala::HdfsTextScanner::FinishScanRange(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), and impala::HdfsRCFileScanner::ProcessRange().
|
private |
Walks file_metadata_ and initiates reading the materialized columns. This initializes column_readers_ and issues the reads for the columns.
Definition at line 1093 of file hdfs-parquet-scanner.cc.
References impala::DiskIoMgr::AddScanRanges(), impala::ScannerContext::AddStream(), impala::HdfsScanNode::AllocateScanRange(), impala::HdfsParquetScanner::FileVersion::application, column_readers_, impala::HdfsScanner::context_, impala::DiskIoMgr::RequestRange::disk_id(), impala::DiskIoMgr::ScanRange::expected_local(), impala::DiskIoMgr::RequestRange::file(), impala::HdfsFileDesc::file_length, file_metadata_, file_version_, impala::HdfsFileDesc::filename, impala::DiskIoMgr::RequestRange::fs(), impala::HdfsScanNode::GetFileDesc(), impala::RuntimeState::io_mgr(), impala::HdfsScanNode::materialized_slots(), MAX_DICT_HEADER_SIZE, metadata_range_, impala::HdfsFileDesc::mtime, impala::Status::OK, impala::HdfsScanNode::reader_context(), RETURN_IF_ERROR, impala::HdfsScanNode::runtime_state(), impala::HdfsScanner::scan_node_, impala::ScannerContext::Stream::set_contains_tuple_data(), impala::DiskIoMgr::ScanRange::try_cache(), ValidateColumn(), and impala::HdfsParquetScanner::FileVersion::VersionLt().
Referenced by ProcessSplit().
|
protectedinherited |
Initializes write_tuples_fn_ to the jitted function if codegen is possible.
Definition at line 87 of file hdfs-scanner.cc.
References impala::HdfsPartitionDescriptor::escape_char(), impala::HdfsScanNode::GetCodegenFn(), impala::ExecNode::id(), impala::HdfsScanNode::IncNumScannersCodegenDisabled(), impala::HdfsScanNode::IncNumScannersCodegenEnabled(), impala::Status::OK, impala::HdfsScanner::scan_node_, impala::TupleDescriptor::string_slots(), impala::HdfsScanNode::tuple_desc(), and impala::HdfsScanner::write_tuples_fn_.
Referenced by impala::HdfsSequenceScanner::InitNewRange(), and impala::HdfsTextScanner::ResetScanner().
|
inlineprivatevirtual |
Part of the HdfsScanner interface, not used in Parquet.
Implements impala::HdfsScanner.
Definition at line 184 of file hdfs-parquet-scanner.h.
References impala::Status::OK.
|
inlineprotectedinherited |
Initialize a tuple. TODO: only copy over non-null slots. TODO: InitTuple is called frequently, avoid the if, perhaps via templatization.
Definition at line 355 of file hdfs-scanner.h.
References impala::HdfsScanner::num_null_bytes_, and impala::HdfsScanner::tuple_byte_size_.
Referenced by AssembleRows(), impala::HdfsAvroScanner::DecodeAvroData(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsScanner::WriteCompleteTuple(), and impala::HdfsTextScanner::WriteFields().
|
static |
Issue just the footer range for each file. We'll then parse the footer and pick out the columns we want.
Definition at line 83 of file hdfs-parquet-scanner.cc.
References impala::HdfsScanNode::AddDiskIoRanges(), impala::HdfsScanNode::AllocateScanRange(), impala::DiskIoMgr::RequestRange::disk_id(), impala::DiskIoMgr::ScanRange::expected_local(), impala::RuntimeState::LogError(), impala::DiskIoMgr::ScanRange::meta_data(), impala::DiskIoMgr::RequestRange::offset(), impala::Status::OK, impala::ScanRangeMetadata::partition_id, impala::HdfsScanNode::RangeComplete(), RETURN_IF_ERROR, impala::HdfsScanNode::runtime_state(), and impala::DiskIoMgr::ScanRange::try_cache().
Referenced by impala::HdfsScanNode::GetNext().
|
protectedvirtualinherited |
Utility function to append an error message for an invalid row. This is called from ReportTupleParseError() row_idx is the index of the row in the current batch. Subclasses should override this function (i.e. text needs to join boundary rows). Since this is only in the error path, vtable overhead is acceptable.
Reimplemented in impala::HdfsSequenceScanner, and impala::HdfsTextScanner.
Definition at line 572 of file hdfs-scanner.cc.
Referenced by impala::HdfsScanner::ReportTupleParseError().
Definition at line 368 of file hdfs-scanner.h.
References impala::HdfsScanner::batch_, and impala::RowBatch::row_byte_size().
Referenced by AssembleRows(), impala::HdfsAvroScanner::DecodeAvroData(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsScanner::WriteEmptyTuples(), and impala::HdfsTextScanner::WriteFields().
Definition at line 363 of file hdfs-scanner.h.
References impala::HdfsScanner::tuple_byte_size_.
Referenced by AssembleRows(), impala::HdfsAvroScanner::DecodeAvroData(), impala::HdfsRCFileScanner::ProcessRange(), and impala::HdfsTextScanner::WriteFields().
|
virtual |
One-time initialisation of state that is constant across scan ranges.
Reimplemented from impala::HdfsScanner.
Definition at line 440 of file hdfs-parquet-scanner.cc.
References ADD_COUNTER, impala::HdfsScanNode::IncNumScannersCodegenDisabled(), num_cols_counter_, impala::Status::OK, impala::HdfsScanner::Prepare(), RETURN_IF_ERROR, impala::ExecNode::runtime_profile(), and impala::HdfsScanner::scan_node_.
Process the file footer and parse file_metadata_. This should be called with the last FOOTER_SIZE bytes in context_. *eosr is a return value. If true, the scan range is complete (e.g. select count(*))
Definition at line 916 of file hdfs-parquet-scanner.cc.
References impala::HdfsScanNode::AllocateScanRange(), impala::DiskIoMgr::BufferDescriptor::buffer(), impala::HdfsScanner::CommitRows(), impala::HdfsScanner::context_, COUNTER_ADD, CreateSchemaTree(), impala::DeserializeThriftMsg(), impala::DiskIoMgr::RequestRange::disk_id(), impala::ScannerContext::Stream::eosr(), impala::DiskIoMgr::ScanRange::expected_local(), impala::DiskIoMgr::RequestRange::file(), impala::HdfsFileDesc::file_length, file_metadata_, impala::ScannerContext::Stream::filename(), impala::DiskIoMgr::RequestRange::fs(), impala::ScannerContext::Stream::GetBuffer(), impala::Status::GetDetail(), impala::HdfsScanNode::GetFileDesc(), impala::HdfsScanner::GetMemory(), impala::RuntimeState::io_mgr(), impala::DiskIoMgr::BufferDescriptor::len(), impala::HdfsScanNode::MarkFileDescIssued(), impala::HdfsScanNode::materialized_slots(), impala::DiskIoMgr::max_read_buffer_size(), metadata_range_, impala::HdfsFileDesc::mtime, impala::Status::OK, impala::Status::ok(), impala::PARQUET_VERSION_NUMBER, pool, impala::DiskIoMgr::Read(), impala::HdfsScanNode::reader_context(), impala::DiskIoMgr::BufferDescriptor::Return(), RETURN_IF_ERROR, impala::ScanNode::rows_read_counter(), impala::HdfsScanNode::runtime_state(), impala::HdfsScanner::scan_node_, impala::ScannerContext::Stream::scan_range(), schema_, impala::HdfsScanner::stream_, impala::DiskIoMgr::ScanRange::try_cache(), UNLIKELY, ValidateFileMetadata(), and impala::HdfsScanner::WriteEmptyTuples().
Referenced by ProcessSplit().
|
virtual |
Process an entire split, reading bytes from the context's streams. Context is initialized with the split data (e.g. template tuple, partition descriptor, etc). This function should only return on error or end of scan range.
Implements impala::HdfsScanner.
Definition at line 769 of file hdfs-parquet-scanner.cc.
References AssembleRows(), impala::HdfsScanner::batch_, column_readers_, impala::HdfsScanner::CommitRows(), impala::HdfsScanner::context_, COUNTER_SET, CreateColumnReaders(), file_metadata_, InitColumns(), num_cols_counter_, impala::Status::OK, ProcessFooter(), impala::ScannerContext::ReleaseCompletedResources(), RETURN_IF_ERROR, and impala::HdfsScanner::stream_.
|
protectedinherited |
Report parse error for column @ desc. If abort_on_error is true, sets parse_status_ to the error message.
Definition at line 577 of file hdfs-scanner.cc.
References impala::RuntimeState::abort_on_error(), impala::SlotDescriptor::col_pos(), impala::RuntimeState::LogError(), impala::RuntimeState::LogHasSpace(), impala::HdfsScanNode::num_partition_keys(), impala::Status::ok(), impala::HdfsScanner::parse_status_, impala::HdfsScanner::scan_node_, impala::HdfsScanner::state_, and impala::SlotDescriptor::type().
Referenced by impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsScanner::ReportTupleParseError(), and impala::HdfsTextScanner::WritePartialTuple().
|
protectedinherited |
Utility function to report parse errors for each field. If errors[i] is nonzero, fields[i] had a parse error. row_idx is the idx of the row in the current batch that had the parse error Returns false if parsing should be aborted. In this case parse_status_ is set to the error. This is called from WriteAlignedTuples.
Definition at line 546 of file hdfs-scanner.cc.
References impala::RuntimeState::abort_on_error(), impala::ScannerContext::Stream::filename(), impala::RuntimeState::LogError(), impala::RuntimeState::LogHasSpace(), impala::HdfsScanner::LogRowParseError(), impala::HdfsScanNode::materialized_slots(), impala::HdfsScanner::num_errors_in_file_, impala::Status::ok(), impala::HdfsScanner::parse_status_, impala::HdfsScanner::ReportColumnParseError(), impala::RuntimeState::ReportFileErrors(), impala::HdfsScanner::scan_node_, impala::HdfsScanner::state_, and impala::HdfsScanner::stream_.
Referenced by impala::HdfsSequenceScanner::ProcessRange(), and impala::HdfsScanner::WriteAlignedTuples().
|
protectedinherited |
Set batch_ to a new row batch and update tuple_mem_ accordingly.
Definition at line 108 of file hdfs-scanner.cc.
References impala::MemPool::Allocate(), impala::HdfsScanner::batch_, impala::RuntimeState::batch_size(), impala::ExecNode::mem_tracker(), impala::ExecNode::row_desc(), impala::HdfsScanner::scan_node_, impala::HdfsScanner::state_, impala::HdfsScanner::tuple_byte_size_, impala::RowBatch::tuple_data_pool(), and impala::HdfsScanner::tuple_mem_.
Referenced by impala::HdfsScanner::CommitRows(), and impala::HdfsScanner::Prepare().
|
protectedinherited |
Update the decompressor_ object given a compression type or codec name. Depending on the old compression type and the new one, it may close the old decompressor and/or create a new one of different type.
Definition at line 513 of file hdfs-scanner.cc.
References impala::Codec::CreateDecompressor(), impala::HdfsScanner::data_buffer_pool_, impala::HdfsScanner::decompression_type_, impala::HdfsScanner::decompressor_, impala::Status::OK, RETURN_IF_ERROR, impala::HdfsScanner::scan_node_, impala::TupleDescriptor::string_slots(), and impala::HdfsScanNode::tuple_desc().
Referenced by impala::HdfsAvroScanner::InitNewRange(), impala::HdfsSequenceScanner::InitNewRange(), and impala::HdfsTextScanner::ProcessSplit().
|
protectedinherited |
|
private |
Validates the column metadata to make sure this column is supported (e.g. encoding, type, etc) and matches the type of col_reader's slot desc.
Definition at line 1290 of file hdfs-parquet-scanner.cc.
References impala::HdfsParquetScanner::BaseColumnReader::col_idx(), impala::ColumnType::DebugString(), impala::ParquetPlainEncoder::DecimalSize(), impala::DiskIoMgr::RequestRange::file(), file_metadata_, impala::IMPALA_TO_PARQUET_TYPES, IsEncodingSupported(), LOG_OR_RETURN_ON_ERROR, metadata_range_, impala::Status::OK, impala::ColumnType::precision, impala::PrintEncoding(), impala::ColumnType::scale, impala::HdfsParquetScanner::BaseColumnReader::schema_element(), impala::HdfsParquetScanner::BaseColumnReader::slot_desc(), impala::HdfsScanner::state_, impala::ColumnType::type, impala::SlotDescriptor::type(), and impala::TYPE_DECIMAL.
Referenced by InitColumns().
|
private |
Validates the file metadata.
Definition at line 1263 of file hdfs-parquet-scanner.cc.
References file_metadata_, file_version_, impala::ScannerContext::Stream::filename(), impala::Status::OK, impala::PARQUET_CURRENT_VERSION, and impala::HdfsScanner::stream_.
Referenced by ProcessFooter().
|
protectedinherited |
Processes batches of fields and writes them out to tuple_row_mem.
Definition at line 33 of file hdfs-scanner-ir.cc.
References impala::HdfsScanner::ReportTupleParseError(), impala::HdfsScanner::template_tuple_, impala::HdfsScanner::tuple_, impala::HdfsScanner::tuple_byte_size_, UNLIKELY, and impala::HdfsScanner::WriteCompleteTuple().
Referenced by impala::HdfsSequenceScanner::ProcessDecompressedBlock(), and impala::HdfsTextScanner::WriteFields().
|
protectedinherited |
Writes out all slots for 'tuple' from 'fields'. 'fields' must be aligned to the start of the tuple (e.g. fields[0] maps to slots[0]). After writing the tuple, it will be evaluated against the conjuncts.
Definition at line 217 of file hdfs-scanner.cc.
References impala::HdfsScanner::EvalConjuncts(), impala::HdfsScanner::InitTuple(), impala::FieldLocation::len, impala::HdfsScanNode::materialized_slots(), impala::HdfsScanner::scan_node_, impala::TupleRow::SetTuple(), impala::HdfsScanner::text_converter_, impala::HdfsScanNode::tuple_idx(), and UNLIKELY.
Referenced by impala::HdfsSequenceScanner::ProcessRange(), and impala::HdfsScanner::WriteAlignedTuples().
|
protectedinherited |
Utility method to write out tuples when there are no materialized fields (e.g. select count(*) or only partition keys). num_tuples - Total number of tuples to write out. Returns the number of tuples added to the row batch.
Definition at line 157 of file hdfs-scanner.cc.
References impala::RowBatch::AddRow(), impala::RowBatch::AddRows(), impala::RowBatch::AtCapacity(), impala::RowBatch::capacity(), impala::RowBatch::CommitLastRow(), impala::RowBatch::CommitRows(), impala::HdfsScanner::EvalConjuncts(), impala::RowBatch::GetRow(), impala::RowBatch::INVALID_ROW_INDEX, impala::RowBatch::num_rows(), impala::HdfsScanner::scan_node_, impala::TupleRow::SetTuple(), impala::HdfsScanner::template_tuple_, and impala::HdfsScanNode::tuple_idx().
Referenced by impala::HdfsSequenceScanner::ProcessDecompressedBlock(), ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), and impala::HdfsRCFileScanner::ProcessRange().
|
protectedinherited |
Write empty tuples and commit them to the context object.
Definition at line 195 of file hdfs-scanner.cc.
References impala::HdfsScanner::EvalConjuncts(), impala::HdfsScanner::next_row(), impala::HdfsScanner::scan_node_, impala::TupleRow::SetTuple(), impala::HdfsScanner::template_tuple_, and impala::HdfsScanNode::tuple_idx().
|
friend |
Definition at line 116 of file hdfs-parquet-scanner.h.
|
friend |
Definition at line 121 of file hdfs-parquet-scanner.h.
Referenced by CreateReader().
|
friend |
Definition at line 119 of file hdfs-parquet-scanner.h.
|
private |
Timer for materializing rows. This ignores time getting the next buffer.
Definition at line 147 of file hdfs-parquet-scanner.h.
Referenced by AssembleRows(), Close(), and HdfsParquetScanner().
|
protectedinherited |
The current row batch being populated. Creating new row batches, attaching context resources, and handing off to the scan node is handled by this class in CommitRows(), but AttachPool() must be called by scanner subclasses to attach any memory allocated by that subclass. All row batches created by this class are transferred to the scan node (i.e., all batches are ultimately owned by the scan node).
Definition at line 177 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::AddFinalRowBatch(), impala::HdfsScanner::AttachPool(), impala::HdfsScanner::CommitRows(), impala::HdfsScanner::GetMemory(), impala::HdfsScanner::next_row(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), ProcessSplit(), impala::HdfsScanner::StartNewRowBatch(), impala::HdfsTextScanner::WriteFields(), and impala::HdfsScanner::~HdfsScanner().
|
private |
Column reader for each materialized columns for this file.
Definition at line 125 of file hdfs-parquet-scanner.h.
Referenced by AssembleRows(), Close(), CreateColumnReaders(), InitColumns(), and ProcessSplit().
|
protectedinherited |
ExprContext for each conjunct. Each scanner has its own ExprContexts so the conjuncts can be safely evaluated in parallel.
Definition at line 154 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::Close(), impala::HdfsScanner::CommitRows(), impala::HdfsScanner::EvalConjuncts(), impala::HdfsScanner::GetConjunctCtx(), and impala::HdfsScanner::Prepare().
|
protectedinherited |
Context for this scanner.
Definition at line 147 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::AddFinalRowBatch(), AssembleRows(), impala::HdfsScanner::CommitRows(), impala::HdfsTextScanner::FillByteBufferCompressedFile(), impala::HdfsTextScanner::FillByteBufferGzip(), InitColumns(), impala::HdfsTextScanner::InitNewRange(), impala::HdfsSequenceScanner::InitNewRange(), impala::HdfsScanner::Prepare(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), ProcessSplit(), and impala::HdfsTextScanner::ResetScanner().
|
protectedinherited |
Pool to allocate per data block memory. This should be used with the decompressor and any other per data block allocations.
Definition at line 205 of file hdfs-scanner.h.
Referenced by impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), impala::HdfsTextScanner::FillByteBufferGzip(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ReadCompressedBlock(), impala::HdfsRCFileScanner::ReadRowGroup(), impala::HdfsRCFileScanner::ResetRowGroup(), impala::HdfsScanner::UpdateDecompressor(), and impala::HdfsTextScanner::WritePartialTuple().
|
protectedinherited |
Time spent decompressing bytes.
Definition at line 208 of file hdfs-scanner.h.
Referenced by impala::HdfsTextScanner::FillByteBufferCompressedFile(), impala::HdfsTextScanner::FillByteBufferGzip(), impala::HdfsSequenceScanner::GetRecord(), impala::HdfsScanner::Prepare(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsRCFileScanner::ReadColumnBuffers(), impala::HdfsSequenceScanner::ReadCompressedBlock(), impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage(), and impala::HdfsRCFileScanner::ReadKeyBuffers().
|
protectedinherited |
The most recently used decompression type.
Definition at line 201 of file hdfs-scanner.h.
Referenced by impala::HdfsTextScanner::FillByteBuffer(), impala::HdfsTextScanner::FillByteBufferCompressedFile(), and impala::HdfsScanner::UpdateDecompressor().
|
protectedinherited |
Decompressor class to use, if any.
Definition at line 198 of file hdfs-scanner.h.
Referenced by impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), impala::HdfsScanner::Close(), impala::HdfsTextScanner::FillByteBuffer(), impala::HdfsTextScanner::FillByteBufferCompressedFile(), impala::HdfsTextScanner::FillByteBufferGzip(), impala::HdfsTextScanner::FinishScanRange(), impala::HdfsSequenceScanner::GetRecord(), impala::HdfsRCFileScanner::InitNewRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsRCFileScanner::ReadColumnBuffers(), impala::HdfsSequenceScanner::ReadCompressedBlock(), impala::HdfsRCFileScanner::ReadKeyBuffers(), and impala::HdfsScanner::UpdateDecompressor().
|
private |
Pool to copy dictionary page buffer into. This pool is shared across all the pages in a column chunk.
Definition at line 144 of file hdfs-parquet-scanner.h.
Referenced by Close(), and impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage().
|
staticinherited |
Assumed size of an OS file block. Used mostly when reading file format headers, etc. This probably ought to be a derived number from the environment.
Definition at line 95 of file hdfs-scanner.h.
|
private |
File metadata thrift object.
Definition at line 128 of file hdfs-parquet-scanner.h.
Referenced by AssembleRows(), InitColumns(), ProcessFooter(), ProcessSplit(), ValidateColumn(), and ValidateFileMetadata().
|
private |
Version of the application that wrote this file.
Definition at line 131 of file hdfs-parquet-scanner.h.
Referenced by impala::HdfsParquetScanner::ColumnReader< T >::ColumnReader(), InitColumns(), impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage(), and ValidateFileMetadata().
|
staticprivate |
Size of the file footer. This is a guess. If this value is too little, we will need to issue another read.
Definition at line 113 of file hdfs-parquet-scanner.h.
|
staticinherited |
Scanner subclasses must implement these static functions as well. Unfortunately, c++ does not allow static virtual functions. Issue the initial ranges for 'files'. HdfsFileDesc groups all the splits assigned to this scan node by file. This is called before any of the scanner subclasses are created to process splits in 'files'. The strategy on how to parse the scan ranges depends on the file format.
Definition at line 137 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::CodegenWriteCompleteTuple().
|
private |
Scan range for the metadata.
Definition at line 137 of file hdfs-parquet-scanner.h.
Referenced by InitColumns(), ProcessFooter(), and ValidateColumn().
|
private |
Number of cols that need to be read.
Definition at line 150 of file hdfs-parquet-scanner.h.
Referenced by Prepare(), and ProcessSplit().
|
protectedinherited |
number of errors in current file
Definition at line 183 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::ReportTupleParseError().
|
protectedinherited |
Number of null bytes in the tuple.
Definition at line 189 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::InitTuple().
|
private |
Returned in ProcessSplit.
Definition at line 140 of file hdfs-parquet-scanner.h.
Referenced by AssembleRows(), and impala::HdfsParquetScanner::BoolColumnReader::ReadSlot().
|
protectedinherited |
The scan node that started this scanner.
Definition at line 141 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::AddFinalRowBatch(), AssembleRows(), impala::HdfsParquetScanner::BaseColumnReader::BaseColumnReader(), impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), Close(), impala::BaseSequenceScanner::CloseFileRanges(), impala::HdfsScanner::CommitRows(), CreateColumnReaders(), CreateReader(), impala::HdfsRCFileScanner::DebugString(), impala::HdfsAvroScanner::DecodeAvroData(), impala::HdfsTextScanner::FillByteBufferCompressedFile(), impala::HdfsTextScanner::FinishScanRange(), InitColumns(), impala::HdfsScanner::InitializeWriteTuplesFn(), impala::HdfsTextScanner::InitNewRange(), impala::HdfsAvroScanner::InitNewRange(), impala::HdfsSequenceScanner::InitNewRange(), impala::HdfsRCFileScanner::InitNewRange(), impala::HdfsAvroScanner::ParseMetadata(), impala::HdfsTextScanner::Prepare(), impala::BaseSequenceScanner::Prepare(), Prepare(), impala::HdfsScanner::Prepare(), impala::HdfsSequenceScanner::Prepare(), impala::HdfsRCFileScanner::Prepare(), impala::HdfsSequenceScanner::ProcessBlockCompressedScanRange(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), impala::BaseSequenceScanner::ProcessSplit(), impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage(), impala::HdfsRCFileScanner::ReadRowGroup(), impala::HdfsScanner::ReportColumnParseError(), impala::HdfsScanner::ReportTupleParseError(), impala::HdfsTextScanner::ResetScanner(), impala::HdfsAvroScanner::ResolveSchemas(), impala::HdfsScanner::StartNewRowBatch(), impala::HdfsScanner::UpdateDecompressor(), impala::HdfsAvroScanner::VerifyTypesMatch(), impala::HdfsScanner::WriteCompleteTuple(), impala::HdfsScanner::WriteEmptyTuples(), impala::HdfsTextScanner::WriteFields(), and impala::HdfsTextScanner::WritePartialTuple().
|
private |
The root schema node for this file.
Definition at line 134 of file hdfs-parquet-scanner.h.
Referenced by CreateColumnReaders(), and ProcessFooter().
|
protectedinherited |
RuntimeState for error reporting.
Definition at line 144 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::Close(), impala::HdfsScanner::CommitRows(), impala::HdfsTextScanner::FillByteBufferGzip(), impala::HdfsTextScanner::FinishScanRange(), impala::HdfsTextScanner::Prepare(), impala::HdfsScanner::Prepare(), impala::HdfsSequenceScanner::Prepare(), impala::HdfsSequenceScanner::ProcessBlockCompressedScanRange(), impala::HdfsRCFileScanner::ProcessRange(), impala::BaseSequenceScanner::ProcessSplit(), impala::HdfsSequenceScanner::ReadCompressedBlock(), impala::BaseSequenceScanner::ReadPastSize(), impala::HdfsRCFileScanner::ReadRowGroup(), impala::HdfsScanner::ReportColumnParseError(), impala::HdfsScanner::ReportTupleParseError(), impala::HdfsAvroScanner::ResolveSchemas(), impala::HdfsScanner::StartNewRowBatch(), ValidateColumn(), and impala::HdfsTextScanner::WriteFields().
|
protectedinherited |
The first stream for context_.
Definition at line 150 of file hdfs-scanner.h.
Referenced by impala::HdfsTextScanner::Close(), impala::BaseSequenceScanner::Close(), CreateColumnReaders(), impala::HdfsRCFileScanner::DebugString(), impala::HdfsTextScanner::FillByteBuffer(), impala::HdfsTextScanner::FillByteBufferCompressedFile(), impala::HdfsTextScanner::FillByteBufferGzip(), impala::HdfsTextScanner::FindFirstTuple(), impala::HdfsTextScanner::FinishScanRange(), impala::HdfsSequenceScanner::GetRecord(), impala::HdfsTextScanner::InitNewRange(), impala::HdfsRCFileScanner::InitNewRange(), impala::HdfsRCFileScanner::NextField(), impala::HdfsAvroScanner::ParseMetadata(), impala::BaseSequenceScanner::Prepare(), impala::HdfsScanner::Prepare(), impala::HdfsSequenceScanner::ProcessBlockCompressedScanRange(), ProcessFooter(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsAvroScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsTextScanner::ProcessSplit(), impala::BaseSequenceScanner::ProcessSplit(), ProcessSplit(), impala::HdfsSequenceScanner::ReadBlockHeader(), impala::HdfsRCFileScanner::ReadColumnBuffers(), impala::HdfsSequenceScanner::ReadCompressedBlock(), impala::HdfsAvroScanner::ReadFileHeader(), impala::HdfsSequenceScanner::ReadFileHeader(), impala::HdfsRCFileScanner::ReadFileHeader(), impala::HdfsRCFileScanner::ReadKeyBuffers(), impala::HdfsRCFileScanner::ReadNumColumnsMetadata(), impala::HdfsRCFileScanner::ReadRowGroupHeader(), impala::BaseSequenceScanner::ReadSync(), impala::HdfsScanner::ReportTupleParseError(), impala::BaseSequenceScanner::SkipToSync(), ValidateFileMetadata(), impala::HdfsAvroScanner::VerifyTypesMatch(), and impala::HdfsTextScanner::WriteFields().
|
protectedinherited |
A partially materialized tuple with only partition key slots set. The non-partition key slots are set to NULL. The template tuple must be copied into tuple_ before any of the other slots are materialized. Pointer is NULL if there are no partition key slots. This template tuple is computed once for each file and valid for the duration of that file. It is owned by the HDFS scan node.
Definition at line 164 of file hdfs-scanner.h.
Referenced by impala::HdfsAvroScanner::AllocateFileHeader(), AssembleRows(), CreateColumnReaders(), impala::HdfsAvroScanner::DecodeAvroData(), impala::HdfsAvroScanner::InitNewRange(), impala::HdfsScanner::Prepare(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsAvroScanner::ResolveSchemas(), impala::HdfsScanner::WriteAlignedTuples(), impala::HdfsScanner::WriteEmptyTuples(), and impala::HdfsTextScanner::WriteFields().
|
protectedinherited |
Helper class for converting text to other types;.
Definition at line 186 of file hdfs-scanner.h.
Referenced by impala::HdfsTextScanner::InitNewRange(), impala::HdfsSequenceScanner::InitNewRange(), impala::HdfsRCFileScanner::Prepare(), impala::HdfsRCFileScanner::ProcessRange(), impala::HdfsScanner::WriteCompleteTuple(), and impala::HdfsTextScanner::WritePartialTuple().
|
protectedinherited |
Current tuple pointer into tuple_mem_.
Definition at line 170 of file hdfs-scanner.h.
Referenced by impala::HdfsTextScanner::FinishScanRange(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), impala::HdfsTextScanner::ProcessRange(), impala::HdfsSequenceScanner::ProcessRange(), impala::HdfsScanner::WriteAlignedTuples(), and impala::HdfsTextScanner::WriteFields().
|
protectedinherited |
Fixed size of each tuple, in bytes.
Definition at line 167 of file hdfs-scanner.h.
Referenced by AssembleRows(), impala::HdfsScanner::InitTuple(), impala::HdfsScanner::next_tuple(), impala::HdfsScanner::StartNewRowBatch(), and impala::HdfsScanner::WriteAlignedTuples().
|
protectedinherited |
The tuple memory of batch_.
Definition at line 180 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::CommitRows(), impala::HdfsScanner::GetMemory(), and impala::HdfsScanner::StartNewRowBatch().
|
protectedinherited |
Jitted write tuples function pointer. Null if codegen is disabled.
Definition at line 215 of file hdfs-scanner.h.
Referenced by impala::HdfsScanner::InitializeWriteTuplesFn(), impala::HdfsSequenceScanner::ProcessDecompressedBlock(), and impala::HdfsTextScanner::WriteFields().