16 #ifndef IMPALA_EXEC_HDFS_TEXT_SCANNER_H
17 #define IMPALA_EXEC_HDFS_TEXT_SCANNER_H
24 class DelimitedTextParser;
42 const std::vector<HdfsFileDesc*>& files);
46 const std::vector<ExprContext*>& conjunct_ctxs);
Status ProcessRange(int *num_tuples, bool past_scan_range)
virtual Status InitNewRange()
std::vector< char * > row_end_locations_
virtual void LogRowParseError(int row_idx, std::stringstream *)
StringBuffer boundary_column_
Helper string for dealing with columns that span file blocks.
char * byte_buffer_end_
Ending position of HDFS buffer.
Status FillByteBufferCompressedFile(bool *eosr)
A tuple with 0 materialised slots is represented as NULL.
static const int NEXT_BLOCK_READ_SIZE
Status FinishScanRange()
Reads past the end of the scan range for the next tuple end.
std::vector< FieldLocation > field_locations_
Return field locations from the Delimited Text Parser.
StringBuffer boundary_row_
int WritePartialTuple(FieldLocation *, int num_fields, bool copy_strings)
virtual Status Prepare(ScannerContext *context)
Implementation of HdfsScanner interface.
bool only_parsing_header_
True if we are parsing the header for this scanner.
static Status IssueInitialRanges(HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files)
Issue io manager byte ranges for 'files'.
bool partial_tuple_empty_
boost::scoped_ptr< DelimitedTextParser > delimited_text_parser_
Helper class for picking fields and rows from delimited text.
static const char * LLVM_CLASS_NAME
Status FindFirstTuple(bool *tuple_found)
HdfsTextScanner(HdfsScanNode *scan_node, RuntimeState *state)
static llvm::Function * Codegen(HdfsScanNode *, const std::vector< ExprContext * > &conjunct_ctxs)
Codegen writing tuples and evaluating predicates.
int slot_idx_
Index into materialized_slots_ for the next slot to output for the current tuple. ...
RuntimeProfile::Counter * parse_delimiter_timer_
Time parsing text files.
int WriteFields(MemPool *, TupleRow *tuple_row_mem, int num_fields, int num_tuples)
Status FillByteBufferGzip(bool *eosr)
boost::scoped_ptr< MemPool > boundary_pool_
Mem pool for boundary_row_ and boundary_column_.
virtual Status ProcessSplit()
char * byte_buffer_ptr_
Current position in byte buffer.
void CopyBoundaryField(FieldLocation *data, MemPool *pool)
static const std::string LZO_INDEX_SUFFIX
Suffix for lzo index files.
int64_t byte_buffer_read_size_
Actual bytes received from last file read.
virtual Status FillByteBuffer(bool *eosr, int num_bytes=0)
virtual ~HdfsTextScanner()