16 #ifndef IMPALA_EXEC_BASE_SEQUENCE_SCANNER_H
17 #define IMPALA_EXEC_BASE_SEQUENCE_SCANNER_H
39 const std::vector<HdfsFileDesc*>& files);
100 virtual THdfsFileFormat::type
file_format()
const = 0;
166 int FindSyncBlock(
const uint8_t* buffer,
int buffer_len,
const uint8_t* sync,
static Status IssueInitialRanges(HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files)
Issue the initial ranges for all sequence container files.
bool only_parsing_header_
If true, this scanner object is only for processing the header.
FileHeader * header_
File header for this scan range. This is not owned by the parent scan node.
static const int SYNC_HASH_SIZE
Size of the sync hash field.
virtual Status ProcessRange()=0
Status SkipToSync(const uint8_t *sync, int sync_size)
virtual Status Prepare(ScannerContext *context)
One-time initialisation of state that is constant across scan ranges.
virtual Status ReadFileHeader()=0
virtual Status ProcessSplit()
int ReadPastSize(int64_t file_offset)
int num_syncs_
The number of syncs seen by this scanner so far.
virtual ~BaseSequenceScanner()
RuntimeProfile::Counter * bytes_skipped_counter_
Number of bytes skipped when advancing to next sync on error.
static const int SYNC_MARKER
Sync indicator.
virtual THdfsFileFormat::type file_format() const =0
Returns type of scanner: e.g. rcfile, seqfile.
int FindSyncBlock(const uint8_t *buffer, int buffer_len, const uint8_t *sync, int sync_len)
static const int HEADER_SIZE
bool finished_
finished_ is set by ReadSync() and SkipToSync().
void CloseFileRanges(const char *file)
BaseSequenceScanner(HdfsScanNode *, RuntimeState *)
virtual FileHeader * AllocateFileHeader()=0