16 #ifndef IMPALA_EXEC_HDFS_SCANNER_H_
17 #define IMPALA_EXEC_HDFS_SCANNER_H_
22 #include <boost/regex.hpp>
23 #include <boost/scoped_ptr.hpp>
37 class HdfsPartitionDescriptor;
43 class TupleDescriptor;
111 virtual void Close();
222 THdfsFileFormat::type type,
const std::string& scanner_name);
258 DCHECK(pool != NULL);
292 int max_added_tuples,
int slots_per_tuple,
int row_start_indx);
334 TupleRow* tuple_row,
Tuple* template_tuple, uint8_t* error_fields,
335 uint8_t* error_in_row);
340 const std::vector<ExprContext*>& conjunct_ctxs);
346 llvm::Function* write_tuple_fn);
356 if (template_tuple != NULL) {
364 uint8_t* mem =
reinterpret_cast<uint8_t*
>(t);
369 uint8_t* mem =
reinterpret_cast<uint8_t*
>(r);
boost::scoped_ptr< Codec > decompressor_
Decompressor class to use, if any.
void ReportColumnParseError(const SlotDescriptor *desc, const char *data, int len)
ExprContext * GetConjunctCtx(int idx) const
static const char * LLVM_CLASS_NAME
virtual Status InitNewRange()=0
Reset internal state for a new scan range.
HdfsScanNode * scan_node_
The scan node that started this scanner.
virtual void LogRowParseError(int row_idx, std::stringstream *)
static const char * LLVM_CLASS_NAME
ScannerContext * context_
Context for this scanner.
static const int FILE_BLOCK_SIZE
int tuple_byte_size_
Fixed size of each tuple, in bytes.
boost::scoped_ptr< MemPool > data_buffer_pool_
static llvm::Function * CodegenWriteCompleteTuple(HdfsScanNode *, LlvmCodeGen *, const std::vector< ExprContext * > &conjunct_ctxs)
boost::scoped_ptr< TextConverter > text_converter_
Helper class for converting text to other types;.
int32_t num_null_bytes_
Number of null bytes in the tuple.
WriteTuplesFn write_tuples_fn_
Jitted write tuples function pointer. Null if codegen is disabled.
uint8_t * tuple_mem_
The tuple memory of batch_.
A tuple with 0 materialised slots is represented as NULL.
std::vector< ExprContext * > conjunct_ctxs_
int WriteEmptyTuples(RowBatch *row_batch, int num_tuples)
void AcquireData(MemPool *src, bool keep_current)
TupleRow * next_row(TupleRow *r) const
void StartNewRowBatch()
Set batch_ to a new row batch and update tuple_mem_ accordingly.
THdfsCompression::type decompression_type_
The most recently used decompression type.
LLVM code generator. This is the top level object to generate jitted code.
RuntimeState * state_
RuntimeState for error reporting.
virtual Status ProcessSplit()=0
Status UpdateDecompressor(const THdfsCompression::type &compression)
void InitTuple(Tuple *template_tuple, Tuple *tuple)
int GetMemory(MemPool **pool, Tuple **tuple_mem, TupleRow **tuple_row_mem)
int num_errors_in_file_
number of errors in current file
Status CommitRows(int num_rows)
HdfsScanner(HdfsScanNode *scan_node, RuntimeState *state)
bool WriteCompleteTuple(MemPool *pool, FieldLocation *fields, Tuple *tuple, TupleRow *tuple_row, Tuple *template_tuple, uint8_t *error_fields, uint8_t *error_in_row)
bool IR_ALWAYS_INLINE EvalConjuncts(TupleRow *row)
MemPool * tuple_data_pool()
static llvm::Function * CodegenWriteAlignedTuples(HdfsScanNode *, LlvmCodeGen *, llvm::Function *write_tuple_fn)
void AttachPool(MemPool *pool, bool commit_batch)
int WriteAlignedTuples(MemPool *pool, TupleRow *tuple_row_mem, int row_size, FieldLocation *fields, int num_tuples, int max_added_tuples, int slots_per_tuple, int row_start_indx)
Metadata for a single partition inside an Hdfs table.
Tuple * tuple_
Current tuple pointer into tuple_mem_.
static bool EvalConjuncts(ExprContext *const *ctxs, int num_ctxs, TupleRow *row)
RuntimeProfile::Counter * decompress_timer_
Time spent decompressing bytes.
Status InitializeWriteTuplesFn(HdfsPartitionDescriptor *partition, THdfsFileFormat::type type, const std::string &scanner_name)
bool ReportTupleParseError(FieldLocation *fields, uint8_t *errors, int row_idx)
ScannerContext::Stream * stream_
The first stream for context_.
int(* WriteTuplesFn)(HdfsScanner *, MemPool *, TupleRow *, int, FieldLocation *, int, int, int, int)
Tuple * next_tuple(Tuple *t) const
virtual Status Prepare(ScannerContext *context)
One-time initialisation of state that is constant across scan ranges.