15 #ifndef IMPALA_EXEC_HDFS_AVRO_WRITER_H
16 #define IMPALA_EXEC_HDFS_AVRO_WRITER_H
29 class TupleDescriptor;
34 struct OutputPartition;
62 const std::vector<ExprContext*>& output_exprs);
76 const std::vector<int32_t>& row_group_indices,
std::string sync_marker_
16 byte sync marker (a uuid)
virtual Status InitNewFile()
Called when a new file is started.
WriteStream out_
Buffer which holds accumulated output.
virtual ~HdfsAvroTableWriter()
virtual void Close()
Called once when this writer should cleanup any resources.
virtual Status AppendRowBatch(RowBatch *rows, const std::vector< int32_t > &row_group_indices, bool *new_file)
boost::scoped_ptr< MemPool > mem_pool_
virtual std::string file_extension() const
Returns the file extension for this writer.
virtual uint64_t default_block_size() const
virtual Status Finalize()
std::string codec_name_
Name of codec, only set if codec_type_ != NONE.
THdfsCompression::type codec_type_
Type of the codec, will be NONE if no compression is used.
Status WriteFileHeader()
Writes the Avro file header to HDFS.
HdfsAvroTableWriter(HdfsTableSink *parent, RuntimeState *state, OutputPartition *output, const HdfsPartitionDescriptor *partition, const HdfsTableDescriptor *table_desc, const std::vector< ExprContext * > &output_exprs)
Metadata for a single partition inside an Hdfs table.
virtual Status Init()
Do initialization of writer.
boost::scoped_ptr< Codec > compressor_
The codec for compressing, only set if codec_type_ != NONE.
uint64_t unflushed_rows_
Number of rows consumed since last flush.
void AppendField(const ColumnType &type, const void *value)
Adds an encoded field to out_.
void ConsumeRow(TupleRow *row)
Processes a single row, appending to out_.