16 #ifndef IMPALA_EXEC_HDFS_PARQUET_TABLE_WRITER_H
17 #define IMPALA_EXEC_HDFS_PARQUET_TABLE_WRITER_H
23 #include <boost/scoped_ptr.hpp>
33 struct OutputPartition;
35 class ThriftSerializer;
55 const std::vector<ExprContext*>& output_expr_ctxs);
68 const std::vector<int32_t>& row_group_indices,
boost::scoped_ptr< ThriftSerializer > thrift_serializer_
boost::scoped_ptr< MemPool > reusable_col_mem_pool_
int64_t file_size_limit_
Limit on the total size of the file.
std::vector< uint8_t > compression_staging_buffer_
parquet::FileMetaData file_metadata_
File metdata thrift description.
static const int DEFAULT_DATA_PAGE_SIZE
Default data page size. In bytes.
parquet::RowGroup * current_row_group_
The current row group being written to.
virtual Status Init()
Initialize column information.
virtual void Close()
Called once when this writer should cleanup any resources.
int64_t row_count_
Number of rows in current file.
int64_t file_size_estimate_
static const int HDFS_BLOCK_SIZE
Default hdfs block size. In bytes.
Status FlushCurrentRowGroup()
virtual Status InitNewFile()
static const int HDFS_MIN_FILE_SIZE
Minimum file size. If the configured size is less, fail.
static const int HDFS_BLOCK_ALIGNMENT
Align block sizes to this constant. In bytes.
virtual Status Finalize()
Write out all the data.
~HdfsParquetTableWriter()
HdfsParquetTableWriter(HdfsTableSink *parent, RuntimeState *state, OutputPartition *output_partition, const HdfsPartitionDescriptor *part_desc, const HdfsTableDescriptor *table_desc, const std::vector< ExprContext * > &output_expr_ctxs)
virtual std::string file_extension() const
Returns the file extension for this writer.
Status WriteFileHeader()
Write the file header information to the output file.
static const int ROW_GROUP_SIZE
Default row group size. In bytes.
std::vector< BaseColumnWriter * > columns_
array of pointers to column information.
boost::scoped_ptr< MemPool > per_file_mem_pool_
virtual uint64_t default_block_size() const
Returns the target HDFS block size to use.
Metadata for a single partition inside an Hdfs table.
virtual Status AppendRowBatch(RowBatch *batch, const std::vector< int32_t > &row_group_indices, bool *new_file)
Appends parquet representation of rows in the batch to the current file.
Status WriteFileFooter()
Write the file metadata and footer.
TParquetInsertStats parquet_stats_
For each column, the on disk size written.
static const int64_t MAX_DATA_PAGE_SIZE
int64_t MinBlockSize() const
Minimum allowable block size in bytes. This is a function of the number of columns.