16 #ifndef IMPALA_UTIL_CODEC_H
17 #define IMPALA_UTIL_CODEC_H
23 #include <boost/scoped_ptr.hpp>
24 #include "gen-cpp/Descriptors_types.h"
51 typedef std::map<const std::string, const THdfsCompression::type>
CodecMap;
64 THdfsCompression::type format, boost::scoped_ptr<Codec>* decompressor);
68 const std::string& codec, boost::scoped_ptr<Codec>* decompressor);
78 THdfsCompression::type format, boost::scoped_ptr<Codec>* compressor);
82 const std::string& codec, boost::scoped_ptr<Codec>* compressor);
105 const uint8_t* input, int64_t* output_length, uint8_t** output) = 0;
111 int* output_length, uint8_t** output);
118 int64_t* input_bytes_read, int64_t* output_length, uint8_t** output,
bool* eos) {
119 return Status(
"Not implemented.");
127 virtual int64_t
MaxOutputLen(int64_t input_len,
const uint8_t* input = NULL) = 0;
130 virtual void Close();
static const CodecMap CODEC_MAP
bool reuse_output_buffer() const
static Status CreateCompressor(MemPool *mem_pool, bool reuse, THdfsCompression::type format, boost::scoped_ptr< Codec > *compressor)
static const char *const BZIP2_COMPRESSION
Codec(MemPool *mem_pool, bool reuse_buffer)
static Status CreateDecompressor(MemPool *mem_pool, bool reuse, THdfsCompression::type format, boost::scoped_ptr< Codec > *decompressor)
virtual std::string file_extension() const =0
File extension to use for this compression codec.
bool reuse_buffer_
Can we reuse the output buffer or do we need to allocate on each call?
static const char *const DEFAULT_COMPRESSION
These are the codec string representations used in Hadoop.
static std::string GetCodecName(THdfsCompression::type)
Return the name of a compression algorithm.
static const char *const GZIP_COMPRESSION
static Status GetHadoopCodecClassName(THdfsCompression::type, std::string *out_name)
Returns the java class name for the given compression type.
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)=0
Process a block of data, either compressing or decompressing it.
static const char *const SNAPPY_COMPRESSION
static const int MAX_BLOCK_SIZE
virtual Status Init()=0
Initialize the codec. This should only be called once.
virtual Status ProcessBlockStreaming(int64_t input_length, const uint8_t *input, int64_t *input_bytes_read, int64_t *output_length, uint8_t **output, bool *eos)
static const char *const UNKNOWN_CODEC_ERROR
Status ProcessBlock32(bool output_preallocated, int input_length, const uint8_t *input, int *output_length, uint8_t **output)
int64_t buffer_length_
Length of the output buffer.
boost::scoped_ptr< MemPool > temp_memory_pool_
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)=0
std::map< const std::string, const THdfsCompression::type > CodecMap
Map from codec string to compression format.
virtual void Close()
Must be called on codec before destructor for final cleanup.
MemPool * memory_pool_
Pool to allocate the buffer to hold transformed data.