Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
codec.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_UTIL_CODEC_H
17 #define IMPALA_UTIL_CODEC_H
18 
19 #include "common/status.h"
20 #include "runtime/mem-pool.h"
21 #include "util/runtime-profile.h"
22 
23 #include <boost/scoped_ptr.hpp>
24 #include "gen-cpp/Descriptors_types.h"
25 
26 namespace impala {
27 
28 class MemPool;
29 class RuntimeState;
30 
41 class Codec {
42  public:
44  static const char* const DEFAULT_COMPRESSION;
45  static const char* const GZIP_COMPRESSION;
46  static const char* const BZIP2_COMPRESSION;
47  static const char* const SNAPPY_COMPRESSION;
48  static const char* const UNKNOWN_CODEC_ERROR;
49 
51  typedef std::map<const std::string, const THdfsCompression::type> CodecMap;
52  static const CodecMap CODEC_MAP;
53 
63  static Status CreateDecompressor(MemPool* mem_pool, bool reuse,
64  THdfsCompression::type format, boost::scoped_ptr<Codec>* decompressor);
65 
67  static Status CreateDecompressor(MemPool* mem_pool, bool reuse,
68  const std::string& codec, boost::scoped_ptr<Codec>* decompressor);
69 
77  static Status CreateCompressor(MemPool* mem_pool, bool reuse,
78  THdfsCompression::type format, boost::scoped_ptr<Codec>* compressor);
79 
81  static Status CreateCompressor(MemPool* mem_pool, bool reuse,
82  const std::string& codec, boost::scoped_ptr<Codec>* compressor);
83 
85  static std::string GetCodecName(THdfsCompression::type);
87  static Status GetHadoopCodecClassName(THdfsCompression::type, std::string* out_name);
88 
89  virtual ~Codec() {}
90 
92  //
97  //
100  //
104  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
105  const uint8_t* input, int64_t* output_length, uint8_t** output) = 0;
106 
110  Status ProcessBlock32(bool output_preallocated, int input_length, const uint8_t* input,
111  int* output_length, uint8_t** output);
112 
117  virtual Status ProcessBlockStreaming(int64_t input_length, const uint8_t* input,
118  int64_t* input_bytes_read, int64_t* output_length, uint8_t** output, bool* eos) {
119  return Status("Not implemented.");
120  }
121 
127  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL) = 0;
128 
130  virtual void Close();
131 
133  virtual std::string file_extension() const = 0;
134 
135  bool reuse_output_buffer() const { return reuse_buffer_; }
136 
140  static const int MAX_BLOCK_SIZE = (2L * 1024 * 1024 * 1024) - 1;
141 
142  protected:
148  Codec(MemPool* mem_pool, bool reuse_buffer);
149 
151  virtual Status Init() = 0;
152 
155 
158  boost::scoped_ptr<MemPool> temp_memory_pool_;
159 
162 
165  uint8_t* out_buffer_;
166 
168  int64_t buffer_length_;
169 };
170 
171 }
172 #endif
static const CodecMap CODEC_MAP
Definition: codec.h:52
bool reuse_output_buffer() const
Definition: codec.h:135
static Status CreateCompressor(MemPool *mem_pool, bool reuse, THdfsCompression::type format, boost::scoped_ptr< Codec > *compressor)
static const char *const BZIP2_COMPRESSION
Definition: codec.h:46
Codec(MemPool *mem_pool, bool reuse_buffer)
Definition: codec.cc:164
static Status CreateDecompressor(MemPool *mem_pool, bool reuse, THdfsCompression::type format, boost::scoped_ptr< Codec > *decompressor)
virtual std::string file_extension() const =0
File extension to use for this compression codec.
bool reuse_buffer_
Can we reuse the output buffer or do we need to allocate on each call?
Definition: codec.h:161
static const char *const DEFAULT_COMPRESSION
These are the codec string representations used in Hadoop.
Definition: codec.h:44
static std::string GetCodecName(THdfsCompression::type)
Return the name of a compression algorithm.
Definition: codec.cc:50
static const char *const GZIP_COMPRESSION
Definition: codec.h:45
static Status GetHadoopCodecClassName(THdfsCompression::type, std::string *out_name)
Returns the java class name for the given compression type.
Definition: codec.cc:59
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)=0
Process a block of data, either compressing or decompressing it.
static const char *const SNAPPY_COMPRESSION
Definition: codec.h:47
static const int MAX_BLOCK_SIZE
Definition: codec.h:140
virtual Status Init()=0
Initialize the codec. This should only be called once.
virtual Status ProcessBlockStreaming(int64_t input_length, const uint8_t *input, int64_t *input_bytes_read, int64_t *output_length, uint8_t **output, bool *eos)
Definition: codec.h:117
static const char *const UNKNOWN_CODEC_ERROR
Definition: codec.h:48
Status ProcessBlock32(bool output_preallocated, int input_length, const uint8_t *input, int *output_length, uint8_t **output)
Definition: codec.cc:181
uint8_t * out_buffer_
Definition: codec.h:165
int64_t buffer_length_
Length of the output buffer.
Definition: codec.h:168
boost::scoped_ptr< MemPool > temp_memory_pool_
Definition: codec.h:158
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)=0
std::map< const std::string, const THdfsCompression::type > CodecMap
Map from codec string to compression format.
Definition: codec.h:51
virtual ~Codec()
Definition: codec.h:89
virtual void Close()
Must be called on codec before destructor for final cleanup.
Definition: codec.cc:174
MemPool * memory_pool_
Pool to allocate the buffer to hold transformed data.
Definition: codec.h:154