Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
compress.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_UTIL_COMPRESS_H
17 #define IMPALA_UTIL_COMPRESS_H
18 
20 #include <zlib.h>
21 
22 #include "util/codec.h"
23 #include "exec/hdfs-scanner.h"
24 #include "runtime/mem-pool.h"
25 
26 namespace impala {
27 
31 
32 class GzipCompressor : public Codec {
33  public:
35  enum Format {
39  };
40 
41  virtual ~GzipCompressor();
42  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
43  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
44  const uint8_t* input, int64_t* output_length, uint8_t** output);
45 
46  virtual std::string file_extension() const { return "gz"; }
47 
48  private:
49  friend class Codec;
50  GzipCompressor(Format format, MemPool* mem_pool = NULL, bool reuse_buffer = false);
51  virtual Status Init();
52 
54 
56  z_stream stream_;
57 
59  const static int WINDOW_BITS = 15; // Maximum window size
60  const static int GZIP_CODEC = 16; // Output Gzip.
61 
66  Status Compress(int64_t input_length, const uint8_t* input,
67  int64_t* output_length, uint8_t* output);
68 };
69 
70 class BzipCompressor : public Codec {
71  public:
72  virtual ~BzipCompressor() { }
73  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
74  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
75  const uint8_t* input, int64_t* output_length, uint8_t** output);
76  virtual std::string file_extension() const { return "bz2"; }
77 
78  private:
79  friend class Codec;
80  BzipCompressor(MemPool* mem_pool, bool reuse_buffer);
81  virtual Status Init() { return Status::OK; }
82 };
83 
84 class SnappyBlockCompressor : public Codec {
85  public:
86  virtual ~SnappyBlockCompressor() { }
87  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
88  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
89  const uint8_t* input, int64_t* output_length, uint8_t** output);
90  virtual std::string file_extension() const { return "snappy"; }
91 
92  private:
93  friend class Codec;
94  SnappyBlockCompressor(MemPool* mem_pool, bool reuse_buffer);
95  virtual Status Init() { return Status::OK; }
96 };
97 
98 class SnappyCompressor : public Codec {
99  public:
100  virtual ~SnappyCompressor() { }
101  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
102  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
103  const uint8_t* input, int64_t* output_length, uint8_t** output);
104  virtual std::string file_extension() const { return "snappy"; }
105 
109  static uint32_t ComputeChecksum(int64_t input_len, const uint8_t* input);
110 
111  private:
112  friend class Codec;
113  SnappyCompressor(MemPool* mem_pool = NULL, bool reuse_buffer = false);
114  virtual Status Init() { return Status::OK; }
115 };
116 
121 class Lz4Compressor : public Codec {
122  public:
123  virtual ~Lz4Compressor() { }
124  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
125  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
126  const uint8_t* input, int64_t* output_length, uint8_t** output);
127  virtual std::string file_extension() const { return "lz4"; }
128 
129  private:
130  friend class Codec;
131  Lz4Compressor(MemPool* mem_pool = NULL, bool reuse_buffer = false);
132  virtual Status Init() { return Status::OK; }
133 };
134 
135 }
136 #endif
virtual ~SnappyCompressor()
Definition: compress.h:100
virtual ~SnappyBlockCompressor()
Definition: compress.h:86
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: compress.h:132
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: compress.h:114
virtual ~GzipCompressor()
Definition: compress.cc:40
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: compress.h:81
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: compress.cc:192
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: compress.cc:249
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: compress.h:95
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: compress.cc:285
static const int GZIP_CODEC
Definition: compress.h:60
SnappyCompressor(MemPool *mem_pool=NULL, bool reuse_buffer=false)
Definition: compress.cc:241
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: compress.cc:61
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: compress.cc:44
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: compress.h:76
BzipCompressor(MemPool *mem_pool, bool reuse_buffer)
Definition: compress.cc:131
z_stream stream_
Structure used to communicate with the library.
Definition: compress.h:56
virtual ~Lz4Compressor()
Definition: compress.h:123
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: compress.cc:135
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: compress.h:46
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: compress.cc:110
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: compress.h:90
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: compress.h:127
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: compress.cc:140
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: compress.cc:197
GzipCompressor(Format format, MemPool *mem_pool=NULL, bool reuse_buffer=false)
Definition: compress.cc:34
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: compress.cc:289
Status Compress(int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t *output)
Definition: compress.cc:82
static const int WINDOW_BITS
These are magic numbers from zlib.h. Not clear why they are not defined there.
Definition: compress.h:59
static const Status OK
Definition: status.h:87
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: compress.cc:245
Format
Compression formats supported by the zlib library.
Definition: compress.h:35
virtual ~BzipCompressor()
Definition: compress.h:72
SnappyBlockCompressor(MemPool *mem_pool, bool reuse_buffer)
Definition: compress.cc:188
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: compress.h:104
Lz4Compressor(MemPool *mem_pool=NULL, bool reuse_buffer=false)
Definition: compress.cc:281
static uint32_t ComputeChecksum(int64_t input_len, const uint8_t *input)
Definition: compress.cc:273