Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
decompress.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_UTIL_DECOMPRESS_H
17 #define IMPALA_UTIL_DECOMPRESS_H
18 
19 // We need zlib.h here to declare stream_ below.
20 #include <zlib.h>
21 
22 #include "util/codec.h"
23 #include "exec/hdfs-scanner.h"
24 #include "runtime/mem-pool.h"
25 
26 namespace impala {
27 
28 class GzipDecompressor : public Codec {
29  public:
30  virtual ~GzipDecompressor();
31  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
32  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
33  const uint8_t* input, int64_t* output_length, uint8_t** output);
34  virtual Status ProcessBlockStreaming(int64_t input_length, const uint8_t* input,
35  int64_t* input_bytes_read, int64_t* output_length, uint8_t** output, bool* eos);
36  virtual std::string file_extension() const { return "gz"; }
37 
38  private:
39  friend class Codec;
41  MemPool* mem_pool = NULL, bool reuse_buffer = false, bool is_deflate = false);
42  virtual Status Init();
43  std::string DebugStreamState() const;
44 
47 
48  z_stream stream_;
49 
51  const static int WINDOW_BITS = 15; // Maximum window size
52  const static int DETECT_CODEC = 32; // Determine if this is libz or gzip from header.
53 };
54 
55 class BzipDecompressor : public Codec {
56  public:
57  virtual ~BzipDecompressor() { }
58  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
59  virtual Status ProcessBlock(bool output_preallocated,
60  int64_t input_length, const uint8_t* input,
61  int64_t* output_length, uint8_t** output);
62  virtual std::string file_extension() const { return "bz2"; }
63  private:
64  friend class Codec;
65  BzipDecompressor(MemPool* mem_pool, bool reuse_buffer);
66 
67  virtual Status Init() { return Status::OK; }
68 };
69 
70 class SnappyDecompressor : public Codec {
71  public:
74  static const uint TRAILING_CHECKSUM_LEN = 4;
75 
76  virtual ~SnappyDecompressor() { }
77  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
78  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
79  const uint8_t* input, int64_t* output_length, uint8_t** output);
80  virtual std::string file_extension() const { return "snappy"; }
81 
82  private:
83  friend class Codec;
84  SnappyDecompressor(MemPool* mem_pool = NULL, bool reuse_buffer = false);
85  virtual Status Init() { return Status::OK; }
86 };
87 
91 class Lz4Decompressor : public Codec {
92  public:
93  virtual ~Lz4Decompressor() { }
94  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
95  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
96  const uint8_t* input, int64_t* output_length, uint8_t** output);
97  virtual std::string file_extension() const { return "lz4"; }
98 
99  private:
100  friend class Codec;
101  Lz4Decompressor(MemPool* mem_pool = NULL, bool reuse_buffer = false);
102  virtual Status Init() { return Status::OK; }
103 };
104 
106  public:
108  virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t* input = NULL);
109  virtual Status ProcessBlock(bool output_preallocated, int64_t input_length,
110  const uint8_t* input, int64_t* output_length, uint8_t** output);
111  virtual std::string file_extension() const { return "snappy"; }
112 
113  private:
114  friend class Codec;
115  SnappyBlockDecompressor(MemPool* mem_pool, bool reuse_buffer);
116  virtual Status Init() { return Status::OK; }
117 };
118 
119 }
120 #endif
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: decompress.cc:444
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: decompress.h:97
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: decompress.cc:118
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: decompress.cc:449
SnappyBlockDecompressor(MemPool *mem_pool, bool reuse_buffer)
Definition: decompress.cc:274
BzipDecompressor(MemPool *mem_pool, bool reuse_buffer)
Definition: decompress.cc:209
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: decompress.cc:217
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: decompress.cc:213
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: decompress.h:111
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: decompress.cc:278
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: decompress.h:116
Lz4Decompressor(MemPool *mem_pool=NULL, bool reuse_buffer=false)
Definition: decompress.cc:440
static const int WINDOW_BITS
These are magic numbers from zlib.h. Not clear why they are not defined there.
Definition: decompress.h:51
std::string DebugStreamState() const
Definition: decompress.cc:60
static const int DETECT_CODEC
Definition: decompress.h:52
virtual ~BzipDecompressor()
Definition: decompress.h:57
virtual Status ProcessBlockStreaming(int64_t input_length, const uint8_t *input, int64_t *input_bytes_read, int64_t *output_length, uint8_t **output, bool *eos)
Definition: decompress.cc:71
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: decompress.h:67
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: decompress.h:80
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: decompress.h:102
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: decompress.cc:375
virtual ~Lz4Decompressor()
Definition: decompress.h:93
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: decompress.cc:406
static const Status OK
Definition: status.h:87
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: decompress.h:36
virtual Status ProcessBlock(bool output_preallocated, int64_t input_length, const uint8_t *input, int64_t *output_length, uint8_t **output)
Process a block of data, either compressing or decompressing it.
Definition: decompress.cc:416
bool is_deflate_
If set assume deflate format, otherwise zlib or gzip.
Definition: decompress.h:46
virtual int64_t MaxOutputLen(int64_t input_len, const uint8_t *input=NULL)
Definition: decompress.cc:56
GzipDecompressor(MemPool *mem_pool=NULL, bool reuse_buffer=false, bool is_deflate=false)
Definition: decompress.cc:35
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: decompress.cc:45
virtual std::string file_extension() const
File extension to use for this compression codec.
Definition: decompress.h:62
static const uint TRAILING_CHECKSUM_LEN
Definition: decompress.h:74
SnappyDecompressor(MemPool *mem_pool=NULL, bool reuse_buffer=false)
Definition: decompress.cc:402
virtual Status Init()
Initialize the codec. This should only be called once.
Definition: decompress.h:85