Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hdfs-sequence-table-writer.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_EXEC_HDFS_SEQUENCE_WRITER_H
16 #define IMPALA_EXEC_HDFS_SEQUENCE_WRITER_H
17 
18 #include <hdfs.h>
19 #include <sstream>
20 
21 #include "runtime/descriptors.h"
22 #include "exec/hdfs-table-sink.h"
23 #include "exec/hdfs-table-writer.h"
24 #include "util/codec.h"
25 #include "write-stream.h"
26 
27 namespace impala {
28 
29 class Expr;
30 class TupleDescriptor;
31 class TupleRow;
32 class RuntimeState;
33 struct StringValue;
34 struct OutputPartition;
35 
39  public:
41  RuntimeState* state, OutputPartition* output,
42  const HdfsPartitionDescriptor* partition,
43  const HdfsTableDescriptor* table_desc,
44  const std::vector<ExprContext*>& output_exprs);
45 
47 
48  virtual Status Init();
49  virtual Status Finalize() { return Flush(); }
50  virtual Status InitNewFile() { return WriteFileHeader(); }
51  virtual void Close() { return; }
52  virtual uint64_t default_block_size() const { return 0; }
53  virtual std::string file_extension() const { return "seq"; }
54 
57  virtual Status AppendRowBatch(RowBatch* rows,
58  const std::vector<int32_t>& row_group_indices,
59  bool* new_file);
60 
61  private:
63  inline Status ConsumeRow(TupleRow* row);
64 
67 
70 
73  inline void EncodeRow(TupleRow* row, WriteStream* buf);
74 
76  inline void WriteEscapedString(const StringValue* str_val, WriteStream* buf);
77 
80  Status Flush();
81 
85 
88 
91 
94 
97 
100 
102  std::string codec_name_;
104  boost::scoped_ptr<Codec> compressor_;
105 
108 
111 
114 
116  std::string sync_marker_;
118  std::string neg1_sync_marker_;
119 
121  static const char* VALUE_CLASS_NAME;
123  static uint8_t SEQ6_CODE[4];
124 };
125 
126 } // namespace impala
127 #endif
virtual std::string file_extension() const
Returns the file extension for this writer.
void EncodeRow(TupleRow *row, WriteStream *buf)
std::string sync_marker_
16 byte sync marker (a uuid)
static const char * VALUE_CLASS_NAME
Name of java class to use when reading the values.
HdfsSequenceTableWriter(HdfsTableSink *parent, RuntimeState *state, OutputPartition *output, const HdfsPartitionDescriptor *partition, const HdfsTableDescriptor *table_desc, const std::vector< ExprContext * > &output_exprs)
uint64_t unflushed_rows_
number of rows consumed since last flush
Status ConsumeRow(TupleRow *row)
processes a single row, delegates to Compress or NoCompress ConsumeRow().
char escape_char_
Escape character for text encoding.
virtual void Close()
Called once when this writer should cleanup any resources.
bool record_compression_
true if compression is applied on each record individually
std::string codec_name_
name of codec, only set if compress_flag_
WriteStream row_buf_
Temporary Buffer for a single row.
Status WriteFileHeader()
writes the SEQ file header to HDFS
bool compress_flag_
true if compression is enabled
std::string neg1_sync_marker_
A -1 infront of the sync marker, used in decompressed formats.
virtual Status AppendRowBatch(RowBatch *rows, const std::vector< int32_t > &row_group_indices, bool *new_file)
virtual Status Init()
Do initialization of writer.
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177
Status WriteCompressedBlock()
writes the contents of out_ as a single compressed block
WriteStream out_
buffer which holds accumulated output
boost::scoped_ptr< Codec > compressor_
the codec for compressing, only set if compress_flag_
char field_delim_
Character delimiting fields.
void WriteEscapedString(const StringValue *str_val, WriteStream *buf)
writes the str_val to the buffer, escaping special characters
static uint8_t SEQ6_CODE[4]
Magic characters used to identify the file type.
virtual uint64_t default_block_size() const
virtual Status InitNewFile()
Called when a new file is started.
MemPool * mem_pool_
memory pool used by codec to allocate output buffer