Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hdfs-avro-table-writer.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_EXEC_HDFS_AVRO_WRITER_H
16 #define IMPALA_EXEC_HDFS_AVRO_WRITER_H
17 
18 #include <hdfs.h>
19 #include <sstream>
20 #include <string>
21 
22 #include "exec/hdfs-table-writer.h"
23 #include "util/codec.h"
24 #include "exec/write-stream.h"
25 
26 namespace impala {
27 
28 class Expr;
29 class TupleDescriptor;
30 class TupleRow;
31 class RuntimeState;
32 class HdfsTableSink;
33 struct StringValue;
34 struct OutputPartition;
35 
45 //
51 //
57  public:
59  RuntimeState* state, OutputPartition* output,
60  const HdfsPartitionDescriptor* partition,
61  const HdfsTableDescriptor* table_desc,
62  const std::vector<ExprContext*>& output_exprs);
63 
64  virtual ~HdfsAvroTableWriter() { }
65 
66  virtual Status Init();
67  virtual Status Finalize() { return Flush(); }
68  virtual Status InitNewFile() { return WriteFileHeader(); }
69  virtual void Close() { mem_pool_->FreeAll(); }
70  virtual uint64_t default_block_size() const { return 0; }
71  virtual std::string file_extension() const { return "avro"; }
72 
75  virtual Status AppendRowBatch(RowBatch* rows,
76  const std::vector<int32_t>& row_group_indices,
77  bool* new_file);
78 
79  private:
81  void ConsumeRow(TupleRow* row);
82 
84  inline void AppendField(const ColumnType& type, const void* value);
85 
88 
91  Status Flush();
92 
95 
98  boost::scoped_ptr<MemPool> mem_pool_;
99 
102 
104  std::string codec_name_;
105 
107  THdfsCompression::type codec_type_;
108 
110  boost::scoped_ptr<Codec> compressor_;
111 
113  std::string sync_marker_;
114 };
115 
116 } // namespace impala
117 #endif
std::string sync_marker_
16 byte sync marker (a uuid)
virtual Status InitNewFile()
Called when a new file is started.
WriteStream out_
Buffer which holds accumulated output.
virtual void Close()
Called once when this writer should cleanup any resources.
virtual Status AppendRowBatch(RowBatch *rows, const std::vector< int32_t > &row_group_indices, bool *new_file)
boost::scoped_ptr< MemPool > mem_pool_
virtual std::string file_extension() const
Returns the file extension for this writer.
virtual uint64_t default_block_size() const
std::string codec_name_
Name of codec, only set if codec_type_ != NONE.
THdfsCompression::type codec_type_
Type of the codec, will be NONE if no compression is used.
Status WriteFileHeader()
Writes the Avro file header to HDFS.
HdfsAvroTableWriter(HdfsTableSink *parent, RuntimeState *state, OutputPartition *output, const HdfsPartitionDescriptor *partition, const HdfsTableDescriptor *table_desc, const std::vector< ExprContext * > &output_exprs)
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177
virtual Status Init()
Do initialization of writer.
boost::scoped_ptr< Codec > compressor_
The codec for compressing, only set if codec_type_ != NONE.
uint64_t unflushed_rows_
Number of rows consumed since last flush.
void AppendField(const ColumnType &type, const void *value)
Adds an encoded field to out_.
void ConsumeRow(TupleRow *row)
Processes a single row, appending to out_.