Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hdfs-table-writer.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXEC_HDFS_TABLE_WRITER_H
17 #define IMPALA_EXEC_HDFS_TABLE_WRITER_H
18 
19 #include <hdfs.h>
20 #include <boost/scoped_ptr.hpp>
21 #include <boost/unordered_map.hpp>
22 
23 #include "runtime/descriptors.h"
24 #include "exec/hdfs-table-sink.h"
25 #include "util/hdfs-util.h"
26 
27 namespace impala {
28 
34  public:
42  RuntimeState* state, OutputPartition* output_partition,
43  const HdfsPartitionDescriptor* partition_desc,
44  const HdfsTableDescriptor* table_desc,
45  const std::vector<ExprContext*>& output_expr_ctxs);
46 
47  virtual ~HdfsTableWriter() { }
48 
58 
60  virtual Status Init() = 0;
61 
63  virtual Status InitNewFile() = 0;
64 
73  virtual Status AppendRowBatch(RowBatch* batch,
74  const std::vector<int32_t>& row_group_indices,
75  bool* new_file) = 0;
76 
80  virtual Status Finalize() = 0;
81 
83  virtual void Close() = 0;
84 
86  TInsertStats& stats() { return stats_; };
87 
90  virtual uint64_t default_block_size() const = 0;
91 
93  virtual std::string file_extension() const = 0;
94 
95  protected:
98  static const int HDFS_FLUSH_WRITE_SIZE = 50 * 1024;
99 
101  Status Write(const char* data, int32_t len) {
102  return Write(reinterpret_cast<const uint8_t*>(data), len);
103  }
104  Status Write(const uint8_t* data, int32_t len);
105 
106  template<typename T>
107  Status Write(T v) {
108  return Write(reinterpret_cast<uint8_t*>(&v), sizeof(T));
109  }
110 
113 
116 
119 
122 
124  std::vector<ExprContext*> output_expr_ctxs_;
125 
127  TInsertStats stats_;
128 
129 };
130 }
131 #endif
static const int HDFS_FLUSH_WRITE_SIZE
HdfsTableSink * parent_
Parent table sink object.
virtual uint64_t default_block_size() const =0
RuntimeState * state_
Runtime state.
virtual Status InitNewFile()=0
Called when a new file is started.
TInsertStats & stats()
Returns the stats for this writer.
Status Write(const char *data, int32_t len)
Write to the current hdfs file.
const HdfsTableDescriptor * table_desc_
Table descriptor of table to be written.
virtual Status AppendRowBatch(RowBatch *batch, const std::vector< int32_t > &row_group_indices, bool *new_file)=0
HdfsTableWriter(HdfsTableSink *parent, RuntimeState *state, OutputPartition *output_partition, const HdfsPartitionDescriptor *partition_desc, const HdfsTableDescriptor *table_desc, const std::vector< ExprContext * > &output_expr_ctxs)
virtual Status Finalize()=0
std::vector< ExprContext * > output_expr_ctxs_
Expressions that materialize output values.
virtual Status Init()=0
Do initialization of writer.
virtual std::string file_extension() const =0
Returns the file extension for this writer.
virtual void Close()=0
Called once when this writer should cleanup any resources.
TInsertStats stats_
Subclass should populate any file format specific stats.
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177
OutputPartition * output_
Structure describing partition written to by this writer.