Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hdfs-text-table-writer.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_EXEC_HDFS_TEXT_TABLE_WRITER_H
17 #define IMPALA_EXEC_HDFS_TEXT_TABLE_WRITER_H
18 
19 #include <hdfs.h>
20 #include <sstream>
21 #include <boost/scoped_ptr.hpp>
22 
23 #include "runtime/descriptors.h"
24 #include "exec/hdfs-table-sink.h"
25 #include "exec/hdfs-table-writer.h"
26 
27 namespace impala {
28 
29 class Codec;
30 class Expr;
31 class MemPool;
32 struct OutputPartition;
33 class RuntimeState;
34 struct StringValue;
35 class TupleDescriptor;
36 class TupleRow;
37 
41  public:
43  RuntimeState* state, OutputPartition* output,
44  const HdfsPartitionDescriptor* partition,
45  const HdfsTableDescriptor* table_desc,
46  const std::vector<ExprContext*>& output_expr_ctxs);
47 
49 
50  virtual Status Init();
51  virtual Status Finalize();
52  virtual Status InitNewFile() { return Status::OK; }
53  virtual void Close();
54  virtual uint64_t default_block_size() const;
55  virtual std::string file_extension() const;
56 
60  Status AppendRowBatch(RowBatch* current_row,
61  const std::vector<int32_t>& row_group_indices, bool* new_file);
62 
63  private:
67  inline void PrintEscaped(const StringValue* str_val);
68 
71  Status Flush();
72 
75 
78 
81 
83  int64_t flush_size_;
84 
87  std::stringstream rowbatch_stringstream_;
88 
90  THdfsCompression::type codec_;
91 
93  boost::scoped_ptr<Codec> compressor_;
94 
96  boost::scoped_ptr<MemPool> mem_pool_;
97 };
98 
99 }
100 #endif
HdfsTextTableWriter(HdfsTableSink *parent, RuntimeState *state, OutputPartition *output, const HdfsPartitionDescriptor *partition, const HdfsTableDescriptor *table_desc, const std::vector< ExprContext * > &output_expr_ctxs)
virtual Status Init()
Do initialization of writer.
virtual Status InitNewFile()
Called when a new file is started.
virtual uint64_t default_block_size() const
int64_t flush_size_
Size in rowbatch_stringstream_ before we call flush.
virtual std::string file_extension() const
Returns the file extension for this writer.
Status AppendRowBatch(RowBatch *current_row, const std::vector< int32_t > &row_group_indices, bool *new_file)
void PrintEscaped(const StringValue *str_val)
boost::scoped_ptr< MemPool > mem_pool_
Memory pool to use with compressor_.
char escape_char_
Escape character.
std::stringstream rowbatch_stringstream_
static const Status OK
Definition: status.h:87
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177
boost::scoped_ptr< Codec > compressor_
Compressor if compression is enabled.
char field_delim_
Character delimiting fields (to become slots).
char tuple_delim_
Character delimiting tuples.
virtual void Close()
Called once when this writer should cleanup any resources.
THdfsCompression::type codec_
Compression codec.