doc/html/hdfs-sequence-table-writer_8cc_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #include "exec/hdfs-sequence-table-writer.h"

 #include "exec/write-stream.inline.h"

 #include "exec/exec-node.h"

 #include "util/hdfs-util.h"

 #include "util/uid-util.h"

 #include "exprs/expr.h"

 #include "exprs/expr-context.h"

 #include "runtime/raw-value.h"

 #include "runtime/row-batch.h"

 #include "runtime/runtime-state.h"

 #include "runtime/hdfs-fs-cache.h"


 #include <vector>

 #include <hdfs.h>

 #include <boost/scoped_ptr.hpp>

 #include <stdlib.h>

 #include <codec.h>


 #include "common/names.h"


 namespace impala {


 uint8_t HdfsSequenceTableWriter::SEQ6_CODE[4] = {'S', 'E', 'Q', 6};

 const char* HdfsSequenceTableWriter::VALUE_CLASS_NAME = "org.apache.hadoop.io.Text";


 HdfsSequenceTableWriter::HdfsSequenceTableWriter(HdfsTableSink* parent,

                         RuntimeState* state, OutputPartition* output,

                         const HdfsPartitionDescriptor* partition,

                         const HdfsTableDescriptor* table_desc,

                         const vector<ExprContext*>& output_exprs)

     : HdfsTableWriter(parent, state, output, partition, table_desc, output_exprs),

       mem_pool_(new MemPool(parent->mem_tracker())), compress_flag_(false),

       unflushed_rows_(0), record_compression_(false) {

   approx_block_size_ = 64 * 1024 * 1024;

   parent->mem_tracker()->Consume(approx_block_size_);

   field_delim_ = partition->field_delim();

   escape_char_ = partition->escape_char();

 }


 Status HdfsSequenceTableWriter::Init() {

   THdfsCompression::type codec = THdfsCompression::SNAPPY_BLOCKED;

   const TQueryOptions& query_options = state_->query_options();

   if (query_options.__isset.compression_codec) {

     codec = query_options.compression_codec;

     if (codec == THdfsCompression::SNAPPY) {

       // Seq file (and in general things that use hadoop.io.codec) always

       // mean snappy_blocked.

       codec = THdfsCompression::SNAPPY_BLOCKED;

     }

   }

   if (codec != THdfsCompression::NONE) {

     compress_flag_ = true;

     if (query_options.__isset.seq_compression_mode) {

       record_compression_ =

           query_options.seq_compression_mode == THdfsSeqCompressionMode::RECORD;

     }

     RETURN_IF_ERROR(Codec::GetHadoopCodecClassName(codec, &codec_name_));

     RETURN_IF_ERROR(Codec::CreateCompressor(

         mem_pool_, true, codec_name_, &compressor_));

     DCHECK(compressor_.get() != NULL);

   }


   // create the Sync marker

   string uuid = GenerateUUIDString();

   uint8_t sync_neg1[20];


   ReadWriteUtil::PutInt(sync_neg1, static_cast<uint32_t>(-1));

   DCHECK(uuid.size() == 16);

   memcpy(sync_neg1 + sizeof(int32_t), uuid.data(), uuid.size());

   neg1_sync_marker_ = string(reinterpret_cast<char*>(sync_neg1), 20);

   sync_marker_ = uuid;


   return Status::OK;

 }


 Status HdfsSequenceTableWriter::AppendRowBatch(RowBatch* batch,

                                            const vector<int32_t>& row_group_indices,

                                            bool* new_file) {

   int32_t limit;

   if (row_group_indices.empty()) {

     limit = batch->num_rows();

   } else {

     limit = row_group_indices.size();

   }

   COUNTER_ADD(parent_->rows_inserted_counter(), limit);


   bool all_rows = row_group_indices.empty();

   int num_non_partition_cols =

       table_desc_->num_cols() - table_desc_->num_clustering_cols();

   DCHECK_GE(output_expr_ctxs_.size(), num_non_partition_cols) << parent_->DebugString();


   {

     SCOPED_TIMER(parent_->encode_timer());

     if (all_rows) {

       for (int row_idx = 0; row_idx < limit; ++row_idx) {

         RETURN_IF_ERROR(ConsumeRow(batch->GetRow(row_idx)));

       }

     } else {

       for (int row_idx = 0; row_idx < limit; ++row_idx) {

         TupleRow* row = batch->GetRow(row_group_indices[row_idx]);

         RETURN_IF_ERROR(ConsumeRow(row));

       }

     }

   }


   if (!compress_flag_) {

     out_.WriteBytes(neg1_sync_marker_.size(), neg1_sync_marker_.data());

   }


   if (out_.Size() >= approx_block_size_) Flush();

   *new_file = false;

   return Status::OK;

 }


 Status HdfsSequenceTableWriter::WriteFileHeader() {

   out_.WriteBytes(sizeof(SEQ6_CODE), reinterpret_cast<uint8_t*>(SEQ6_CODE));


   // Output an empty KeyClassName field

   out_.WriteEmptyText();


   // Setup to be correct value class

   out_.WriteText(strlen(VALUE_CLASS_NAME),

                  reinterpret_cast<const uint8_t*>(VALUE_CLASS_NAME));


   // Flag for if compression is used

   out_.WriteBoolean(compress_flag_);

   // Only valid if compression is used. Indicates if block compression is used.

   out_.WriteBoolean(!record_compression_);


   // Output the name of our compression codec, parsed by readers

   if (compress_flag_) {

     out_.WriteText(codec_name_.size(),

                    reinterpret_cast<const uint8_t*>(codec_name_.data()));

   }


   // Meta data is formated as an integer N followed by N*2 strings,

   // which are key-value pairs. Hive does not write meta data, so neither does Impala

   out_.WriteInt(0);


   // write the sync marker

   out_.WriteBytes(sync_marker_.size(), sync_marker_.data());


   string text = out_.String();

   RETURN_IF_ERROR(Write(reinterpret_cast<const uint8_t*>(text.c_str()), text.size()));

   out_.Clear();

   return Status::OK;

 }


 Status HdfsSequenceTableWriter::WriteCompressedBlock() {

   WriteStream header;

   DCHECK(compress_flag_);


   // add a sync marker to start of the block

   header.WriteBytes(sync_marker_.size(), sync_marker_.data());


   header.WriteVLong(unflushed_rows_);


   // Write Key Lengths and Key Values

   header.WriteEmptyText();

   header.WriteEmptyText();


   // Output an Empty string for value Lengths

   header.WriteEmptyText();


   uint8_t *output;

   int64_t output_length;

   string text = out_.String();

   {

     SCOPED_TIMER(parent_->compress_timer());

     RETURN_IF_ERROR(compressor_->ProcessBlock(false, text.size(),

         reinterpret_cast<uint8_t*>(&text[0]), &output_length, &output));

   }


   header.WriteVInt(output_length);

   string head = header.String();

   RETURN_IF_ERROR(Write(reinterpret_cast<const uint8_t*>(head.data()),

                         head.size()));

   RETURN_IF_ERROR(Write(output, output_length));

   return Status::OK;

 }


 inline void HdfsSequenceTableWriter::WriteEscapedString(const StringValue* str_val,

                                                        WriteStream* buf) {

   for (int i = 0; i < str_val->len; ++i) {

     if (str_val->ptr[i] == field_delim_ || str_val->ptr[i] == escape_char_) {

       buf->WriteByte(escape_char_);

     }

     buf->WriteByte(str_val->ptr[i]);

   }

 }


 void HdfsSequenceTableWriter::EncodeRow(TupleRow* row, WriteStream* buf) {

   // TODO Unify with text table writer

   int num_non_partition_cols =

       table_desc_->num_cols() - table_desc_->num_clustering_cols();

   DCHECK_GE(output_expr_ctxs_.size(), num_non_partition_cols) << parent_->DebugString();

   for (int j = 0; j < num_non_partition_cols; ++j) {

     void* value = output_expr_ctxs_[j]->GetValue(row);

     if (value != NULL) {

       if (output_expr_ctxs_[j]->root()->type().type == TYPE_STRING) {

         WriteEscapedString(reinterpret_cast<const StringValue*>(value), &row_buf_);

       } else {

         string str;

         output_expr_ctxs_[j]->PrintValue(value, &str);

         buf->WriteBytes(str.size(), str.data());

       }

     } else {

       // NULLs in hive are encoded based on the 'serialization.null.format' property.

       const string& null_val = table_desc_->null_column_value();

       buf->WriteBytes(null_val.size(), null_val.data());

     }

     // Append field delimiter.

     if (j + 1 < num_non_partition_cols) {

       buf->WriteByte(field_delim_);

     }

   }

 }


 inline Status HdfsSequenceTableWriter::ConsumeRow(TupleRow* row) {

   ++unflushed_rows_;

   row_buf_.Clear();

   if (compress_flag_ && !record_compression_) {

     // Output row for a block compressed sequence file

     // write the length as a vlong and then write the contents

     EncodeRow(row, &row_buf_);

     out_.WriteVLong(row_buf_.Size());

     out_.WriteBytes(row_buf_.Size(), row_buf_.String().data());

     return Status::OK;

   }


   EncodeRow(row, &row_buf_);


   const uint8_t* value_bytes;

   int64_t value_length;

   if (record_compression_) {

     // apply compression to row_buf_

     // the length of the buffer must be prefixed to the buffer prior to compression

     //

     // TODO this incurs copy overhead to place the length in front of the

     // buffer prior to compression. We may want to rewrite to avoid copying.

     string text = row_buf_.String();

     row_buf_.Clear();

     // encoding as "Text" writes the length before the text

     row_buf_.WriteText(text.size(), reinterpret_cast<const uint8_t*>(&text.data()[0]));

     text = row_buf_.String();

     uint8_t *tmp;

     {

       SCOPED_TIMER(parent_->compress_timer());

       RETURN_IF_ERROR(compressor_->ProcessBlock(false, text.size(),

           reinterpret_cast<uint8_t*>(&text[0]), &value_length, &tmp));

     }

     value_bytes = tmp;

   } else {

     value_length = row_buf_.Size();

     value_bytes = reinterpret_cast<const uint8_t*>(row_buf_.String().data());

   }


   int rec_len = value_length;

   // if the record is compressed, the length is part of the compressed text

   // if not, then we need to write the length (below) and account for it's size

   if (!record_compression_) rec_len += ReadWriteUtil::VLongRequiredBytes(value_length);


   // Length of the record (incl. key length and value length)

   out_.WriteInt(rec_len);


   // Write length of the key (Impala/Hive doesn't write a key)

   out_.WriteInt(0);


   // if the record is compressed, the length is part of the compressed text

   if (!record_compression_) out_.WriteVLong(value_length);


   // write out the value (possibly compressed)

   out_.WriteBytes(value_length, value_bytes);

   return Status::OK;

 }


 Status HdfsSequenceTableWriter::Flush() {

   if (unflushed_rows_ == 0) return Status::OK;


   SCOPED_TIMER(parent_->hdfs_write_timer());


   if (compress_flag_ && !record_compression_) {

     RETURN_IF_ERROR(WriteCompressedBlock());

   } else {

     string out_str = out_.String();

     RETURN_IF_ERROR(

         Write(reinterpret_cast<const uint8_t*>(out_str.data()), out_str.size()));

   }

   out_.Clear();

   unflushed_rows_ = 0;

   return Status::OK;

 }


 } // namespace impala

impala::HdfsTableDescriptor::null_column_value
const std::string & null_column_value() const
Definition: descriptors.h:233

row-batch.h

impala::RowBatch::num_rows
int num_rows() const
Definition: row-batch.h:215

impala::WriteStream::WriteBytes
int WriteBytes(int length, const uint8_t *buf)
Writes bytes to the buffer, returns the number of bytes written.
Definition: write-stream.inline.h:66

impala::HdfsTableWriter::parent_
HdfsTableSink * parent_
Parent table sink object.
Definition: hdfs-table-writer.h:112

impala::Codec::CreateCompressor
static Status CreateCompressor(MemPool *mem_pool, bool reuse, THdfsCompression::type format, boost::scoped_ptr< Codec > *compressor)

impala::StringValue
Definition: string-value.h:33

impala::WriteStream::WriteEmptyText
int WriteEmptyText()
Writes an empty string to the buffer (encoded as 1 byte)
Definition: write-stream.inline.h:83

impala::HdfsSequenceTableWriter::EncodeRow
void EncodeRow(TupleRow *row, WriteStream *buf)
Definition: hdfs-sequence-table-writer.cc:206

impala::HdfsTableWriter::state_
RuntimeState * state_
Runtime state.
Definition: hdfs-table-writer.h:115

impala::HdfsSequenceTableWriter::sync_marker_
std::string sync_marker_
16 byte sync marker (a uuid)
Definition: hdfs-sequence-table-writer.h:116

impala::StringValue::len
int len
Definition: string-value.h:38

impala::HdfsTableDescriptor
Definition: descriptors.h:226

impala::HdfsSequenceTableWriter::VALUE_CLASS_NAME
static const char * VALUE_CLASS_NAME
Name of java class to use when reading the values.
Definition: hdfs-sequence-table-writer.h:121

RETURN_IF_ERROR
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242

hdfs-util.h

raw-value.h

write-stream.inline.h

impala::RowBatch::GetRow
TupleRow * GetRow(int row_idx)
Definition: row-batch.h:140

codec.h

impala::HdfsTableWriter::Write
Status Write(const char *data, int32_t len)
Write to the current hdfs file.
Definition: hdfs-table-writer.h:101

expr-context.h

impala::HdfsTableSink::mem_tracker
MemTracker * mem_tracker()
Definition: hdfs-table-sink.h:149

impala::TableDescriptor::num_cols
int num_cols() const
Definition: descriptors.h:152

hdfs-sequence-table-writer.h

impala::OutputPartition
Definition: hdfs-table-sink.h:40

impala::HdfsTableWriter::table_desc_
const HdfsTableDescriptor * table_desc_
Table descriptor of table to be written.
Definition: hdfs-table-writer.h:121

impala::WriteStream::WriteInt
int WriteInt(uint32_t val)
Definition: write-stream.inline.h:48

COUNTER_ADD
#define COUNTER_ADD(c, v)
Definition: runtime-profile.h:55

impala::TupleRow
Definition: tuple-row.h:28

SCOPED_TIMER
#define SCOPED_TIMER(c)
Definition: runtime-profile.h:53

impala::HdfsSequenceTableWriter::HdfsSequenceTableWriter
HdfsSequenceTableWriter(HdfsTableSink *parent, RuntimeState *state, OutputPartition *output, const HdfsPartitionDescriptor *partition, const HdfsTableDescriptor *table_desc, const std::vector< ExprContext * > &output_exprs)
Definition: hdfs-sequence-table-writer.cc:40

impala::HdfsTableSink::DebugString
std::string DebugString() const
Definition: hdfs-table-sink.cc:643

impala::HdfsSequenceTableWriter::unflushed_rows_
uint64_t unflushed_rows_
number of rows consumed since last flush
Definition: hdfs-sequence-table-writer.h:99

hdfs-fs-cache.h

impala::HdfsTableSink
Definition: hdfs-table-sink.h:122

impala::WriteStream::Clear
void Clear()
Definition: write-stream.inline.h:100

impala::HdfsSequenceTableWriter::ConsumeRow
Status ConsumeRow(TupleRow *row)
processes a single row, delegates to Compress or NoCompress ConsumeRow().
Definition: hdfs-sequence-table-writer.cc:233

impala::HdfsPartitionDescriptor::escape_char
char escape_char() const
Definition: descriptors.h:183

impala::Status
Definition: status.h:81

impala::WriteStream::Size
size_t Size()
Definition: write-stream.inline.h:96

impala::HdfsSequenceTableWriter::escape_char_
char escape_char_
Escape character for text encoding.
Definition: hdfs-sequence-table-writer.h:113

impala::ReadWriteUtil::PutInt
static void PutInt(uint8_t *buf, uint16_t integer)
Definition: read-write-util.h:126

impala::MemPool
Definition: mem-pool.h:77

impala::RuntimeState::query_options
const TQueryOptions & query_options() const
Definition: runtime-state.h:95

impala::RuntimeState
Definition: runtime-state.h:69

impala::WriteStream::WriteVInt
int WriteVInt(int32_t val)
Definition: write-stream.inline.h:42

impala::Codec::GetHadoopCodecClassName
static Status GetHadoopCodecClassName(THdfsCompression::type, std::string *out_name)
Returns the java class name for the given compression type.
Definition: codec.cc:59

impala::WriteStream::WriteBoolean
int WriteBoolean(bool val)
Definition: write-stream.inline.h:87

impala::TableDescriptor::num_clustering_cols
int num_clustering_cols() const
Definition: descriptors.h:153

impala::HdfsSequenceTableWriter::record_compression_
bool record_compression_
true if compression is applied on each record individually
Definition: hdfs-sequence-table-writer.h:107

exec-node.h

impala::HdfsSequenceTableWriter::approx_block_size_
uint64_t approx_block_size_
Definition: hdfs-sequence-table-writer.h:84

impala::WriteStream::WriteByte
int WriteByte(uint8_t val)
Definition: write-stream.inline.h:28

impala::HdfsSequenceTableWriter::codec_name_
std::string codec_name_
name of codec, only set if compress_flag_
Definition: hdfs-sequence-table-writer.h:102

impala::HdfsTableWriter::output_expr_ctxs_
std::vector< ExprContext * > output_expr_ctxs_
Expressions that materialize output values.
Definition: hdfs-table-writer.h:124

impala::GenerateUUIDString
string GenerateUUIDString()
generates a 16 byte UUID
Definition: uid-util.h:52

runtime-state.h

impala::RowBatch
Definition: row-batch.h:66

impala::WriteStream::WriteText
int WriteText(int32_t len, const uint8_t *buf)
Writes the length as a VLong follows by the byte string.
Definition: write-stream.inline.h:76

impala::HdfsSequenceTableWriter::row_buf_
WriteStream row_buf_
Temporary Buffer for a single row.
Definition: hdfs-sequence-table-writer.h:90

impala::HdfsSequenceTableWriter::WriteFileHeader
Status WriteFileHeader()
writes the SEQ file header to HDFS
Definition: hdfs-sequence-table-writer.cc:129

impala::HdfsSequenceTableWriter::Flush
Status Flush()
Definition: hdfs-sequence-table-writer.cc:291

impala::HdfsSequenceTableWriter::compress_flag_
bool compress_flag_
true if compression is enabled
Definition: hdfs-sequence-table-writer.h:96

impala::HdfsSequenceTableWriter::neg1_sync_marker_
std::string neg1_sync_marker_
A -1 infront of the sync marker, used in decompressed formats.
Definition: hdfs-sequence-table-writer.h:118

hdfs.h

impala::StringValue::ptr
char * ptr
Definition: string-value.h:37

impala::HdfsSequenceTableWriter::AppendRowBatch
virtual Status AppendRowBatch(RowBatch *rows, const std::vector< int32_t > &row_group_indices, bool *new_file)
Definition: hdfs-sequence-table-writer.cc:90

impala::HdfsTableWriter
Definition: hdfs-table-writer.h:33

uid-util.h

impala::HdfsPartitionDescriptor::field_delim
char field_delim() const
Definition: descriptors.h:181

impala::HdfsTableSink::hdfs_write_timer
RuntimeProfile::Counter * hdfs_write_timer()
Definition: hdfs-table-sink.h:154

impala::HdfsSequenceTableWriter::Init
virtual Status Init()
Do initialization of writer.
Definition: hdfs-sequence-table-writer.cc:54

impala::WriteStream
Definition: write-stream.h:29

impala::HdfsTableSink::rows_inserted_counter
RuntimeProfile::Counter * rows_inserted_counter()
Definition: hdfs-table-sink.h:151

impala::Status::OK
static const Status OK
Definition: status.h:87

impala::HdfsTableSink::encode_timer
RuntimeProfile::Counter * encode_timer()
Definition: hdfs-table-sink.h:153

impala::MemTracker::Consume
void Consume(int64_t bytes)
Increases consumption of this tracker and its ancestors by 'bytes'.
Definition: mem-tracker.h:118

expr.h

impala::HdfsPartitionDescriptor
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177

names.h

impala::HdfsSequenceTableWriter::WriteCompressedBlock
Status WriteCompressedBlock()
writes the contents of out_ as a single compressed block
Definition: hdfs-sequence-table-writer.cc:163

impala::TYPE_STRING
Definition: types.h:38

impala::WriteStream::String
std::string String()
returns the contents of this stream as a string
Definition: write-stream.inline.h:92

impala::HdfsSequenceTableWriter::out_
WriteStream out_
buffer which holds accumulated output
Definition: hdfs-sequence-table-writer.h:87

impala::HdfsSequenceTableWriter::compressor_
boost::scoped_ptr< Codec > compressor_
the codec for compressing, only set if compress_flag_
Definition: hdfs-sequence-table-writer.h:104

impala::WriteStream::WriteVLong
int WriteVLong(int64_t val)
Definition: write-stream.inline.h:36

impala::HdfsSequenceTableWriter::field_delim_
char field_delim_
Character delimiting fields.
Definition: hdfs-sequence-table-writer.h:110

impala::HdfsSequenceTableWriter::WriteEscapedString
void WriteEscapedString(const StringValue *str_val, WriteStream *buf)
writes the str_val to the buffer, escaping special characters
Definition: hdfs-sequence-table-writer.cc:196

impala::HdfsTableSink::compress_timer
RuntimeProfile::Counter * compress_timer()
Definition: hdfs-table-sink.h:155

impala::HdfsSequenceTableWriter::SEQ6_CODE
static uint8_t SEQ6_CODE[4]
Magic characters used to identify the file type.
Definition: hdfs-sequence-table-writer.h:123

impala::ReadWriteUtil::VLongRequiredBytes
static int VLongRequiredBytes(int64_t val)
returns size of the encoded long value, not including the 1 byte for length
Definition: read-write-util.h:175

impala::HdfsSequenceTableWriter::mem_pool_
MemPool * mem_pool_
memory pool used by codec to allocate output buffer
Definition: hdfs-sequence-table-writer.h:93