doc/html/hdfs-parquet-scanner_8cc_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #include "exec/hdfs-parquet-scanner.h"


 #include <limits> // for std::numeric_limits


 #include <boost/algorithm/string.hpp>

 #include <gflags/gflags.h>

 #include <gutil/strings/substitute.h>


 #include "common/object-pool.h"

 #include "common/logging.h"

 #include "exec/hdfs-scan-node.h"

 #include "exec/scanner-context.inline.h"

 #include "exec/read-write-util.h"

 #include "exprs/expr.h"

 #include "runtime/descriptors.h"

 #include "runtime/runtime-state.h"

 #include "runtime/mem-pool.h"

 #include "runtime/raw-value.h"

 #include "runtime/row-batch.h"

 #include "runtime/tuple-row.h"

 #include "runtime/tuple.h"

 #include "runtime/string-value.h"

 #include "util/bitmap.h"

 #include "util/bit-util.h"

 #include "util/decompress.h"

 #include "util/debug-util.h"

 #include "util/error-util.h"

 #include "util/dict-encoding.h"

 #include "util/rle-encoding.h"

 #include "util/runtime-profile.h"

 #include "rpc/thrift-util.h"


 #include "common/names.h"


 using boost::algorithm::is_any_of;

 using boost::algorithm::split;

 using boost::algorithm::token_compress_on;

 using namespace impala;

 using namespace strings;


 // Provide a workaround for IMPALA-1658.

 DEFINE_bool(convert_legacy_hive_parquet_utc_timestamps, false,

     "When true, TIMESTAMPs read from files written by Parquet-MR (used by Hive) will "

     "be converted from UTC to local time. Writes are unaffected.");


 // Max data page header size in bytes. This is an estimate and only needs to be an upper

 // bound. It is theoretically possible to have a page header of any size due to string

 // value statistics, but in practice we'll have trouble reading string values this large.

 const int MAX_PAGE_HEADER_SIZE = 8 * 1024 * 1024;


 // Max dictionary page header size in bytes. This is an estimate and only needs to be an

 // upper bound.

 const int MAX_DICT_HEADER_SIZE = 100;


 #define LOG_OR_ABORT(error_msg, runtime_state)                          \

   if (runtime_state->abort_on_error()) {                                \

     return Status(error_msg);                                           \

   } else {                                                              \

     runtime_state->LogError(error_msg);                                 \

     return Status::OK;                                                  \

   }


 #define LOG_OR_RETURN_ON_ERROR(error_msg, runtime_state)                \

   if (runtime_state->abort_on_error()) {                                \

     return Status(error_msg.msg());                                     \

   }                                                                     \

   runtime_state->LogError(error_msg);


 Status HdfsParquetScanner::IssueInitialRanges(HdfsScanNode* scan_node,

     const std::vector<HdfsFileDesc*>& files) {

   vector<DiskIoMgr::ScanRange*> footer_ranges;

   for (int i = 0; i < files.size(); ++i) {

     for (int j = 0; j < files[i]->splits.size(); ++j) {

       DiskIoMgr::ScanRange* split = files[i]->splits[j];


       // Since Parquet scanners always read entire files, only read a file if we're

       // assigned the first split to avoid reading multi-block files with multiple

       // scanners.

       // We only process the split that starts at offset 0.

       if (split->offset() != 0) {

         // We are expecting each file to be one hdfs block (so all the scan range offsets

         // should be 0).  This is not incorrect but we will issue a warning.

         scan_node->runtime_state()->LogError(

             ErrorMsg(TErrorCode::PARQUET_MULTIPLE_BLOCKS, files[i]->filename));

         // We assign the entire file to one scan range, so mark all but one split

         // (i.e. the first split) as complete

         scan_node->RangeComplete(THdfsFileFormat::PARQUET, THdfsCompression::NONE);

         continue;

       }


       // Compute the offset of the file footer

       DCHECK_GT(files[i]->file_length, 0);

       int64_t footer_size = min(static_cast<int64_t>(FOOTER_SIZE), files[i]->file_length);

       int64_t footer_start = files[i]->file_length - footer_size;


       ScanRangeMetadata* metadata =

           reinterpret_cast<ScanRangeMetadata*>(split->meta_data());

       DiskIoMgr::ScanRange* footer_range = scan_node->AllocateScanRange(

           files[i]->fs, files[i]->filename.c_str(), footer_size, footer_start,

           metadata->partition_id, split->disk_id(), split->try_cache(),

           split->expected_local(), files[i]->mtime);

       footer_ranges.push_back(footer_range);

     }

   }

   RETURN_IF_ERROR(scan_node->AddDiskIoRanges(footer_ranges));

   return Status::OK;

 }


 namespace impala {


 HdfsParquetScanner::HdfsParquetScanner(HdfsScanNode* scan_node, RuntimeState* state)

     : HdfsScanner(scan_node, state),

       metadata_range_(NULL),

       dictionary_pool_(new MemPool(scan_node->mem_tracker())),

       assemble_rows_timer_(scan_node_->materialize_tuple_timer()) {

   assemble_rows_timer_.Stop();

 }


 HdfsParquetScanner::~HdfsParquetScanner() {

 }


 // Reader for a single column from the parquet file.  It's associated with a

 // ScannerContext::Stream and is responsible for decoding the data.

 // Super class for per-type column readers. This contains most of the logic,

 // the type specific functions must be implemented in the subclass.

 class HdfsParquetScanner::BaseColumnReader {

  public:

   virtual ~BaseColumnReader() {}


   // This is called once for each row group in the file.

   Status Reset(const parquet::ColumnMetaData* metadata, ScannerContext::Stream* stream) {

     DCHECK_NOTNULL(stream);

     DCHECK_NOTNULL(metadata);


     num_buffered_values_ = 0;

     data_ = NULL;

     stream_ = stream;

     metadata_ = metadata;

     dict_decoder_base_ = NULL;

     num_values_read_ = 0;

     if (metadata_->codec != parquet::CompressionCodec::UNCOMPRESSED) {

       RETURN_IF_ERROR(Codec::CreateDecompressor(

           NULL, false, PARQUET_TO_IMPALA_CODEC[metadata_->codec], &decompressor_));

     }

     return Status::OK;

   }


   // Called once when the scanner is complete for final cleanup.

   void Close() {

     if (decompressor_.get() != NULL) decompressor_->Close();

   }


   int64_t total_len() const { return metadata_->total_compressed_size; }

   const SlotDescriptor* slot_desc() const { return node_.slot_desc; }

   const parquet::SchemaElement& schema_element() const { return *node_.element; }

   int col_idx() const { return node_.col_idx; }

   int max_def_level() const { return node_.max_def_level; }

   THdfsCompression::type codec() const {

     if (metadata_ == NULL) return THdfsCompression::NONE;

     return PARQUET_TO_IMPALA_CODEC[metadata_->codec];

   }


   // Read the next value into tuple for this column.  Returns false if there are no

   // more values in the file.

   // *conjuncts_failed is an in/out parameter. If false, it means this row has already

   // been filtered out (i.e. ReadValue is really a SkipValue()) and should be set to

   // true if ReadValue() can filter out this row.

   // TODO: this is the function that needs to be codegen'd (e.g. CodegenReadValue())

   // The codegened functions from all the materialized cols will then be combined

   // into one function.

   // TODO: another option is to materialize col by col for the entire row batch in

   // one call.  e.g. MaterializeCol would write out 1024 values.  Our row batches

   // are currently dense so we'll need to figure out something there.

   bool ReadValue(MemPool* pool, Tuple* tuple, bool* conjuncts_failed);


   // TODO: Some encodings might benefit a lot from a SkipValues(int num_rows) if

   // we know this row can be skipped. This could be very useful with stats and big

   // sections can be skipped. Implement that when we can benefit from it.


  protected:

   friend class HdfsParquetScanner;


   HdfsParquetScanner* parent_;

   const SchemaNode& node_;


   const parquet::ColumnMetaData* metadata_;

   scoped_ptr<Codec> decompressor_;

   ScannerContext::Stream* stream_;


   // Pool to allocate decompression buffers from.

   boost::scoped_ptr<MemPool> decompressed_data_pool_;


   // Header for current data page.

   parquet::PageHeader current_page_header_;


   // Num values remaining in the current data page

   int num_buffered_values_;


   // Pointer to start of next value in data page

   uint8_t* data_;


   // Decoder for definition.  Only one of these is valid at a time, depending on

   // the data page metadata.

   RleDecoder rle_def_levels_;

   BitReader bit_packed_def_levels_;


   // Decoder for dictionary-encoded columns. Set by the subclass.

   DictDecoderBase* dict_decoder_base_;


   // The number of values seen so far. Updated per data page.

   int64_t num_values_read_;


   // Cache of the bitmap_filter_ (if any) for this slot.

   const Bitmap* bitmap_filter_;

   // Cache of hash_seed_ to use with bitmap_filter_.

   uint32_t hash_seed_;


   // Bitmap filters are optional (i.e. they can be ignored and the results will be

   // correct). Keep track of stats to determine if the filter is not effective. If

   // the number of rows filtered out is too low, this is not worth the cost.

   // TODO: this should be cost based taking into account how much we save when we

   // filter a row.

   int64_t rows_returned_;

   int64_t bitmap_filter_rows_rejected_;


   BaseColumnReader(HdfsParquetScanner* parent, const SchemaNode& node)

     : parent_(parent),

       node_(node),

       metadata_(NULL),

       stream_(NULL),

       decompressed_data_pool_(new MemPool(parent->scan_node_->mem_tracker())),

       num_buffered_values_(0),

       num_values_read_(0) {

     DCHECK_NOTNULL(node.slot_desc);

     DCHECK_GE(node.col_idx, 0);

     DCHECK_GE(node.max_def_level, 0);


     RuntimeState* state = parent_->scan_node_->runtime_state();

     bitmap_filter_ = state->GetBitmapFilter(slot_desc()->id());

     hash_seed_ = state->fragment_hash_seed();

     rows_returned_ = 0;

     bitmap_filter_rows_rejected_ = 0;

   }


   // Read the next data page.  If a dictionary page is encountered, that will

   // be read and this function will continue reading for the next data page.

   Status ReadDataPage();


   // Returns the definition level for the next value

   // Returns -1 if there was a error parsing it.

   int ReadDefinitionLevel();


   // Creates a dictionary decoder from values/size. Subclass must implement this

   // and set dict_decoder_base_.

   virtual void CreateDictionaryDecoder(uint8_t* values, int size) = 0;


   // Initializes the reader with the data contents. This is the content for

   // the entire decompressed data page. Decoders can initialize state from

   // here.

   virtual Status InitDataPage(uint8_t* data, int size) = 0;


   // Writes the next value into *slot using pool if necessary.

   // Returns false if there was an error.

   // Subclass must implement this.

   // TODO: we need to remove this with codegen.

   virtual bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_failed) = 0;

 };


 // Per column type reader.

 template<typename T>

 class HdfsParquetScanner::ColumnReader : public HdfsParquetScanner::BaseColumnReader {

  public:

   ColumnReader(HdfsParquetScanner* parent, const SchemaNode& node)

     : BaseColumnReader(parent, node) {

     DCHECK_NE(slot_desc()->type().type, TYPE_BOOLEAN);

     if (slot_desc()->type().type == TYPE_DECIMAL) {

       fixed_len_size_ = ParquetPlainEncoder::DecimalSize(slot_desc()->type());

     } else if (slot_desc()->type().type == TYPE_VARCHAR) {

       fixed_len_size_ = slot_desc()->type().len;

     } else {

       fixed_len_size_ = -1;

     }

     needs_conversion_ = slot_desc()->type().type == TYPE_CHAR ||

         // TODO: Add logic to detect file versions that have unconverted TIMESTAMP

         // values. Currently all versions have converted values.

         (FLAGS_convert_legacy_hive_parquet_utc_timestamps &&

         slot_desc()->type().type == TYPE_TIMESTAMP &&

         parent->file_version_.application == "parquet-mr");

   }


  protected:

   virtual void CreateDictionaryDecoder(uint8_t* values, int size) {

     dict_decoder_.reset(new DictDecoder<T>(values, size, fixed_len_size_));

     dict_decoder_base_ = dict_decoder_.get();

   }


   virtual Status InitDataPage(uint8_t* data, int size) {

     if (current_page_header_.data_page_header.encoding ==

           parquet::Encoding::PLAIN_DICTIONARY) {

       if (dict_decoder_.get() == NULL) {

         return Status("File corrupt. Missing dictionary page.");

       }

       dict_decoder_->SetData(data, size);

     }


     // Check if we should disable the bitmap filter. We'll do this if the filter

     // is not removing a lot of rows.

     // TODO: how to pick the selectivity?

     if (bitmap_filter_ != NULL && rows_returned_ > 10000 &&

         bitmap_filter_rows_rejected_ < rows_returned_ * .1) {

       bitmap_filter_ = NULL;

     }

     return Status::OK;

   }


   virtual bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_failed)  {

     parquet::Encoding::type page_encoding =

         current_page_header_.data_page_header.encoding;

     bool result = true;

     T val;

     T* val_ptr = needs_conversion_ ? &val : reinterpret_cast<T*>(slot);

     if (page_encoding == parquet::Encoding::PLAIN_DICTIONARY) {

       result = dict_decoder_->GetValue(val_ptr);

     } else {

       DCHECK(page_encoding == parquet::Encoding::PLAIN);

       data_ += ParquetPlainEncoder::Decode<T>(data_, fixed_len_size_, val_ptr);

     }

     if (needs_conversion_) ConvertSlot(&val, reinterpret_cast<T*>(slot), pool);

     ++rows_returned_;

     if (!*conjuncts_failed && bitmap_filter_ != NULL) {

       uint32_t h = RawValue::GetHashValue(slot, slot_desc()->type(), hash_seed_);

       *conjuncts_failed = !bitmap_filter_->Get<true>(h);

       ++bitmap_filter_rows_rejected_;

     }

     return result;

   }


  private:

   void CopySlot(T* slot, MemPool* pool) {

     // no-op for non-string columns.

   }


   // Converts and writes src into dst based on desc_->type()

   void ConvertSlot(const T* src, T* dst, MemPool* pool) {

     DCHECK(false);

   }


   scoped_ptr<DictDecoder<T> > dict_decoder_;


   // true decoded values must be converted before being written to an output tuple

   bool needs_conversion_;


   // The size of this column with plain encoding for FIXED_LEN_BYTE_ARRAY, or

   // the max length for VARCHAR columns. Unused otherwise.

   int fixed_len_size_;

 };


 template<>

 void HdfsParquetScanner::ColumnReader<StringValue>::CopySlot(

     StringValue* slot, MemPool* pool) {

   if (slot->len == 0) return;

   uint8_t* buffer = pool->Allocate(slot->len);

   memcpy(buffer, slot->ptr, slot->len);

   slot->ptr = reinterpret_cast<char*>(buffer);

 }


 template<>

 void HdfsParquetScanner::ColumnReader<StringValue>::ConvertSlot(

     const StringValue* src, StringValue* dst, MemPool* pool) {

   DCHECK(slot_desc()->type().type == TYPE_CHAR);

   int len = slot_desc()->type().len;

   StringValue sv;

   sv.len = len;

   if (slot_desc()->type().IsVarLen()) {

     sv.ptr = reinterpret_cast<char*>(pool->Allocate(len));

   } else {

     sv.ptr = reinterpret_cast<char*>(dst);

   }

   int unpadded_len = min(len, src->len);

   memcpy(sv.ptr, src->ptr, unpadded_len);

   StringValue::PadWithSpaces(sv.ptr, len, unpadded_len);


   if (slot_desc()->type().IsVarLen()) *dst = sv;

 }


 template<>

 void HdfsParquetScanner::ColumnReader<TimestampValue>::ConvertSlot(

     const TimestampValue* src, TimestampValue* dst, MemPool* pool) {

   // Conversion should only happen when this flag is enabled.

   DCHECK(FLAGS_convert_legacy_hive_parquet_utc_timestamps);

   *dst = *src;

   if (dst->HasDateAndTime()) dst->UtcToLocal();

 }


 class HdfsParquetScanner::BoolColumnReader : public HdfsParquetScanner::BaseColumnReader {

  public:

   BoolColumnReader(HdfsParquetScanner* parent, const SchemaNode& node)

     : BaseColumnReader(parent, node) {

     DCHECK_EQ(slot_desc()->type().type, TYPE_BOOLEAN);

   }


  protected:

   virtual void CreateDictionaryDecoder(uint8_t* values, int size) {

     DCHECK(false) << "Dictionary encoding is not supported for bools. Should never "

                   << "have gotten this far.";

   }


   virtual Status InitDataPage(uint8_t* data, int size) {

     // Initialize bool decoder

     bool_values_ = BitReader(data, size);

     return Status::OK;

   }


   virtual bool ReadSlot(void* slot, MemPool* pool, bool* conjuncts_failed)  {

     bool valid = bool_values_.GetValue(1, reinterpret_cast<bool*>(slot));

     if (!valid) parent_->parse_status_ = Status("Invalid bool column.");

     return valid;

   }


  private:

   BitReader bool_values_;

 };


 }


 Status HdfsParquetScanner::Prepare(ScannerContext* context) {

   RETURN_IF_ERROR(HdfsScanner::Prepare(context));

   num_cols_counter_ =

       ADD_COUNTER(scan_node_->runtime_profile(), "NumColumns", TUnit::UNIT);


   scan_node_->IncNumScannersCodegenDisabled();

   return Status::OK;

 }


 void HdfsParquetScanner::Close() {

   vector<THdfsCompression::type> compression_types;

   for (int i = 0; i < column_readers_.size(); ++i) {

     if (column_readers_[i]->decompressed_data_pool_.get() != NULL) {

       // No need to commit the row batches with the AttachPool() calls

       // since AddFinalRowBatch() already does below.

       AttachPool(column_readers_[i]->decompressed_data_pool_.get(), false);

     }

     column_readers_[i]->Close();

     compression_types.push_back(column_readers_[i]->codec());

   }

   AttachPool(dictionary_pool_.get(), false);

   AddFinalRowBatch();


   // If this was a metadata only read (i.e. count(*)), there are no columns.

   if (compression_types.empty()) compression_types.push_back(THdfsCompression::NONE);

   scan_node_->RangeComplete(THdfsFileFormat::PARQUET, compression_types);

   assemble_rows_timer_.Stop();

   assemble_rows_timer_.ReleaseCounter();


   HdfsScanner::Close();

 }


 HdfsParquetScanner::BaseColumnReader* HdfsParquetScanner::CreateReader(

     const SchemaNode& node) {

   BaseColumnReader* reader = NULL;

   switch (node.slot_desc->type().type) {

     case TYPE_BOOLEAN:

       reader = new BoolColumnReader(this, node);

       break;

     case TYPE_TINYINT:

       reader = new ColumnReader<int8_t>(this, node);

       break;

     case TYPE_SMALLINT:

       reader = new ColumnReader<int16_t>(this, node);

       break;

     case TYPE_INT:

       reader = new ColumnReader<int32_t>(this, node);

       break;

     case TYPE_BIGINT:

       reader = new ColumnReader<int64_t>(this, node);

       break;

     case TYPE_FLOAT:

       reader = new ColumnReader<float>(this, node);

       break;

     case TYPE_DOUBLE:

       reader = new ColumnReader<double>(this, node);

       break;

     case TYPE_TIMESTAMP:

       reader = new ColumnReader<TimestampValue>(this, node);

       break;

     case TYPE_STRING:

     case TYPE_VARCHAR:

     case TYPE_CHAR:

       reader = new ColumnReader<StringValue>(this, node);

       break;

     case TYPE_DECIMAL:

       switch (node.slot_desc->type().GetByteSize()) {

         case 4:

           reader = new ColumnReader<Decimal4Value>(this, node);

           break;

         case 8:

           reader = new ColumnReader<Decimal8Value>(this, node);

           break;

         case 16:

           reader = new ColumnReader<Decimal16Value>(this, node);

           break;

       }

       break;

     default:

       DCHECK(false);

   }

   return scan_node_->runtime_state()->obj_pool()->Add(reader);

 }


 // In 1.1, we had a bug where the dictionary page metadata was not set. Returns true

 // if this matches those versions and compatibility workarounds need to be used.

 static bool RequiresSkippedDictionaryHeaderCheck(

     const HdfsParquetScanner::FileVersion& v) {

   if (v.application != "impala") return false;

   return v.VersionEq(1,1,0) || (v.VersionEq(1,2,0) && v.is_impala_internal);

 }


 Status HdfsParquetScanner::BaseColumnReader::ReadDataPage() {

   Status status;

   uint8_t* buffer;


   // We're about to move to the next data page.  The previous data page is

   // now complete, pass along the memory allocated for it.

   parent_->AttachPool(decompressed_data_pool_.get(), false);


   // Read the next data page, skipping page types we don't care about.

   // We break out of this loop on the non-error case (a data page was found or we read all

   // the pages).

   while (true) {

     DCHECK_EQ(num_buffered_values_, 0);

     if (num_values_read_ >= metadata_->num_values) {

       // No more pages to read

       DCHECK_EQ(num_values_read_, metadata_->num_values);

       break;

     }


     int64_t buffer_size;

     RETURN_IF_ERROR(stream_->GetBuffer(true, &buffer, &buffer_size));

     if (buffer_size == 0) {

       DCHECK(stream_->eosr());

       ErrorMsg msg(TErrorCode::PARQUET_COLUMN_METADATA_INVALID,

          metadata_->num_values, num_values_read_,

          slot_desc()->col_pos() - parent_->scan_node_->num_partition_keys());

       LOG_OR_ABORT(msg, parent_->scan_node_->runtime_state());

     }


     // We don't know the actual header size until the thrift object is deserialized.  Loop

     // until we successfully deserialize the header or exceed the maximum header size.

     uint32_t header_size;

     while (true) {

       header_size = buffer_size;

       status = DeserializeThriftMsg(

           buffer, &header_size, true, &current_page_header_);

       if (status.ok()) break;


       if (buffer_size >= MAX_PAGE_HEADER_SIZE) {

         stringstream ss;

         ss << "ParquetScanner: could not read data page because page header exceeded "

            << "maximum size of "

            << PrettyPrinter::Print(MAX_PAGE_HEADER_SIZE, TUnit::BYTES);

         status.AddDetail(ss.str());

         return status;

       }


       // Didn't read entire header, increase buffer size and try again

       Status status;

       int64_t new_buffer_size = max(buffer_size * 2, 1024L);

       bool success = stream_->GetBytes(

           new_buffer_size, &buffer, &new_buffer_size, &status, /* peek */ true);

       if (!success) {

         DCHECK(!status.ok());

         return status;

       }

       DCHECK(status.ok());


       if (buffer_size == new_buffer_size) {

         DCHECK_NE(new_buffer_size, 0);

         ErrorMsg msg(TErrorCode::PARQUET_HEADER_EOF);

         LOG_OR_ABORT(msg, parent_->scan_node_->runtime_state());

       }

       DCHECK_GT(new_buffer_size, buffer_size);

       buffer_size = new_buffer_size;

     }


     // Successfully deserialized current_page_header_

     if (!stream_->SkipBytes(header_size, &status)) return status;


     int data_size = current_page_header_.compressed_page_size;

     int uncompressed_size = current_page_header_.uncompressed_page_size;


     if (current_page_header_.type == parquet::PageType::DICTIONARY_PAGE) {

       if (dict_decoder_base_ != NULL) {

         return Status("Column chunk should not contain two dictionary pages.");

       }

       if (slot_desc()->type().type == TYPE_BOOLEAN) {

         return Status("Unexpected dictionary page. Dictionary page is not"

             " supported for booleans.");

       }

       const parquet::DictionaryPageHeader* dict_header = NULL;

       if (current_page_header_.__isset.dictionary_page_header) {

         dict_header = &current_page_header_.dictionary_page_header;

       } else {

         if (!RequiresSkippedDictionaryHeaderCheck(parent_->file_version_)) {

           return Status("Dictionary page does not have dictionary header set.");

         }

       }

       if (dict_header != NULL &&

           dict_header->encoding != parquet::Encoding::PLAIN &&

           dict_header->encoding != parquet::Encoding::PLAIN_DICTIONARY) {

         return Status("Only PLAIN and PLAIN_DICTIONARY encodings are supported "

             "for dictionary pages.");

       }


       if (!stream_->ReadBytes(data_size, &data_, &status)) return status;


       uint8_t* dict_values = NULL;

       if (decompressor_.get() != NULL) {

         dict_values = parent_->dictionary_pool_->Allocate(uncompressed_size);

         RETURN_IF_ERROR(decompressor_->ProcessBlock32(true, data_size, data_,

             &uncompressed_size, &dict_values));

         VLOG_FILE << "Decompressed " << data_size << " to " << uncompressed_size;

         data_size = uncompressed_size;

       } else {

         DCHECK_EQ(data_size, current_page_header_.uncompressed_page_size);

         // Copy dictionary from io buffer (which will be recycled as we read

         // more data) to a new buffer

         dict_values = parent_->dictionary_pool_->Allocate(data_size);

         memcpy(dict_values, data_, data_size);

       }


       CreateDictionaryDecoder(dict_values, data_size);

       if (dict_header != NULL &&

           dict_header->num_values != dict_decoder_base_->num_entries()) {

         return Status(Substitute(

             "Invalid dictionary. Expected $0 entries but data contained $1 entries",

             dict_header->num_values, dict_decoder_base_->num_entries()));

       }

       // Done with dictionary page, read next page

       continue;

     }


     if (current_page_header_.type != parquet::PageType::DATA_PAGE) {

       // We can safely skip non-data pages

       if (!stream_->SkipBytes(data_size, &status)) return status;

       continue;

     }


     // Read Data Page

     if (!stream_->ReadBytes(data_size, &data_, &status)) return status;

     num_buffered_values_ = current_page_header_.data_page_header.num_values;

     num_values_read_ += num_buffered_values_;


     if (decompressor_.get() != NULL) {

       SCOPED_TIMER(parent_->decompress_timer_);

       uint8_t* decompressed_buffer = decompressed_data_pool_->Allocate(uncompressed_size);

       RETURN_IF_ERROR(decompressor_->ProcessBlock32(true,

           current_page_header_.compressed_page_size, data_, &uncompressed_size,

           &decompressed_buffer));

       VLOG_FILE << "Decompressed " << current_page_header_.compressed_page_size

                 << " to " << uncompressed_size;

       DCHECK_EQ(current_page_header_.uncompressed_page_size, uncompressed_size);

       data_ = decompressed_buffer;

       data_size = current_page_header_.uncompressed_page_size;

     } else {

       DCHECK_EQ(metadata_->codec, parquet::CompressionCodec::UNCOMPRESSED);

       DCHECK_EQ(current_page_header_.compressed_page_size, uncompressed_size);

     }


     if (max_def_level() > 0) {

       // Initialize the definition level data

       int32_t num_definition_bytes = 0;

       switch (current_page_header_.data_page_header.definition_level_encoding) {

         case parquet::Encoding::RLE: {

           if (!ReadWriteUtil::Read(&data_, &data_size, &num_definition_bytes, &status)) {

             return status;

           }

           int bit_width = BitUtil::Log2(max_def_level() + 1);

           rle_def_levels_ = RleDecoder(data_, num_definition_bytes, bit_width);

           break;

         }

         case parquet::Encoding::BIT_PACKED:

           num_definition_bytes = BitUtil::Ceil(num_buffered_values_, 8);

           bit_packed_def_levels_ = BitReader(data_, num_definition_bytes);

           break;

         default: {

           stringstream ss;

           ss << "Unsupported definition level encoding: "

             << current_page_header_.data_page_header.definition_level_encoding;

           return Status(ss.str());

         }

       }

       DCHECK_GT(num_definition_bytes, 0);

       data_ += num_definition_bytes;

       data_size -= num_definition_bytes;

     }


     // Data can be empty if the column contains all NULLs

     if (data_size != 0) RETURN_IF_ERROR(InitDataPage(data_, data_size));

     break;

   }


   return Status::OK;

 }


 // TODO More codegen here as well.

 inline int HdfsParquetScanner::BaseColumnReader::ReadDefinitionLevel() {

   if (max_def_level() == 0) {

     // This column and any containing structs are required so there is nothing encoded for

     // the definition levels.

     return 1;

   }


   uint8_t definition_level;

   bool valid = false;

   switch (current_page_header_.data_page_header.definition_level_encoding) {

     case parquet::Encoding::RLE:

       valid = rle_def_levels_.Get(&definition_level);

       break;

     case parquet::Encoding::BIT_PACKED: {

       valid = bit_packed_def_levels_.GetValue(1, &definition_level);

       break;

     }

     default:

       DCHECK(false);

   }

   if (!valid) return -1;

   return definition_level;

 }


 inline bool HdfsParquetScanner::BaseColumnReader::ReadValue(

     MemPool* pool, Tuple* tuple, bool* conjuncts_failed) {

   if (num_buffered_values_ == 0) {

     parent_->assemble_rows_timer_.Stop();

     parent_->parse_status_ = ReadDataPage();

     // We don't return Status objects as parameters because they are too

     // expensive for per row/per col calls.  If ReadDataPage failed,

     // return false to indicate this column reader is done.

     if (num_buffered_values_ == 0 || !parent_->parse_status_.ok()) return false;

     parent_->assemble_rows_timer_.Start();

   }


   --num_buffered_values_;

   int definition_level = ReadDefinitionLevel();

   if (definition_level < 0) return false;


   if (definition_level != max_def_level()) {

     // Null value

     DCHECK_LT(definition_level, max_def_level());

     tuple->SetNull(slot_desc()->null_indicator_offset());

     return true;

   }

   return ReadSlot(tuple->GetSlot(slot_desc()->tuple_offset()), pool, conjuncts_failed);

 }


 Status HdfsParquetScanner::ProcessSplit() {

   // First process the file metadata in the footer

   bool eosr;

   RETURN_IF_ERROR(ProcessFooter(&eosr));

   if (eosr) return Status::OK;


   // We've processed the metadata and there are columns that need to be materialized.

   RETURN_IF_ERROR(CreateColumnReaders());

   COUNTER_SET(num_cols_counter_, static_cast<int64_t>(column_readers_.size()));


   // The scanner-wide stream was used only to read the file footer.  Each column has added

   // its own stream.

   stream_ = NULL;


   // Iterate through each row group in the file and read all the materialized columns

   // per row group.  Row groups are independent, so this this could be parallelized.

   // However, having multiple row groups per file should be seen as an edge case and

   // we can do better parallelizing across files instead.

   // TODO: not really an edge case since MR writes multiple row groups

   for (int i = 0; i < file_metadata_.row_groups.size(); ++i) {

     // Attach any resources and clear the streams before starting a new row group. These

     // streams could either be just the footer stream or streams for the previous row

     // group.

     context_->ReleaseCompletedResources(batch_, /* done */ true);

     // Commit the rows to flush the row batch from the previous row group

     CommitRows(0);


     RETURN_IF_ERROR(InitColumns(i));

     RETURN_IF_ERROR(AssembleRows(i));

   }


   return Status::OK;

 }


 // TODO: this needs to be codegen'd.  The ReadValue function needs to be codegen'd,

 // specific to type and encoding and then inlined into AssembleRows().

 Status HdfsParquetScanner::AssembleRows(int row_group_idx) {

   assemble_rows_timer_.Start();

   // Read at most as many rows as stated in the metadata

   int64_t expected_rows_in_group = file_metadata_.row_groups[row_group_idx].num_rows;

   int64_t rows_read = 0;

   bool reached_limit = scan_node_->ReachedLimit();

   bool cancelled = context_->cancelled();

   int num_column_readers = column_readers_.size();

   MemPool* pool;


   while (!reached_limit && !cancelled && rows_read < expected_rows_in_group) {

     Tuple* tuple;

     TupleRow* row;

     int64_t row_mem_limit = static_cast<int64_t>(GetMemory(&pool, &tuple, &row));

     int64_t expected_rows_to_read = expected_rows_in_group - rows_read;

     int64_t num_rows = std::min(expected_rows_to_read, row_mem_limit);


     int num_to_commit = 0;

     if (num_column_readers > 0) {

       for (int i = 0; i < num_rows; ++i) {

         bool conjuncts_failed = false;

         InitTuple(template_tuple_, tuple);

         for (int c = 0; c < num_column_readers; ++c) {

           if (!column_readers_[c]->ReadValue(pool, tuple, &conjuncts_failed)) {

             assemble_rows_timer_.Stop();

             // This column is complete and has no more data.  This indicates

             // we are done with this row group.

             // For correctly formed files, this should be the first column we

             // are reading.

             DCHECK(c == 0 || !parse_status_.ok())

               << "c=" << c << " " << parse_status_.GetDetail();;

             COUNTER_ADD(scan_node_->rows_read_counter(), i);

             RETURN_IF_ERROR(CommitRows(num_to_commit));


             // If we reach this point, it means that we reached the end of file for

             // this column. Test if the expected number of rows from metadata matches

             // the actual number of rows in the file.

             rows_read += i;

             if (rows_read != expected_rows_in_group) {

               HdfsParquetScanner::BaseColumnReader* reader = column_readers_[c];

               DCHECK_NOTNULL(reader->stream_);


               ErrorMsg msg(TErrorCode::PARQUET_GROUP_ROW_COUNT_ERROR,

                  reader->stream_->filename(), row_group_idx,

                  expected_rows_in_group, rows_read);

               LOG_OR_RETURN_ON_ERROR(msg, scan_node_->runtime_state());

             }

             return parse_status_;

           }

         }

         if (conjuncts_failed) continue;

         row->SetTuple(scan_node_->tuple_idx(), tuple);

         if (EvalConjuncts(row)) {

           row = next_row(row);

           tuple = next_tuple(tuple);

           ++num_to_commit;

         }

       }

     } else {

       // Special case when there is no data for the accessed column(s) in the file.

       // This can happen, for example, due to schema evolution (alter table add column).

       // Since all the tuples are same, evaluating conjuncts only for the first tuple.

       DCHECK_GT(num_rows, 0);

       InitTuple(template_tuple_, tuple);

       row->SetTuple(scan_node_->tuple_idx(), tuple);

       if (EvalConjuncts(row)) {

         row = next_row(row);

         tuple = next_tuple(tuple);


         for (int i = 1; i < num_rows; ++i) {

           InitTuple(template_tuple_, tuple);

           row->SetTuple(scan_node_->tuple_idx(), tuple);

           row = next_row(row);

           tuple = next_tuple(tuple);

         }

         num_to_commit += num_rows;

       }

     }

     rows_read += num_rows;

     COUNTER_ADD(scan_node_->rows_read_counter(), num_rows);

     RETURN_IF_ERROR(CommitRows(num_to_commit));


     reached_limit = scan_node_->ReachedLimit();

     cancelled = context_->cancelled();

   }


   if (!reached_limit && !cancelled && (num_column_readers > 0)) {

     // If we get to this point, it means that we have read as many rows as the metadata

     // told us we should read. Attempt to read one more row and if that succeeds report

     // the error.

     DCHECK_EQ(rows_read, expected_rows_in_group);

     uint8_t dummy_tuple_mem[tuple_byte_size_];

     Tuple* dummy_tuple = reinterpret_cast<Tuple*>(&dummy_tuple_mem);

     InitTuple(template_tuple_, dummy_tuple);

     bool conjuncts_failed = false;

     if (column_readers_[0]->ReadValue(pool, dummy_tuple, &conjuncts_failed)) {

       // If another tuple is successfully read, it means that there are still values

       // in the file.

       HdfsParquetScanner::BaseColumnReader* reader = column_readers_[0];

       DCHECK_NOTNULL(reader->stream_);

       ErrorMsg msg(TErrorCode::PARQUET_GROUP_ROW_COUNT_OVERFLOW,

           reader->stream_->filename(), row_group_idx,

           expected_rows_in_group);

       LOG_OR_RETURN_ON_ERROR(msg, scan_node_->runtime_state());

     }

   }


   assemble_rows_timer_.Stop();

   return parse_status_;

 }


 Status HdfsParquetScanner::ProcessFooter(bool* eosr) {

   *eosr = false;

   uint8_t* buffer;

   int64_t len;


   RETURN_IF_ERROR(stream_->GetBuffer(false, &buffer, &len));

   DCHECK(stream_->eosr());


   // Number of bytes in buffer after the fixed size footer is accounted for.

   int remaining_bytes_buffered = len - sizeof(int32_t) - sizeof(PARQUET_VERSION_NUMBER);


   // Make sure footer has enough bytes to contain the required information.

   if (remaining_bytes_buffered < 0) {

     return Status(Substitute("File $0 is invalid.  Missing metadata.",

         stream_->filename()));

   }


   // Validate magic file bytes are correct

   uint8_t* magic_number_ptr = buffer + len - sizeof(PARQUET_VERSION_NUMBER);

   if (memcmp(magic_number_ptr,

       PARQUET_VERSION_NUMBER, sizeof(PARQUET_VERSION_NUMBER) != 0)) {

     return Status(Substitute("File $0 is invalid.  Invalid file footer: $1",

         stream_->filename(),

         string((char*)magic_number_ptr, sizeof(PARQUET_VERSION_NUMBER))));

   }


   // The size of the metadata is encoded as a 4 byte little endian value before

   // the magic number

   uint8_t* metadata_size_ptr = magic_number_ptr - sizeof(int32_t);

   uint32_t metadata_size = *reinterpret_cast<uint32_t*>(metadata_size_ptr);

   uint8_t* metadata_ptr = metadata_size_ptr - metadata_size;

   // If the metadata was too big, we need to stitch it before deserializing it.

   // In that case, we stitch the data in this buffer.

   vector<uint8_t> metadata_buffer;

   metadata_range_ = stream_->scan_range();


   if (UNLIKELY(metadata_size > remaining_bytes_buffered)) {

     // In this case, the metadata is bigger than our guess meaning there are

     // not enough bytes in the footer range from IssueInitialRanges().

     // We'll just issue more ranges to the IoMgr that is the actual footer.

     const HdfsFileDesc* file_desc = scan_node_->GetFileDesc(metadata_range_->file());

     DCHECK_NOTNULL(file_desc);

     // The start of the metadata is:

     // file_length - 4-byte metadata size - footer-size - metadata size

     int64_t metadata_start = file_desc->file_length -

       sizeof(int32_t) - sizeof(PARQUET_VERSION_NUMBER) - metadata_size;

     int64_t metadata_bytes_to_read = metadata_size;

     if (metadata_start < 0) {

       return Status(Substitute("File $0 is invalid. Invalid metadata size in file "

           "footer: $1 bytes. File size: $2 bytes.", stream_->filename(), metadata_size,

           file_desc->file_length));

     }

     // IoMgr can only do a fixed size Read(). The metadata could be larger

     // so we stitch it here.

     // TODO: consider moving this stitching into the scanner context. The scanner

     // context usually handles the stitching but no other scanner need this logic

     // now.

     metadata_buffer.resize(metadata_size);

     metadata_ptr = &metadata_buffer[0];

     int64_t copy_offset = 0;

     DiskIoMgr* io_mgr = scan_node_->runtime_state()->io_mgr();


     while (metadata_bytes_to_read > 0) {

       int64_t to_read = ::min(static_cast<int64_t>(io_mgr->max_read_buffer_size()),

           metadata_bytes_to_read);

       DiskIoMgr::ScanRange* range = scan_node_->AllocateScanRange(

           metadata_range_->fs(), metadata_range_->file(), to_read,

           metadata_start + copy_offset, -1, metadata_range_->disk_id(),

           metadata_range_->try_cache(), metadata_range_->expected_local(),

           file_desc->mtime);


       DiskIoMgr::BufferDescriptor* io_buffer = NULL;

       RETURN_IF_ERROR(io_mgr->Read(scan_node_->reader_context(), range, &io_buffer));

       memcpy(metadata_ptr + copy_offset, io_buffer->buffer(), io_buffer->len());

       io_buffer->Return();


       metadata_bytes_to_read -= to_read;

       copy_offset += to_read;

     }

     DCHECK_EQ(metadata_bytes_to_read, 0);

   }

   // Deserialize file header

   // TODO: this takes ~7ms for a 1000-column table, figure out how to reduce this.

   Status status =

       DeserializeThriftMsg(metadata_ptr, &metadata_size, true, &file_metadata_);

   if (!status.ok()) {

     return Status(Substitute("File $0 has invalid file metadata at file offset $1. "

         "Error = $2.", stream_->filename(),

         metadata_size + sizeof(PARQUET_VERSION_NUMBER) + sizeof(uint32_t),

         status.GetDetail()));

   }


   RETURN_IF_ERROR(ValidateFileMetadata());


   // Tell the scan node this file has been taken care of.

   HdfsFileDesc* desc = scan_node_->GetFileDesc(stream_->filename());

   scan_node_->MarkFileDescIssued(desc);


   // Parse file schema

   RETURN_IF_ERROR(CreateSchemaTree(file_metadata_.schema, &schema_));


   if (scan_node_->materialized_slots().empty()) {

     // No materialized columns.  We can serve this query from just the metadata.  We

     // don't need to read the column data.

     int64_t num_tuples = file_metadata_.num_rows;

     COUNTER_ADD(scan_node_->rows_read_counter(), num_tuples);


     while (num_tuples > 0) {

       MemPool* pool;

       Tuple* tuple;

       TupleRow* current_row;

       int max_tuples = GetMemory(&pool, &tuple, &current_row);

       max_tuples = min(static_cast<int64_t>(max_tuples), num_tuples);

       num_tuples -= max_tuples;


       int num_to_commit = WriteEmptyTuples(context_, current_row, max_tuples);

       RETURN_IF_ERROR(CommitRows(num_to_commit));

     }


     *eosr = true;

     return Status::OK;

   } else if (file_metadata_.num_rows == 0) {

     // Empty file

     *eosr = true;

     return Status::OK;

   }


   if (file_metadata_.row_groups.empty()) {

     return Status(Substitute("Invalid file. This file: $0 has no row groups",

                              stream_->filename()));

   }

   return Status::OK;

 }


 Status HdfsParquetScanner::CreateColumnReaders() {

   DCHECK(column_readers_.empty());

   for (int i = 0; i < scan_node_->materialized_slots().size(); ++i) {

     SlotDescriptor* slot_desc = scan_node_->materialized_slots()[i];

     const vector<int>& path = slot_desc->col_path();

     SchemaNode* node = &schema_;

     // Traverse path and resolve node to this slot's SchemaNode, or NULL if this slot

     // doesn't exist in this file's schema

     for (int j = 0; j < path.size(); ++j) {

       int idx = j > 0 ? path[j] : path[j] - scan_node_->num_partition_keys();

       if (node->children.size() <= idx) {

         // The selected column is not in the file

         VLOG_FILE << Substitute("File $0 does not contain path $1",

             stream_->filename(), PrintPath(path));

         node = NULL;

         break;

       }

       node = &node->children[idx];

     }


     if (node != NULL && node->children.size() > 0) {

       string error = Substitute("Path $0 is not a supported type in file $1",

           PrintPath(path), stream_->filename());

       VLOG_QUERY << error << endl << schema_.DebugString();

       return Status(error);

     }


     if (node == NULL) {

       // In this case, we are selecting a column that is not in the file.

       // Update the template tuple to put a NULL in this slot.

       if (template_tuple_ == NULL) {

         template_tuple_ = scan_node_->InitEmptyTemplateTuple();

       }

       template_tuple_->SetNull(slot_desc->null_indicator_offset());

       continue;

     }

     node->slot_desc = slot_desc;


     column_readers_.push_back(CreateReader(*node));

   }

   return Status::OK;

 }


 Status HdfsParquetScanner::InitColumns(int row_group_idx) {

   const HdfsFileDesc* file_desc = scan_node_->GetFileDesc(metadata_range_->file());

   DCHECK_NOTNULL(file_desc);

   parquet::RowGroup& row_group = file_metadata_.row_groups[row_group_idx];


   // All the scan ranges (one for each column).

   vector<DiskIoMgr::ScanRange*> col_ranges;


   for (int i = 0; i < column_readers_.size(); ++i) {

     const parquet::ColumnChunk& col_chunk =

         row_group.columns[column_readers_[i]->col_idx()];

     int64_t col_start = col_chunk.meta_data.data_page_offset;

     RETURN_IF_ERROR(ValidateColumn(*column_readers_[i], row_group_idx));


     // If there is a dictionary page, the file format requires it to come before

     // any data pages.  We need to start reading the column from the data page.

     if (col_chunk.meta_data.__isset.dictionary_page_offset) {

       if (col_chunk.meta_data.dictionary_page_offset >= col_start) {

         stringstream ss;

         ss << "File " << file_desc->filename << ": metadata is corrupt. "

            << "Dictionary page (offset=" << col_chunk.meta_data.dictionary_page_offset

            << ") must come before any data pages (offset=" << col_start << ").";

         return Status(ss.str());

       }

       col_start = col_chunk.meta_data.dictionary_page_offset;

     }

     int64_t col_len = col_chunk.meta_data.total_compressed_size;

     int64_t col_end = col_start + col_len;

     if (col_end <= 0 || col_end > file_desc->file_length) {

       stringstream ss;

       ss << "File " << file_desc->filename << ": metadata is corrupt. "

          << "Column " << column_readers_[i]->col_idx() << " has invalid column offsets "

          << "(offset=" << col_start << ", size=" << col_len << ", "

          << "file_size=" << file_desc->file_length << ").";

       return Status(ss.str());

     }

     if (file_version_.application == "parquet-mr" && file_version_.VersionLt(1, 2, 9)) {

       // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the

       // dictionary page header size in total_compressed_size and total_uncompressed_size

       // (see IMPALA-694). We pad col_len to compensate.

       int64_t bytes_remaining = file_desc->file_length - col_end;

       int64_t pad = min(static_cast<int64_t>(MAX_DICT_HEADER_SIZE), bytes_remaining);

       col_len += pad;

     }


     // TODO: this will need to change when we have co-located files and the columns

     // are different files.

     if (!col_chunk.file_path.empty()) {

       DCHECK_EQ(col_chunk.file_path, string(metadata_range_->file()));

     }


     DiskIoMgr::ScanRange* col_range = scan_node_->AllocateScanRange(

         metadata_range_->fs(), metadata_range_->file(), col_len, col_start,

         column_readers_[i]->col_idx(), metadata_range_->disk_id(),

         metadata_range_->try_cache(), metadata_range_->expected_local(),

         file_desc->mtime);

     col_ranges.push_back(col_range);


     // Get the stream that will be used for this column

     ScannerContext::Stream* stream = context_->AddStream(col_range);

     DCHECK(stream != NULL);


     RETURN_IF_ERROR(column_readers_[i]->Reset(&col_chunk.meta_data, stream));


     if (!scan_node_->materialized_slots()[i]->type().IsStringType() ||

         col_chunk.meta_data.codec != parquet::CompressionCodec::UNCOMPRESSED) {

       // Non-string types are always compact.  Compressed columns don't reference data

       // in the io buffers after tuple materialization.  In both cases, we can set compact

       // to true and recycle buffers more promptly.

       stream->set_contains_tuple_data(false);

     }

   }

   DCHECK_EQ(col_ranges.size(), column_readers_.size());

   DCHECK_GE(scan_node_->materialized_slots().size(), column_readers_.size());


   // Issue all the column chunks to the io mgr and have them scheduled immediately.

   // This means these ranges aren't returned via DiskIoMgr::GetNextRange and

   // instead are scheduled to be read immediately.

   RETURN_IF_ERROR(scan_node_->runtime_state()->io_mgr()->AddScanRanges(

       scan_node_->reader_context(), col_ranges, true));


   return Status::OK;

 }


 Status HdfsParquetScanner::CreateSchemaTree(const vector<parquet::SchemaElement>& schema,

     HdfsParquetScanner::SchemaNode* node) const {

   int max_def_level = 0;

   int idx = 0;

   int col_idx = 0;

   return CreateSchemaTree(schema, max_def_level, &idx, &col_idx, node);

 }


 Status HdfsParquetScanner::CreateSchemaTree(

     const vector<parquet::SchemaElement>& schema, int max_def_level, int* idx,

     int* col_idx, HdfsParquetScanner::SchemaNode* node) const {

   if (*idx >= schema.size()) {

     return Status(Substitute("File $0 corrupt: could not reconstruct schema tree from "

             "flattened schema in file metadata", stream_->filename()));

   }

   node->element = &schema[*idx];

   ++(*idx);


   if (node->element->num_children == 0) {

     // node is a leaf node, meaning it's materialized in the file and appears in

     // file_metadata_.row_groups.columns

     node->col_idx = *col_idx;

     ++(*col_idx);

   }


   if (node->element->repetition_type == parquet::FieldRepetitionType::OPTIONAL) {

     ++max_def_level;

   }

   node->max_def_level = max_def_level;


   node->children.resize(node->element->num_children);

   for (int i = 0; i < node->element->num_children; ++i) {

     RETURN_IF_ERROR(

         CreateSchemaTree(schema, max_def_level, idx, col_idx, &node->children[i]));

   }

   return Status::OK;

 }


 HdfsParquetScanner::FileVersion::FileVersion(const string& created_by) {

   string created_by_lower = created_by;

   std::transform(created_by_lower.begin(), created_by_lower.end(),

       created_by_lower.begin(), ::tolower);

   is_impala_internal = false;


   vector<string> tokens;

   split(tokens, created_by_lower, is_any_of(" "), token_compress_on);

   // Boost always creates at least one token

   DCHECK_GT(tokens.size(), 0);

   application = tokens[0];


   if (tokens.size() >= 3 && tokens[1] == "version") {

     string version_string = tokens[2];

     // Ignore any trailing nodextra characters

     int n = version_string.find_first_not_of("0123456789.");

     string version_string_trimmed = version_string.substr(0, n);


     vector<string> version_tokens;

     split(version_tokens, version_string_trimmed, is_any_of("."));

     version.major = version_tokens.size() >= 1 ? atoi(version_tokens[0].c_str()) : 0;

     version.minor = version_tokens.size() >= 2 ? atoi(version_tokens[1].c_str()) : 0;

     version.patch = version_tokens.size() >= 3 ? atoi(version_tokens[2].c_str()) : 0;


     if (application == "impala") {

       if (version_string.find("-internal") != string::npos) is_impala_internal = true;

     }

   } else {

     version.major = 0;

     version.minor = 0;

     version.patch = 0;

   }

 }


 bool HdfsParquetScanner::FileVersion::VersionLt(int major, int minor, int patch) const {

   if (version.major < major) return true;

   if (version.major > major) return false;

   DCHECK_EQ(version.major, major);

   if (version.minor < minor) return true;

   if (version.minor > minor) return false;

   DCHECK_EQ(version.minor, minor);

   return version.patch < patch;

 }


 bool HdfsParquetScanner::FileVersion::VersionEq(int major, int minor, int patch) const {

   return version.major == major && version.minor == minor && version.patch == patch;

 }


 Status HdfsParquetScanner::ValidateFileMetadata() {

   if (file_metadata_.version > PARQUET_CURRENT_VERSION) {

     stringstream ss;

     ss << "File: " << stream_->filename() << " is of an unsupported version. "

        << "file version: " << file_metadata_.version;

     return Status(ss.str());

   }


   // Parse out the created by application version string

   if (file_metadata_.__isset.created_by) {

     file_version_ = FileVersion(file_metadata_.created_by);

   }

   return Status::OK;

 }


 bool IsEncodingSupported(parquet::Encoding::type e) {

   switch (e) {

     case parquet::Encoding::PLAIN:

     case parquet::Encoding::PLAIN_DICTIONARY:

     case parquet::Encoding::BIT_PACKED:

     case parquet::Encoding::RLE:

       return true;

     default:

       return false;

   }

 }


 Status HdfsParquetScanner::ValidateColumn(

     const BaseColumnReader& col_reader, int row_group_idx) {

   const SlotDescriptor* slot_desc = col_reader.slot_desc();

   int col_idx = col_reader.col_idx();

   const parquet::SchemaElement& schema_element = col_reader.schema_element();

   parquet::ColumnChunk& file_data =

       file_metadata_.row_groups[row_group_idx].columns[col_idx];


   // Check the encodings are supported

   vector<parquet::Encoding::type>& encodings = file_data.meta_data.encodings;

   for (int i = 0; i < encodings.size(); ++i) {

     if (!IsEncodingSupported(encodings[i])) {

       stringstream ss;

       ss << "File '" << metadata_range_->file() << "' uses an unsupported encoding: "

          << PrintEncoding(encodings[i]) << " for column '" << schema_element.name

          << "'.";

       return Status(ss.str());

     }

   }


   // Check the compression is supported

   if (file_data.meta_data.codec != parquet::CompressionCodec::UNCOMPRESSED &&

       file_data.meta_data.codec != parquet::CompressionCodec::SNAPPY &&

       file_data.meta_data.codec != parquet::CompressionCodec::GZIP) {

     stringstream ss;

     ss << "File '" << metadata_range_->file() << "' uses an unsupported compression: "

         << file_data.meta_data.codec << " for column '" << schema_element.name

         << "'.";

     return Status(ss.str());

   }


   // Check the type in the file is compatible with the catalog metadata.

   parquet::Type::type type = IMPALA_TO_PARQUET_TYPES[slot_desc->type().type];

   if (type != file_data.meta_data.type) {

     stringstream ss;

     ss << "File '" << metadata_range_->file() << "' has an incompatible type with the"

        << " table schema for column '" << schema_element.name << "'.  Expected type: "

        << type << ".  Actual type: " << file_data.meta_data.type;

     return Status(ss.str());

   }


   // Check that this column is optional or required

   if (schema_element.repetition_type != parquet::FieldRepetitionType::OPTIONAL &&

       schema_element.repetition_type != parquet::FieldRepetitionType::REQUIRED) {

     stringstream ss;

     ss << "File '" << metadata_range_->file() << "' column '" << schema_element.name

        << "' contains an unsupported column repetition type: "

        << schema_element.repetition_type;

     return Status(ss.str());

   }


   // Check the decimal scale in the file matches the metastore scale and precision.

   // We fail the query if the metadata makes it impossible for us to safely read

   // the file. If we don't require the metadata, we will fail the query if

   // abort_on_error is true, otherwise we will just log a warning.

   bool is_converted_type_decimal = schema_element.__isset.converted_type &&

       schema_element.converted_type == parquet::ConvertedType::DECIMAL;

   if (slot_desc->type().type == TYPE_DECIMAL) {

     // We require that the scale and byte length be set.

     if (schema_element.type != parquet::Type::FIXED_LEN_BYTE_ARRAY) {

       stringstream ss;

       ss << "File '" << metadata_range_->file() << "' column '" << schema_element.name

          << "' should be a decimal column encoded using FIXED_LEN_BYTE_ARRAY.";

       return Status(ss.str());

     }


     if (!schema_element.__isset.type_length) {

       stringstream ss;

       ss << "File '" << metadata_range_->file() << "' column '" << schema_element.name

          << "' does not have type_length set.";

       return Status(ss.str());

     }


     int expected_len = ParquetPlainEncoder::DecimalSize(slot_desc->type());

     if (schema_element.type_length != expected_len) {

       stringstream ss;

       ss << "File '" << metadata_range_->file() << "' column '" << schema_element.name

          << "' has an invalid type length. Expecting: " << expected_len

          << " len in file: " << schema_element.type_length;

       return Status(ss.str());

     }


     if (!schema_element.__isset.scale) {

       stringstream ss;

       ss << "File '" << metadata_range_->file() << "' column '" << schema_element.name

          << "' does not have the scale set.";

       return Status(ss.str());

     }


     if (schema_element.scale != slot_desc->type().scale) {

       // TODO: we could allow a mismatch and do a conversion at this step.

       stringstream ss;

       ss << "File '" << metadata_range_->file() << "' column '" << schema_element.name

          << "' has a scale that does not match the table metadata scale."

          << " File metadata scale: " << schema_element.scale

          << " Table metadata scale: " << slot_desc->type().scale;

       return Status(ss.str());

     }


     // The other decimal metadata should be there but we don't need it.

     if (!schema_element.__isset.precision) {

       ErrorMsg msg(TErrorCode::PARQUET_MISSING_PRECISION,

           metadata_range_->file(), schema_element.name);

       LOG_OR_RETURN_ON_ERROR(msg, state_);

     } else {

       if (schema_element.precision != slot_desc->type().precision) {

         // TODO: we could allow a mismatch and do a conversion at this step.

         ErrorMsg msg(TErrorCode::PARQUET_WRONG_PRECISION,

             metadata_range_->file(), schema_element.name,

             schema_element.precision, slot_desc->type().precision);

         LOG_OR_RETURN_ON_ERROR(msg, state_);

       }

     }


     if (!is_converted_type_decimal) {

       // TODO: is this validation useful? It is not required at all to read the data and

       // might only serve to reject otherwise perfectly readable files.

       ErrorMsg msg(TErrorCode::PARQUET_BAD_CONVERTED_TYPE,

           metadata_range_->file(), schema_element.name);

       LOG_OR_RETURN_ON_ERROR(msg, state_);

     }

   } else if (schema_element.__isset.scale || schema_element.__isset.precision ||

       is_converted_type_decimal) {

     ErrorMsg msg(TErrorCode::PARQUET_INCOMPATIBLE_DECIMAL,

         metadata_range_->file(), schema_element.name, slot_desc->type().DebugString());

     LOG_OR_RETURN_ON_ERROR(msg, state_);

   }

   return Status::OK;

 }


 string PrintRepetitionType(const parquet::FieldRepetitionType::type& t) {

   switch (t) {

     case parquet::FieldRepetitionType::REQUIRED: return "required";

     case parquet::FieldRepetitionType::OPTIONAL: return "optional";

     case parquet::FieldRepetitionType::REPEATED: return "repeated";

     default: return "<unknown>";

   }

 }


 string PrintParquetType(const parquet::Type::type& t) {

   switch (t) {

     case parquet::Type::BOOLEAN: return "boolean";

     case parquet::Type::INT32: return "int32";

     case parquet::Type::INT64: return "int64";

     case parquet::Type::INT96: return "int96";

     case parquet::Type::FLOAT: return "float";

     case parquet::Type::DOUBLE: return "double";

     case parquet::Type::BYTE_ARRAY: return "byte_array";

     case parquet::Type::FIXED_LEN_BYTE_ARRAY: return "fixed_len_byte_array";

     default: return "<unknown>";

   }

 }


 string HdfsParquetScanner::SchemaNode::DebugString(int indent) const {

   stringstream ss;

   for (int i = 0; i < indent; ++i) ss << " ";

   ss << PrintRepetitionType(element->repetition_type) << " ";

   if (element->num_children > 0) {

     ss << "struct";

   } else {

     ss << PrintParquetType(element->type);

   }

   ss << " " << element->name << " [i:" << col_idx << " d:" << max_def_level << "]";

   if (element->num_children > 0) {

     ss << " {" << endl;

     for (int i = 0; i < element->num_children; ++i) {

       ss << children[i].DebugString(indent + 2) << endl;

     }

     for (int i = 0; i < indent; ++i) ss << " ";

     ss << "}";

   }

   return ss.str();

 }

impala::HdfsScanNode::materialized_slots
const std::vector< SlotDescriptor * > & materialized_slots() const
Definition: hdfs-scan-node.h:119

impala::HdfsParquetScanner::BaseColumnReader::stream_
ScannerContext::Stream * stream_
Definition: hdfs-parquet-scanner.cc:202

impala::HdfsParquetScanner::BaseColumnReader::Close
void Close()
Definition: hdfs-parquet-scanner.cc:163

row-batch.h

impala::HdfsParquetScanner::BaseColumnReader::bitmap_filter_rows_rejected_
int64_t bitmap_filter_rows_rejected_
Definition: hdfs-parquet-scanner.cc:238

impala::ParquetPlainEncoder::DecimalSize
static int DecimalSize(const ColumnType &t)
The minimum byte size to store decimals of with precision t.precision.
Definition: parquet-common.h:116

impala::HdfsParquetScanner::BaseColumnReader::CreateDictionaryDecoder
virtual void CreateDictionaryDecoder(uint8_t *values, int size)=0

dict-encoding.h

impala::TYPE_DOUBLE
Definition: types.h:36

impala::HdfsParquetScanner::BaseColumnReader::rows_returned_
int64_t rows_returned_
Definition: hdfs-parquet-scanner.cc:237

impala::HdfsParquetScanner::ColumnReader::dict_decoder_
scoped_ptr< DictDecoder< T > > dict_decoder_
Definition: hdfs-parquet-scanner.cc:362

impala::HdfsParquetScanner::BaseColumnReader::decompressor_
scoped_ptr< Codec > decompressor_
Definition: hdfs-parquet-scanner.cc:201

impala::HdfsParquetScanner::BaseColumnReader::codec
THdfsCompression::type codec() const
Definition: hdfs-parquet-scanner.cc:172

impala::HdfsParquetScanner::BaseColumnReader::bit_packed_def_levels_
BitReader bit_packed_def_levels_
Definition: hdfs-parquet-scanner.cc:219

impala::ScannerContext::Stream
Definition: scanner-context.h:66

impala::HdfsScanner::scan_node_
HdfsScanNode * scan_node_
The scan node that started this scanner.
Definition: hdfs-scanner.h:141

impala::Status::GetDetail
const std::string GetDetail() const
Definition: status.cc:184

impala::HdfsParquetScanner::SchemaNode
Internal representation of a column schema (including nested-type columns).
Definition: hdfs-parquet-scanner.h:90

impala::HdfsParquetScanner::ValidateFileMetadata
Status ValidateFileMetadata()
Validates the file metadata.
Definition: hdfs-parquet-scanner.cc:1263

path
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")

hdfs-scan-node.h

impala::Tuple::SetNull
void SetNull(const NullIndicatorOffset &offset)
Definition: tuple.h:101

MAX_PAGE_HEADER_SIZE
const int MAX_PAGE_HEADER_SIZE
Definition: hdfs-parquet-scanner.cc:63

impala::HdfsScanNode::num_partition_keys
int num_partition_keys() const
Returns number of partition keys in the table, including non-materialized slots.
Definition: hdfs-scan-node.h:127

impala::HdfsParquetScanner::BaseColumnReader::parent_
HdfsParquetScanner * parent_
Definition: hdfs-parquet-scanner.cc:197

impala::ScannerContext::Stream::GetBuffer
Status GetBuffer(bool peek, uint8_t **buffer, int64_t *out_len)
Definition: scanner-context.cc:171

impala::HdfsScanner::context_
ScannerContext * context_
Context for this scanner.
Definition: hdfs-scanner.h:147

impala::HdfsParquetScanner::BaseColumnReader::ReadDataPage
Status ReadDataPage()
Definition: hdfs-parquet-scanner.cc:532

runtime-profile.h

impala::HdfsFileDesc::filename
std::string filename
File name including the path.
Definition: hdfs-scan-node.h:58

impala::TYPE_CHAR
Definition: types.h:47

impala::StringValue
Definition: string-value.h:33

impala::ScannerContext::Stream::eosr
bool eosr() const
Definition: scanner-context.h:113

impala::HdfsParquetScanner::BoolColumnReader::BoolColumnReader
BoolColumnReader(HdfsParquetScanner *parent, const SchemaNode &node)
Definition: hdfs-parquet-scanner.cc:411

impala::HdfsScanner::tuple_byte_size_
int tuple_byte_size_
Fixed size of each tuple, in bytes.
Definition: hdfs-scanner.h:167

PrintRepetitionType
string PrintRepetitionType(const parquet::FieldRepetitionType::type &t)
Definition: hdfs-parquet-scanner.cc:1420

impala::TYPE_VARCHAR
Definition: types.h:48

impala::HdfsParquetScanner::BaseColumnReader::BaseColumnReader
BaseColumnReader(HdfsParquetScanner *parent, const SchemaNode &node)
Definition: hdfs-parquet-scanner.cc:240

impala::Codec::CreateDecompressor
static Status CreateDecompressor(MemPool *mem_pool, bool reuse, THdfsCompression::type format, boost::scoped_ptr< Codec > *decompressor)

impala::HdfsParquetScanner::FileVersion::VersionEq
bool VersionEq(int major, int minor, int patch) const
Returns true if version is equal to <major>.<minor>.<patch>
Definition: hdfs-parquet-scanner.cc:1259

PrintParquetType
string PrintParquetType(const parquet::Type::type &t)
Definition: hdfs-parquet-scanner.cc:1429

impala::HdfsParquetScanner::BaseColumnReader
Definition: hdfs-parquet-scanner.cc:140

impala::ColumnType::precision
int precision
Only set if type == TYPE_DECIMAL.
Definition: types.h:68

impala::Tuple
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48

impala::StringValue::len
int len
Definition: string-value.h:38

impala::PARQUET_VERSION_NUMBER
const uint8_t PARQUET_VERSION_NUMBER[4]
Definition: parquet-common.h:30

impala::HdfsParquetScanner
Definition: hdfs-parquet-scanner.h:43

impala::HdfsParquetScanner::BaseColumnReader::num_buffered_values_
int num_buffered_values_
Definition: hdfs-parquet-scanner.cc:211

impala::HdfsParquetScanner::BoolColumnReader::InitDataPage
virtual Status InitDataPage(uint8_t *data, int size)
Definition: hdfs-parquet-scanner.cc:422

impala::HdfsParquetScanner::SchemaNode::children
std::vector< SchemaNode > children
Any nested schema nodes. Empty for non-nested types.
Definition: hdfs-parquet-scanner.h:103

impala::ScannerContext::ReleaseCompletedResources
void ReleaseCompletedResources(RowBatch *batch, bool done)
Definition: scanner-context.cc:45

impala::HdfsParquetScanner::BaseColumnReader::ReadDefinitionLevel
int ReadDefinitionLevel()
Definition: hdfs-parquet-scanner.cc:720

impala::PrintPath
string PrintPath(const vector< int > &path)
Definition: debug-util.cc:211

RETURN_IF_ERROR
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242

raw-value.h

impala::ScanRangeMetadata
Definition: hdfs-scan-node.h:78

error-util.h

RequiresSkippedDictionaryHeaderCheck
static bool RequiresSkippedDictionaryHeaderCheck(const HdfsParquetScanner::FileVersion &v)
Definition: hdfs-parquet-scanner.cc:526

impala::ColumnType::scale
int scale
Definition: types.h:68

LOG_OR_RETURN_ON_ERROR
#define LOG_OR_RETURN_ON_ERROR(error_msg, runtime_state)
Definition: hdfs-parquet-scanner.cc:77

impala::HdfsParquetScanner::BaseColumnReader::rle_def_levels_
RleDecoder rle_def_levels_
Definition: hdfs-parquet-scanner.cc:218

impala::PARQUET_TO_IMPALA_CODEC
const THdfsCompression::type PARQUET_TO_IMPALA_CODEC[]
Mapping of Parquet codec enums to Impala enums.
Definition: parquet-common.h:56

impala::HdfsParquetScanner::schema_
SchemaNode schema_
The root schema node for this file.
Definition: hdfs-parquet-scanner.h:134

impala::HdfsScanner::WriteEmptyTuples
int WriteEmptyTuples(RowBatch *row_batch, int num_tuples)
Definition: hdfs-scanner.cc:157

impala::DiskIoMgr::BufferDescriptor
Definition: disk-io-mgr.h:196

impala::HdfsScanner::AddFinalRowBatch
void AddFinalRowBatch()
Definition: hdfs-scanner.cc:145

impala::SlotDescriptor::col_path
const std::vector< int > & col_path() const
Definition: descriptors.h:85

impala::Bitmap
Definition: bitmap.h:29

impala::HdfsParquetScanner::parse_status_
Status parse_status_
Returned in ProcessSplit.
Definition: hdfs-parquet-scanner.h:140

impala::Tuple::GetSlot
void * GetSlot(int offset)
Definition: tuple.h:118

impala::StringValue::PadWithSpaces
static void PadWithSpaces(char *cptr, int64_t cptr_len, int64_t num_chars)
Definition: string-value.inline.h:100

impala::DiskIoMgr::BufferDescriptor::buffer
char * buffer()
Definition: disk-io-mgr.h:199

impala::ScanNode::rows_read_counter
RuntimeProfile::Counter * rows_read_counter() const
Definition: scan-node.h:96

impala::DiskIoMgr::RequestRange::file
const char * file() const
Definition: disk-io-mgr.h:266

impala::HdfsParquetScanner::ColumnReader::ColumnReader
ColumnReader(HdfsParquetScanner *parent, const SchemaNode &node)
Definition: hdfs-parquet-scanner.cc:287

impala::HdfsScanNode::MarkFileDescIssued
void MarkFileDescIssued(const HdfsFileDesc *file_desc)
Definition: hdfs-scan-node.cc:683

impala::HdfsScanner::template_tuple_
Tuple * template_tuple_
Definition: hdfs-scanner.h:164

impala::DiskIoMgr::AddScanRanges
Status AddScanRanges(RequestContext *reader, const std::vector< ScanRange * > &ranges, bool schedule_immediately=false)
Definition: disk-io-mgr.cc:455

impala::HdfsParquetScanner::ColumnReader::ReadSlot
virtual bool ReadSlot(void *slot, MemPool *pool, bool *conjuncts_failed)
Definition: hdfs-parquet-scanner.cc:330

impala::DiskIoMgr::RequestRange::offset
int64_t offset() const
Definition: disk-io-mgr.h:267

impala::DiskIoMgr::BufferDescriptor::len
int64_t len()
Definition: disk-io-mgr.h:201

impala::SlotDescriptor
Definition: descriptors.h:75

impala::HdfsParquetScanner::FileVersion
Definition: hdfs-parquet-scanner.h:57

impala::ScannerContext::cancelled
bool cancelled() const
If true, the ScanNode has been cancelled and the scanner thread should finish up. ...
Definition: scanner-context.cc:282

impala::HdfsParquetScanner::metadata_range_
const DiskIoMgr::ScanRange * metadata_range_
Scan range for the metadata.
Definition: hdfs-parquet-scanner.h:137

impala::HdfsScanNode::InitEmptyTemplateTuple
Tuple * InitEmptyTemplateTuple()
Definition: hdfs-scan-node.cc:289

impala::Status::AddDetail
void AddDetail(const std::string &msg)
Add a detail string. Calling this method is only defined on a non-OK message.
Definition: status.cc:166

MAX_DICT_HEADER_SIZE
const int MAX_DICT_HEADER_SIZE
Definition: hdfs-parquet-scanner.cc:67

impala::ScannerContext
Definition: scanner-context.h:55

impala::HdfsParquetScanner::ColumnReader
Definition: hdfs-parquet-scanner.cc:285

impala::HdfsParquetScanner::BoolColumnReader::CreateDictionaryDecoder
virtual void CreateDictionaryDecoder(uint8_t *values, int size)
Definition: hdfs-parquet-scanner.cc:417

impala::HdfsScanner::next_row
TupleRow * next_row(TupleRow *r) const
Definition: hdfs-scanner.h:368

logging.h

COUNTER_ADD
#define COUNTER_ADD(c, v)
Definition: runtime-profile.h:55

impala::HdfsParquetScanner::file_metadata_
parquet::FileMetaData file_metadata_
File metadata thrift object.
Definition: hdfs-parquet-scanner.h:128

impala::ScanRangeMetadata::partition_id
int64_t partition_id
The partition id that this range is part of.
Definition: hdfs-scan-node.h:80

impala::HdfsParquetScanner::ColumnReader::InitDataPage
virtual Status InitDataPage(uint8_t *data, int size)
Definition: hdfs-parquet-scanner.cc:311

impala::TupleRow
Definition: tuple-row.h:28

impala::SlotDescriptor::null_indicator_offset
const NullIndicatorOffset & null_indicator_offset() const
Definition: descriptors.h:89

impala::HdfsParquetScanner::BaseColumnReader::max_def_level
int max_def_level() const
Definition: hdfs-parquet-scanner.cc:171

impala::ExecNode::ReachedLimit
bool ReachedLimit()
Definition: exec-node.h:159

impala::HdfsParquetScanner::BaseColumnReader::total_len
int64_t total_len() const
Definition: hdfs-parquet-scanner.cc:167

impala::TYPE_TIMESTAMP
Definition: types.h:37

impala::Bitmap::Get
bool Get(int64_t bit_index) const
Definition: bitmap.h:54

impala::DiskIoMgr::Read
Status Read(RequestContext *reader, ScanRange *range, BufferDescriptor **buffer)
Definition: disk-io-mgr.cc:555

impala::HdfsScanNode::RangeComplete
void RangeComplete(const THdfsFileFormat::type &file_type, const THdfsCompression::type &compression_type)
Definition: hdfs-scan-node.cc:924

impala::ScannerContext::Stream::filename
const char * filename()
Definition: scanner-context.h:118

SCOPED_TIMER
#define SCOPED_TIMER(c)
Definition: runtime-profile.h:53

impala::HdfsParquetScanner::column_readers_
std::vector< BaseColumnReader * > column_readers_
Column reader for each materialized columns for this file.
Definition: hdfs-parquet-scanner.h:125

impala::HdfsParquetScanner::BoolColumnReader::bool_values_
BitReader bool_values_
Definition: hdfs-parquet-scanner.cc:435

impala::TimestampValue::UtcToLocal
void UtcToLocal()
Definition: timestamp-value.cc:48

impala::HdfsParquetScanner::BoolColumnReader
Definition: hdfs-parquet-scanner.cc:409

impala::HdfsParquetScanner::BaseColumnReader::decompressed_data_pool_
boost::scoped_ptr< MemPool > decompressed_data_pool_
Definition: hdfs-parquet-scanner.cc:205

impala::DictDecoderBase
Definition: dict-encoding.h:166

impala::HdfsParquetScanner::FileVersion::application
std::string application
Application that wrote the file. e.g. "IMPALA".
Definition: hdfs-parquet-scanner.h:59

impala::HdfsParquetScanner::AssembleRows
Status AssembleRows(int row_group_idx)
Definition: hdfs-parquet-scanner.cc:805

impala::PrettyPrinter::Print
static std::string Print(bool value, TUnit::type ignored, bool verbose=false)
Definition: pretty-printer.h:33

hdfs-parquet-scanner.h

impala::HdfsParquetScanner::BoolColumnReader
friend class BoolColumnReader
Definition: hdfs-parquet-scanner.h:121

object-pool.h

impala::HdfsScanner::Close
virtual void Close()
Definition: hdfs-scanner.cc:82

decompress.h

impala::ScannerContext::Stream::GetBytes
bool GetBytes(int64_t requested_len, uint8_t **buffer, int64_t *out_len, Status *status, bool peek=false)
Definition: scanner-context.inline.h:31

impala::HdfsParquetScanner::dictionary_pool_
boost::scoped_ptr< MemPool > dictionary_pool_
Definition: hdfs-parquet-scanner.h:144

impala::Status
Definition: status.h:81

impala::HdfsParquetScanner::BoolColumnReader::ReadSlot
virtual bool ReadSlot(void *slot, MemPool *pool, bool *conjuncts_failed)
Definition: hdfs-parquet-scanner.cc:428

impala::ColumnType::DebugString
std::string DebugString() const
Definition: types.cc:194

impala::HdfsScanner::state_
RuntimeState * state_
RuntimeState for error reporting.
Definition: hdfs-scanner.h:144

impala::HdfsParquetScanner::file_version_
FileVersion file_version_
Version of the application that wrote this file.
Definition: hdfs-parquet-scanner.h:131

VLOG_QUERY
#define VLOG_QUERY
Definition: logging.h:57

impala::TYPE_INT
Definition: types.h:33

impala::ColumnType::type
PrimitiveType type
Definition: types.h:60

impala::HdfsParquetScanner::assemble_rows_timer_
ScopedTimer< MonotonicStopWatch > assemble_rows_timer_
Timer for materializing rows. This ignores time getting the next buffer.
Definition: hdfs-parquet-scanner.h:147

impala::IMPALA_TO_PARQUET_TYPES
const parquet::Type::type IMPALA_TO_PARQUET_TYPES[]
Definition: parquet-common.h:35

impala::HdfsFileDesc::mtime
int64_t mtime
Last modified time.
Definition: hdfs-scan-node.h:65

impala::HdfsScanNode::GetFileDesc
HdfsFileDesc * GetFileDesc(const std::string &filename)
Returns the file desc for 'filename'. Returns NULL if filename is invalid.
Definition: hdfs-scan-node.cc:206

impala::MemPool
Definition: mem-pool.h:77

impala::HdfsParquetScanner::FileVersion::FileVersion
FileVersion()
Definition: hdfs-parquet-scanner.h:76

impala::RuntimeState::LogError
bool LogError(const ErrorMsg &msg)
Definition: runtime-state.cc:224

impala::DictDecoder
Definition: dict-encoding.h:187

impala::HdfsScanner::InitTuple
void InitTuple(Tuple *template_tuple, Tuple *tuple)
Definition: hdfs-scanner.h:355

impala::HdfsScanner::GetMemory
int GetMemory(MemPool **pool, Tuple **tuple_mem, TupleRow **tuple_row_mem)
Definition: hdfs-scanner.cc:115

impala::HdfsScanNode::AddDiskIoRanges
Status AddDiskIoRanges(const std::vector< DiskIoMgr::ScanRange * > &ranges)
Adds ranges to the io mgr queue and starts up new scanner threads if possible.

impala::ReadWriteUtil::Read
static bool Read(uint8_t **buf, int *buf_len, T *val, Status *status)
Definition: read-write-util.h:222

impala::RuntimeState
Definition: runtime-state.h:69

impala::HdfsParquetScanner::HdfsParquetScanner
HdfsParquetScanner(HdfsScanNode *scan_node, RuntimeState *state)
Definition: hdfs-parquet-scanner.cc:125

impala::RuntimeState::GetBitmapFilter
const Bitmap * GetBitmapFilter(SlotId slot)
Definition: runtime-state.h:172

impala::SlotDescriptor::type
const ColumnType & type() const
Definition: descriptors.h:78

impala::HdfsFileDesc::file_length
int64_t file_length
Definition: hdfs-scan-node.h:62

bit-util.h

impala::HdfsParquetScanner::Close
virtual void Close()
Definition: hdfs-parquet-scanner.cc:449

impala::TYPE_SMALLINT
Definition: types.h:32

impala::DiskIoMgr::ScanRange::try_cache
bool try_cache() const
Definition: disk-io-mgr.h:313

impala::RuntimeState::obj_pool
ObjectPool * obj_pool() const
Definition: runtime-state.h:92

debug-util.h

impala::HdfsParquetScanner::BaseColumnReader::bitmap_filter_
const Bitmap * bitmap_filter_
Definition: hdfs-parquet-scanner.cc:228

impala::HdfsParquetScanner::SchemaNode::col_idx
int col_idx
Definition: hdfs-parquet-scanner.h:96

pool
ObjectPool pool
Definition: expr-benchmark.cc:89

impala::HdfsParquetScanner::ColumnReader::needs_conversion_
bool needs_conversion_
Definition: hdfs-parquet-scanner.cc:365

thrift-util.h

impala::TimestampValue
Definition: timestamp-value.h:65

impala::ColumnType::GetByteSize
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
Definition: types.h:178

impala::HdfsParquetScanner::BaseColumnReader::InitDataPage
virtual Status InitDataPage(uint8_t *data, int size)=0

impala::ScannerContext::Stream::SkipBytes
bool SkipBytes(int64_t length, Status *)
Skip over the next length bytes in the specified HDFS file.
Definition: scanner-context.inline.h:70

impala::HdfsParquetScanner::ValidateColumn
Status ValidateColumn(const BaseColumnReader &col_reader, int row_group_idx)
Definition: hdfs-parquet-scanner.cc:1290

impala::HdfsScanner::CommitRows
Status CommitRows(int num_rows)
Definition: hdfs-scanner.cc:124

impala::HdfsParquetScanner::InitColumns
Status InitColumns(int row_group_idx)
Definition: hdfs-parquet-scanner.cc:1093

ADD_COUNTER
#define ADD_COUNTER(profile, name, unit)
Definition: runtime-profile.h:47

impala::RuntimeState::fragment_hash_seed
uint32_t fragment_hash_seed() const
Definition: runtime-state.h:154

impala::HdfsScanNode
Definition: hdfs-scan-node.h:104

impala::ColumnType::len
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62

impala::HdfsScanNode::runtime_state
RuntimeState * runtime_state()
Definition: hdfs-scan-node.h:136

impala::HdfsParquetScanner::ColumnReader::CreateDictionaryDecoder
virtual void CreateDictionaryDecoder(uint8_t *values, int size)
Definition: hdfs-parquet-scanner.cc:306

impala::HdfsParquetScanner::BaseColumnReader::Reset
Status Reset(const parquet::ColumnMetaData *metadata, ScannerContext::Stream *stream)
Definition: hdfs-parquet-scanner.cc:145

impala::HdfsScanNode::reader_context
DiskIoMgr::RequestContext * reader_context()
Definition: hdfs-scan-node.h:138

impala::BitUtil::Ceil
static int Ceil(int value, int divisor)
Returns the ceil of value/divisor.
Definition: bit-util.h:32

impala::HdfsParquetScanner::num_cols_counter_
RuntimeProfile::Counter * num_cols_counter_
Number of cols that need to be read.
Definition: hdfs-parquet-scanner.h:150

impala::TYPE_BOOLEAN
Definition: types.h:30

impala::HdfsParquetScanner::BaseColumnReader::dict_decoder_base_
DictDecoderBase * dict_decoder_base_
Definition: hdfs-parquet-scanner.cc:222

rle-encoding.h

impala::TYPE_BIGINT
Definition: types.h:34

impala::HdfsParquetScanner::BaseColumnReader::num_values_read_
int64_t num_values_read_
Definition: hdfs-parquet-scanner.cc:225

impala::DiskIoMgr
Definition: disk-io-mgr.h:188

DEFINE_bool
DEFINE_bool(convert_legacy_hive_parquet_utc_timestamps, false,"When true, TIMESTAMPs read from files written by Parquet-MR (used by Hive) will ""be converted from UTC to local time. Writes are unaffected.")

runtime-state.h

impala::HdfsParquetScanner::BaseColumnReader::slot_desc
const SlotDescriptor * slot_desc() const
Definition: hdfs-parquet-scanner.cc:168

COUNTER_SET
#define COUNTER_SET(c, v)
Definition: runtime-profile.h:56

impala::HdfsScanner::EvalConjuncts
bool IR_ALWAYS_INLINE EvalConjuncts(TupleRow *row)
Definition: hdfs-scanner.h:266

impala::HdfsParquetScanner::BaseColumnReader::hash_seed_
uint32_t hash_seed_
Definition: hdfs-parquet-scanner.cc:230

impala::HdfsParquetScanner::~HdfsParquetScanner
virtual ~HdfsParquetScanner()
Definition: hdfs-parquet-scanner.cc:133

impala::StringValue::ptr
char * ptr
Definition: string-value.h:37

impala::HdfsScanner::AttachPool
void AttachPool(MemPool *pool, bool commit_batch)
Definition: hdfs-scanner.h:256

impala::HdfsParquetScanner::SchemaNode::element
const parquet::SchemaElement * element
The corresponding schema element defined in the file metadata.
Definition: hdfs-parquet-scanner.h:92

impala::TupleRow::SetTuple
void SetTuple(int tuple_idx, Tuple *tuple)
Definition: tuple-row.h:34

impala::HdfsParquetScanner::ColumnReader::fixed_len_size_
int fixed_len_size_
Definition: hdfs-parquet-scanner.cc:369

impala::ErrorMsg
Definition: error-util.h:47

read-write-util.h

bitmap.h

IsEncodingSupported
bool IsEncodingSupported(parquet::Encoding::type e)
Definition: hdfs-parquet-scanner.cc:1278

impala::DiskIoMgr::RequestRange::disk_id
int disk_id() const
Definition: disk-io-mgr.h:269

impala::HdfsParquetScanner::SchemaNode::slot_desc
SlotDescriptor * slot_desc
Definition: hdfs-parquet-scanner.h:105

UNLIKELY
#define UNLIKELY(expr)
Definition: compiler-util.h:33

scanner-context.inline.h

impala::BitReader
Definition: bit-stream-utils.h:92

impala::HdfsParquetScanner::BaseColumnReader::ReadSlot
virtual bool ReadSlot(void *slot, MemPool *pool, bool *conjuncts_failed)=0

impala::PrintEncoding
std::string PrintEncoding(const parquet::Encoding::type &type)

impala::HdfsParquetScanner::ProcessFooter
Status ProcessFooter(bool *eosr)
Definition: hdfs-parquet-scanner.cc:916

impala::HdfsParquetScanner::BaseColumnReader::schema_element
const parquet::SchemaElement & schema_element() const
Definition: hdfs-parquet-scanner.cc:169

impala::DiskIoMgr::ScanRange::expected_local
bool expected_local() const
Definition: disk-io-mgr.h:314

impala::HdfsParquetScanner::CreateReader
BaseColumnReader * CreateReader(const SchemaNode &node)
Definition: hdfs-parquet-scanner.cc:472

impala::HdfsScanNode::AllocateScanRange
DiskIoMgr::ScanRange * AllocateScanRange(hdfsFS fs, const char *file, int64_t len, int64_t offset, int64_t partition_id, int disk_id, bool try_cache, bool expected_local, int64_t mtime)
Definition: hdfs-scan-node.cc:183

impala::Status::OK
static const Status OK
Definition: status.h:87

impala::HdfsParquetScanner::BaseColumnReader::current_page_header_
parquet::PageHeader current_page_header_
Definition: hdfs-parquet-scanner.cc:208

impala::HdfsParquetScanner::ProcessSplit
virtual Status ProcessSplit()
Definition: hdfs-parquet-scanner.cc:769

tuple.h

impala::HdfsParquetScanner::BaseColumnReader::data_
uint8_t * data_
Definition: hdfs-parquet-scanner.cc:214

impala::HdfsParquetScanner::BaseColumnReader::node_
const SchemaNode & node_
Definition: hdfs-parquet-scanner.cc:198

impala::HdfsScanner
Definition: hdfs-scanner.h:91

expr.h

impala::HdfsParquetScanner::BaseColumnReader::~BaseColumnReader
virtual ~BaseColumnReader()
Definition: hdfs-parquet-scanner.cc:142

mem-pool.h

impala::DiskIoMgr::BufferDescriptor::Return
void Return()
Definition: disk-io-mgr.cc:182

names.h

impala::ScannerContext::Stream::scan_range
const DiskIoMgr::ScanRange * scan_range()
Definition: scanner-context.h:119

impala::ObjectPool::Add
T * Add(T *t)
Definition: object-pool.h:42

impala::ScannerContext::AddStream
Stream * AddStream(DiskIoMgr::ScanRange *range)
Definition: scanner-context.cc:58

impala::DeserializeThriftMsg
Status DeserializeThriftMsg(JNIEnv *env, jbyteArray serialized_msg, T *deserialized_msg)
Definition: jni-thrift-util.h:45

impala::DictDecoderBase::num_entries
virtual int num_entries() const =0

tuple-row.h

impala::PARQUET_CURRENT_VERSION
const uint32_t PARQUET_CURRENT_VERSION
Definition: parquet-common.h:31

impala::HdfsParquetScanner::CreateColumnReaders
Status CreateColumnReaders()
Definition: hdfs-parquet-scanner.cc:1050

impala::TYPE_STRING
Definition: types.h:38

VLOG_FILE
#define VLOG_FILE
Definition: logging.h:58

impala::HdfsScanNode::tuple_idx
int tuple_idx() const
Definition: hdfs-scan-node.h:124

impala::HdfsParquetScanner::ColumnReader::ConvertSlot
void ConvertSlot(const T *src, T *dst, MemPool *pool)
Definition: hdfs-parquet-scanner.cc:358

impala::DiskIoMgr::max_read_buffer_size
int max_read_buffer_size() const
Returns the maximum read buffer size.
Definition: disk-io-mgr.h:590

impala::HdfsParquetScanner::ColumnReader::CopySlot
void CopySlot(T *slot, MemPool *pool)
Definition: hdfs-parquet-scanner.cc:353

impala::HdfsParquetScanner::BaseColumnReader::col_idx
int col_idx() const
Definition: hdfs-parquet-scanner.cc:170

gen_ir_descriptions.idx
int idx
Definition: gen_ir_descriptions.py:215

impala::HdfsScanner::batch_
RowBatch * batch_
Definition: hdfs-scanner.h:177

impala::Status::ok
bool ok() const
Definition: status.h:172

impala::HdfsScanner::decompress_timer_
RuntimeProfile::Counter * decompress_timer_
Time spent decompressing bytes.
Definition: hdfs-scanner.h:208

impala::DiskIoMgr::RequestRange::fs
hdfsFS fs() const
Definition: disk-io-mgr.h:265

impala::ScannerContext::Stream::ReadBytes
bool ReadBytes(int64_t length, uint8_t **buf, Status *, bool peek=false)
Definition: scanner-context.inline.h:56

impala::RuntimeState::io_mgr
DiskIoMgr * io_mgr()
Definition: runtime-state.h:139

impala::HdfsParquetScanner::IssueInitialRanges
static Status IssueInitialRanges(HdfsScanNode *scan_node, const std::vector< HdfsFileDesc * > &files)
Definition: hdfs-parquet-scanner.cc:83

impala::TimestampValue::HasDateAndTime
bool HasDateAndTime() const
Definition: timestamp-value.h:135

impala::HdfsParquetScanner::BaseColumnReader::metadata_
const parquet::ColumnMetaData * metadata_
Definition: hdfs-parquet-scanner.cc:200

impala::TYPE_TINYINT
Definition: types.h:31

impala::HdfsParquetScanner::SchemaNode::max_def_level
int max_def_level
Definition: hdfs-parquet-scanner.h:100

impala::PLAIN
Definition: webserver.cc:122

descriptors.h

LOG_OR_ABORT
#define LOG_OR_ABORT(error_msg, runtime_state)
Definition: hdfs-parquet-scanner.cc:69

impala::HdfsParquetScanner::BaseColumnReader::ReadValue
bool ReadValue(MemPool *pool, Tuple *tuple, bool *conjuncts_failed)
Definition: hdfs-parquet-scanner.cc:744

impala::DiskIoMgr::ScanRange::meta_data
void * meta_data() const
Definition: disk-io-mgr.h:312

string-value.h

impala::HdfsParquetScanner::SchemaNode::DebugString
std::string DebugString(int indent=0) const
Definition: hdfs-parquet-scanner.cc:1443

impala::TYPE_FLOAT
Definition: types.h:35

impala::RleDecoder
Decoder class for RLE encoded data.
Definition: rle-encoding.h:77

impala::HdfsScanNode::IncNumScannersCodegenDisabled
void IncNumScannersCodegenDisabled()
Definition: hdfs-scan-node.h:172

impala::DiskIoMgr::ScanRange
Definition: disk-io-mgr.h:295

impala::HdfsScanner::stream_
ScannerContext::Stream * stream_
The first stream for context_.
Definition: hdfs-scanner.h:150

impala::ScannerContext::Stream::set_contains_tuple_data
void set_contains_tuple_data(bool v)
Definition: scanner-context.h:97

impala::MemPool::Allocate
uint8_t * Allocate(int size)
Definition: mem-pool.h:92

impala::HdfsParquetScanner::FileVersion::is_impala_internal
bool is_impala_internal
If true, this file was generated by an Impala internal release.
Definition: hdfs-parquet-scanner.h:74

impala::HdfsScanner::next_tuple
Tuple * next_tuple(Tuple *t) const
Definition: hdfs-scanner.h:363

impala::RawValue::GetHashValue
static uint32_t GetHashValue(const void *v, const ColumnType &type, uint32_t seed=0)
Definition: raw-value.h:168

impala::HdfsScanner::Prepare
virtual Status Prepare(ScannerContext *context)
One-time initialisation of state that is constant across scan ranges.
Definition: hdfs-scanner.cc:71

impala::HdfsFileDesc
Definition: hdfs-scan-node.h:53

impala::TYPE_DECIMAL
Definition: types.h:42

impala::ExecNode::runtime_profile
RuntimeProfile * runtime_profile()
Definition: exec-node.h:161

impala::HdfsParquetScanner::FileVersion::VersionLt
bool VersionLt(int major, int minor=0, int patch=0) const
Returns true if version is strictly less than <major>.<minor>.<patch>
Definition: hdfs-parquet-scanner.cc:1249

impala::HdfsParquetScanner::CreateSchemaTree
Status CreateSchemaTree(const std::vector< parquet::SchemaElement > &schema, SchemaNode *node) const

impala::BitUtil::Log2
static int Log2(uint64_t x)
Definition: bit-util.h:135

impala::BitReader::GetValue
bool GetValue(int num_bits, T *v)
Definition: bit-stream-utils.inline.h:85