Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
impala::ParquetPlainEncoder Class Reference

#include <parquet-common.h>

Collaboration diagram for impala::ParquetPlainEncoder:

Public Member Functions

template<>
int ByteSize (const bool &b)
 Disable for bools. Plain encoding is not used for booleans. More...
 
template<>
int Encode (uint8_t *, int fixed_len_size, const bool &)
 
template<>
int Decode (uint8_t *, int fixed_len_size, bool *v)
 
template<>
int ByteSize (const Decimal4Value &)
 
template<>
int ByteSize (const Decimal8Value &)
 
template<>
int ByteSize (const Decimal16Value &)
 
template<>
int ByteSize (const int8_t &v)
 Parquet doesn't have 8-bit or 16-bit ints. They are converted to 32-bit. More...
 
template<>
int ByteSize (const int16_t &v)
 
template<>
int ByteSize (const StringValue &v)
 
template<>
int ByteSize (const TimestampValue &v)
 
template<>
int Decode (uint8_t *buffer, int fixed_len_size, int8_t *v)
 
template<>
int Decode (uint8_t *buffer, int fixed_len_size, int16_t *v)
 
template<>
int Encode (uint8_t *buffer, int fixed_len_size, const int8_t &v)
 
template<>
int Encode (uint8_t *buffer, int fixed_len_size, const int16_t &v)
 
template<>
int Encode (uint8_t *buffer, int fixed_len_size, const StringValue &v)
 
template<>
int Decode (uint8_t *buffer, int fixed_len_size, StringValue *v)
 
template<>
int Encode (uint8_t *buffer, int fixed_len_size, const Decimal4Value &v)
 
template<>
int Encode (uint8_t *buffer, int fixed_len_size, const Decimal8Value &v)
 
template<>
int Encode (uint8_t *buffer, int fixed_len_size, const Decimal16Value &v)
 
template<>
int Decode (uint8_t *buffer, int fixed_len_size, Decimal4Value *v)
 
template<>
int Decode (uint8_t *buffer, int fixed_len_size, Decimal8Value *v)
 
template<>
int Decode (uint8_t *buffer, int fixed_len_size, Decimal16Value *v)
 

Static Public Member Functions

template<typename T >
static int ByteSize (const T &v)
 Returns the byte size of 'v'. More...
 
static int ByteSize (const ColumnType &t)
 
static int DecimalSize (const ColumnType &t)
 The minimum byte size to store decimals of with precision t.precision. More...
 
template<typename T >
static int Encode (uint8_t *buffer, int fixed_len_size, const T &t)
 
template<typename T >
static int Decode (uint8_t *buffer, int fixed_len_size, T *v)
 
template<typename T >
static int EncodeToFixedLenByteArray (uint8_t *buffer, int fixed_len_size, const T &t)
 
template<typename T >
static int DecodeFromFixedLenByteArray (uint8_t *buffer, int fixed_len_size, T *v)
 

Detailed Description

The plain encoding does not maintain any state so all these functions are static helpers. TODO: we are using templates to provide a generic interface (over the types) to avoid performance penalties. This makes the code more complex and should be removed when we have codegen support to inline virtual calls.

Definition at line 80 of file parquet-common.h.

Member Function Documentation

template<typename T >
static int impala::ParquetPlainEncoder::ByteSize ( const T &  v)
inlinestatic
static int impala::ParquetPlainEncoder::ByteSize ( const ColumnType t)
inlinestatic

Returns the encoded size of values of type t. Returns -1 if it is variable length. This can be different than the slot size of the types.

Definition at line 88 of file parquet-common.h.

References DecimalSize(), impala::ColumnType::type, impala::TYPE_BIGINT, impala::TYPE_BOOLEAN, impala::TYPE_CHAR, impala::TYPE_DECIMAL, impala::TYPE_DOUBLE, impala::TYPE_FLOAT, impala::TYPE_INT, impala::TYPE_NULL, impala::TYPE_SMALLINT, impala::TYPE_STRING, impala::TYPE_TIMESTAMP, impala::TYPE_TINYINT, and impala::TYPE_VARCHAR.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const bool b)

Disable for bools. Plain encoding is not used for booleans.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const Decimal4Value )
inline

Not used for decimals since the plain encoding encodes them using FIXED_LEN_BYTE_ARRAY.

Definition at line 200 of file parquet-common.h.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const Decimal8Value )
inline

Definition at line 204 of file parquet-common.h.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const Decimal16Value )
inline

Definition at line 208 of file parquet-common.h.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const int8_t &  v)
inline

Parquet doesn't have 8-bit or 16-bit ints. They are converted to 32-bit.

Definition at line 215 of file parquet-common.h.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const int16_t &  v)
inline

Definition at line 217 of file parquet-common.h.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const StringValue v)
inline

Definition at line 220 of file parquet-common.h.

References impala::StringValue::len.

template<>
int impala::ParquetPlainEncoder::ByteSize ( const TimestampValue v)
inline

Definition at line 225 of file parquet-common.h.

static int impala::ParquetPlainEncoder::DecimalSize ( const ColumnType t)
inlinestatic
template<typename T >
static int impala::ParquetPlainEncoder::Decode ( uint8_t *  buffer,
int  fixed_len_size,
T *  v 
)
inlinestatic

Decodes t from buffer. Returns the number of bytes read. Buffer need not be aligned. For types that are stored as FIXED_LEN_BYTE_ARRAY, fixed_len_size is the size of the object. Otherwise, it is unused.

Definition at line 176 of file parquet-common.h.

References ByteSize().

Referenced by impala::DictDecoder< T >::DictDecoder(), impala::SetDecimalVal(), and impala::TestType().

template<>
int impala::ParquetPlainEncoder::Decode ( uint8_t *  ,
int  fixed_len_size,
bool v 
)
template<>
int impala::ParquetPlainEncoder::Decode ( uint8_t *  buffer,
int  fixed_len_size,
int8_t *  v 
)
inline

Definition at line 230 of file parquet-common.h.

References ByteSize().

template<>
int impala::ParquetPlainEncoder::Decode ( uint8_t *  buffer,
int  fixed_len_size,
int16_t *  v 
)
inline

Definition at line 235 of file parquet-common.h.

References ByteSize().

template<>
int impala::ParquetPlainEncoder::Decode ( uint8_t *  buffer,
int  fixed_len_size,
StringValue v 
)
inline

Definition at line 265 of file parquet-common.h.

References ByteSize(), impala::StringValue::len, and impala::StringValue::ptr.

template<>
int impala::ParquetPlainEncoder::Decode ( uint8_t *  buffer,
int  fixed_len_size,
Decimal4Value v 
)
inline
template<>
int impala::ParquetPlainEncoder::Decode ( uint8_t *  buffer,
int  fixed_len_size,
Decimal8Value v 
)
inline
template<>
int impala::ParquetPlainEncoder::Decode ( uint8_t *  buffer,
int  fixed_len_size,
Decimal16Value v 
)
inline
template<typename T >
static int impala::ParquetPlainEncoder::DecodeFromFixedLenByteArray ( uint8_t *  buffer,
int  fixed_len_size,
T *  v 
)
static

Decodes into v assuming buffer is encoded using FIXED_LEN_BYTE_ARRAY of 'fixed_len_size'. The bytes in buffer must be big endian and the result stored in v is the machine endian format.

template<typename T >
static int impala::ParquetPlainEncoder::Encode ( uint8_t *  buffer,
int  fixed_len_size,
const T &  t 
)
inlinestatic

Encodes t into buffer. Returns the number of bytes added. buffer must be preallocated and big enough. Buffer need not be aligned. 'fixed_len_size' is only applicable for data encoded using FIXED_LEN_BYTE_ARRAY and is the number of bytes the plain encoder should use.

Definition at line 166 of file parquet-common.h.

References ByteSize().

Referenced by impala::HdfsParquetTableWriter::ColumnWriter< T >::EncodeValue(), impala::TEST(), impala::TestType(), and impala::DictEncoder< T >::WriteDict().

template<>
int impala::ParquetPlainEncoder::Encode ( uint8_t *  ,
int  fixed_len_size,
const bool  
)
template<>
int impala::ParquetPlainEncoder::Encode ( uint8_t *  buffer,
int  fixed_len_size,
const int8_t &  v 
)
inline

Definition at line 241 of file parquet-common.h.

References ByteSize().

template<>
int impala::ParquetPlainEncoder::Encode ( uint8_t *  buffer,
int  fixed_len_size,
const int16_t &  v 
)
inline

Definition at line 249 of file parquet-common.h.

References ByteSize().

template<>
int impala::ParquetPlainEncoder::Encode ( uint8_t *  buffer,
int  fixed_len_size,
const StringValue v 
)
inline

Definition at line 257 of file parquet-common.h.

References ByteSize(), impala::StringValue::len, and impala::StringValue::ptr.

template<>
int impala::ParquetPlainEncoder::Encode ( uint8_t *  buffer,
int  fixed_len_size,
const Decimal4Value v 
)
inline

Write decimals as big endian (byte comparable) to benefit from common prefixes. fixed_len_size can be less than sizeof(Decimal*Value) for space savings. This means that the value in the in-memory format has leading zeros or negative 1's. For example, precision 2 fits in 1 byte. All decimals stored as Decimal4Value will have 3 bytes of leading zeros, we will only store the interesting byte.

Definition at line 281 of file parquet-common.h.

References impala::DecimalUtil::EncodeToFixedLenByteArray().

template<>
int impala::ParquetPlainEncoder::Encode ( uint8_t *  buffer,
int  fixed_len_size,
const Decimal8Value v 
)
inline
template<>
int impala::ParquetPlainEncoder::Encode ( uint8_t *  buffer,
int  fixed_len_size,
const Decimal16Value v 
)
inline
template<typename T >
static int impala::ParquetPlainEncoder::EncodeToFixedLenByteArray ( uint8_t *  buffer,
int  fixed_len_size,
const T &  t 
)
static

Encode 't', which must be in the machine endian, to FIXED_LEN_BYTE_ARRAY of 'fixed_len_size'. The result is encoded as big endian.


The documentation for this class was generated from the following file: