#include <parquet-common.h>

Collaboration diagram for impala::ParquetPlainEncoder:

Public Member Functions
template<>
int	ByteSize (const bool &b)
	Disable for bools. Plain encoding is not used for booleans. More...

template<>
int	Encode (uint8_t *, int fixed_len_size, const bool &)

template<>
int	Decode (uint8_t , int fixed_len_size, bool v)

template<>
int	ByteSize (const Decimal4Value &)

template<>
int	ByteSize (const Decimal8Value &)

template<>
int	ByteSize (const Decimal16Value &)

template<>
int	ByteSize (const int8_t &v)
	Parquet doesn't have 8-bit or 16-bit ints. They are converted to 32-bit. More...

template<>
int	ByteSize (const int16_t &v)

template<>
int	ByteSize (const StringValue &v)

template<>
int	ByteSize (const TimestampValue &v)

template<>
int	Decode (uint8_t buffer, int fixed_len_size, int8_t v)

template<>
int	Decode (uint8_t buffer, int fixed_len_size, int16_t v)

template<>
int	Encode (uint8_t *buffer, int fixed_len_size, const int8_t &v)

template<>
int	Encode (uint8_t *buffer, int fixed_len_size, const int16_t &v)

template<>
int	Encode (uint8_t *buffer, int fixed_len_size, const StringValue &v)

template<>
int	Decode (uint8_t buffer, int fixed_len_size, StringValue v)

template<>
int	Encode (uint8_t *buffer, int fixed_len_size, const Decimal4Value &v)

template<>
int	Encode (uint8_t *buffer, int fixed_len_size, const Decimal8Value &v)

template<>
int	Encode (uint8_t *buffer, int fixed_len_size, const Decimal16Value &v)

template<>
int	Decode (uint8_t buffer, int fixed_len_size, Decimal4Value v)

template<>
int	Decode (uint8_t buffer, int fixed_len_size, Decimal8Value v)

template<>
int	Decode (uint8_t buffer, int fixed_len_size, Decimal16Value v)

Static Public Member Functions
template<typename T >
static int	ByteSize (const T &v)
	Returns the byte size of 'v'. More...

static int	ByteSize (const ColumnType &t)

static int	DecimalSize (const ColumnType &t)
	The minimum byte size to store decimals of with precision t.precision. More...

template<typename T >
static int	Encode (uint8_t *buffer, int fixed_len_size, const T &t)

template<typename T >
static int	Decode (uint8_t buffer, int fixed_len_size, T v)

template<typename T >
static int	EncodeToFixedLenByteArray (uint8_t *buffer, int fixed_len_size, const T &t)

template<typename T >
static int	DecodeFromFixedLenByteArray (uint8_t buffer, int fixed_len_size, T v)

Detailed Description

The plain encoding does not maintain any state so all these functions are static helpers. TODO: we are using templates to provide a generic interface (over the types) to avoid performance penalties. This makes the code more complex and should be removed when we have codegen support to inline virtual calls.

Definition at line 80 of file parquet-common.h.

Member Function Documentation

template<typename T >

static int impala::ParquetPlainEncoder::ByteSize ( const T & v )

inlinestatic

Returns the byte size of 'v'.

Definition at line 84 of file parquet-common.h.

Referenced by impala::DictEncoder< T >::AddToTable(), impala::HdfsParquetTableWriter::ColumnWriter< T >::ColumnWriter(), Decode(), Encode(), and impala::TEST().

static int impala::ParquetPlainEncoder::ByteSize ( const ColumnType & t )

inlinestatic

Returns the encoded size of values of type t. Returns -1 if it is variable length. This can be different than the slot size of the types.

Definition at line 88 of file parquet-common.h.

References DecimalSize(), impala::ColumnType::type, impala::TYPE_BIGINT, impala::TYPE_BOOLEAN, impala::TYPE_CHAR, impala::TYPE_DECIMAL, impala::TYPE_DOUBLE, impala::TYPE_FLOAT, impala::TYPE_INT, impala::TYPE_NULL, impala::TYPE_SMALLINT, impala::TYPE_STRING, impala::TYPE_TIMESTAMP, impala::TYPE_TINYINT, and impala::TYPE_VARCHAR.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const bool & b )

Disable for bools. Plain encoding is not used for booleans.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const Decimal4Value & )

inline

Not used for decimals since the plain encoding encodes them using FIXED_LEN_BYTE_ARRAY.

Definition at line 200 of file parquet-common.h.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const Decimal8Value & )

inline

Definition at line 204 of file parquet-common.h.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const Decimal16Value & )

inline

Definition at line 208 of file parquet-common.h.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const int8_t & v )

inline

Parquet doesn't have 8-bit or 16-bit ints. They are converted to 32-bit.

Definition at line 215 of file parquet-common.h.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const int16_t & v )

inline

Definition at line 217 of file parquet-common.h.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const StringValue & v )

inline

Definition at line 220 of file parquet-common.h.

References impala::StringValue::len.

template<>

int impala::ParquetPlainEncoder::ByteSize ( const TimestampValue & v )

inline

Definition at line 225 of file parquet-common.h.

static int impala::ParquetPlainEncoder::DecimalSize ( const ColumnType & t )

inlinestatic

The minimum byte size to store decimals of with precision t.precision.

Definition at line 116 of file parquet-common.h.

References impala::ColumnType::precision, impala::ColumnType::type, and impala::TYPE_DECIMAL.

Referenced by ByteSize(), impala::HdfsParquetScanner::ColumnReader< T >::ColumnReader(), impala::HdfsParquetTableWriter::CreateSchema(), and impala::HdfsParquetScanner::ValidateColumn().

template<typename T >

static int impala::ParquetPlainEncoder::Decode	(	uint8_t *	buffer,
		int	fixed_len_size,
		T *	v
	)

inlinestatic

Decodes t from buffer. Returns the number of bytes read. Buffer need not be aligned. For types that are stored as FIXED_LEN_BYTE_ARRAY, fixed_len_size is the size of the object. Otherwise, it is unused.

Definition at line 176 of file parquet-common.h.

References ByteSize().

Referenced by impala::DictDecoder< T >::DictDecoder(), impala::SetDecimalVal(), and impala::TestType().

template<>

int impala::ParquetPlainEncoder::Decode	(	uint8_t *	,
		int	fixed_len_size,
		bool *	v
	)

template<>

int impala::ParquetPlainEncoder::Decode	(	uint8_t *	buffer,
		int	fixed_len_size,
		int8_t *	v
	)

inline

Definition at line 230 of file parquet-common.h.

References ByteSize().

template<>

int impala::ParquetPlainEncoder::Decode	(	uint8_t *	buffer,
		int	fixed_len_size,
		int16_t *	v
	)

inline

Definition at line 235 of file parquet-common.h.

References ByteSize().

template<>

int impala::ParquetPlainEncoder::Decode	(	uint8_t *	buffer,
		int	fixed_len_size,
		StringValue *	v
	)

inline

Definition at line 265 of file parquet-common.h.

References ByteSize(), impala::StringValue::len, and impala::StringValue::ptr.

template<>

int impala::ParquetPlainEncoder::Decode	(	uint8_t *	buffer,
		int	fixed_len_size,
		Decimal4Value *	v
	)

inline

Definition at line 302 of file parquet-common.h.

References impala::DecimalUtil::DecodeFromFixedLenByteArray().

template<>

int impala::ParquetPlainEncoder::Decode	(	uint8_t *	buffer,
		int	fixed_len_size,
		Decimal8Value *	v
	)

inline

Definition at line 309 of file parquet-common.h.

References impala::DecimalUtil::DecodeFromFixedLenByteArray().

template<>

int impala::ParquetPlainEncoder::Decode	(	uint8_t *	buffer,
		int	fixed_len_size,
		Decimal16Value *	v
	)

inline

Definition at line 316 of file parquet-common.h.

References impala::DecimalUtil::DecodeFromFixedLenByteArray().

template<typename T >

static int impala::ParquetPlainEncoder::DecodeFromFixedLenByteArray	(	uint8_t *	buffer,
		int	fixed_len_size,
		T *	v
	)

static

Decodes into v assuming buffer is encoded using FIXED_LEN_BYTE_ARRAY of 'fixed_len_size'. The bytes in buffer must be big endian and the result stored in v is the machine endian format.

template<typename T >

static int impala::ParquetPlainEncoder::Encode	(	uint8_t *	buffer,
		int	fixed_len_size,
		const T &	t
	)

inlinestatic

Encodes t into buffer. Returns the number of bytes added. buffer must be preallocated and big enough. Buffer need not be aligned. 'fixed_len_size' is only applicable for data encoded using FIXED_LEN_BYTE_ARRAY and is the number of bytes the plain encoder should use.

Definition at line 166 of file parquet-common.h.

References ByteSize().

Referenced by impala::HdfsParquetTableWriter::ColumnWriter< T >::EncodeValue(), impala::TEST(), impala::TestType(), and impala::DictEncoder< T >::WriteDict().

template<>

int impala::ParquetPlainEncoder::Encode	(	uint8_t *	,
		int	fixed_len_size,
		const bool &
	)

template<>

int impala::ParquetPlainEncoder::Encode	(	uint8_t *	buffer,
		int	fixed_len_size,
		const int8_t &	v
	)

inline

Definition at line 241 of file parquet-common.h.

References ByteSize().

template<>

int impala::ParquetPlainEncoder::Encode	(	uint8_t *	buffer,
		int	fixed_len_size,
		const int16_t &	v
	)

inline

Definition at line 249 of file parquet-common.h.

References ByteSize().

template<>

int impala::ParquetPlainEncoder::Encode	(	uint8_t *	buffer,
		int	fixed_len_size,
		const StringValue &	v
	)

inline

Definition at line 257 of file parquet-common.h.

References ByteSize(), impala::StringValue::len, and impala::StringValue::ptr.

template<>

int impala::ParquetPlainEncoder::Encode	(	uint8_t *	buffer,
		int	fixed_len_size,
		const Decimal4Value &	v
	)

inline

Write decimals as big endian (byte comparable) to benefit from common prefixes. fixed_len_size can be less than sizeof(Decimal*Value) for space savings. This means that the value in the in-memory format has leading zeros or negative 1's. For example, precision 2 fits in 1 byte. All decimals stored as Decimal4Value will have 3 bytes of leading zeros, we will only store the interesting byte.

Definition at line 281 of file parquet-common.h.

References impala::DecimalUtil::EncodeToFixedLenByteArray().

template<>

int impala::ParquetPlainEncoder::Encode	(	uint8_t *	buffer,
		int	fixed_len_size,
		const Decimal8Value &	v
	)

inline

Definition at line 288 of file parquet-common.h.

References impala::DecimalUtil::EncodeToFixedLenByteArray().

template<>

int impala::ParquetPlainEncoder::Encode	(	uint8_t *	buffer,
		int	fixed_len_size,
		const Decimal16Value &	v
	)

inline

Definition at line 295 of file parquet-common.h.

References impala::DecimalUtil::EncodeToFixedLenByteArray().

template<typename T >

static int impala::ParquetPlainEncoder::EncodeToFixedLenByteArray	(	uint8_t *	buffer,
		int	fixed_len_size,
		const T &	t
	)

static

Encode 't', which must be in the machine endian, to FIXED_LEN_BYTE_ARRAY of 'fixed_len_size'. The result is encoded as big endian.

The documentation for this class was generated from the following file:

be/src/exec/parquet-common.h

Public Member Functions

Static Public Member Functions

Detailed Description

Member Function Documentation