doc/html/raw-value_8h_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 #ifndef IMPALA_RUNTIME_RAW_VALUE_H

 #define IMPALA_RUNTIME_RAW_VALUE_H


 #include <string>


 #include <boost/functional/hash.hpp>

 #include <math.h>


 #include "common/logging.h"

 #include "runtime/string-value.inline.h"

 #include "runtime/timestamp-value.h"

 #include "runtime/types.h"

 #include "util/hash-util.h"


 namespace impala {


 class MemPool;

 class SlotDescriptor;

 class Tuple;


 class RawValue {

  public:

   static const int ASCII_PRECISION;


   static void PrintValue(const void* value, const ColumnType& type, int scale,

                          std::stringstream* stream);


   static void PrintValue(const void* value, const ColumnType& type, int scale,

                          std::string* str);


   static void PrintValueAsBytes(const void* value, const ColumnType& type,

                                 std::stringstream* stream);


   static uint32_t GetHashValue(const void* v, const ColumnType& type, uint32_t seed = 0);


   static uint32_t GetHashValueFnv(const void* v, const ColumnType& type, uint32_t seed);


   static int Compare(const void* v1, const void* v2, const ColumnType& type);


   static void Write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc,

                     MemPool* pool);


   static void Write(const void* src, void* dst, const ColumnType& type, MemPool* pool);


   static void Write(const void* src, const ColumnType& type, void* dst, uint8_t** buf);


   static bool Eq(const void* v1, const void* v2, const ColumnType& type);


  private:

   static const uint32_t HASH32_COMBINE_SEED = 0x9e3779b9;


   static inline uint32_t HashCombine32(uint32_t value, uint32_t seed) {

     return seed ^ (HASH32_COMBINE_SEED + value + (seed << 6) + (seed >> 2));

   }

 };


 inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type) {

   const StringValue* string_value1;

   const StringValue* string_value2;

   switch (type.type) {

     case TYPE_BOOLEAN:

       return *reinterpret_cast<const bool*>(v1)

           == *reinterpret_cast<const bool*>(v2);

     case TYPE_TINYINT:

       return *reinterpret_cast<const int8_t*>(v1)

           == *reinterpret_cast<const int8_t*>(v2);

     case TYPE_SMALLINT:

       return *reinterpret_cast<const int16_t*>(v1)

           == *reinterpret_cast<const int16_t*>(v2);

     case TYPE_INT:

       return *reinterpret_cast<const int32_t*>(v1)

           == *reinterpret_cast<const int32_t*>(v2);

     case TYPE_BIGINT:

       return *reinterpret_cast<const int64_t*>(v1)

           == *reinterpret_cast<const int64_t*>(v2);

     case TYPE_FLOAT:

       return *reinterpret_cast<const float*>(v1)

           == *reinterpret_cast<const float*>(v2);

     case TYPE_DOUBLE:

       return *reinterpret_cast<const double*>(v1)

           == *reinterpret_cast<const double*>(v2);

     case TYPE_STRING:

     case TYPE_VARCHAR:

       string_value1 = reinterpret_cast<const StringValue*>(v1);

       string_value2 = reinterpret_cast<const StringValue*>(v2);

       return string_value1->Eq(*string_value2);

     case TYPE_TIMESTAMP:

       return *reinterpret_cast<const TimestampValue*>(v1) ==

           *reinterpret_cast<const TimestampValue*>(v2);

     case TYPE_CHAR: {

       const char* v1ptr = StringValue::CharSlotToPtr(v1, type);

       const char* v2ptr = StringValue::CharSlotToPtr(v2, type);

       int64_t l1 = StringValue::UnpaddedCharLength(v1ptr, type.len);

       int64_t l2 = StringValue::UnpaddedCharLength(v2ptr, type.len);

       return StringCompare(v1ptr, l1, v2ptr, l2, std::min(l1, l2)) == 0;

     }

     case TYPE_DECIMAL:

       switch (type.GetByteSize()) {

         case 4:

           return reinterpret_cast<const Decimal4Value*>(v1)->value()

               == reinterpret_cast<const Decimal4Value*>(v2)->value();

         case 8:

           return reinterpret_cast<const Decimal8Value*>(v1)->value()

               == reinterpret_cast<const Decimal8Value*>(v2)->value();

         case 16:

           return reinterpret_cast<const Decimal16Value*>(v1)->value()

               == reinterpret_cast<const Decimal16Value*>(v2)->value();

         default:

           break;

       }

     default:

       DCHECK(false) << type;

       return 0;

   };

 }


 inline uint32_t RawValue::GetHashValue(const void* v, const ColumnType& type,

     uint32_t seed) {

   // Hash_combine with v = 0

   if (v == NULL) return HashCombine32(0, seed);


   switch (type.type) {

     case TYPE_STRING:

     case TYPE_VARCHAR: {

       const StringValue* string_value = reinterpret_cast<const StringValue*>(v);

       return HashUtil::Hash(string_value->ptr, string_value->len, seed);

     }

     case TYPE_BOOLEAN: return HashCombine32(*reinterpret_cast<const bool*>(v), seed);

     case TYPE_TINYINT: return HashUtil::Hash(v, 1, seed);

     case TYPE_SMALLINT: return HashUtil::Hash(v, 2, seed);

     case TYPE_INT: return HashUtil::Hash(v, 4, seed);

     case TYPE_BIGINT: return HashUtil::Hash(v, 8, seed);

     case TYPE_FLOAT: return HashUtil::Hash(v, 4, seed);

     case TYPE_DOUBLE: return HashUtil::Hash(v, 8, seed);

     case TYPE_TIMESTAMP: return HashUtil::Hash(v, 12, seed);

     case TYPE_CHAR: return HashUtil::Hash(StringValue::CharSlotToPtr(v, type),

                                           type.len, seed);

     case TYPE_DECIMAL: return HashUtil::Hash(v, type.GetByteSize(), seed);

     default:

       DCHECK(false);

       return 0;

   }

 }


 inline uint32_t RawValue::GetHashValueFnv(const void* v, const ColumnType& type,

     uint32_t seed) {

   // Hash_combine with v = 0

   if (v == NULL) return HashCombine32(0, seed);


   switch (type.type ) {

     case TYPE_STRING:

     case TYPE_VARCHAR: {

       const StringValue* string_value = reinterpret_cast<const StringValue*>(v);

       return HashUtil::FnvHash64to32(string_value->ptr, string_value->len, seed);

     }

     case TYPE_BOOLEAN: return HashCombine32(*reinterpret_cast<const bool*>(v), seed);

     case TYPE_TINYINT: return HashUtil::FnvHash64to32(v, 1, seed);

     case TYPE_SMALLINT: return HashUtil::FnvHash64to32(v, 2, seed);

     case TYPE_INT: return HashUtil::FnvHash64to32(v, 4, seed);

     case TYPE_BIGINT: return HashUtil::FnvHash64to32(v, 8, seed);

     case TYPE_FLOAT: return HashUtil::FnvHash64to32(v, 4, seed);

     case TYPE_DOUBLE: return HashUtil::FnvHash64to32(v, 8, seed);

     case TYPE_TIMESTAMP: return HashUtil::FnvHash64to32(v, 12, seed);

     case TYPE_CHAR: return HashUtil::FnvHash64to32(StringValue::CharSlotToPtr(v, type),

                                                    type.len, seed);

     case TYPE_DECIMAL: return HashUtil::FnvHash64to32(v, type.GetByteSize(), seed);

     default:

       DCHECK(false);

       return 0;

   }

 }


 inline void RawValue::PrintValue(const void* value, const ColumnType& type, int scale,

     std::stringstream* stream) {

   if (value == NULL) {

     *stream << "NULL";

     return;

   }


   int old_precision = stream->precision();

   std::ios_base::fmtflags old_flags = stream->flags();

   if (scale > -1) {

     stream->precision(scale);

     // Setting 'fixed' causes precision to set the number of digits printed after the

     // decimal (by default it sets the maximum number of digits total).

     *stream << std::fixed;

   }


   const StringValue* string_val = NULL;

   switch (type.type) {

     case TYPE_BOOLEAN: {

       bool val = *reinterpret_cast<const bool*>(value);

       *stream << (val ? "true" : "false");

       return;

     }

     case TYPE_TINYINT:

       // Extra casting for chars since they should not be interpreted as ASCII.

       *stream << static_cast<int>(*reinterpret_cast<const int8_t*>(value));

       break;

     case TYPE_SMALLINT:

       *stream << *reinterpret_cast<const int16_t*>(value);

       break;

     case TYPE_INT:

       *stream << *reinterpret_cast<const int32_t*>(value);

       break;

     case TYPE_BIGINT:

       *stream << *reinterpret_cast<const int64_t*>(value);

       break;

     case TYPE_FLOAT:

       {

         float val = *reinterpret_cast<const float*>(value);

         if (LIKELY(std::isfinite(val))) {

           *stream << val;

         } else if (isinf(val)) {

           // 'Infinity' is Java's text representation of inf. By staying close to Java, we

           // allow Hive to read text tables containing non-finite values produced by

           // Impala. (The same logic applies to 'NaN', below).

           *stream << (val < 0 ? "-Infinity" : "Infinity");

         } else if (isnan(val)) {

           *stream << "NaN";

         }

       }

       break;

     case TYPE_DOUBLE:

       {

         double val = *reinterpret_cast<const double*>(value);

         if (LIKELY(std::isfinite(val))) {

           *stream << val;

         } else if (isinf(val)) {

           // See TYPE_FLOAT for rationale.

           *stream << (val < 0 ? "-Infinity" : "Infinity");

         } else if (isnan(val)) {

           *stream << "NaN";

         }

       }

       break;

     case TYPE_VARCHAR:

     case TYPE_STRING:

       string_val = reinterpret_cast<const StringValue*>(value);

       if (type.type == TYPE_VARCHAR) DCHECK(string_val->len <= type.len);

       stream->write(string_val->ptr, string_val->len);

       break;

     case TYPE_TIMESTAMP:

       *stream << *reinterpret_cast<const TimestampValue*>(value);

       break;

     case TYPE_CHAR:

       stream->write(StringValue::CharSlotToPtr(value, type), type.len);

       break;

     case TYPE_DECIMAL:

       switch (type.GetByteSize()) {

         case 4:

           *stream << reinterpret_cast<const Decimal4Value*>(value)->ToString(type);

           break;

         case 8:

           *stream << reinterpret_cast<const Decimal8Value*>(value)->ToString(type);

           break;

         case 16:

           *stream << reinterpret_cast<const Decimal16Value*>(value)->ToString(type);

           break;

         default:

           DCHECK(false) << type;

       }

       break;

     default:

       DCHECK(false);

   }

   stream->precision(old_precision);

   // Undo setting stream to fixed

   stream->flags(old_flags);

 }


 }


 #endif

impala::StringValue::Eq
bool Eq(const StringValue &other) const
==
Definition: string-value.inline.h:74

timestamp-value.h

impala::RawValue::PrintValue
static void PrintValue(const void *value, const ColumnType &type, int scale, std::stringstream *stream)
Definition: raw-value.h:224

impala::TYPE_DOUBLE
Definition: types.h:36

types.h

impala::TYPE_CHAR
Definition: types.h:47

impala::StringValue
Definition: string-value.h:33

impala::RawValue::PrintValueAsBytes
static void PrintValueAsBytes(const void *value, const ColumnType &type, std::stringstream *stream)
Writes the byte representation of a value to a stringstream character-by-character.
Definition: raw-value.cc:28

impala::RawValue::Eq
static bool Eq(const void *v1, const void *v2, const ColumnType &type)
Definition: raw-value.h:106

impala::TYPE_VARCHAR
Definition: types.h:48

impala::Tuple
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48

impala::StringValue::len
int len
Definition: string-value.h:38

impala::RawValue::ASCII_PRECISION
static const int ASCII_PRECISION
Ascii output precision for double/float.
Definition: raw-value.h:40

hash-util.h

impala::SlotDescriptor
Definition: descriptors.h:75

impala::RawValue::HASH32_COMBINE_SEED
static const uint32_t HASH32_COMBINE_SEED
The magic number (used in hash_combine()) 0x9e3779b9 = 2^32 / (golden ratio).
Definition: raw-value.h:96

logging.h

impala::TYPE_TIMESTAMP
Definition: types.h:37

impala::RawValue::GetHashValueFnv
static uint32_t GetHashValueFnv(const void *v, const ColumnType &type, uint32_t seed)
Definition: raw-value.h:196

impala::StringValue::UnpaddedCharLength
static int64_t UnpaddedCharLength(const char *cptr, int64_t len)
Returns number of characters in a char array (ignores trailing spaces)
Definition: string-value.inline.h:107

impala::TYPE_INT
Definition: types.h:33

impala::ColumnType::type
PrimitiveType type
Definition: types.h:60

impala::MemPool
Definition: mem-pool.h:77

impala::RawValue::HashCombine32
static uint32_t HashCombine32(uint32_t value, uint32_t seed)
Definition: raw-value.h:101

impala::TYPE_SMALLINT
Definition: types.h:32

impala::HashUtil::Hash
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
Definition: hash-util.h:135

pool
ObjectPool pool
Definition: expr-benchmark.cc:89

impala::TimestampValue
Definition: timestamp-value.h:65

impala::DecimalValue< int32_t >

impala::ColumnType::GetByteSize
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
Definition: types.h:178

string-value.inline.h

impala::ColumnType::len
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62

impala::RawValue::Write
static void Write(const void *value, Tuple *tuple, const SlotDescriptor *slot_desc, MemPool *pool)
Definition: raw-value.cc:303

impala::TYPE_BOOLEAN
Definition: types.h:30

impala::TYPE_BIGINT
Definition: types.h:34

impala::StringValue::ptr
char * ptr
Definition: string-value.h:37

impala::RawValue::Compare
static int Compare(const void *v1, const void *v2, const ColumnType &type)
Definition: raw-value.cc:109

impala::ColumnType
Definition: types.h:59

LIKELY
#define LIKELY(expr)
Definition: compiler-util.h:32

impala::StringValue::CharSlotToPtr
static char * CharSlotToPtr(void *slot, const ColumnType &type)
Definition: string-value.inline.h:115

impala::RawValue
Useful utility functions for runtime values (which are passed around as void*).
Definition: raw-value.h:37

impala::HashUtil::FnvHash64to32
static uint32_t FnvHash64to32(const void *data, int32_t bytes, uint32_t hash)
Definition: hash-util.h:125

impala::TYPE_STRING
Definition: types.h:38

impala::TYPE_TINYINT
Definition: types.h:31

impala::StringCompare
static int StringCompare(const char *s1, int n1, const char *s2, int n2, int len)
Definition: string-value.inline.h:37

impala::TYPE_FLOAT
Definition: types.h:35

impala::RawValue::GetHashValue
static uint32_t GetHashValue(const void *v, const ColumnType &type, uint32_t seed=0)
Definition: raw-value.h:168

impala::TYPE_DECIMAL
Definition: types.h:42