Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hash-util.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_UTIL_HASH_UTIL_H
17 #define IMPALA_UTIL_HASH_UTIL_H
18 
19 #include "common/logging.h"
20 #include "common/compiler-util.h"
21 
22 #include "util/cpu-info.h"
23 #include "util/sse-util.h"
24 
25 namespace impala {
26 
28 class HashUtil {
29  public:
37  static uint32_t CrcHash(const void* data, int32_t bytes, uint32_t hash) {
39  uint32_t words = bytes / sizeof(uint32_t);
40  bytes = bytes % sizeof(uint32_t);
41 
42  const uint32_t* p = reinterpret_cast<const uint32_t*>(data);
43  while (words--) {
44  hash = SSE4_crc32_u32(hash, *p);
45  ++p;
46  }
47 
48  const uint8_t* s = reinterpret_cast<const uint8_t*>(p);
49  while (bytes--) {
50  hash = SSE4_crc32_u8(hash, *s);
51  ++s;
52  }
53 
54  // The lower half of the CRC hash has has poor uniformity, so swap the halves
55  // for anyone who only uses the first several bits of the hash.
56  hash = (hash << 16) | (hash >> 16);
57  return hash;
58  }
59 
60  static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995;
61  static const int MURMUR_R = 47;
62 
64  static uint64_t MurmurHash2_64(const void* input, int len, uint64_t seed) {
65  uint64_t h = seed ^ (len * MURMUR_PRIME);
66 
67  const uint64_t* data = reinterpret_cast<const uint64_t*>(input);
68  const uint64_t* end = data + (len / sizeof(uint64_t));
69 
70  while (data != end) {
71  uint64_t k = *data++;
72  k *= MURMUR_PRIME;
73  k ^= k >> MURMUR_R;
74  k *= MURMUR_PRIME;
75  h ^= k;
76  h *= MURMUR_PRIME;
77  }
78 
79  const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
80  switch (len & 7) {
81  case 7: h ^= uint64_t(data2[6]) << 48;
82  case 6: h ^= uint64_t(data2[5]) << 40;
83  case 5: h ^= uint64_t(data2[4]) << 32;
84  case 4: h ^= uint64_t(data2[3]) << 24;
85  case 3: h ^= uint64_t(data2[2]) << 16;
86  case 2: h ^= uint64_t(data2[1]) << 8;
87  case 1: h ^= uint64_t(data2[0]);
88  h *= MURMUR_PRIME;
89  }
90 
91  h ^= h >> MURMUR_R;
92  h *= MURMUR_PRIME;
93  h ^= h >> MURMUR_R;
94  return h;
95  }
96 
98  static const uint32_t FNV_PRIME = 0x01000193; // 16777619
99  static const uint32_t FNV_SEED = 0x811C9DC5; // 2166136261
100  static const uint64_t FNV64_PRIME = 1099511628211UL;
101  static const uint64_t FNV64_SEED = 14695981039346656037UL;
102 
112  static uint64_t FnvHash64(const void* data, int32_t bytes, uint64_t hash) {
113  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
114  while (bytes--) {
115  hash = (*ptr ^ hash) * FNV64_PRIME;
116  ++ptr;
117  }
118  return hash;
119  }
120 
125  static uint32_t FnvHash64to32(const void* data, int32_t bytes, uint32_t hash) {
126  uint64_t hash_u64 = hash | ((uint64_t)hash << 32);
127  hash_u64 = FnvHash64(data, bytes, hash_u64);
128  return (hash_u64 >> 32) ^ (hash_u64 & 0xFFFFFFFF);
129  }
130 
135  static uint32_t Hash(const void* data, int32_t bytes, uint32_t seed) {
137  return CrcHash(data, bytes, seed);
138  } else {
139  return MurmurHash2_64(data, bytes, seed);
140  }
141  }
142 
143 };
144 
145 }
146 
147 #endif
const StringSearch UrlParser::hash_search & hash
Definition: url-parser.cc:41
static uint32_t SSE4_crc32_u8(uint32_t crc, uint8_t v)
Definition: sse-util.h:107
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
Definition: hash-util.h:135
Utility class to compute hash values.
Definition: hash-util.h:28
static uint32_t SSE4_crc32_u32(uint32_t crc, uint32_t v)
Definition: sse-util.h:112
static const uint64_t FNV64_SEED
Definition: hash-util.h:101
static const int64_t SSE4_2
Definition: cpu-info.h:34
static uint64_t FnvHash64(const void *data, int32_t bytes, uint64_t hash)
Definition: hash-util.h:112
static const uint32_t FNV_PRIME
default values recommended by http://isthe.com/chongo/tech/comp/fnv/
Definition: hash-util.h:98
static const uint64_t FNV64_PRIME
Definition: hash-util.h:100
static const uint32_t FNV_SEED
Definition: hash-util.h:99
static const int MURMUR_R
Definition: hash-util.h:61
#define LIKELY(expr)
Definition: compiler-util.h:32
static uint32_t CrcHash(const void *data, int32_t bytes, uint32_t hash)
Definition: hash-util.h:37
static uint32_t FnvHash64to32(const void *data, int32_t bytes, uint32_t hash)
Definition: hash-util.h:125
static const uint64_t MURMUR_PRIME
Definition: hash-util.h:60
static bool IsSupported(long flag)
Returns whether of not the cpu supports this flag.
Definition: cpu-info.h:58
static uint64_t MurmurHash2_64(const void *input, int len, uint64_t seed)
Murmur2 hash implementation returning 64-bit hashes.
Definition: hash-util.h:64