20 #include <boost/functional/hash.hpp>
33 using boost::hash_combine;
34 using boost::hash_range;
35 using namespace impala;
74 typedef uint32_t (*
CodegenHashFn)(
int rows,
char* data, int32_t* results);
88 for (
int i = 0; i < batch; ++i) {
89 int32_t* values =
reinterpret_cast<int32_t*
>(data->
data);
90 for (
int j = 0; j < rows; ++j) {
92 for (
int k = 0; k < cols; ++k) {
105 for (
int i = 0; i < batch; ++i) {
106 int32_t* values =
reinterpret_cast<int32_t*
>(data->
data);
107 for (
int j = 0; j < rows; ++j) {
109 for (
int k = 0; k < cols; ++k) {
122 for (
int i = 0; i < batch; ++i) {
123 int32_t* values =
reinterpret_cast<int32_t*
>(data->
data);
124 for (
int j = 0; j < rows; ++j) {
126 for (
int k = 0; k < cols; ++k) {
127 size_t hash_value = boost::hash<int32_t>().
operator()(values[k]);
128 hash_combine(h, hash_value);
140 for (
int i = 0; i < batch; ++i) {
141 char* values =
reinterpret_cast<char*
>(data->
data);
142 fn(rows, values, &data->
results[0]);
149 for (
int i = 0; i < batch; ++i) {
150 char* values =
reinterpret_cast<char*
>(data->
data);
151 for (
int j = 0; j < rows; ++j) {
155 values +=
sizeof(int8_t);
158 values +=
sizeof(int32_t);
161 values +=
sizeof(int64_t);
175 for (
int i = 0; i < batch; ++i) {
176 char* values =
reinterpret_cast<char*
>(data->
data);
177 for (
int j = 0; j < rows; ++j) {
181 values +=
sizeof(int8_t);
184 values +=
sizeof(int32_t);
187 values +=
sizeof(int64_t);
202 for (
int i = 0; i < batch; ++i) {
203 char* values =
reinterpret_cast<char*
>(data->
data);
204 fn(rows, values, &data->
results[0]);
211 for (
int i = 0; i < batch; ++i) {
212 char* values =
reinterpret_cast<char*
>(data->
data);
213 for (
int j = 0; j < rows; ++j) {
216 size_t hash_value = boost::hash<int8_t>().
operator()(*
reinterpret_cast<int8_t*
>(values));
217 hash_combine(h, hash_value);
218 values +=
sizeof(int8_t);
220 hash_value = boost::hash<int32_t>().
operator()(*
reinterpret_cast<int32_t*
>(values));
221 hash_combine(h, hash_value);
222 values +=
sizeof(int32_t);
224 hash_value = boost::hash<int64_t>().
operator()(*
reinterpret_cast<int64_t*
>(values));
225 hash_combine(h, hash_value);
226 values +=
sizeof(int64_t);
229 hash_value = hash_range<char*>(str->
ptr, str->
ptr + str->
len);
230 hash_combine(h, hash_value);
239 vector<bool> buckets;
240 buckets.resize(num_buckets);
242 int num_collisions = 0;
243 for (
int i = 0; i < data->
results.size(); ++i) {
245 int bucket = hash % num_buckets;
246 if (buckets[bucket]) ++num_collisions;
247 buckets[bucket] =
true;
249 memset(&data->
results[0], 0, data->
results.size() *
sizeof(uint32_t));
250 return num_collisions;
274 string name = mixed ?
"HashMixed" :
"HashInt";
278 prototype.AddArgument(
280 prototype.AddArgument(
285 Function* fn = prototype.GeneratePrototype(&builder, &args[0]);
287 BasicBlock* loop_start = builder.GetInsertBlock();
288 BasicBlock* loop_body = BasicBlock::Create(codegen->
context(),
"loop", fn);
289 BasicBlock* loop_exit = BasicBlock::Create(codegen->
context(),
"exit", fn);
291 int fixed_byte_size = mixed ?
292 sizeof(int8_t) +
sizeof(int32_t) +
sizeof(int64_t) :
sizeof(int32_t) * 4;
297 Value* row_size = NULL;
300 sizeof(int8_t) +
sizeof(int32_t) +
sizeof(int64_t) +
sizeof(
StringValue));
307 Value* counter_check =
309 builder.CreateCondBr(counter_check, loop_body, loop_exit);
312 builder.SetInsertPoint(loop_body);
313 PHINode* counter = builder.CreatePHI(codegen->
GetType(
TYPE_INT), 2,
"counter");
317 counter->addIncoming(next_counter, loop_body);
320 Value*
offset = builder.CreateMul(counter, row_size);
321 Value* data = builder.CreateGEP(args[1], offset);
324 seed = builder.CreateCall3(fixed_fn, data, dummy_len, seed);
328 Value* string_data = builder.CreateGEP(
332 Value* str_ptr = builder.CreateStructGEP(string_val, 0);
333 Value* str_len = builder.CreateStructGEP(string_val, 1);
334 str_ptr = builder.CreateLoad(str_ptr);
335 str_len = builder.CreateLoad(str_len);
336 seed = builder.CreateCall3(string_hash_fn, str_ptr, str_len, seed);
339 Value* result = builder.CreateGEP(args[2], counter);
340 builder.CreateStore(seed, result);
342 counter_check = builder.CreateICmpSLT(next_counter, args[0]);
343 builder.CreateCondBr(counter_check, loop_body, loop_exit);
346 builder.SetInsertPoint(loop_exit);
347 builder.CreateRetVoid();
352 int main(
int argc,
char **argv) {
357 const int NUM_ROWS = 1024;
368 scoped_ptr<LlvmCodeGen> codegen;
371 cout <<
"Could not start codegen.";
374 codegen->EnableOptimizations(
true);
377 void* jitted_hash_ints;
378 codegen->AddFunctionToJit(hash_ints, &jitted_hash_ints);
381 void* jitted_hash_mixed;
382 codegen->AddFunctionToJit(hash_mixed, &jitted_hash_mixed);
384 status = codegen->FinalizeModule();
386 cout <<
"Could not compile module: " << status.
GetDetail();
392 vector<DataProvider::ColDesc> int_cols;
393 int_cols.push_back(DataProvider::ColDesc::Create<int32_t>(0, 10000));
394 int_cols.push_back(DataProvider::ColDesc::Create<int32_t>(0, 1000000));
395 int_cols.push_back(DataProvider::ColDesc::Create<int32_t>(0, 1000000));
396 int_cols.push_back(DataProvider::ColDesc::Create<int32_t>(0, 1000000));
398 int_provider.
Reset(NUM_ROWS, NUM_ROWS, int_cols);
402 int_data.
num_cols = int_cols.size();
408 string min_std_str(
"aaa");
409 string max_std_str(
"zzzzzzzzzz");
411 StringValue min_str(const_cast<char*>(min_std_str.c_str()), min_std_str.size());
412 StringValue max_str(const_cast<char*>(max_std_str.c_str()), max_std_str.size());
414 vector<DataProvider::ColDesc> mixed_cols;
415 mixed_cols.push_back(DataProvider::ColDesc::Create<int8_t>(0, 25));
416 mixed_cols.push_back(DataProvider::ColDesc::Create<int32_t>(0, 10000));
417 mixed_cols.push_back(DataProvider::ColDesc::Create<int64_t>(0, 100000000));
418 mixed_cols.push_back(DataProvider::ColDesc::Create<StringValue>(min_str, max_str));
419 mixed_provider.
Reset(NUM_ROWS, NUM_ROWS, mixed_cols);
423 mixed_data.
num_cols = mixed_cols.size();
425 mixed_data.
jitted_fn = jitted_hash_mixed;
432 cout << int_suite.
Measure() << endl;
static Status LoadImpalaIR(ObjectPool *, const std::string &id, boost::scoped_ptr< LlvmCodeGen > *codegen)
int AddBenchmark(const std::string &name, BenchmarkFunction fn, void *args, int baseline_idx=0)
const std::string GetDetail() const
void Reset(int num_rows, int batch_size, const std::vector< ColDesc > &columns)
llvm::PointerType * GetPtrType(llvm::Type *type)
Return a pointer type to 'type'.
void TestCrcMixedHash(int batch, void *d)
Utility struct that wraps a variable name and llvm type.
uint32_t(* CodegenHashFn)(int rows, char *data, int32_t *results)
static std::string GetMachineInfo()
Output machine/build configuration as a string.
const StringSearch UrlParser::hash_search & hash
vector< int32_t > results
std::string Measure()
Runs all the benchmarks and returns the result in a formatted string.
void * NextBatch(int *rows_returned)
int main(int argc, char **argv)
Function * CodegenCrcHash(LlvmCodeGen *codegen, bool mixed)
std::size_t hash_value(const Decimal4Value &v)
This function must be called 'hash_value' to be picked up by boost.
void TestCodegenMixedHash(int batch, void *d)
void TestFnvMixedHash(int batch, void *d)
See data-provider-test.cc on how to use this.
ObjectPool * obj_pool()
Returns a local object pool.
LLVM code generator. This is the top level object to generate jitted code.
void AddArgument(const NamedVariable &var)
Add argument.
llvm::Function * GetHashFunction(int num_bytes=-1)
int NumCollisions(TestData *data, int num_buckets)
void TestCrcIntHash(int batch, void *d)
This class is thread-safe.
static const uint32_t FNV_SEED
void TestCodegenIntHash(int batch, void *d)
static void InitializeLlvm(bool load_backend=false)
vector< StringValue > data
void TestBoostMixedHash(int batch, void *d)
static uint32_t CrcHash(const void *data, int32_t bytes, uint32_t hash)
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
uint8_t offset[7 *64-sizeof(uint64_t)]
void TestBoostIntHash(int batch, void *d)
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
static uint32_t FnvHash64to32(const void *data, int32_t bytes, uint32_t hash)
static void Init()
Initialize CpuInfo.
llvm::Function * FinalizeFunction(llvm::Function *function)
llvm::LLVMContext & context()
llvm::PointerType * ptr_type()
void TestFnvIntHash(int batch, void *d)