Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
llvm-codegen.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "codegen/llvm-codegen.h"
16 
17 #include <fstream>
18 #include <iostream>
19 #include <sstream>
20 #include <boost/thread/mutex.hpp>
21 
22 #include <llvm/ADT/Triple.h>
23 #include <llvm/Analysis/InstructionSimplify.h>
24 #include <llvm/Analysis/Passes.h>
25 #include <llvm/Bitcode/ReaderWriter.h>
26 #include <llvm/ExecutionEngine/ExecutionEngine.h>
27 #include <llvm/ExecutionEngine/JIT.h>
28 #include <llvm/IR/DataLayout.h>
29 #include <llvm/Linker.h>
30 #include <llvm/PassManager.h>
31 #include <llvm/Support/DynamicLibrary.h>
32 #include <llvm/Support/Host.h>
33 #include <llvm/Support/MemoryBuffer.h>
34 #include <llvm/Support/NoFolder.h>
35 #include <llvm/Support/TargetRegistry.h>
36 #include <llvm/Support/TargetSelect.h>
37 #include <llvm/Support/raw_ostream.h>
38 #include <llvm/Support/system_error.h>
39 #include <llvm/Target/TargetLibraryInfo.h>
40 #include <llvm/Transforms/IPO.h>
41 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
42 #include <llvm/Transforms/Scalar.h>
43 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
44 #include <llvm/Transforms/Utils/Cloning.h>
45 
46 #include "common/logging.h"
47 #include "codegen/codegen-anyval.h"
50 #include "impala-ir/impala-ir-names.h"
51 #include "runtime/hdfs-fs-cache.h"
52 #include "util/cpu-info.h"
53 #include "util/hdfs-util.h"
54 #include "util/path-builder.h"
55 
56 #include "common/names.h"
57 
58 using namespace llvm;
59 using std::fstream;
60 
61 DEFINE_bool(print_llvm_ir_instruction_count, false,
62  "if true, prints the instruction counts of all JIT'd functions");
63 
64 DEFINE_bool(disable_optimization_passes, false,
65  "if true, disables llvm optimization passes (used for testing)");
66 DEFINE_bool(dump_ir, false, "if true, output IR after optimization passes");
67 DEFINE_string(unopt_module_dir, "",
68  "if set, saves unoptimized generated IR modules to the specified directory.");
69 DEFINE_string(opt_module_dir, "",
70  "if set, saves optimized generated IR modules to the specified directory.");
71 DECLARE_string(local_library_dir);
72 
73 namespace impala {
74 
76 static bool llvm_initialized = false;
77 
78 void LlvmCodeGen::InitializeLlvm(bool load_backend) {
79  mutex::scoped_lock initialization_lock(llvm_initialization_lock);
80  if (llvm_initialized) return;
81  // This allocates a global llvm struct and enables multithreading.
82  // There is no real good time to clean this up but we only make it once.
83  bool result = llvm::llvm_start_multithreaded();
84  DCHECK(result);
85  // This can *only* be called once per process and is used to setup
86  // dynamically linking jitted code.
87  llvm::InitializeNativeTarget();
88  llvm_initialized = true;
89 
90  if (load_backend) {
91  string path;
92  // For test env, we have to load libfesupport.so to provide sym for LLVM.
93  PathBuilder::GetFullBuildPath("service/libfesupport.so", &path);
94  bool failed = llvm::sys::DynamicLibrary::LoadLibraryPermanently(path.c_str());
95  DCHECK_EQ(failed, 0);
96  }
97 }
98 
99 LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const string& id) :
100  id_(id),
101  profile_(pool, "CodeGen"),
102  optimizations_enabled_(false),
103  is_corrupt_(false),
104  is_compiled_(false),
105  context_(new llvm::LLVMContext()),
106  module_(NULL),
107  execution_engine_(NULL),
108  debug_trace_fn_(NULL) {
109 
110  DCHECK(llvm_initialized) << "Must call LlvmCodeGen::InitializeLlvm first.";
111 
112  load_module_timer_ = ADD_TIMER(&profile_, "LoadTime");
113  prepare_module_timer_ = ADD_TIMER(&profile_, "PrepareTime");
114  module_file_size_ = ADD_COUNTER(&profile_, "ModuleFileSize", TUnit::BYTES);
115  codegen_timer_ = ADD_TIMER(&profile_, "CodegenTime");
116  optimization_timer_ = ADD_TIMER(&profile_, "OptimizationTime");
117  compile_timer_ = ADD_TIMER(&profile_, "CompileTime");
118 
119  loaded_functions_.resize(IRFunction::FN_END);
120 }
121 
123  const string& file, const string& id, scoped_ptr<LlvmCodeGen>* codegen) {
124  codegen->reset(new LlvmCodeGen(pool, id));
125  SCOPED_TIMER((*codegen)->profile_.total_time_counter());
126 
127  Module* loaded_module;
128  RETURN_IF_ERROR(LoadModule(codegen->get(), file, &loaded_module));
129  (*codegen)->module_ = loaded_module;
130 
131  return (*codegen)->Init();
132 }
133 
134 Status LlvmCodeGen::LoadModule(LlvmCodeGen* codegen, const string& file,
135  Module** module) {
136  OwningPtr<MemoryBuffer> file_buffer;
137  {
139 
140  llvm::error_code err = MemoryBuffer::getFile(file, file_buffer);
141  if (err.value() != 0) {
142  stringstream ss;
143  ss << "Could not load module " << file << ": " << err.message();
144  return Status(ss.str());
145  }
146  }
147 
148  COUNTER_ADD(codegen->module_file_size_, file_buffer->getBufferSize());
149 
151  string error;
152  *module = ParseBitcodeFile(file_buffer.get(), codegen->context(), &error);
153  if (*module == NULL) {
154  stringstream ss;
155  ss << "Could not parse module " << file << ": " << error;
156  return Status(ss.str());
157  }
158  return Status::OK;
159 }
160 
161 // TODO: Create separate counters/timers (file size, load time) for each module linked
162 Status LlvmCodeGen::LinkModule(const string& file) {
163  if (linked_modules_.find(file) != linked_modules_.end()) return Status::OK;
164 
166  Module* new_module;
167  RETURN_IF_ERROR(LoadModule(this, file, &new_module));
168  string error_msg;
169  bool error =
170  Linker::LinkModules(module_, new_module, Linker::DestroySource, &error_msg);
171  if (error) {
172  stringstream ss;
173  ss << "Problem linking " << file << " to main module: " << error_msg;
174  return Status(ss.str());
175  }
176  linked_modules_.insert(file);
177  return Status::OK;
178 }
179 
181  ObjectPool* pool, const string& id, scoped_ptr<LlvmCodeGen>* codegen_ret) {
182  // Load the statically cross compiled file. We cannot load an ll file with sse
183  // instructions on a machine without sse support (the load fails, doesn't matter
184  // if those instructions end up getting run or not).
185  string module_file;
187  PathBuilder::GetFullPath("llvm-ir/impala-sse.ll", &module_file);
188  } else {
189  PathBuilder::GetFullPath("llvm-ir/impala-no-sse.ll", &module_file);
190  }
191  RETURN_IF_ERROR(LoadFromFile(pool, module_file, id, codegen_ret));
192  LlvmCodeGen* codegen = codegen_ret->get();
193 
194  // Parse module for cross compiled functions and types
197 
198  // Get type for StringValue
200 
201  // Get type for TimestampValue
203 
204  // Verify size is correct
205  const DataLayout* data_layout = codegen->execution_engine()->getDataLayout();
206  const StructLayout* layout =
207  data_layout->getStructLayout(static_cast<StructType*>(codegen->string_val_type_));
208  if (layout->getSizeInBytes() != sizeof(StringValue)) {
209  DCHECK_EQ(layout->getSizeInBytes(), sizeof(StringValue));
210  return Status("Could not create llvm struct type for StringVal");
211  }
212 
213  // Parse functions from module
214  vector<Function*> functions;
215  codegen->GetFunctions(&functions);
216  int parsed_functions = 0;
217  for (int i = 0; i < functions.size(); ++i) {
218  string fn_name = functions[i]->getName();
219  for (int j = IRFunction::FN_START; j < IRFunction::FN_END; ++j) {
220  // Substring match to match precompiled functions. The compiled function names
221  // will be mangled.
222  // TODO: reconsider this. Substring match is probably not strict enough but
223  // undoing the mangling is no fun either.
224  if (fn_name.find(FN_MAPPINGS[j].fn_name) != string::npos) {
225  if (codegen->loaded_functions_[FN_MAPPINGS[j].fn] != NULL) {
226  return Status("Duplicate definition found for function: " + fn_name);
227  }
228  functions[i]->addFnAttr(Attribute::AlwaysInline);
229  codegen->loaded_functions_[FN_MAPPINGS[j].fn] = functions[i];
230  ++parsed_functions;
231  }
232  }
233  }
234 
235  if (parsed_functions != IRFunction::FN_END) {
236  stringstream ss;
237  ss << "Unable to find these precompiled functions: ";
238  bool first = true;
239  for (int i = IRFunction::FN_START; i != IRFunction::FN_END; ++i) {
240  if (codegen->loaded_functions_[i] == NULL) {
241  if (!first) ss << ", ";
242  ss << FN_MAPPINGS[i].fn_name;
243  first = false;
244  }
245  }
246  return Status(ss.str());
247  }
248 
249  return Status::OK;
250 }
251 
253  if (module_ == NULL) {
254  module_ = new Module(id_, context());
255  }
256  llvm::CodeGenOpt::Level opt_level = CodeGenOpt::Aggressive;
257 #ifndef NDEBUG
258  // For debug builds, don't generate JIT compiled optimized assembly.
259  // This takes a non-neglible amount of time (~.5 ms per function) and
260  // blows up the fe tests (which take ~10-20 ms each).
261  opt_level = CodeGenOpt::None;
262 #endif
263  EngineBuilder builder = EngineBuilder(module_).setOptLevel(opt_level);
264  //TODO Uncomment the below line as soon as we upgrade to LLVM 3.5 to enable SSE, if
265  // available. In LLVM 3.3 this is done automatically and cannot be enabled because
266  // for some reason SSE4 intrinsics selection will not work.
267  //builder.setMCPU(llvm::sys::getHostCPUName());
268  builder.setErrorStr(&error_string_);
269  execution_engine_.reset(builder.create());
270  if (execution_engine_ == NULL) {
271  // execution_engine_ will take ownership of the module if it is created
272  delete module_;
273  stringstream ss;
274  ss << "Could not create ExecutionEngine: " << error_string_;
275  return Status(ss.str());
276  }
277 
278  void_type_ = Type::getVoidTy(context());
279  ptr_type_ = PointerType::get(GetType(TYPE_TINYINT), 0);
280  true_value_ = ConstantInt::get(context(), APInt(1, true, true));
281  false_value_ = ConstantInt::get(context(), APInt(1, false, true));
282 
284 
285  return Status::OK;
286 }
287 
289  for (map<Function*, bool>::iterator iter = jitted_functions_.begin();
290  iter != jitted_functions_.end(); ++iter) {
291  execution_engine_->freeMachineCodeForFunction(iter->first);
292  }
293 }
294 
296  optimizations_enabled_ = enable;
297 }
298 
299 string LlvmCodeGen::GetIR(bool full_module) const {
300  string str;
301  raw_string_ostream stream(str);
302  if (full_module) {
303  module_->print(stream, NULL);
304  } else {
305  for (int i = 0; i < codegend_functions_.size(); ++i) {
306  codegend_functions_[i]->print(stream, NULL);
307  }
308  }
309  return str;
310 }
311 
312 Type* LlvmCodeGen::GetType(const ColumnType& type) {
313  switch (type.type) {
314  case TYPE_NULL:
315  return Type::getInt1Ty(context());
316  case TYPE_BOOLEAN:
317  return Type::getInt1Ty(context());
318  case TYPE_TINYINT:
319  return Type::getInt8Ty(context());
320  case TYPE_SMALLINT:
321  return Type::getInt16Ty(context());
322  case TYPE_INT:
323  return Type::getInt32Ty(context());
324  case TYPE_BIGINT:
325  return Type::getInt64Ty(context());
326  case TYPE_FLOAT:
327  return Type::getFloatTy(context());
328  case TYPE_DOUBLE:
329  return Type::getDoubleTy(context());
330  case TYPE_STRING:
331  case TYPE_VARCHAR:
332  case TYPE_CHAR:
333  return string_val_type_;
334  case TYPE_TIMESTAMP:
335  return timestamp_val_type_;
336  case TYPE_DECIMAL:
337  return Type::getIntNTy(context(), type.GetByteSize() * 8);
338  default:
339  DCHECK(false) << "Invalid type: " << type;
340  return NULL;
341  }
342 }
343 
344 PointerType* LlvmCodeGen::GetPtrType(const ColumnType& type) {
345  return PointerType::get(GetType(type), 0);
346 }
347 
348 Type* LlvmCodeGen::GetType(const string& name) {
349  Type* type = module_->getTypeByName(name);
350  DCHECK_NOTNULL(type);
351  return type;
352 }
353 
354 PointerType* LlvmCodeGen::GetPtrType(const string& name) {
355  Type* type = GetType(name);
356  DCHECK(type != NULL) << name;
357  return PointerType::get(type, 0);
358 }
359 
360 PointerType* LlvmCodeGen::GetPtrType(Type* type) {
361  return PointerType::get(type, 0);
362 }
363 
364 // Llvm doesn't let you create a PointerValue from a c-side ptr. Instead
365 // cast it to an int and then to 'type'.
366 Value* LlvmCodeGen::CastPtrToLlvmPtr(Type* type, const void* ptr) {
367  Constant* const_int = ConstantInt::get(Type::getInt64Ty(context()), (int64_t)ptr);
368  return ConstantExpr::getIntToPtr(const_int, type);
369 }
370 
371 Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, int64_t val) {
372  switch (type) {
373  case TYPE_TINYINT:
374  return ConstantInt::get(context(), APInt(8, val));
375  case TYPE_SMALLINT:
376  return ConstantInt::get(context(), APInt(16, val));
377  case TYPE_INT:
378  return ConstantInt::get(context(), APInt(32, val));
379  case TYPE_BIGINT:
380  return ConstantInt::get(context(), APInt(64, val));
381  default:
382  DCHECK(false);
383  return NULL;
384  }
385 }
386 
387 AllocaInst* LlvmCodeGen::CreateEntryBlockAlloca(Function* f, const NamedVariable& var) {
388  IRBuilder<> tmp(&f->getEntryBlock(), f->getEntryBlock().begin());
389  AllocaInst* alloca = tmp.CreateAlloca(var.type, 0, var.name.c_str());
390  if (var.type == GetType(CodegenAnyVal::LLVM_DECIMALVAL_NAME)) {
391  // Generated functions may manipulate DecimalVal arguments via SIMD instructions such
392  // as 'movaps' that require 16-byte memory alignment. LLVM uses 8-byte alignment by
393  // default, so explicitly set the alignment for DecimalVals.
394  alloca->setAlignment(16);
395  }
396  return alloca;
397 }
398 
399 AllocaInst* LlvmCodeGen::CreateEntryBlockAlloca(const LlvmBuilder& builder, Type* type,
400  const char* name) {
401  return CreateEntryBlockAlloca(builder.GetInsertBlock()->getParent(),
402  NamedVariable(name, type));
403 }
404 
405 void LlvmCodeGen::CreateIfElseBlocks(Function* fn, const string& if_name,
406  const string& else_name, BasicBlock** if_block, BasicBlock** else_block,
407  BasicBlock* insert_before) {
408  *if_block = BasicBlock::Create(context(), if_name, fn, insert_before);
409  *else_block = BasicBlock::Create(context(), else_name, fn, insert_before);
410 }
411 
413  if (external_functions_.find(prototype->name()) != external_functions_.end()) {
414  return external_functions_[prototype->name()];
415  }
416  Function* func = prototype->GeneratePrototype();
417  external_functions_[prototype->name()] = func;
418  return func;
419 }
420 
421 Function* LlvmCodeGen::GetFunction(IRFunction::Type function) {
422  DCHECK(loaded_functions_[function] != NULL);
423  return loaded_functions_[function];
424 }
425 
426 // There is an llvm bug (#10957) that causes the first step of the verifier to always
427 // abort the process if it runs into an issue and ignores ReturnStatusAction. This
428 // would cause impalad to go down if one query has a problem.
429 // To work around this, we will copy that step here and not abort on error.
430 // TODO: doesn't seem there is much traction in getting this fixed but we'll see
431 bool LlvmCodeGen::VerifyFunction(Function* fn) {
432  if (is_corrupt_) return false;
433 
434  // Verify the function is valid. Adapted from the pre-verifier function pass.
435  for (Function::iterator i = fn->begin(), e = fn->end(); i != e; ++i) {
436  if (i->empty() || !i->back().isTerminator()) {
437  LOG(ERROR) << "Basic block must end with terminator: \n" << Print(&(*i));
438  is_corrupt_ = true;
439  break;
440  }
441  }
442 
443  if (!is_corrupt_) is_corrupt_ = llvm::verifyFunction(*fn, PrintMessageAction);
444 
445  if (is_corrupt_) {
446  string fn_name = fn->getName(); // llvm has some fancy operator overloading
447  LOG(ERROR) << "Function corrupt: " << fn_name;
448  fn->dump();
449  return false;
450  }
451  return true;
452 }
453 
455  LlvmCodeGen* gen, const string& name, Type* ret_type) :
456  codegen_(gen), name_(name), ret_type_(ret_type) {
457  DCHECK(!codegen_->is_compiled_) << "Not valid to add additional functions";
458 }
459 
461  LlvmBuilder* builder, Value** params) {
462  vector<Type*> arguments;
463  for (int i = 0; i < args_.size(); ++i) {
464  arguments.push_back(args_[i].type);
465  }
466  FunctionType* prototype = FunctionType::get(ret_type_, arguments, false);
467 
468  Function* fn = Function::Create(
469  prototype, Function::ExternalLinkage, name_, codegen_->module_);
470  DCHECK(fn != NULL);
471 
472  // Name the arguments
473  int idx = 0;
474  for (Function::arg_iterator iter = fn->arg_begin();
475  iter != fn->arg_end(); ++iter, ++idx) {
476  iter->setName(args_[idx].name);
477  if (params != NULL) params[idx] = iter;
478  }
479 
480  if (builder != NULL) {
481  BasicBlock* entry_block = BasicBlock::Create(codegen_->context(), "entry", fn);
482  builder->SetInsertPoint(entry_block);
483  }
484 
485  codegen_->codegend_functions_.push_back(fn);
486  return fn;
487 }
488 
489 Function* LlvmCodeGen::ReplaceCallSites(Function* caller, bool update_in_place,
490  Function* new_fn, const string& replacee_name, int* replaced) {
491  DCHECK(caller->getParent() == module_);
492  DCHECK(caller != NULL);
493  DCHECK(new_fn != NULL);
494 
495  if (!update_in_place) {
496  caller = CloneFunction(caller);
497  } else if (jitted_functions_.find(caller) != jitted_functions_.end()) {
498  // This function is already dynamically linked, unlink it.
499  execution_engine_->freeMachineCodeForFunction(caller);
500  jitted_functions_.erase(caller);
501  }
502 
503  *replaced = 0;
504  // loop over all blocks
505  Function::iterator block_iter = caller->begin();
506  while (block_iter != caller->end()) {
507  BasicBlock* block = block_iter++;
508  // loop over instructions in the block
509  BasicBlock::iterator instr_iter = block->begin();
510  while (instr_iter != block->end()) {
511  Instruction* instr = instr_iter++;
512  // look for call instructions
513  if (CallInst::classof(instr)) {
514  CallInst* call_instr = reinterpret_cast<CallInst*>(instr);
515  Function* old_fn = call_instr->getCalledFunction();
516  // look for call instruction that matches the name
517  if (old_fn != NULL && old_fn->getName().find(replacee_name) != string::npos) {
518  // Replace the called function
519  call_instr->setCalledFunction(new_fn);
520  ++*replaced;
521  }
522  }
523  }
524  }
525 
526  return caller;
527 }
528 
529 Function* LlvmCodeGen::CloneFunction(Function* fn) {
530  ValueToValueMapTy dummy_vmap;
531  // CloneFunction() automatically gives the new function a unique name
532  Function* fn_clone = llvm::CloneFunction(fn, dummy_vmap, false);
533  fn_clone->copyAttributesFrom(fn);
534  module_->getFunctionList().push_back(fn_clone);
535  return fn_clone;
536 }
537 
538 // TODO: revisit this. Inlining all call sites might not be the right call. We
539 // probably need to make this more complicated and somewhat cost based or write
540 // our own optimization passes.
541 int LlvmCodeGen::InlineCallSites(Function* fn, bool skip_registered_fns) {
542  int functions_inlined = 0;
543  // Collect all call sites
544  vector<CallInst*> call_sites;
545 
546  // loop over all blocks
547  Function::iterator block_iter = fn->begin();
548  while (block_iter != fn->end()) {
549  BasicBlock* block = block_iter++;
550  // loop over instructions in the block
551  BasicBlock::iterator instr_iter = block->begin();
552  while (instr_iter != block->end()) {
553  Instruction* instr = instr_iter++;
554  // look for call instructions
555  if (CallInst::classof(instr)) {
556  CallInst* call_instr = reinterpret_cast<CallInst*>(instr);
557  Function* called_fn = call_instr->getCalledFunction();
558  // called_fn will be NULL if it's a virtual function call, etc.
559  if (called_fn == NULL || !called_fn->hasFnAttribute(Attribute::AlwaysInline)) {
560  continue;
561  }
562  if (skip_registered_fns) {
563  if (registered_exprs_.find(called_fn) != registered_exprs_.end()) {
564  continue;
565  }
566  }
567  call_sites.push_back(call_instr);
568  }
569  }
570  }
571 
572  // Inline all call sites. InlineFunction can still fail (function is recursive, etc)
573  // but that always leaves the original function in a consistent state
574  for (int i = 0; i < call_sites.size(); ++i) {
575  llvm::InlineFunctionInfo info;
576  if (llvm::InlineFunction(call_sites[i], info)) {
577  ++functions_inlined;
578  }
579  }
580  return functions_inlined;
581 }
582 
583 Function* LlvmCodeGen::OptimizeFunctionWithExprs(Function* fn) {
584  int num_inlined;
585  do {
586  // This assumes that all redundant exprs have been registered.
587  num_inlined = InlineCallSites(fn, false);
588  } while (num_inlined > 0);
589 
590  // TODO(skye): fix subexpression elimination
591  // SubExprElimination subexpr_elim(this);
592  // subexpr_elim.Run(fn);
593  return FinalizeFunction(fn);
594 }
595 
596 Function* LlvmCodeGen::FinalizeFunction(Function* function) {
597  function->addFnAttr(llvm::Attribute::AlwaysInline);
598 
599  if (!VerifyFunction(function)) {
600  function->eraseFromParent(); // deletes function
601  return NULL;
602  }
603  if (FLAGS_dump_ir) function->dump();
604  return function;
605 }
606 
608  DCHECK(!is_compiled_);
609  is_compiled_ = true;
610 
611  if (FLAGS_unopt_module_dir.size() != 0) {
612  string path = FLAGS_unopt_module_dir + "/" + id_ + "_unopt.ll";
613  fstream f(path.c_str(), fstream::out | fstream::trunc);
614  if (f.fail()) {
615  LOG(ERROR) << "Could not save IR to: " << path;
616  } else {
617  f << GetIR(true);
618  f.close();
619  }
620  }
621 
622  if (is_corrupt_) return Status("Module is corrupt.");
624 
625  // Don't waste time optimizing module if there are no functions to JIT. This can happen
626  // if the codegen object is created but no functions are successfully codegen'd.
627  if (optimizations_enabled_ && !FLAGS_disable_optimization_passes &&
628  !fns_to_jit_compile_.empty()) {
629  OptimizeModule();
630  }
631 
633  // JIT compile all codegen'd functions
634  for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
635  *fns_to_jit_compile_[i].second = JitFunction(fns_to_jit_compile_[i].first);
636  }
637 
638  if (FLAGS_opt_module_dir.size() != 0) {
639  string path = FLAGS_opt_module_dir + "/" + id_ + "_opt.ll";
640  fstream f(path.c_str(), fstream::out | fstream::trunc);
641  if (f.fail()) {
642  LOG(ERROR) << "Could not save IR to: " << path;
643  } else {
644  f << GetIR(true);
645  f.close();
646  }
647  }
648 
649  return Status::OK;
650 }
651 
654 
655  // This pass manager will construct optimizations passes that are "typical" for
656  // c/c++ programs. We're relying on llvm to pick the best passes for us.
657  // TODO: we can likely muck with this to get better compile speeds or write
658  // our own passes. Our subexpression elimination optimization can be rolled into
659  // a pass.
660  PassManagerBuilder pass_builder ;
661  // 2 maps to -O2
662  // TODO: should we switch to 3? (3 may not produce different IR than 2 while taking
663  // longer, but we should check)
664  pass_builder.OptLevel = 2;
665  // Don't optimize for code size (this corresponds to -O2/-O3)
666  pass_builder.SizeLevel = 0;
667  pass_builder.Inliner = createFunctionInliningPass() ;
668 
669  // Specifying the data layout is necessary for some optimizations (e.g. removing many
670  // of the loads/stores produced by structs).
671  const string& data_layout_str = module_->getDataLayout();
672  DCHECK(!data_layout_str.empty());
673 
674  // Before running any other optimization passes, run the internalize pass, giving it
675  // the names of all functions registered by AddFunctionToJit(), followed by the
676  // global dead code elimination pass. This causes all functions not registered to be
677  // JIT'd to be marked as internal, and any internal functions that are not used are
678  // deleted by DCE pass. This greatly decreases compile time by removing unused code.
679  vector<const char*> exported_fn_names;
680  for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
681  exported_fn_names.push_back(fns_to_jit_compile_[i].first->getName().data());
682  }
683  scoped_ptr<PassManager> module_pass_manager(new PassManager());
684  module_pass_manager->add(new DataLayout(data_layout_str));
685  module_pass_manager->add(createInternalizePass(exported_fn_names));
686  module_pass_manager->add(createGlobalDCEPass());
687  module_pass_manager->run(*module_);
688 
689  // Create and run function pass manager
690  scoped_ptr<FunctionPassManager> fn_pass_manager(new FunctionPassManager(module_));
691  fn_pass_manager->add(new DataLayout(data_layout_str));
692  pass_builder.populateFunctionPassManager(*fn_pass_manager);
693  fn_pass_manager->doInitialization();
694  for (Module::iterator it = module_->begin(), end = module_->end(); it != end ; ++it) {
695  if (!it->isDeclaration()) fn_pass_manager->run(*it);
696  }
697  fn_pass_manager->doFinalization();
698 
699  // Create and run module pass manager
700  module_pass_manager.reset(new PassManager());
701  module_pass_manager->add(new DataLayout(data_layout_str));
702  pass_builder.populateModulePassManager(*module_pass_manager);
703  module_pass_manager->run(*module_);
704  if (FLAGS_print_llvm_ir_instruction_count) {
705  for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
706  InstructionCounter counter;
707  counter.visit(*fns_to_jit_compile_[i].first);
708  VLOG(1) << fns_to_jit_compile_[i].first->getName().str();
709  VLOG(1) << counter.PrintCounters();
710  }
711  }
712 }
713 
714 void LlvmCodeGen::AddFunctionToJit(Function* fn, void** fn_ptr) {
715  Type* decimal_val_type = GetType(CodegenAnyVal::LLVM_DECIMALVAL_NAME);
716  if (fn->getReturnType() == decimal_val_type) {
717  // Per the x86 calling convention ABI, DecimalVals should be returned via an extra
718  // first DecimalVal* argument. We generate non-compliant functions that return the
719  // DecimalVal directly, which we can call from generated code, but not from compiled
720  // native code. To avoid accidentally calling a non-compliant function from native
721  // code, call 'function' from an ABI-compliant wrapper.
722  stringstream name;
723  name << fn->getName().str() << "ABIWrapper";
724  LlvmCodeGen::FnPrototype prototype(this, name.str(), void_type_);
725  // Add return argument
726  prototype.AddArgument(NamedVariable("result", decimal_val_type->getPointerTo()));
727  // Add regular arguments
728  for (Function::arg_iterator arg = fn->arg_begin(); arg != fn->arg_end(); ++arg) {
729  prototype.AddArgument(NamedVariable(arg->getName(), arg->getType()));
730  }
731  LlvmBuilder builder(context());
732  Value* args[fn->arg_size() + 1];
733  Function* fn_wrapper = prototype.GeneratePrototype(&builder, &args[0]);
734  fn_wrapper->addFnAttr(llvm::Attribute::AlwaysInline);
735  // Mark first argument as sret (not sure if this is necessary but it can't hurt)
736  fn_wrapper->addAttribute(1, Attribute::StructRet);
737  // Call 'fn' and store the result in the result argument
738  Value* result =
739  builder.CreateCall(fn, ArrayRef<Value*>(&args[1], fn->arg_size()), "result");
740  builder.CreateStore(result, args[0]);
741  builder.CreateRetVoid();
742  fn = FinalizeFunction(fn_wrapper);
743  DCHECK(fn != NULL);
744  }
745  fns_to_jit_compile_.push_back(make_pair(fn, fn_ptr));
746 }
747 
748 void* LlvmCodeGen::JitFunction(Function* function) {
749  if (is_corrupt_) return NULL;
750 
751  // TODO: log a warning if the jitted function is too big (larger than I cache)
752  void* jitted_function = execution_engine_->getPointerToFunction(function);
753  boost::lock_guard<mutex> l(jitted_functions_lock_);
754  if (jitted_function != NULL) {
755  jitted_functions_[function] = true;
756  }
757  return jitted_function;
758 }
759 
760 // Wrapper around printf to make it easier to call from IR
761 extern "C" void DebugTrace(const char* str) {
762  printf("LLVM Trace: %s\n", str);
763 }
764 
765 void LlvmCodeGen::CodegenDebugTrace(LlvmBuilder* builder, const char* str) {
766  LOG(ERROR) << "Remove IR codegen debug traces before checking in.";
767 
768  // Lazily link in debug function to the module
769  if (debug_trace_fn_ == NULL) {
770  vector<Type*> args;
771  args.push_back(ptr_type_);
772  FunctionType* fn_type = FunctionType::get(void_type_, args, false);
773  debug_trace_fn_ = Function::Create(fn_type, GlobalValue::ExternalLinkage,
774  "DebugTrace", module_);
775 
776  DCHECK(debug_trace_fn_ != NULL);
777  // DebugTrace shouldn't already exist (llvm mangles function names if there
778  // are duplicates)
779  DCHECK(debug_trace_fn_->getName() == "DebugTrace");
780 
781  debug_trace_fn_->setCallingConv(CallingConv::C);
782 
783  // Add a mapping to the execution engine so it can link the DebugTrace function
784  execution_engine_->addGlobalMapping(debug_trace_fn_,
785  reinterpret_cast<void*>(&DebugTrace));
786  }
787 
788  // Make a copy of str into memory owned by this object. This is no guarantee that str is
789  // still around when the debug printf is executed.
790  debug_strings_.push_back(str);
791  str = debug_strings_[debug_strings_.size() - 1].c_str();
792 
793  // Call the function by turning 'str' into a constant ptr value
794  Value* str_ptr = CastPtrToLlvmPtr(ptr_type_, const_cast<char*>(str));
795  vector<Value*> calling_args;
796  calling_args.push_back(str_ptr);
797  builder->CreateCall(debug_trace_fn_, calling_args);
798 }
799 
800 void LlvmCodeGen::GetFunctions(vector<Function*>* functions) {
801  Module::iterator fn_iter = module_->begin();
802  while (fn_iter != module_->end()) {
803  Function* fn = fn_iter++;
804  if (!fn->empty()) functions->push_back(fn);
805  }
806 }
807 
808 void LlvmCodeGen::GetSymbols(unordered_set<string>* symbols) {
809  Module::iterator fn_iter = module_->begin();
810  while (fn_iter != module_->end()) {
811  Function* fn = fn_iter++;
812  if (!fn->empty()) symbols->insert(fn->getName());
813  }
814 }
815 
816 // TODO: cache this function (e.g. all min(int, int) are identical).
817 // we probably want some more global IR function cache, or, implement this
818 // in c and precompile it with clang.
819 // define i32 @Min(i32 %v1, i32 %v2) {
820 // entry:
821 // %0 = icmp slt i32 %v1, %v2
822 // br i1 %0, label %ret_v1, label %ret_v2
823 //
824 // ret_v1: ; preds = %entry
825 // ret i32 %v1
826 //
827 // ret_v2: ; preds = %entry
828 // ret i32 %v2
829 // }
830 Function* LlvmCodeGen::CodegenMinMax(const ColumnType& type, bool min) {
831  LlvmCodeGen::FnPrototype prototype(this, min ? "Min" : "Max", GetType(type));
832  prototype.AddArgument(LlvmCodeGen::NamedVariable("v1", GetType(type)));
833  prototype.AddArgument(LlvmCodeGen::NamedVariable("v2", GetType(type)));
834 
835  Value* params[2];
836  LlvmBuilder builder(context());
837  Function* fn = prototype.GeneratePrototype(&builder, &params[0]);
838 
839  Value* compare = NULL;
840  switch (type.type) {
841  case TYPE_NULL:
842  compare = false_value();
843  break;
844  case TYPE_BOOLEAN:
845  if (min) {
846  // For min, return x && y
847  compare = builder.CreateAnd(params[0], params[1]);
848  } else {
849  // For max, return x || y
850  compare = builder.CreateOr(params[0], params[1]);
851  }
852  break;
853  case TYPE_TINYINT:
854  case TYPE_SMALLINT:
855  case TYPE_INT:
856  case TYPE_BIGINT:
857  if (min) {
858  compare = builder.CreateICmpSLT(params[0], params[1]);
859  } else {
860  compare = builder.CreateICmpSGT(params[0], params[1]);
861  }
862  break;
863  case TYPE_FLOAT:
864  case TYPE_DOUBLE:
865  if (min) {
866  compare = builder.CreateFCmpULT(params[0], params[1]);
867  } else {
868  compare = builder.CreateFCmpUGT(params[0], params[1]);
869  }
870  break;
871  default:
872  DCHECK(false);
873  }
874 
875  if (type.type == TYPE_BOOLEAN) {
876  builder.CreateRet(compare);
877  } else {
878  BasicBlock* ret_v1, *ret_v2;
879  CreateIfElseBlocks(fn, "ret_v1", "ret_v2", &ret_v1, &ret_v2);
880 
881  builder.CreateCondBr(compare, ret_v1, ret_v2);
882  builder.SetInsertPoint(ret_v1);
883  builder.CreateRet(params[0]);
884  builder.SetInsertPoint(ret_v2);
885  builder.CreateRet(params[1]);
886  }
887 
888  if (!VerifyFunction(fn)) return NULL;
889  return fn;
890 }
891 
892 // Intrinsics are loaded one by one. Some are overloaded (e.g. memcpy) and the types must
893 // be specified.
894 // TODO: is there a better way to do this?
896  // Load memcpy
897  {
898  Type* types[] = { ptr_type(), ptr_type(), GetType(TYPE_INT) };
899  Function* fn = Intrinsic::getDeclaration(module(), Intrinsic::memcpy, types);
900  if (fn == NULL) {
901  return Status("Could not find memcpy intrinsic.");
902  }
903  llvm_intrinsics_[Intrinsic::memcpy] = fn;
904  }
905 
906  // TODO: where is the best place to put this?
907  struct {
908  Intrinsic::ID id;
909  const char* error;
910  } non_overloaded_intrinsics[] = {
911  { Intrinsic::x86_sse42_crc32_32_8, "sse4.2 crc32_u8" },
912  { Intrinsic::x86_sse42_crc32_32_16, "sse4.2 crc32_u16" },
913  { Intrinsic::x86_sse42_crc32_32_32, "sse4.2 crc32_u32" },
914  { Intrinsic::x86_sse42_crc32_64_64, "sse4.2 crc32_u64" },
915  };
916  const int num_intrinsics =
917  sizeof(non_overloaded_intrinsics) / sizeof(non_overloaded_intrinsics[0]);
918 
919  for (int i = 0; i < num_intrinsics; ++i) {
920  Intrinsic::ID id = non_overloaded_intrinsics[i].id;
921  Function* fn = Intrinsic::getDeclaration(module(), id);
922  if (fn == NULL) {
923  stringstream ss;
924  ss << "Could not find " << non_overloaded_intrinsics[i].error << " intrinsic";
925  return Status(ss.str());
926  }
927  llvm_intrinsics_[id] = fn;
928  }
929 
930  return Status::OK;
931 }
932 
933 void LlvmCodeGen::CodegenMemcpy(LlvmBuilder* builder, Value* dst, Value* src, int size) {
934  DCHECK_GE(size, 0);
935  if (size == 0) return;
936 
937  // Cast src/dst to int8_t*. If they already are, this will get optimized away
938  DCHECK(PointerType::classof(dst->getType()));
939  DCHECK(PointerType::classof(src->getType()));
940  dst = builder->CreateBitCast(dst, ptr_type());
941  src = builder->CreateBitCast(src, ptr_type());
942 
943  // Get intrinsic function.
944  Function* memcpy_fn = llvm_intrinsics_[Intrinsic::memcpy];
945  DCHECK(memcpy_fn != NULL);
946 
947  // The fourth argument is the alignment. For non-zero values, the caller
948  // must guarantee that the src and dst values are aligned to that byte boundary.
949  // TODO: We should try to take advantage of this since our tuples are well aligned.
950  Value* args[] = {
951  dst, src, GetIntConstant(TYPE_INT, size),
953  false_value() // is_volatile.
954  };
955  builder->CreateCall(memcpy_fn, args);
956 }
957 
959  hash_fns_.clear();
960 }
961 
962 // Codegen to compute hash for a particular byte size. Loops are unrolled in this
963 // process. For the case where num_bytes == 11, we'd do this by calling
964 // 1. crc64 (for first 8 bytes)
965 // 2. crc16 (for bytes 9, 10)
966 // 3. crc8 (for byte 11)
967 // The resulting IR looks like:
968 // define i32 @CrcHash11(i8* %data, i32 %len, i32 %seed) {
969 // entry:
970 // %0 = zext i32 %seed to i64
971 // %1 = bitcast i8* %data to i64*
972 // %2 = getelementptr i64* %1, i32 0
973 // %3 = load i64* %2
974 // %4 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %0, i64 %3)
975 // %5 = trunc i64 %4 to i32
976 // %6 = getelementptr i8* %data, i32 8
977 // %7 = bitcast i8* %6 to i16*
978 // %8 = load i16* %7
979 // %9 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %5, i16 %8)
980 // %10 = getelementptr i8* %6, i32 2
981 // %11 = load i8* %10
982 // %12 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %9, i8 %11)
983 // ret i32 %12
984 // }
985 Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
987  if (num_bytes == -1) {
988  // -1 indicates variable length, just return the generic loop based
989  // hash fn.
990  return GetFunction(IRFunction::HASH_CRC);
991  }
992 
993  map<int, Function*>::iterator cached_fn = hash_fns_.find(num_bytes);
994  if (cached_fn != hash_fns_.end()) {
995  return cached_fn->second;
996  }
997 
998  // Generate a function to hash these bytes
999  stringstream ss;
1000  ss << "CrcHash" << num_bytes;
1001  FnPrototype prototype(this, ss.str(), GetType(TYPE_INT));
1002  prototype.AddArgument(LlvmCodeGen::NamedVariable("data", ptr_type()));
1003  prototype.AddArgument(LlvmCodeGen::NamedVariable("len", GetType(TYPE_INT)));
1004  prototype.AddArgument(LlvmCodeGen::NamedVariable("seed", GetType(TYPE_INT)));
1005 
1006  Value* args[3];
1007  LlvmBuilder builder(context());
1008  Function* fn = prototype.GeneratePrototype(&builder, &args[0]);
1009  Value* data = args[0];
1010  Value* result = args[2];
1011 
1012  Function* crc8_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_32_8];
1013  Function* crc16_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_32_16];
1014  Function* crc32_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_32_32];
1015  Function* crc64_fn = llvm_intrinsics_[Intrinsic::x86_sse42_crc32_64_64];
1016 
1017  // Generate the crc instructions starting with the highest number of bytes
1018  if (num_bytes >= 8) {
1019  Value* result_64 = builder.CreateZExt(result, GetType(TYPE_BIGINT));
1020  Value* ptr = builder.CreateBitCast(data, GetPtrType(TYPE_BIGINT));
1021  int i = 0;
1022  while (num_bytes >= 8) {
1023  Value* index[] = { GetIntConstant(TYPE_INT, i++) };
1024  Value* d = builder.CreateLoad(builder.CreateGEP(ptr, index));
1025  result_64 = builder.CreateCall2(crc64_fn, result_64, d);
1026  num_bytes -= 8;
1027  }
1028  result = builder.CreateTrunc(result_64, GetType(TYPE_INT));
1029  Value* index[] = { GetIntConstant(TYPE_INT, i * 8) };
1030  // Update data to past the 8-byte chunks
1031  data = builder.CreateGEP(data, index);
1032  }
1033 
1034  if (num_bytes >= 4) {
1035  DCHECK_LT(num_bytes, 8);
1036  Value* ptr = builder.CreateBitCast(data, GetPtrType(TYPE_INT));
1037  Value* d = builder.CreateLoad(ptr);
1038  result = builder.CreateCall2(crc32_fn, result, d);
1039  Value* index[] = { GetIntConstant(TYPE_INT, 4) };
1040  data = builder.CreateGEP(data, index);
1041  num_bytes -= 4;
1042  }
1043 
1044  if (num_bytes >= 2) {
1045  DCHECK_LT(num_bytes, 4);
1046  Value* ptr = builder.CreateBitCast(data, GetPtrType(TYPE_SMALLINT));
1047  Value* d = builder.CreateLoad(ptr);
1048  result = builder.CreateCall2(crc16_fn, result, d);
1049  Value* index[] = { GetIntConstant(TYPE_INT, 2) };
1050  data = builder.CreateGEP(data, index);
1051  num_bytes -= 2;
1052  }
1053 
1054  if (num_bytes > 0) {
1055  DCHECK_EQ(num_bytes, 1);
1056  Value* d = builder.CreateLoad(data);
1057  result = builder.CreateCall2(crc8_fn, result, d);
1058  --num_bytes;
1059  }
1060  DCHECK_EQ(num_bytes, 0);
1061 
1062  Value* shift_16 = GetIntConstant(TYPE_INT, 16);
1063  Value* upper_bits = builder.CreateShl(result, shift_16);
1064  Value* lower_bits = builder.CreateLShr(result, shift_16);
1065  result = builder.CreateOr(upper_bits, lower_bits);
1066  builder.CreateRet(result);
1067 
1068  fn = FinalizeFunction(fn);
1069  if (fn != NULL) {
1070  hash_fns_[num_bytes] = fn;
1071  }
1072  return fn;
1073  } else {
1074  return GetMurmurHashFunction(num_bytes);
1075  }
1076 }
1077 
1078 static Function* GetLenOptimizedHashFn(
1079  LlvmCodeGen* codegen, IRFunction::Type f, int len) {
1080  Function* fn = codegen->GetFunction(f);
1081  DCHECK(fn != NULL);
1082  if (len != -1) {
1083  // Clone this function since we're going to modify it by replacing the
1084  // length with num_bytes.
1085  fn = codegen->CloneFunction(fn);
1086  Value* len_arg = codegen->GetArgument(fn, 1);
1087  len_arg->replaceAllUsesWith(codegen->GetIntConstant(TYPE_INT, len));
1088  }
1089  return codegen->FinalizeFunction(fn);
1090 }
1091 
1093  return GetLenOptimizedHashFn(this, IRFunction::HASH_FNV, len);
1094 }
1095 
1097  return GetLenOptimizedHashFn(this, IRFunction::HASH_MURMUR, len);
1098 }
1099 
1100 void LlvmCodeGen::ReplaceInstWithValue(Instruction* from, Value* to) {
1101  BasicBlock::iterator iter(from);
1102  llvm::ReplaceInstWithValue(from->getParent()->getInstList(), iter, to);
1103 }
1104 
1105 Argument* LlvmCodeGen::GetArgument(Function* fn, int i) {
1106  DCHECK_LE(i, fn->arg_size());
1107  Function::arg_iterator iter = fn->arg_begin();
1108  for (int j = 0; j < i; ++j) ++iter;
1109  return iter;
1110 }
1111 
1112 }
boost::scoped_ptr< llvm::ExecutionEngine > execution_engine_
Execution/Jitting engine.
Definition: llvm-codegen.h:501
std::string error_string_
Error string that llvm will write to.
Definition: llvm-codegen.h:490
static Status LoadImpalaIR(ObjectPool *, const std::string &id, boost::scoped_ptr< LlvmCodeGen > *codegen)
~LlvmCodeGen()
Removes all jit compiled dynamically linked functions from the process.
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")
llvm::PointerType * GetPtrType(llvm::Type *type)
Return a pointer type to 'type'.
void OptimizeModule()
Optimizes the module. This includes pruning the module of any unused functions.
const std::string & name() const
Returns name of function.
Definition: llvm-codegen.h:168
Utility struct that wraps a variable name and llvm type.
Definition: llvm-codegen.h:149
std::set< std::string > linked_modules_
Definition: llvm-codegen.h:540
llvm::Function * CodegenMinMax(const ColumnType &type, bool min)
Generates function to return min/max(v1, v2)
llvm::Type * void_type_
Definition: llvm-codegen.h:555
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242
std::map< llvm::Intrinsic::ID, llvm::Function * > llvm_intrinsics_
A cache of loaded llvm intrinsics.
Definition: llvm-codegen.h:531
Status Init()
Initializes the jitter and execution engine.
int InlineCallSites(llvm::Function *fn, bool skip_registered_fns)
#define ADD_TIMER(profile, name)
std::vector< llvm::Function * > loaded_functions_
Functions parsed from pre-compiled module. Indexed by ImpalaIR::Function enum.
Definition: llvm-codegen.h:517
std::string GetIR(bool full_module) const
RuntimeProfile profile_
Codegen counters.
Definition: llvm-codegen.h:457
std::vector< std::string > debug_strings_
Definition: llvm-codegen.h:551
DEFINE_bool(print_llvm_ir_instruction_count, false,"if true, prints the instruction counts of all JIT'd functions")
llvm::Function * GetLibCFunction(FnPrototype *prototype)
Returns the libc function, adding it to the module if it has not already been.
void ReplaceInstWithValue(llvm::Instruction *from, llvm::Value *to)
LlvmCodeGen(ObjectPool *pool, const std::string &module_id)
Top level codegen object. 'module_id' is used for debugging when outputting the IR.
Definition: llvm-codegen.cc:99
#define COUNTER_ADD(c, v)
llvm::Argument * GetArgument(llvm::Function *fn, int i)
Returns the i-th argument of fn.
RuntimeProfile::Counter * codegen_timer_
Time spent doing codegen (adding IR to the module)
Definition: llvm-codegen.h:466
#define SCOPED_TIMER(c)
llvm::Value * false_value_
Definition: llvm-codegen.h:561
static bool llvm_initialized
Definition: llvm-codegen.cc:76
llvm::PointerType * ptr_type_
llvm representation of a few common types. Owned by context.
Definition: llvm-codegen.h:554
static std::string Print(T *value_or_type)
Returns the string representation of a llvm::Value* or llvm::Type*.
Definition: llvm-codegen.h:326
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
RuntimeProfile::Counter * module_file_size_
Definition: llvm-codegen.h:474
llvm::Function * debug_trace_fn_
Definition: llvm-codegen.h:547
llvm::Function * GeneratePrototype(LlvmBuilder *builder=NULL, llvm::Value **params=NULL)
PrimitiveType type
Definition: types.h:60
llvm::Value * CastPtrToLlvmPtr(llvm::Type *type, const void *ptr)
void AddArgument(const NamedVariable &var)
Add argument.
Definition: llvm-codegen.h:171
std::set< llvm::Function * > registered_exprs_
A set of all the functions in 'registered_exprs_map_' for quick lookup.
Definition: llvm-codegen.h:528
llvm::Function * GetHashFunction(int num_bytes=-1)
void CodegenMemcpy(LlvmBuilder *, llvm::Value *dst, llvm::Value *src, int size)
llvm::ExecutionEngine * execution_engine()
Returns execution engine interface.
Definition: llvm-codegen.h:217
Status LinkModule(const std::string &file)
PrimitiveType
Definition: types.h:27
std::map< llvm::Function *, bool > jitted_functions_
Definition: llvm-codegen.h:506
ObjectPool pool
llvm::Type * string_val_type_
Definition: llvm-codegen.h:556
int GetByteSize() const
Returns the byte size of this type. Returns 0 for variable length types.
Definition: types.h:178
std::string PrintCounters() const
Prints all counters.
static void GetFullPath(const std::string &path, std::string *full_path)
Sets full_path to <IMPALA_HOME>/path.
Definition: path-builder.cc:31
#define ADD_COUNTER(profile, name, unit)
void ClearHashFns()
Clears generated hash fns. This is only used for testing.
llvm::Function * GetFunction(IRFunction::Type)
llvm::Value * true_value_
llvm constants to help with code gen verbosity
Definition: llvm-codegen.h:560
bool VerifyFunction(llvm::Function *function)
static const int64_t SSE4_2
Definition: cpu-info.h:34
void CreateIfElseBlocks(llvm::Function *fn, const std::string &if_name, const std::string &else_name, llvm::BasicBlock **if_block, llvm::BasicBlock **else_block, llvm::BasicBlock *insert_before=NULL)
void AddFunctionToJit(llvm::Function *fn, void **fn_ptr)
llvm::Function * CloneFunction(llvm::Function *fn)
Returns a copy of fn. The copy is added to the module.
DECLARE_string(local_library_dir)
FnPrototype(LlvmCodeGen *, const std::string &name, llvm::Type *ret_type)
void DebugTrace(const char *str)
std::vector< std::pair< llvm::Function *, void ** > > fns_to_jit_compile_
The vector of functions to automatically JIT compile after FinalizeModule().
Definition: llvm-codegen.h:543
static mutex llvm_initialization_lock
Definition: llvm-codegen.cc:75
void visit(const llvm::Module &M)
Visits each Function in Module M.
static const char * LLVM_CLASS_NAME
std::string id_
ID used for debugging (can be e.g. the fragment instance ID)
Definition: llvm-codegen.h:454
llvm::Value * false_value()
Definition: llvm-codegen.h:381
llvm::Function * GetFnvHashFunction(int num_bytes=-1)
std::map< std::string, llvm::Function * > external_functions_
Definition: llvm-codegen.h:514
std::vector< llvm::Function * > codegend_functions_
Definition: llvm-codegen.h:522
static const Status OK
Definition: status.h:87
static Function * GetLenOptimizedHashFn(LlvmCodeGen *codegen, IRFunction::Type f, int len)
RuntimeProfile::Counter * prepare_module_timer_
Time spent constructing the in-memory module from the .ir file.
Definition: llvm-codegen.h:463
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
boost::mutex jitted_functions_lock_
Lock protecting jitted_functions_.
Definition: llvm-codegen.h:509
static const char * LLVM_CLASS_NAME
For C++/IR interop, we need to be able to look up types by name.
Definition: string-value.h:121
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
std::map< int, llvm::Function * > hash_fns_
Definition: llvm-codegen.h:536
llvm::Function * FinalizeFunction(llvm::Function *function)
static const char * LLVM_DECIMALVAL_NAME
RuntimeProfile::Counter * load_module_timer_
Time spent reading the .ir file from the file system.
Definition: llvm-codegen.h:460
llvm::Module * module_
Definition: llvm-codegen.h:498
llvm::Function * GetMurmurHashFunction(int num_bytes=-1)
void CodegenDebugTrace(LlvmBuilder *builder, const char *message)
llvm::Function * ReplaceCallSites(llvm::Function *caller, bool update_in_place, llvm::Function *new_fn, const std::string &target_name, int *num_replaced)
void EnableOptimizations(bool enable)
Turns on/off optimization passes.
RuntimeProfile::Counter * optimization_timer_
Time spent optimizing the module.
Definition: llvm-codegen.h:469
void GetSymbols(boost::unordered_set< std::string > *symbols)
Fils in 'symbols' with all the symbols in the module.
string name
Definition: cpu-info.cc:50
llvm::LLVMContext & context()
Definition: llvm-codegen.h:214
void * JitFunction(llvm::Function *function)
bool optimizations_enabled_
whether or not optimizations are enabled
Definition: llvm-codegen.h:477
llvm::AllocaInst * CreateEntryBlockAlloca(llvm::Function *f, const NamedVariable &var)
llvm::Function * OptimizeFunctionWithExprs(llvm::Function *fn)
static bool IsSupported(long flag)
Returns whether of not the cpu supports this flag.
Definition: cpu-info.h:58
static Status LoadFromFile(ObjectPool *, const std::string &file, const std::string &id, boost::scoped_ptr< LlvmCodeGen > *codegen)
llvm::PointerType * ptr_type()
Definition: llvm-codegen.h:393
void GetFunctions(std::vector< llvm::Function * > *functions)
static Status LoadModule(LlvmCodeGen *codegen, const std::string &file, llvm::Module **module)
llvm::Type * timestamp_val_type_
Definition: llvm-codegen.h:557
RuntimeProfile::Counter * compile_timer_
Time spent compiling the module.
Definition: llvm-codegen.h:472
DEFINE_string(unopt_module_dir,"","if set, saves unoptimized generated IR modules to the specified directory.")
llvm::Module * module()
Returns the underlying llvm module.
Definition: llvm-codegen.h:220
Counter * total_time_counter()
Returns the counter for the total elapsed time.