Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
llvm-codegen.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_CODEGEN_LLVM_CODEGEN_H
17 #define IMPALA_CODEGEN_LLVM_CODEGEN_H
18 
19 #include "common/status.h"
20 
21 #include <map>
22 #include <string>
23 #include <vector>
24 #include <boost/scoped_ptr.hpp>
25 #include <boost/thread/mutex.hpp>
26 #include <boost/unordered_set.hpp>
27 
28 #include <llvm/Analysis/Verifier.h>
29 #include <llvm/IR/DerivedTypes.h>
30 #include <llvm/IR/IRBuilder.h>
31 #include <llvm/IR/Intrinsics.h>
32 #include <llvm/IR/LLVMContext.h>
33 #include <llvm/IR/Module.h>
34 #include <llvm/Support/raw_ostream.h>
35 
36 #include "exprs/expr.h"
37 #include "impala-ir/impala-ir-functions.h"
38 #include "runtime/types.h"
39 #include "util/runtime-profile.h"
40 
42 namespace llvm {
43  class AllocaInst;
44  class BasicBlock;
45  class ConstantFolder;
46  class ExecutionEngine;
47  class Function;
48  class FunctionPassManager;
49  class LLVMContext;
50  class Module;
51  class NoFolder;
52  class PassManager;
53  class PointerType;
54  class StructType;
55  class TargetData;
56  class Type;
57  class Value;
58 
59  template<bool B, typename T, typename I>
60  class IRBuilder;
61 
62  template<bool preserveName>
64 }
65 
66 namespace impala {
67 
68 class CodegenAnyVal;
69 class SubExprElimination;
70 
72 //
83 //
89 //
96 //
101 //
107 class LlvmCodeGen {
108  public:
117  static void InitializeLlvm(bool load_backend = false);
118 
122  static Status LoadImpalaIR(
123  ObjectPool*, const std::string& id, boost::scoped_ptr<LlvmCodeGen>* codegen);
124 
128  static Status LoadFromFile(ObjectPool*, const std::string& file, const std::string& id,
129  boost::scoped_ptr<LlvmCodeGen>* codegen);
130 
132  ~LlvmCodeGen();
133 
136 
138  void EnableOptimizations(bool enable);
139 
143  std::string GetIR(bool full_module) const;
144 
147 
149  struct NamedVariable {
150  std::string name;
151  llvm::Type* type;
152 
153  NamedVariable(const std::string& name="", llvm::Type* type = NULL) {
154  this->name = name;
155  this->type = type;
156  }
157  };
158 
161  class FnPrototype {
162  public:
165  FnPrototype(LlvmCodeGen*, const std::string& name, llvm::Type* ret_type);
166 
168  const std::string& name() const { return name_; }
169 
171  void AddArgument(const NamedVariable& var) {
172  args_.push_back(var);
173  }
174 
181  llvm::Function* GeneratePrototype(LlvmBuilder* builder = NULL,
182  llvm::Value** params = NULL);
183 
184  private:
185  friend class LlvmCodeGen;
186 
188  std::string name_;
189  llvm::Type* ret_type_;
190  std::vector<NamedVariable> args_;
191  };
192 
194  llvm::PointerType* GetPtrType(llvm::Type* type);
195 
197  llvm::Type* GetType(const ColumnType& type);
198 
200  llvm::PointerType* GetPtrType(const ColumnType& type);
201 
207  llvm::Type* GetType(const std::string& name);
208 
210  llvm::PointerType* GetPtrType(const std::string& name);
211 
214  llvm::LLVMContext& context() { return *context_.get(); }
215 
217  llvm::ExecutionEngine* execution_engine() { return execution_engine_.get(); }
218 
220  llvm::Module* module() { return module_; }
221 
224  void RegisterExprFn(int64_t id, llvm::Function* function) {
225  DCHECK(registered_exprs_map_.find(id) == registered_exprs_map_.end());
226  registered_exprs_map_[id] = function;
227  registered_exprs_.insert(function);
228  }
229 
231  llvm::Function* GetRegisteredExprFn(int64_t id) {
232  std::map<int64_t, llvm::Function*>::iterator it = registered_exprs_map_.find(id);
233  if (it == registered_exprs_map_.end()) return NULL;
234  return it->second;
235  }
236 
241 
255  //
261  llvm::Function* ReplaceCallSites(llvm::Function* caller, bool update_in_place,
262  llvm::Function* new_fn, const std::string& target_name, int* num_replaced);
263 
265  llvm::Function* CloneFunction(llvm::Function* fn);
266 
269  void ReplaceInstWithValue(llvm::Instruction* from, llvm::Value* to);
270 
272  llvm::Argument* GetArgument(llvm::Function* fn, int i);
273 
277  llvm::Function* FinalizeFunction(llvm::Function* function);
278 
286  int InlineCallSites(llvm::Function* fn, bool skip_registered_fns);
287 
299  llvm::Function* OptimizeFunctionWithExprs(llvm::Function* fn);
300 
303  //
308  //
311  //
315  void AddFunctionToJit(llvm::Function* fn, void** fn_ptr);
316 
319  bool VerifyFunction(llvm::Function* function);
320 
323  void CodegenDebugTrace(LlvmBuilder* builder, const char* message);
324 
326  template <typename T> static std::string Print(T* value_or_type) {
327  std::string str;
328  llvm::raw_string_ostream stream(str);
329  value_or_type->print(stream);
330  return str;
331  }
332 
334  llvm::Function* GetLibCFunction(FnPrototype* prototype);
335 
338  llvm::Function* GetFunction(IRFunction::Type);
339 
345  llvm::Function* GetHashFunction(int num_bytes = -1);
346  llvm::Function* GetFnvHashFunction(int num_bytes = -1);
347  llvm::Function* GetMurmurHashFunction(int num_bytes = -1);
348 
354  //
358  llvm::AllocaInst* CreateEntryBlockAlloca(llvm::Function* f, const NamedVariable& var);
359  llvm::AllocaInst* CreateEntryBlockAlloca(const LlvmBuilder& builder, llvm::Type* type,
360  const char* name = "");
361 
367  void CreateIfElseBlocks(llvm::Function* fn, const std::string& if_name,
368  const std::string& else_name,
369  llvm::BasicBlock** if_block, llvm::BasicBlock** else_block,
370  llvm::BasicBlock* insert_before = NULL);
371 
374  llvm::Value* CastPtrToLlvmPtr(llvm::Type* type, const void* ptr);
375 
377  llvm::Value* GetIntConstant(PrimitiveType type, int64_t val);
378 
380  llvm::Value* true_value() { return true_value_; }
381  llvm::Value* false_value() { return false_value_; }
382  llvm::Value* null_ptr_value() { return llvm::ConstantPointerNull::get(ptr_type()); }
383 
385  llvm::Type* boolean_type() { return GetType(TYPE_BOOLEAN); }
386  llvm::Type* tinyint_type() { return GetType(TYPE_TINYINT); }
387  llvm::Type* smallint_type() { return GetType(TYPE_SMALLINT); }
388  llvm::Type* int_type() { return GetType(TYPE_INT); }
389  llvm::Type* bigint_type() { return GetType(TYPE_BIGINT); }
390  llvm::Type* float_type() { return GetType(TYPE_FLOAT); }
391  llvm::Type* double_type() { return GetType(TYPE_DOUBLE); }
392  llvm::Type* string_val_type() { return string_val_type_; }
393  llvm::PointerType* ptr_type() { return ptr_type_; }
394  llvm::Type* void_type() { return void_type_; }
395  llvm::Type* i128_type() { return llvm::Type::getIntNTy(context(), 128); }
396 
399  void GetFunctions(std::vector<llvm::Function*>* functions);
400 
402  void GetSymbols(boost::unordered_set<std::string>* symbols);
403 
405  llvm::Function* CodegenMinMax(const ColumnType& type, bool min);
406 
410  void CodegenMemcpy(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, int size);
411 
415  static Status LoadModule(LlvmCodeGen* codegen, const std::string& file,
416  llvm::Module** module);
417 
420  Status LinkModule(const std::string& file);
421 
422  private:
423  friend class LlvmCodeGenTest;
424  friend class SubExprElimination;
425 
427  LlvmCodeGen(ObjectPool* pool, const std::string& module_id);
428 
430  Status Init();
431 
435 
440  //
445  void* JitFunction(llvm::Function* function);
446 
448  void OptimizeModule();
449 
451  void ClearHashFns();
452 
454  std::string id_;
455 
458 
461 
464 
467 
470 
473 
475 
478 
484 
488 
490  std::string error_string_;
491 
494  boost::scoped_ptr<llvm::LLVMContext> context_;
495 
498  llvm::Module* module_;
499 
501  boost::scoped_ptr<llvm::ExecutionEngine> execution_engine_;
502 
506  std::map<llvm::Function*, bool> jitted_functions_;
507 
510 
514  std::map<std::string, llvm::Function*> external_functions_;
515 
517  std::vector<llvm::Function*> loaded_functions_;
518 
522  std::vector<llvm::Function*> codegend_functions_;
523 
525  std::map<int64_t, llvm::Function*> registered_exprs_map_;
526 
528  std::set<llvm::Function*> registered_exprs_;
529 
531  std::map<llvm::Intrinsic::ID, llvm::Function*> llvm_intrinsics_;
532 
536  std::map<int, llvm::Function*> hash_fns_;
537 
540  std::set<std::string> linked_modules_;
541 
543  std::vector<std::pair<llvm::Function*, void**> > fns_to_jit_compile_;
544 
547  llvm::Function* debug_trace_fn_;
548 
551  std::vector<std::string> debug_strings_;
552 
554  llvm::PointerType* ptr_type_; // int8_t*
555  llvm::Type* void_type_; // void
556  llvm::Type* string_val_type_; // StringValue
557  llvm::Type* timestamp_val_type_; // TimestampValue
558 
560  llvm::Value* true_value_;
561  llvm::Value* false_value_;
562 };
563 
564 }
565 
566 #endif
567 
std::string error_string_
Error string that llvm will write to.
Definition: llvm-codegen.h:490
boost::scoped_ptr< llvm::ExecutionEngine > execution_engine_
Execution/Jitting engine.
Definition: llvm-codegen.h:501
std::map< int64_t, llvm::Function * > registered_exprs_map_
A mapping of unique id to registered expr functions.
Definition: llvm-codegen.h:525
static Status LoadImpalaIR(ObjectPool *, const std::string &id, boost::scoped_ptr< LlvmCodeGen > *codegen)
~LlvmCodeGen()
Removes all jit compiled dynamically linked functions from the process.
RuntimeProfile::Counter * codegen_timer()
Definition: llvm-codegen.h:135
llvm::PointerType * GetPtrType(llvm::Type *type)
Return a pointer type to 'type'.
void OptimizeModule()
Optimizes the module. This includes pruning the module of any unused functions.
const std::string & name() const
Returns name of function.
Definition: llvm-codegen.h:168
Utility struct that wraps a variable name and llvm type.
Definition: llvm-codegen.h:149
llvm::Type * bigint_type()
Definition: llvm-codegen.h:389
std::set< std::string > linked_modules_
Definition: llvm-codegen.h:540
llvm::Function * CodegenMinMax(const ColumnType &type, bool min)
Generates function to return min/max(v1, v2)
RuntimeProfile * runtime_profile()
Definition: llvm-codegen.h:134
llvm::Type * void_type_
Definition: llvm-codegen.h:555
std::map< llvm::Intrinsic::ID, llvm::Function * > llvm_intrinsics_
A cache of loaded llvm intrinsics.
Definition: llvm-codegen.h:531
Status Init()
Initializes the jitter and execution engine.
int InlineCallSites(llvm::Function *fn, bool skip_registered_fns)
std::vector< llvm::Function * > loaded_functions_
Functions parsed from pre-compiled module. Indexed by ImpalaIR::Function enum.
Definition: llvm-codegen.h:517
std::string GetIR(bool full_module) const
RuntimeProfile profile_
Codegen counters.
Definition: llvm-codegen.h:457
llvm::Type * boolean_type()
Simple wrappers to reduce code verbosity.
Definition: llvm-codegen.h:385
std::vector< std::string > debug_strings_
Definition: llvm-codegen.h:551
llvm::Function * GetLibCFunction(FnPrototype *prototype)
Returns the libc function, adding it to the module if it has not already been.
void ReplaceInstWithValue(llvm::Instruction *from, llvm::Value *to)
LlvmCodeGen(ObjectPool *pool, const std::string &module_id)
Top level codegen object. 'module_id' is used for debugging when outputting the IR.
Definition: llvm-codegen.cc:99
llvm::Argument * GetArgument(llvm::Function *fn, int i)
Returns the i-th argument of fn.
RuntimeProfile::Counter * codegen_timer_
Time spent doing codegen (adding IR to the module)
Definition: llvm-codegen.h:466
llvm::Value * null_ptr_value()
Definition: llvm-codegen.h:382
llvm::Value * false_value_
Definition: llvm-codegen.h:561
std::vector< NamedVariable > args_
Definition: llvm-codegen.h:190
llvm::PointerType * ptr_type_
llvm representation of a few common types. Owned by context.
Definition: llvm-codegen.h:554
static std::string Print(T *value_or_type)
Returns the string representation of a llvm::Value* or llvm::Type*.
Definition: llvm-codegen.h:326
NamedVariable(const std::string &name="", llvm::Type *type=NULL)
Definition: llvm-codegen.h:153
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
RuntimeProfile::Counter * module_file_size_
Definition: llvm-codegen.h:474
void RegisterExprFn(int64_t id, llvm::Function *function)
Definition: llvm-codegen.h:224
llvm::Function * debug_trace_fn_
Definition: llvm-codegen.h:547
llvm::Type * double_type()
Definition: llvm-codegen.h:391
llvm::Function * GeneratePrototype(LlvmBuilder *builder=NULL, llvm::Value **params=NULL)
llvm::Type * string_val_type()
Definition: llvm-codegen.h:392
llvm::Value * CastPtrToLlvmPtr(llvm::Type *type, const void *ptr)
void AddArgument(const NamedVariable &var)
Add argument.
Definition: llvm-codegen.h:171
std::set< llvm::Function * > registered_exprs_
A set of all the functions in 'registered_exprs_map_' for quick lookup.
Definition: llvm-codegen.h:528
llvm::Function * GetHashFunction(int num_bytes=-1)
void CodegenMemcpy(LlvmBuilder *, llvm::Value *dst, llvm::Value *src, int size)
llvm::ExecutionEngine * execution_engine()
Returns execution engine interface.
Definition: llvm-codegen.h:217
Status LinkModule(const std::string &file)
PrimitiveType
Definition: types.h:27
std::map< llvm::Function *, bool > jitted_functions_
Definition: llvm-codegen.h:506
ObjectPool pool
llvm::Type * string_val_type_
Definition: llvm-codegen.h:556
boost::scoped_ptr< llvm::LLVMContext > context_
Definition: llvm-codegen.h:494
void ClearHashFns()
Clears generated hash fns. This is only used for testing.
llvm::Function * GetFunction(IRFunction::Type)
llvm::Value * true_value_
llvm constants to help with code gen verbosity
Definition: llvm-codegen.h:560
bool VerifyFunction(llvm::Function *function)
void CreateIfElseBlocks(llvm::Function *fn, const std::string &if_name, const std::string &else_name, llvm::BasicBlock **if_block, llvm::BasicBlock **else_block, llvm::BasicBlock *insert_before=NULL)
void AddFunctionToJit(llvm::Function *fn, void **fn_ptr)
llvm::Function * CloneFunction(llvm::Function *fn)
Returns a copy of fn. The copy is added to the module.
llvm::Value * true_value()
Returns true/false constants (bool type)
Definition: llvm-codegen.h:380
FnPrototype(LlvmCodeGen *, const std::string &name, llvm::Type *ret_type)
std::vector< std::pair< llvm::Function *, void ** > > fns_to_jit_compile_
The vector of functions to automatically JIT compile after FinalizeModule().
Definition: llvm-codegen.h:543
llvm::Type * tinyint_type()
Definition: llvm-codegen.h:386
llvm::IRBuilder LlvmBuilder
Typedef builder in case we want to change the template arguments later.
Definition: llvm-codegen.h:146
std::string id_
ID used for debugging (can be e.g. the fragment instance ID)
Definition: llvm-codegen.h:454
static void InitializeLlvm(bool load_backend=false)
Definition: llvm-codegen.cc:78
llvm::Type * float_type()
Definition: llvm-codegen.h:390
llvm::Value * false_value()
Definition: llvm-codegen.h:381
llvm::Function * GetFnvHashFunction(int num_bytes=-1)
std::map< std::string, llvm::Function * > external_functions_
Definition: llvm-codegen.h:514
std::vector< llvm::Function * > codegend_functions_
Definition: llvm-codegen.h:522
RuntimeProfile::Counter * prepare_module_timer_
Time spent constructing the in-memory module from the .ir file.
Definition: llvm-codegen.h:463
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
boost::mutex jitted_functions_lock_
Lock protecting jitted_functions_.
Definition: llvm-codegen.h:509
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
std::map< int, llvm::Function * > hash_fns_
Definition: llvm-codegen.h:536
llvm::Function * GetRegisteredExprFn(int64_t id)
Returns a registered expr function for id or NULL if it does not exist.
Definition: llvm-codegen.h:231
llvm::Function * FinalizeFunction(llvm::Function *function)
RuntimeProfile::Counter * load_module_timer_
Time spent reading the .ir file from the file system.
Definition: llvm-codegen.h:460
llvm::Module * module_
Definition: llvm-codegen.h:498
llvm::Function * GetMurmurHashFunction(int num_bytes=-1)
void CodegenDebugTrace(LlvmBuilder *builder, const char *message)
llvm::Type * smallint_type()
Definition: llvm-codegen.h:387
llvm::Function * ReplaceCallSites(llvm::Function *caller, bool update_in_place, llvm::Function *new_fn, const std::string &target_name, int *num_replaced)
void EnableOptimizations(bool enable)
Turns on/off optimization passes.
llvm::Type * void_type()
Definition: llvm-codegen.h:394
llvm::Type * int_type()
Definition: llvm-codegen.h:388
RuntimeProfile::Counter * optimization_timer_
Time spent optimizing the module.
Definition: llvm-codegen.h:469
void GetSymbols(boost::unordered_set< std::string > *symbols)
Fils in 'symbols' with all the symbols in the module.
string name
Definition: cpu-info.cc:50
llvm::LLVMContext & context()
Definition: llvm-codegen.h:214
void * JitFunction(llvm::Function *function)
bool optimizations_enabled_
whether or not optimizations are enabled
Definition: llvm-codegen.h:477
llvm::AllocaInst * CreateEntryBlockAlloca(llvm::Function *f, const NamedVariable &var)
llvm::Function * OptimizeFunctionWithExprs(llvm::Function *fn)
llvm::Type * i128_type()
Definition: llvm-codegen.h:395
static Status LoadFromFile(ObjectPool *, const std::string &file, const std::string &id, boost::scoped_ptr< LlvmCodeGen > *codegen)
llvm::PointerType * ptr_type()
Definition: llvm-codegen.h:393
void GetFunctions(std::vector< llvm::Function * > *functions)
static Status LoadModule(LlvmCodeGen *codegen, const std::string &file, llvm::Module **module)
llvm::Type * timestamp_val_type_
Definition: llvm-codegen.h:557
RuntimeProfile::Counter * compile_timer_
Time spent compiling the module.
Definition: llvm-codegen.h:472
llvm::Module * module()
Returns the underlying llvm module.
Definition: llvm-codegen.h:220