Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
subexpr-elimination.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
16 
17 #include <fstream>
18 #include <iostream>
19 #include <sstream>
20 
21 #include <llvm/Analysis/Dominators.h>
22 #include <llvm/Analysis/InstructionSimplify.h>
23 #include <llvm/Analysis/Passes.h>
24 #include <llvm/Support/InstIterator.h>
25 #include "llvm/Transforms/IPO.h"
26 #include <llvm/Transforms/Scalar.h>
27 #include <llvm/Transforms/Utils/SSAUpdater.h>
28 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
29 
30 #include "common/logging.h"
32 #include "impala-ir/impala-ir-names.h"
33 #include "util/cpu-info.h"
34 #include "util/path-builder.h"
35 
36 #include "common/names.h"
37 
38 using namespace impala;
39 using namespace llvm;
40 
42  codegen_(codegen) {
43 }
44 
45 // Before running the standard llvm optimization passes, first remove redundant calls
46 // to slotref expression. SlotRefs are more heavyweight due to the null handling that
47 // is required and after they are inlined, llvm is unable to eliminate the redundant
48 // inlined code blocks.
49 // For example:
50 // select colA + colA would generate an inner loop with 2 calls to the colA slot ref,
51 // rather than doing subexpression elimination. To handle this, we will:
52 // 1. inline all call sites in the original function except calls to SlotRefs
53 // 2. for all call sites to SlotRefs except the first to that SlotRef, replace the
54 // results from the secondary calls with the result from the first and remove
55 // the call instruction.
56 // 3. Inline calls to the SlotRefs (there should only be one for each slot ref).
57 //
58 // In the above example, the input function would look something like:
59 // int ArithmeticAdd(TupleRow* row, bool* is_null) {
60 // bool lhs_is_null, rhs_is_null;
61 // int lhs_value = SlotRef(row, &lhs_is_null);
62 // if (lhs_is_null) { *is_null = true; return 0; }
63 // int rhs_value = SlotRef(row, &rhs_is_null);
64 // if (rhs_is_null) { *is_null = true; return 0; }
65 // *is_null = false; return lhs_value + rhs_value;
66 // }
67 // During step 2, we'd substitute the second call to SlotRef with the results from
68 // the first call.
69 // int ArithmeticAdd(TupleRow* row, bool* is_null) {
70 // bool lhs_is_null, rhs_is_null;
71 // int lhs_value = SlotRef(row, &lhs_is_null);
72 // if (lhs_is_null) { *is_null = true; return 0; }
73 // int rhs_value = lhs_value;
74 // rhs_is_null = lhs_is_null;
75 // if (rhs_is_null) { *is_null = true; return 0; }
76 // *is_null = false; return lhs_value + rhs_value;
77 // }
78 // And then rely on llvm to finish the removing the redundant code, resulting in:
79 // int ArithmeticAdd(TupleRow* row, bool* is_null) {
80 // bool lhs_is_null, rhs_is_null;
81 // int lhs_value = SlotRef(row, &lhs_is_null);
82 // if (lhs_is_null) { *is_null = true; return 0; }
83 // *is_null = false; return lhs_value + lhs_value;
84 // }
85 // Details on how to do this:
86 // http://llvm.org/docs/ProgrammersManual.html#replacing-an-instruction-with-another-value
87 
88 // Step 2 requires more manipulation to ensure the resulting IR is still valid IR.
89 // The call to the expr returns two things, both of which need to be replaced.
90 // The value of the function as the return argument and whether or not the result was
91 // null as a function output argument.
92 // 1. The return value is trivial since with SSA, it is easy to identity all uses of
93 // We simply replace the subsequent call instructions with the value.
94 // 2. For the is_null result ptr, we replace the call to the expr with a store
95 // instruction of the cached value.
96 // i.e:
97 // val1 = Call(is_null_ptr);
98 // is_null1 = *is_null_ptr
99 // ...
100 // val2 = Call(is_null_ptr);
101 // is_null2 = *is_null_ptr
102 // Becomes:
103 // val1 = Call(is_null_ptr);
104 // is_null1 = *is_null_ptr
105 // ...
106 // val2 = val1;
107 // *is_null_ptr = is_null1;
108 // is_null2 = *is_null_ptr
109 // We do this because the is_null ptr is not SSA form, making manipulating it
110 // complex. The above approach exactly preserves the Call function, including
111 // all writes to ptrs. We then rely on the llvm load/store removal pass which
112 // will remove the redundant loads (which is tricky since you have to track
113 // other instructions that wrote to the ptr, etc).
114 // When doing the eliminations, we need to consider the call graph to make sure
115 // the instruction we are replacing with dominates the instruction we are replacing;
116 // that is, we need to guarantee the instruction we are replacing with always executes
117 // before the replacee instruction in all code paths.
118 // TODO: remove all this with expr refactoring. Everything will be SSA form then.
120  // First function call result. Subsequent calls will be replaced with this value
121  CallInst* result;
122  // First is null result. Subsequent calls will be replaced with this value.
123  Instruction* is_null_value;
124 };
125 
126 bool SubExprElimination::Run(Function* fn) {
127  // Step 1:
128  int num_inlined;
129  do {
130  // This assumes that all redundant exprs have been registered.
131  num_inlined = codegen_->InlineCallSites(fn, true);
132  } while (num_inlined > 0);
133 
134  // Mapping of (expr eval function, its 'row' arg) to cached result. We want to remove
135  // redundant calls to the same function with the same argument.
136  map<pair<Function*, Value*>, CachedExprResult> cached_slot_ref_results;
137 
138  // Step 2:
139  DominatorTree dom_tree;
140  dom_tree.runOnFunction(*fn);
141 
142  inst_iterator fn_end = inst_end(fn);
143  inst_iterator instr_iter = inst_begin(fn);
144  // Loop over every instruction in the function.
145  while (instr_iter != fn_end) {
146  Instruction* instr = &*instr_iter;
147  ++instr_iter;
148  // Look for call instructions
149  if (!CallInst::classof(instr)) continue;
150 
151  CallInst* call_instr = reinterpret_cast<CallInst*>(instr);
152  Function* called_fn = call_instr->getCalledFunction();
153  if (codegen_->registered_exprs_.find(called_fn) ==
154  codegen_->registered_exprs_.end()) {
155  continue;
156  }
157 
158  // Found a registered expr function. We generate the IR in a very specific way
159  // when calling the expr. The call instruction is always followed by loading the
160  // resulting is_null result. We need to update both.
161  // TODO: we need to update this to do more analysis since we are relying on a very
162  // specific code structure to do this.
163 
164  // Arguments are (row, scratch_buffer, is_null);
165  DCHECK_EQ(call_instr->getNumArgOperands(), 3);
166  Value* row_arg = call_instr->getArgOperand(0);
167 
168  DCHECK(BitCastInst::classof(row_arg));
169  BitCastInst* row_cast = reinterpret_cast<BitCastInst*>(row_arg);
170  // Get at the underlying row arg. We need to differentiate between
171  // call Fn(row1) and call Fn(row2). (identical fns but different input).
172  row_arg = row_cast->getOperand(0);
173 
174  instr = &*instr_iter;
175  ++instr_iter;
176 
177  if (!LoadInst::classof(instr)) continue;
178  LoadInst* is_null_value = reinterpret_cast<LoadInst*>(instr);
179  Value* loaded_ptr = is_null_value->getPointerOperand();
180 
181  // Subexpr elimination requires the IR to be a very specific form.
182  // call SlotRef(row, NULL, is_null_ptr)
183  // load is_null_ptr
184  // Since we generate this IR currently, we can enforce this logic in our exprs
185  // TODO: this should be removed/generalized with expr refactoring
186  DCHECK_EQ(loaded_ptr, call_instr->getArgOperand(2));
187 
188  pair<Function*, Value*> call_desc = make_pair(called_fn, row_arg);
189  if (cached_slot_ref_results.find(call_desc) == cached_slot_ref_results.end()) {
190  CachedExprResult cache_entry;
191  cache_entry.result = call_instr;
192  cache_entry.is_null_value = is_null_value;
193  cached_slot_ref_results[call_desc] = cache_entry;
194  } else {
195  // Reuse the result.
196  CachedExprResult& cache_entry = cached_slot_ref_results[call_desc];
197  if (dom_tree.dominates(cache_entry.result, call_instr)) {
198  new StoreInst(cache_entry.is_null_value, loaded_ptr, call_instr);
199  call_instr->replaceAllUsesWith(cache_entry.result);
200  call_instr->eraseFromParent();
201  }
202  }
203  }
204 
205  // Step 3:
206  codegen_->InlineCallSites(fn, false);
207  return true;
208 }
bool Run(llvm::Function *function)
Perform subexpr elimination on function.
SubExprElimination(LlvmCodeGen *codegen)
int InlineCallSites(llvm::Function *fn, bool skip_registered_fns)
LlvmCodeGen * codegen_
Parent codegen object.
Instruction * is_null_value
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
std::set< llvm::Function * > registered_exprs_
A set of all the functions in 'registered_exprs_map_' for quick lookup.
Definition: llvm-codegen.h:528