/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass hoists instructions to enable speculative execution on |
11 | | // targets where branches are expensive. This is aimed at GPUs. It |
12 | | // currently works on simple if-then and if-then-else |
13 | | // patterns. |
14 | | // |
15 | | // Removing branches is not the only motivation for this |
16 | | // pass. E.g. consider this code and assume that there is no |
17 | | // addressing mode for multiplying by sizeof(*a): |
18 | | // |
19 | | // if (b > 0) |
20 | | // c = a[i + 1] |
21 | | // if (d > 0) |
22 | | // e = a[i + 2] |
23 | | // |
24 | | // turns into |
25 | | // |
26 | | // p = &a[i + 1]; |
27 | | // if (b > 0) |
28 | | // c = *p; |
29 | | // q = &a[i + 2]; |
30 | | // if (d > 0) |
31 | | // e = *q; |
32 | | // |
33 | | // which could later be optimized to |
34 | | // |
35 | | // r = &a[i]; |
36 | | // if (b > 0) |
37 | | // c = r[1]; |
38 | | // if (d > 0) |
39 | | // e = r[2]; |
40 | | // |
41 | | // Later passes sink back much of the speculated code that did not enable |
42 | | // further optimization. |
43 | | // |
44 | | // This pass is more aggressive than the function SpeculativeyExecuteBB in |
45 | | // SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and |
46 | | // it will speculate at most one instruction. It also will not speculate if |
47 | | // there is a value defined in the if-block that is only used in the then-block. |
48 | | // These restrictions make sense since the speculation in SimplifyCFG seems |
49 | | // aimed at introducing cheap selects, while this pass is intended to do more |
50 | | // aggressive speculation while counting on later passes to either capitalize on |
51 | | // that or clean it up. |
52 | | // |
53 | | // If the pass was created by calling |
54 | | // createSpeculativeExecutionIfHasBranchDivergencePass or the |
55 | | // -spec-exec-only-if-divergent-target option is present, this pass only has an |
56 | | // effect on targets where TargetTransformInfo::hasBranchDivergence() is true; |
57 | | // on other targets, it is a nop. |
58 | | // |
59 | | // This lets you include this pass unconditionally in the IR pass pipeline, but |
60 | | // only enable it for relevant targets. |
61 | | // |
62 | | //===----------------------------------------------------------------------===// |
63 | | |
64 | | #include "llvm/Transforms/Scalar/SpeculativeExecution.h" |
65 | | #include "llvm/ADT/SmallSet.h" |
66 | | #include "llvm/Analysis/GlobalsModRef.h" |
67 | | #include "llvm/Analysis/ValueTracking.h" |
68 | | #include "llvm/IR/Instructions.h" |
69 | | #include "llvm/IR/Module.h" |
70 | | #include "llvm/IR/Operator.h" |
71 | | #include "llvm/Support/CommandLine.h" |
72 | | #include "llvm/Support/Debug.h" |
73 | | |
74 | | using namespace llvm; |
75 | | |
76 | | #define DEBUG_TYPE "speculative-execution" |
77 | | |
78 | | // The risk that speculation will not pay off increases with the |
79 | | // number of instructions speculated, so we put a limit on that. |
80 | | static cl::opt<unsigned> SpecExecMaxSpeculationCost( |
81 | | "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden, |
82 | | cl::desc("Speculative execution is not applied to basic blocks where " |
83 | | "the cost of the instructions to speculatively execute " |
84 | | "exceeds this limit.")); |
85 | | |
86 | | // Speculating just a few instructions from a larger block tends not |
87 | | // to be profitable and this limit prevents that. A reason for that is |
88 | | // that small basic blocks are more likely to be candidates for |
89 | | // further optimization. |
90 | | static cl::opt<unsigned> SpecExecMaxNotHoisted( |
91 | | "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden, |
92 | | cl::desc("Speculative execution is not applied to basic blocks where the " |
93 | | "number of instructions that would not be speculatively executed " |
94 | | "exceeds this limit.")); |
95 | | |
96 | | static cl::opt<bool> SpecExecOnlyIfDivergentTarget( |
97 | | "spec-exec-only-if-divergent-target", cl::init(false), cl::Hidden, |
98 | | cl::desc("Speculative execution is applied only to targets with divergent " |
99 | | "branches, even if the pass was configured to apply only to all " |
100 | | "targets.")); |
101 | | |
102 | | namespace { |
103 | | |
104 | | class SpeculativeExecutionLegacyPass : public FunctionPass { |
105 | | public: |
106 | | static char ID; |
107 | | explicit SpeculativeExecutionLegacyPass(bool OnlyIfDivergentTarget = false) |
108 | | : FunctionPass(ID), OnlyIfDivergentTarget(OnlyIfDivergentTarget || |
109 | | SpecExecOnlyIfDivergentTarget), |
110 | 19.1k | Impl(OnlyIfDivergentTarget) {} |
111 | | |
112 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
113 | | bool runOnFunction(Function &F) override; |
114 | | |
115 | 51 | StringRef getPassName() const override { |
116 | 51 | if (OnlyIfDivergentTarget) |
117 | 51 | return "Speculatively execute instructions if target has divergent " |
118 | 51 | "branches"; |
119 | 0 | return "Speculatively execute instructions"; |
120 | 0 | } |
121 | | |
122 | | private: |
123 | | // Variable preserved purely for correct name printing. |
124 | | const bool OnlyIfDivergentTarget; |
125 | | |
126 | | SpeculativeExecutionPass Impl; |
127 | | }; |
128 | | } // namespace |
129 | | |
130 | | char SpeculativeExecutionLegacyPass::ID = 0; |
131 | 24.6k | INITIALIZE_PASS_BEGIN24.6k (SpeculativeExecutionLegacyPass, "speculative-execution",
|
132 | 24.6k | "Speculatively execute instructions", false, false) |
133 | 24.6k | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
134 | 24.6k | INITIALIZE_PASS_END(SpeculativeExecutionLegacyPass, "speculative-execution", |
135 | | "Speculatively execute instructions", false, false) |
136 | | |
137 | 19.1k | void SpeculativeExecutionLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { |
138 | 19.1k | AU.addRequired<TargetTransformInfoWrapperPass>(); |
139 | 19.1k | AU.addPreserved<GlobalsAAWrapperPass>(); |
140 | 19.1k | } |
141 | | |
142 | 533k | bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) { |
143 | 533k | if (skipFunction(F)) |
144 | 58 | return false; |
145 | 533k | |
146 | 533k | auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
147 | 533k | return Impl.runImpl(F, TTI); |
148 | 533k | } |
149 | | |
150 | | namespace llvm { |
151 | | |
152 | 533k | bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) { |
153 | 533k | if (OnlyIfDivergentTarget && 533k !TTI->hasBranchDivergence()516k ) { |
154 | 515k | DEBUG(dbgs() << "Not running SpeculativeExecution because " |
155 | 515k | "TTI->hasBranchDivergence() is false.\n"); |
156 | 515k | return false; |
157 | 515k | } |
158 | 18.5k | |
159 | 18.5k | this->TTI = TTI; |
160 | 18.5k | bool Changed = false; |
161 | 20.9k | for (auto& B : F) { |
162 | 20.9k | Changed |= runOnBasicBlock(B); |
163 | 20.9k | } |
164 | 533k | return Changed; |
165 | 533k | } |
166 | | |
167 | 20.9k | bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) { |
168 | 20.9k | BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator()); |
169 | 20.9k | if (BI == nullptr) |
170 | 18.6k | return false; |
171 | 2.30k | |
172 | 2.30k | if (2.30k BI->getNumSuccessors() != 22.30k ) |
173 | 1.21k | return false; |
174 | 1.09k | BasicBlock &Succ0 = *BI->getSuccessor(0); |
175 | 1.09k | BasicBlock &Succ1 = *BI->getSuccessor(1); |
176 | 1.09k | |
177 | 1.09k | if (&B == &Succ0 || 1.09k &B == &Succ11.07k || &Succ0 == &Succ1988 ) { |
178 | 106 | return false; |
179 | 106 | } |
180 | 988 | |
181 | 988 | // Hoist from if-then (triangle). |
182 | 988 | if (988 Succ0.getSinglePredecessor() != nullptr && |
183 | 988 | Succ0.getSingleSuccessor() == &Succ1550 ) { |
184 | 229 | return considerHoistingFromTo(Succ0, B); |
185 | 229 | } |
186 | 759 | |
187 | 759 | // Hoist from if-else (triangle). |
188 | 759 | if (759 Succ1.getSinglePredecessor() != nullptr && |
189 | 759 | Succ1.getSingleSuccessor() == &Succ0620 ) { |
190 | 292 | return considerHoistingFromTo(Succ1, B); |
191 | 292 | } |
192 | 467 | |
193 | 467 | // Hoist from if-then-else (diamond), but only if it is equivalent to |
194 | 467 | // an if-else or if-then due to one of the branches doing nothing. |
195 | 467 | if (467 Succ0.getSinglePredecessor() != nullptr && |
196 | 321 | Succ1.getSinglePredecessor() != nullptr && |
197 | 240 | Succ1.getSingleSuccessor() != nullptr && |
198 | 154 | Succ1.getSingleSuccessor() != &B && |
199 | 467 | Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()133 ) { |
200 | 130 | // If a block has only one instruction, then that is a terminator |
201 | 130 | // instruction so that the block does nothing. This does happen. |
202 | 130 | if (Succ1.size() == 1) // equivalent to if-then |
203 | 25 | return considerHoistingFromTo(Succ0, B); |
204 | 105 | if (105 Succ0.size() == 1105 ) // equivalent to if-else |
205 | 4 | return considerHoistingFromTo(Succ1, B); |
206 | 438 | } |
207 | 438 | |
208 | 438 | return false; |
209 | 438 | } |
210 | | |
211 | | static unsigned ComputeSpeculationCost(const Instruction *I, |
212 | 1.37k | const TargetTransformInfo &TTI) { |
213 | 1.37k | switch (Operator::getOpcode(I)) { |
214 | 540 | case Instruction::GetElementPtr: |
215 | 540 | case Instruction::Add: |
216 | 540 | case Instruction::Mul: |
217 | 540 | case Instruction::And: |
218 | 540 | case Instruction::Or: |
219 | 540 | case Instruction::Select: |
220 | 540 | case Instruction::Shl: |
221 | 540 | case Instruction::Sub: |
222 | 540 | case Instruction::LShr: |
223 | 540 | case Instruction::AShr: |
224 | 540 | case Instruction::Xor: |
225 | 540 | case Instruction::ZExt: |
226 | 540 | case Instruction::SExt: |
227 | 540 | case Instruction::Call: |
228 | 540 | case Instruction::BitCast: |
229 | 540 | case Instruction::PtrToInt: |
230 | 540 | case Instruction::IntToPtr: |
231 | 540 | case Instruction::AddrSpaceCast: |
232 | 540 | case Instruction::FPToUI: |
233 | 540 | case Instruction::FPToSI: |
234 | 540 | case Instruction::UIToFP: |
235 | 540 | case Instruction::SIToFP: |
236 | 540 | case Instruction::FPExt: |
237 | 540 | case Instruction::FPTrunc: |
238 | 540 | case Instruction::FAdd: |
239 | 540 | case Instruction::FSub: |
240 | 540 | case Instruction::FMul: |
241 | 540 | case Instruction::FDiv: |
242 | 540 | case Instruction::FRem: |
243 | 540 | case Instruction::ICmp: |
244 | 540 | case Instruction::FCmp: |
245 | 540 | return TTI.getUserCost(I); |
246 | 540 | |
247 | 839 | default: |
248 | 839 | return UINT_MAX; // Disallow anything not whitelisted. |
249 | 0 | } |
250 | 0 | } |
251 | | |
252 | | bool SpeculativeExecutionPass::considerHoistingFromTo( |
253 | 550 | BasicBlock &FromBlock, BasicBlock &ToBlock) { |
254 | 550 | SmallSet<const Instruction *, 8> NotHoisted; |
255 | 414 | const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) { |
256 | 729 | for (Value* V : U->operand_values()) { |
257 | 729 | if (Instruction *I729 = dyn_cast<Instruction>(V)) { |
258 | 434 | if (NotHoisted.count(I) > 0) |
259 | 81 | return false; |
260 | 333 | } |
261 | 729 | } |
262 | 333 | return true; |
263 | 333 | }; |
264 | 550 | |
265 | 550 | unsigned TotalSpeculationCost = 0; |
266 | 1.37k | for (auto& I : FromBlock) { |
267 | 1.37k | const unsigned Cost = ComputeSpeculationCost(&I, *TTI); |
268 | 1.37k | if (Cost != UINT_MAX && 1.37k isSafeToSpeculativelyExecute(&I)540 && |
269 | 1.37k | AllPrecedingUsesFromBlockHoisted(&I)414 ) { |
270 | 333 | TotalSpeculationCost += Cost; |
271 | 333 | if (TotalSpeculationCost > SpecExecMaxSpeculationCost) |
272 | 18 | return false; // too much to hoist |
273 | 1.04k | } else { |
274 | 1.04k | NotHoisted.insert(&I); |
275 | 1.04k | if (NotHoisted.size() > SpecExecMaxNotHoisted) |
276 | 8 | return false; // too much left behind |
277 | 524 | } |
278 | 1.37k | } |
279 | 524 | |
280 | 524 | if (524 TotalSpeculationCost == 0524 ) |
281 | 389 | return false; // nothing to hoist |
282 | 135 | |
283 | 504 | for (auto I = FromBlock.begin(); 135 I != FromBlock.end()504 ;) { |
284 | 369 | // We have to increment I before moving Current as moving Current |
285 | 369 | // changes the list that I is iterating through. |
286 | 369 | auto Current = I; |
287 | 369 | ++I; |
288 | 369 | if (!NotHoisted.count(&*Current)369 ) { |
289 | 199 | Current->moveBefore(ToBlock.getTerminator()); |
290 | 199 | } |
291 | 369 | } |
292 | 550 | return true; |
293 | 550 | } |
294 | | |
295 | 1.88k | FunctionPass *createSpeculativeExecutionPass() { |
296 | 1.88k | return new SpeculativeExecutionLegacyPass(); |
297 | 1.88k | } |
298 | | |
299 | 17.2k | FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() { |
300 | 17.2k | return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true); |
301 | 17.2k | } |
302 | | |
303 | | SpeculativeExecutionPass::SpeculativeExecutionPass(bool OnlyIfDivergentTarget) |
304 | | : OnlyIfDivergentTarget(OnlyIfDivergentTarget || |
305 | 19.2k | SpecExecOnlyIfDivergentTarget) {} |
306 | | |
307 | | PreservedAnalyses SpeculativeExecutionPass::run(Function &F, |
308 | 118 | FunctionAnalysisManager &AM) { |
309 | 118 | auto *TTI = &AM.getResult<TargetIRAnalysis>(F); |
310 | 118 | |
311 | 118 | bool Changed = runImpl(F, TTI); |
312 | 118 | |
313 | 118 | if (!Changed) |
314 | 113 | return PreservedAnalyses::all(); |
315 | 5 | PreservedAnalyses PA; |
316 | 5 | PA.preserve<GlobalsAA>(); |
317 | 5 | return PA; |
318 | 5 | } |
319 | | } // namespace llvm |