/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/GVNHoist.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass hoists expressions from branches to a common dominator. It uses |
11 | | // GVN (global value numbering) to discover expressions computing the same |
12 | | // values. The primary goals of code-hoisting are: |
13 | | // 1. To reduce the code size. |
14 | | // 2. In some cases reduce critical path (by exposing more ILP). |
15 | | // |
16 | | // The algorithm factors out the reachability of values such that multiple |
17 | | // queries to find reachability of values are fast. This is based on finding the |
18 | | // ANTIC points in the CFG which do not change during hoisting. The ANTIC points |
19 | | // are basically the dominance-frontiers in the inverse graph. So we introduce a |
20 | | // data structure (CHI nodes) to keep track of values flowing out of a basic |
21 | | // block. We only do this for values with multiple occurrences in the function |
22 | | // as they are the potential hoistable candidates. This approach allows us to |
23 | | // hoist instructions to a basic block with more than two successors, as well as |
24 | | // deal with infinite loops in a trivial way. |
25 | | // |
26 | | // Limitations: This pass does not hoist fully redundant expressions because |
27 | | // they are already handled by GVN-PRE. It is advisable to run gvn-hoist before |
28 | | // and after gvn-pre because gvn-pre creates opportunities for more instructions |
29 | | // to be hoisted. |
30 | | // |
31 | | // Hoisting may affect the performance in some cases. To mitigate that, hoisting |
32 | | // is disabled in the following cases. |
33 | | // 1. Scalars across calls. |
34 | | // 2. geps when corresponding load/store cannot be hoisted. |
35 | | //===----------------------------------------------------------------------===// |
36 | | |
37 | | #include "llvm/ADT/DenseMap.h" |
38 | | #include "llvm/ADT/SmallPtrSet.h" |
39 | | #include "llvm/ADT/Statistic.h" |
40 | | #include "llvm/Analysis/GlobalsModRef.h" |
41 | | #include "llvm/Analysis/IteratedDominanceFrontier.h" |
42 | | #include "llvm/Analysis/MemorySSA.h" |
43 | | #include "llvm/Analysis/MemorySSAUpdater.h" |
44 | | #include "llvm/Analysis/PostDominators.h" |
45 | | #include "llvm/Analysis/ValueTracking.h" |
46 | | #include "llvm/IR/IntrinsicInst.h" |
47 | | #include "llvm/Transforms/Scalar.h" |
48 | | #include "llvm/Transforms/Scalar/GVN.h" |
49 | | #include "llvm/Transforms/Utils/Local.h" |
50 | | |
51 | | #include <stack> |
52 | | |
53 | | using namespace llvm; |
54 | | |
55 | | #define DEBUG_TYPE "gvn-hoist" |
56 | | |
57 | | STATISTIC(NumHoisted, "Number of instructions hoisted"); |
58 | | STATISTIC(NumRemoved, "Number of instructions removed"); |
59 | | STATISTIC(NumLoadsHoisted, "Number of loads hoisted"); |
60 | | STATISTIC(NumLoadsRemoved, "Number of loads removed"); |
61 | | STATISTIC(NumStoresHoisted, "Number of stores hoisted"); |
62 | | STATISTIC(NumStoresRemoved, "Number of stores removed"); |
63 | | STATISTIC(NumCallsHoisted, "Number of calls hoisted"); |
64 | | STATISTIC(NumCallsRemoved, "Number of calls removed"); |
65 | | |
66 | | static cl::opt<int> |
67 | | MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), |
68 | | cl::desc("Max number of instructions to hoist " |
69 | | "(default unlimited = -1)")); |
70 | | static cl::opt<int> MaxNumberOfBBSInPath( |
71 | | "gvn-hoist-max-bbs", cl::Hidden, cl::init(4), |
72 | | cl::desc("Max number of basic blocks on the path between " |
73 | | "hoisting locations (default = 4, unlimited = -1)")); |
74 | | |
75 | | static cl::opt<int> MaxDepthInBB( |
76 | | "gvn-hoist-max-depth", cl::Hidden, cl::init(100), |
77 | | cl::desc("Hoist instructions from the beginning of the BB up to the " |
78 | | "maximum specified depth (default = 100, unlimited = -1)")); |
79 | | |
80 | | static cl::opt<int> |
81 | | MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10), |
82 | | cl::desc("Maximum length of dependent chains to hoist " |
83 | | "(default = 10, unlimited = -1)")); |
84 | | |
85 | | namespace llvm { |
86 | | |
87 | | typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet; |
88 | | typedef SmallVector<Instruction *, 4> SmallVecInsn; |
89 | | typedef SmallVectorImpl<Instruction *> SmallVecImplInsn; |
90 | | // Each element of a hoisting list contains the basic block where to hoist and |
91 | | // a list of instructions to be hoisted. |
92 | | typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo; |
93 | | typedef SmallVector<HoistingPointInfo, 4> HoistingPointList; |
94 | | // A map from a pair of VNs to all the instructions with those VNs. |
95 | | typedef std::pair<unsigned, unsigned> VNType; |
96 | | typedef DenseMap<VNType, SmallVector<Instruction *, 4>> VNtoInsns; |
97 | | |
98 | | // CHI keeps information about values flowing out of a basic block. It is |
99 | | // similar to PHI but in the inverse graph, and used for outgoing values on each |
100 | | // edge. For conciseness, it is computed only for instructions with multiple |
101 | | // occurrences in the CFG because they are the only hoistable candidates. |
102 | | // A (CHI[{V, B, I1}, {V, C, I2}] |
103 | | // / \ |
104 | | // / \ |
105 | | // B(I1) C (I2) |
106 | | // The Value number for both I1 and I2 is V, the CHI node will save the |
107 | | // instruction as well as the edge where the value is flowing to. |
108 | | struct CHIArg { |
109 | | VNType VN; |
110 | | // Edge destination (shows the direction of flow), may not be where the I is. |
111 | | BasicBlock *Dest; |
112 | | // The instruction (VN) which uses the values flowing out of CHI. |
113 | | Instruction *I; |
114 | 2.65k | bool operator==(const CHIArg &A) { return VN == A.VN; } |
115 | 2.65k | bool operator!=(const CHIArg &A) { return !(*this == A); } |
116 | | }; |
117 | | |
118 | | typedef SmallVectorImpl<CHIArg>::iterator CHIIt; |
119 | | typedef iterator_range<CHIIt> CHIArgs; |
120 | | typedef DenseMap<BasicBlock *, SmallVector<CHIArg, 2>> OutValuesType; |
121 | | typedef DenseMap<BasicBlock *, SmallVector<std::pair<VNType, Instruction *>, 2>> |
122 | | InValuesType; |
123 | | |
124 | | // An invalid value number Used when inserting a single value number into |
125 | | // VNtoInsns. |
126 | | enum : unsigned { InvalidVN = ~2U }; |
127 | | |
128 | | // Records all scalar instructions candidate for code hoisting. |
129 | | class InsnInfo { |
130 | | VNtoInsns VNtoScalars; |
131 | | |
132 | | public: |
133 | | // Inserts I and its value number in VNtoScalars. |
134 | 813 | void insert(Instruction *I, GVN::ValueTable &VN) { |
135 | 813 | // Scalar instruction. |
136 | 813 | unsigned V = VN.lookupOrAdd(I); |
137 | 813 | VNtoScalars[{V, InvalidVN}].push_back(I); |
138 | 813 | } |
139 | | |
140 | 119 | const VNtoInsns &getVNTable() const { return VNtoScalars; } |
141 | | }; |
142 | | |
143 | | // Records all load instructions candidate for code hoisting. |
144 | | class LoadInfo { |
145 | | VNtoInsns VNtoLoads; |
146 | | |
147 | | public: |
148 | | // Insert Load and the value number of its memory address in VNtoLoads. |
149 | 396 | void insert(LoadInst *Load, GVN::ValueTable &VN) { |
150 | 396 | if (Load->isSimple()396 ) { |
151 | 396 | unsigned V = VN.lookupOrAdd(Load->getPointerOperand()); |
152 | 396 | VNtoLoads[{V, InvalidVN}].push_back(Load); |
153 | 396 | } |
154 | 396 | } |
155 | | |
156 | 119 | const VNtoInsns &getVNTable() const { return VNtoLoads; } |
157 | | }; |
158 | | |
159 | | // Records all store instructions candidate for code hoisting. |
160 | | class StoreInfo { |
161 | | VNtoInsns VNtoStores; |
162 | | |
163 | | public: |
164 | | // Insert the Store and a hash number of the store address and the stored |
165 | | // value in VNtoStores. |
166 | 219 | void insert(StoreInst *Store, GVN::ValueTable &VN) { |
167 | 219 | if (!Store->isSimple()) |
168 | 0 | return; |
169 | 219 | // Hash the store address and the stored value. |
170 | 219 | Value *Ptr = Store->getPointerOperand(); |
171 | 219 | Value *Val = Store->getValueOperand(); |
172 | 219 | VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store); |
173 | 219 | } |
174 | | |
175 | 119 | const VNtoInsns &getVNTable() const { return VNtoStores; } |
176 | | }; |
177 | | |
178 | | // Records all call instructions candidate for code hoisting. |
179 | | class CallInfo { |
180 | | VNtoInsns VNtoCallsScalars; |
181 | | VNtoInsns VNtoCallsLoads; |
182 | | VNtoInsns VNtoCallsStores; |
183 | | |
184 | | public: |
185 | | // Insert Call and its value numbering in one of the VNtoCalls* containers. |
186 | 20 | void insert(CallInst *Call, GVN::ValueTable &VN) { |
187 | 20 | // A call that doesNotAccessMemory is handled as a Scalar, |
188 | 20 | // onlyReadsMemory will be handled as a Load instruction, |
189 | 20 | // all other calls will be handled as stores. |
190 | 20 | unsigned V = VN.lookupOrAdd(Call); |
191 | 20 | auto Entry = std::make_pair(V, InvalidVN); |
192 | 20 | |
193 | 20 | if (Call->doesNotAccessMemory()) |
194 | 13 | VNtoCallsScalars[Entry].push_back(Call); |
195 | 7 | else if (7 Call->onlyReadsMemory()7 ) |
196 | 7 | VNtoCallsLoads[Entry].push_back(Call); |
197 | 7 | else |
198 | 0 | VNtoCallsStores[Entry].push_back(Call); |
199 | 20 | } |
200 | | |
201 | 119 | const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } |
202 | | |
203 | 119 | const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } |
204 | | |
205 | 119 | const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; } |
206 | | }; |
207 | | |
208 | 108 | static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { |
209 | 108 | static const unsigned KnownIDs[] = { |
210 | 108 | LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, |
211 | 108 | LLVMContext::MD_noalias, LLVMContext::MD_range, |
212 | 108 | LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, |
213 | 108 | LLVMContext::MD_invariant_group}; |
214 | 108 | combineMetadata(ReplInst, I, KnownIDs); |
215 | 108 | } |
216 | | |
217 | | // This pass hoists common computations across branches sharing common |
218 | | // dominator. The primary goal is to reduce the code size, and in some |
219 | | // cases reduce critical path (by exposing more ILP). |
220 | | class GVNHoist { |
221 | | public: |
222 | | GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, |
223 | | MemoryDependenceResults *MD, MemorySSA *MSSA) |
224 | | : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), |
225 | | MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), |
226 | 60 | HoistingGeps(false) {} |
227 | | |
228 | 60 | bool run(Function &F) { |
229 | 60 | NumFuncArgs = F.arg_size(); |
230 | 60 | VN.setDomTree(DT); |
231 | 60 | VN.setAliasAnalysis(AA); |
232 | 60 | VN.setMemDep(MD); |
233 | 60 | bool Res = false; |
234 | 60 | // Perform DFS Numbering of instructions. |
235 | 60 | unsigned BBI = 0; |
236 | 267 | for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { |
237 | 267 | DFSNumber[BB] = ++BBI; |
238 | 267 | unsigned I = 0; |
239 | 267 | for (auto &Inst : *BB) |
240 | 1.11k | DFSNumber[&Inst] = ++I; |
241 | 267 | } |
242 | 60 | |
243 | 60 | int ChainLength = 0; |
244 | 60 | |
245 | 60 | // FIXME: use lazy evaluation of VN to avoid the fix-point computation. |
246 | 120 | while (1120 ) { |
247 | 120 | if (MaxChainLength != -1 && 120 ++ChainLength >= MaxChainLength120 ) |
248 | 1 | return Res; |
249 | 119 | |
250 | 119 | auto HoistStat = hoistExpressions(F); |
251 | 119 | if (HoistStat.first + HoistStat.second == 0) |
252 | 59 | return Res; |
253 | 60 | |
254 | 60 | if (60 HoistStat.second > 060 ) |
255 | 60 | // To address a limitation of the current GVN, we need to rerun the |
256 | 60 | // hoisting after we hoisted loads or stores in order to be able to |
257 | 60 | // hoist all scalars dependent on the hoisted ld/st. |
258 | 43 | VN.clear(); |
259 | 120 | |
260 | 120 | Res = true; |
261 | 120 | } |
262 | 60 | |
263 | 0 | return Res; |
264 | 60 | } |
265 | | |
266 | | // Copied from NewGVN.cpp |
267 | | // This function provides global ranking of operations so that we can place |
268 | | // them in a canonical order. Note that rank alone is not necessarily enough |
269 | | // for a complete ordering, as constants all have the same rank. However, |
270 | | // generally, we will simplify an operation with all constants so that it |
271 | | // doesn't matter what order they appear in. |
272 | 5.23k | unsigned int rank(const Value *V) const { |
273 | 5.23k | // Prefer constants to undef to anything else |
274 | 5.23k | // Undef is a constant, have to check it first. |
275 | 5.23k | // Prefer smaller constants to constantexprs |
276 | 5.23k | if (isa<ConstantExpr>(V)) |
277 | 0 | return 2; |
278 | 5.23k | if (5.23k isa<UndefValue>(V)5.23k ) |
279 | 0 | return 1; |
280 | 5.23k | if (5.23k isa<Constant>(V)5.23k ) |
281 | 0 | return 0; |
282 | 5.23k | else if (auto *5.23k A5.23k = dyn_cast<Argument>(V)) |
283 | 0 | return 3 + A->getArgNo(); |
284 | 5.23k | |
285 | 5.23k | // Need to shift the instruction DFS by number of arguments + 3 to account |
286 | 5.23k | // for the constant and argument ranking above. |
287 | 5.23k | auto Result = DFSNumber.lookup(V); |
288 | 5.23k | if (Result > 0) |
289 | 5.23k | return 4 + NumFuncArgs + Result; |
290 | 0 | // Unreachable or something else, just return a really large number. |
291 | 0 | return ~0; |
292 | 0 | } |
293 | | |
294 | | private: |
295 | | GVN::ValueTable VN; |
296 | | DominatorTree *DT; |
297 | | PostDominatorTree *PDT; |
298 | | AliasAnalysis *AA; |
299 | | MemoryDependenceResults *MD; |
300 | | MemorySSA *MSSA; |
301 | | std::unique_ptr<MemorySSAUpdater> MSSAUpdater; |
302 | | DenseMap<const Value *, unsigned> DFSNumber; |
303 | | BBSideEffectsSet BBSideEffects; |
304 | | DenseSet<const BasicBlock *> HoistBarrier; |
305 | | |
306 | | SmallVector<BasicBlock *, 32> IDFBlocks; |
307 | | unsigned NumFuncArgs; |
308 | | const bool HoistingGeps; |
309 | | |
310 | | enum InsKind { Unknown, Scalar, Load, Store }; |
311 | | |
312 | | // Return true when there are exception handling in BB. |
313 | 783 | bool hasEH(const BasicBlock *BB) { |
314 | 783 | auto It = BBSideEffects.find(BB); |
315 | 783 | if (It != BBSideEffects.end()) |
316 | 641 | return It->second; |
317 | 142 | |
318 | 142 | if (142 BB->isEHPad() || 142 BB->hasAddressTaken()140 ) { |
319 | 2 | BBSideEffects[BB] = true; |
320 | 2 | return true; |
321 | 2 | } |
322 | 140 | |
323 | 140 | if (140 BB->getTerminator()->mayThrow()140 ) { |
324 | 0 | BBSideEffects[BB] = true; |
325 | 0 | return true; |
326 | 0 | } |
327 | 140 | |
328 | 140 | BBSideEffects[BB] = false; |
329 | 140 | return false; |
330 | 140 | } |
331 | | |
332 | | // Return true when a successor of BB dominates A. |
333 | 0 | bool successorDominate(const BasicBlock *BB, const BasicBlock *A) { |
334 | 0 | for (const BasicBlock *Succ : BB->getTerminator()->successors()) |
335 | 0 | if (DT->dominates(Succ, A)) |
336 | 0 | return true; |
337 | 0 |
|
338 | 0 | return false; |
339 | 0 | } |
340 | | |
341 | | /* Return true when I1 appears before I2 in the instructions of BB. */ |
342 | 46 | bool firstInBB(const Instruction *I1, const Instruction *I2) { |
343 | 46 | assert(I1->getParent() == I2->getParent()); |
344 | 46 | unsigned I1DFS = DFSNumber.lookup(I1); |
345 | 46 | unsigned I2DFS = DFSNumber.lookup(I2); |
346 | 46 | assert(I1DFS && I2DFS); |
347 | 46 | return I1DFS < I2DFS; |
348 | 46 | } |
349 | | |
350 | | // Return true when there are memory uses of Def in BB. |
351 | | bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, |
352 | 46 | const BasicBlock *BB) { |
353 | 46 | const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); |
354 | 46 | if (!Acc) |
355 | 0 | return false; |
356 | 46 | |
357 | 46 | Instruction *OldPt = Def->getMemoryInst(); |
358 | 46 | const BasicBlock *OldBB = OldPt->getParent(); |
359 | 46 | const BasicBlock *NewBB = NewPt->getParent(); |
360 | 46 | bool ReachedNewPt = false; |
361 | 46 | |
362 | 46 | for (const MemoryAccess &MA : *Acc) |
363 | 76 | if (const MemoryUse *76 MU76 = dyn_cast<MemoryUse>(&MA)) { |
364 | 10 | Instruction *Insn = MU->getMemoryInst(); |
365 | 10 | |
366 | 10 | // Do not check whether MU aliases Def when MU occurs after OldPt. |
367 | 10 | if (BB == OldBB && 10 firstInBB(OldPt, Insn)10 ) |
368 | 9 | break; |
369 | 1 | |
370 | 1 | // Do not check whether MU aliases Def when MU occurs before NewPt. |
371 | 1 | if (1 BB == NewBB1 ) { |
372 | 0 | if (!ReachedNewPt0 ) { |
373 | 0 | if (firstInBB(Insn, NewPt)) |
374 | 0 | continue; |
375 | 0 | ReachedNewPt = true; |
376 | 0 | } |
377 | 0 | } |
378 | 1 | if (1 MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)1 ) |
379 | 1 | return true; |
380 | 45 | } |
381 | 45 | |
382 | 45 | return false; |
383 | 45 | } |
384 | | |
385 | | bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB, |
386 | 315 | int &NBBsOnAllPaths) { |
387 | 315 | // Stop walk once the limit is reached. |
388 | 315 | if (NBBsOnAllPaths == 0) |
389 | 1 | return true; |
390 | 314 | |
391 | 314 | // Impossible to hoist with exceptions on the path. |
392 | 314 | if (314 hasEH(BB)314 ) |
393 | 8 | return true; |
394 | 306 | |
395 | 306 | // No such instruction after HoistBarrier in a basic block was |
396 | 306 | // selected for hoisting so instructions selected within basic block with |
397 | 306 | // a hoist barrier can be hoisted. |
398 | 306 | if (306 (BB != SrcBB) && 306 HoistBarrier.count(BB)16 ) |
399 | 2 | return true; |
400 | 304 | |
401 | 304 | return false; |
402 | 304 | } |
403 | | |
404 | | // Return true when there are exception handling or loads of memory Def |
405 | | // between Def and NewPt. This function is only called for stores: Def is |
406 | | // the MemoryDef of the store to be hoisted. |
407 | | |
408 | | // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and |
409 | | // return true when the counter NBBsOnAllPaths reaces 0, except when it is |
410 | | // initialized to -1 which is unlimited. |
411 | | bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, |
412 | 46 | int &NBBsOnAllPaths) { |
413 | 46 | const BasicBlock *NewBB = NewPt->getParent(); |
414 | 46 | const BasicBlock *OldBB = Def->getBlock(); |
415 | 46 | assert(DT->dominates(NewBB, OldBB) && "invalid path"); |
416 | 46 | assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && |
417 | 46 | "def does not dominate new hoisting point"); |
418 | 46 | |
419 | 46 | // Walk all basic blocks reachable in depth-first iteration on the inverse |
420 | 46 | // CFG from OldBB to NewBB. These blocks are all the blocks that may be |
421 | 46 | // executed between the execution of NewBB and OldBB. Hoisting an expression |
422 | 46 | // from OldBB into NewBB has to be safe on all execution paths. |
423 | 136 | for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E136 ;) { |
424 | 91 | const BasicBlock *BB = *I; |
425 | 91 | if (BB == NewBB91 ) { |
426 | 45 | // Stop traversal when reaching HoistPt. |
427 | 45 | I.skipChildren(); |
428 | 45 | continue; |
429 | 45 | } |
430 | 46 | |
431 | 46 | if (46 hasEHhelper(BB, OldBB, NBBsOnAllPaths)46 ) |
432 | 0 | return true; |
433 | 46 | |
434 | 46 | // Check that we do not move a store past loads. |
435 | 46 | if (46 hasMemoryUse(NewPt, Def, BB)46 ) |
436 | 1 | return true; |
437 | 45 | |
438 | 45 | // -1 is unlimited number of blocks on all paths. |
439 | 45 | if (45 NBBsOnAllPaths != -145 ) |
440 | 45 | --NBBsOnAllPaths; |
441 | 91 | |
442 | 91 | ++I; |
443 | 91 | } |
444 | 46 | |
445 | 45 | return false; |
446 | 46 | } |
447 | | |
448 | | // Return true when there are exception handling between HoistPt and BB. |
449 | | // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and |
450 | | // return true when the counter NBBsOnAllPaths reaches 0, except when it is |
451 | | // initialized to -1 which is unlimited. |
452 | | bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, |
453 | 272 | int &NBBsOnAllPaths) { |
454 | 272 | assert(DT->dominates(HoistPt, SrcBB) && "Invalid path"); |
455 | 272 | |
456 | 272 | // Walk all basic blocks reachable in depth-first iteration on |
457 | 272 | // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the |
458 | 272 | // blocks that may be executed between the execution of NewHoistPt and |
459 | 272 | // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe |
460 | 272 | // on all execution paths. |
461 | 791 | for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E791 ;) { |
462 | 530 | const BasicBlock *BB = *I; |
463 | 530 | if (BB == HoistPt530 ) { |
464 | 261 | // Stop traversal when reaching NewHoistPt. |
465 | 261 | I.skipChildren(); |
466 | 261 | continue; |
467 | 261 | } |
468 | 269 | |
469 | 269 | if (269 hasEHhelper(BB, SrcBB, NBBsOnAllPaths)269 ) |
470 | 11 | return true; |
471 | 258 | |
472 | 258 | // -1 is unlimited number of blocks on all paths. |
473 | 258 | if (258 NBBsOnAllPaths != -1258 ) |
474 | 258 | --NBBsOnAllPaths; |
475 | 530 | |
476 | 530 | ++I; |
477 | 530 | } |
478 | 272 | |
479 | 261 | return false; |
480 | 272 | } |
481 | | |
482 | | // Return true when it is safe to hoist a memory load or store U from OldPt |
483 | | // to NewPt. |
484 | | bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, |
485 | 220 | MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { |
486 | 220 | |
487 | 220 | // In place hoisting is safe. |
488 | 220 | if (NewPt == OldPt) |
489 | 0 | return true; |
490 | 220 | |
491 | 220 | const BasicBlock *NewBB = NewPt->getParent(); |
492 | 220 | const BasicBlock *OldBB = OldPt->getParent(); |
493 | 220 | const BasicBlock *UBB = U->getBlock(); |
494 | 220 | |
495 | 220 | // Check for dependences on the Memory SSA. |
496 | 220 | MemoryAccess *D = U->getDefiningAccess(); |
497 | 220 | BasicBlock *DBB = D->getBlock(); |
498 | 220 | if (DT->properlyDominates(NewBB, DBB)) |
499 | 220 | // Cannot move the load or store to NewBB above its definition in DBB. |
500 | 51 | return false; |
501 | 169 | |
502 | 169 | if (169 NewBB == DBB && 169 !MSSA->isLiveOnEntryDef(D)124 ) |
503 | 46 | if (auto *46 UD46 = dyn_cast<MemoryUseOrDef>(D)) |
504 | 36 | if (36 firstInBB(NewPt, UD->getMemoryInst())36 ) |
505 | 36 | // Cannot move the load or store to NewPt above its definition in D. |
506 | 0 | return false; |
507 | 169 | |
508 | 169 | // Check for unsafe hoistings due to side effects. |
509 | 169 | if (169 K == InsKind::Store169 ) { |
510 | 46 | if (hasEHOrLoadsOnPath(NewPt, dyn_cast<MemoryDef>(U), NBBsOnAllPaths)) |
511 | 1 | return false; |
512 | 123 | } else if (123 hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)123 ) |
513 | 0 | return false; |
514 | 168 | |
515 | 168 | if (168 UBB == NewBB168 ) { |
516 | 12 | if (DT->properlyDominates(DBB, NewBB)) |
517 | 9 | return true; |
518 | 12 | assert(UBB == DBB); |
519 | 3 | assert(MSSA->locallyDominates(D, U)); |
520 | 3 | } |
521 | 168 | |
522 | 168 | // No side effects: it is safe to hoist. |
523 | 159 | return true; |
524 | 220 | } |
525 | | |
526 | | // Return true when it is safe to hoist scalar instructions from all blocks in |
527 | | // WL to HoistBB. |
528 | | bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB, |
529 | 149 | int &NBBsOnAllPaths) { |
530 | 149 | return !hasEHOnPath(HoistBB, BB, NBBsOnAllPaths); |
531 | 149 | } |
532 | | |
533 | | // In the inverse CFG, the dominance frontier of basic block (BB) is the |
534 | | // point where ANTIC needs to be computed for instructions which are going |
535 | | // to be hoisted. Since this point does not change during gvn-hoist, |
536 | | // we compute it only once (on demand). |
537 | | // The ides is inspired from: |
538 | | // "Partial Redundancy Elimination in SSA Form" |
539 | | // ROBERT KENNEDY, SUN CHAN, SHIN-MING LIU, RAYMOND LO, PENG TU and FRED CHOW |
540 | | // They use similar idea in the forward graph to to find fully redundant and |
541 | | // partially redundant expressions, here it is used in the inverse graph to |
542 | | // find fully anticipable instructions at merge point (post-dominator in |
543 | | // the inverse CFG). |
544 | | // Returns the edge via which an instruction in BB will get the values from. |
545 | | |
546 | | // Returns true when the values are flowing out to each edge. |
547 | 252 | bool valueAnticipable(CHIArgs C, TerminatorInst *TI) const { |
548 | 252 | if (TI->getNumSuccessors() > std::distance(C.begin(), C.end())) |
549 | 122 | return false; // Not enough args in this CHI. |
550 | 130 | |
551 | 130 | for (auto CHI : C) 130 { |
552 | 243 | BasicBlock *Dest = CHI.Dest; |
553 | 243 | // Find if all the edges have values flowing out of BB. |
554 | 357 | bool Found = any_of(TI->successors(), [Dest](const BasicBlock *BB) { |
555 | 357 | return BB == Dest; }); |
556 | 243 | if (!Found) |
557 | 0 | return false; |
558 | 130 | } |
559 | 130 | return true; |
560 | 130 | } |
561 | | |
562 | | // Check if it is safe to hoist values tracked by CHI in the range |
563 | | // [Begin, End) and accumulate them in Safe. |
564 | | void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K, |
565 | 252 | SmallVectorImpl<CHIArg> &Safe) { |
566 | 252 | int NumBBsOnAllPaths = MaxNumberOfBBSInPath; |
567 | 851 | for (auto CHI : C) { |
568 | 851 | Instruction *Insn = CHI.I; |
569 | 851 | if (!Insn) // No instruction was inserted in this CHI. |
570 | 482 | continue; |
571 | 369 | if (369 K == InsKind::Scalar369 ) { |
572 | 149 | if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths)) |
573 | 138 | Safe.push_back(CHI); |
574 | 369 | } else { |
575 | 220 | MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn); |
576 | 220 | if (safeToHoistLdSt(BB->getTerminator(), Insn, UD, K, NumBBsOnAllPaths)) |
577 | 168 | Safe.push_back(CHI); |
578 | 220 | } |
579 | 851 | } |
580 | 252 | } |
581 | | |
582 | | typedef DenseMap<VNType, SmallVector<Instruction *, 2>> RenameStackType; |
583 | | // Push all the VNs corresponding to BB into RenameStack. |
584 | | void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs, |
585 | 3.30k | RenameStackType &RenameStack) { |
586 | 3.30k | auto it1 = ValueBBs.find(BB); |
587 | 3.30k | if (it1 != ValueBBs.end()3.30k ) { |
588 | 275 | // Iterate in reverse order to keep lower ranked values on the top. |
589 | 469 | for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) { |
590 | 469 | // Get the value of instruction I |
591 | 469 | DEBUG(dbgs() << "\nPushing on stack: " << *VI.second); |
592 | 469 | RenameStack[VI.first].push_back(VI.second); |
593 | 469 | } |
594 | 275 | } |
595 | 3.30k | } |
596 | | |
597 | | void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs, |
598 | 3.30k | RenameStackType &RenameStack) { |
599 | 3.30k | // For each *predecessor* (because Post-DOM) of BB check if it has a CHI |
600 | 3.58k | for (auto Pred : predecessors(BB)) { |
601 | 3.58k | auto P = CHIBBs.find(Pred); |
602 | 3.58k | if (P == CHIBBs.end()3.58k ) { |
603 | 3.27k | continue; |
604 | 3.27k | } |
605 | 305 | DEBUG305 (dbgs() << "\nLooking at CHIs in: " << Pred->getName();); |
606 | 305 | // A CHI is found (BB -> Pred is an edge in the CFG) |
607 | 305 | // Pop the stack until Top(V) = Ve. |
608 | 305 | auto &VCHI = P->second; |
609 | 972 | for (auto It = VCHI.begin(), E = VCHI.end(); It != E972 ;) { |
610 | 667 | CHIArg &C = *It; |
611 | 667 | if (!C.Dest667 ) { |
612 | 487 | auto si = RenameStack.find(C.VN); |
613 | 487 | // The Basic Block where CHI is must dominate the value we want to |
614 | 487 | // track in a CHI. In the PDom walk, there can be values in the |
615 | 487 | // stack which are not control dependent e.g., nested loop. |
616 | 487 | if (si != RenameStack.end() && 487 si->second.size()437 && |
617 | 487 | DT->dominates(Pred, si->second.back()->getParent())380 ) { |
618 | 369 | C.Dest = BB; // Assign the edge |
619 | 369 | C.I = si->second.pop_back_val(); // Assign the argument |
620 | 369 | DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName() |
621 | 369 | << *C.I << ", VN: " << C.VN.first << ", " |
622 | 369 | << C.VN.second); |
623 | 369 | } |
624 | 487 | // Move to next CHI of a different value |
625 | 487 | It = std::find_if(It, VCHI.end(), |
626 | 1.70k | [It](CHIArg &A) { return A != *It; }); |
627 | 487 | } else |
628 | 180 | ++It; |
629 | 667 | } |
630 | 3.58k | } |
631 | 3.30k | } |
632 | | |
633 | | // Walk the post-dominator tree top-down and use a stack for each value to |
634 | | // store the last value you see. When you hit a CHI from a given edge, the |
635 | | // value to use as the argument is at the top of the stack, add the value to |
636 | | // CHI and pop. |
637 | 714 | void insertCHI(InValuesType &ValueBBs, OutValuesType &CHIBBs) { |
638 | 714 | auto Root = PDT->getNode(nullptr); |
639 | 714 | if (!Root) |
640 | 0 | return; |
641 | 714 | // Depth first walk on PDom tree to fill the CHIargs at each PDF. |
642 | 714 | RenameStackType RenameStack; |
643 | 4.01k | for (auto Node : depth_first(Root)) { |
644 | 4.01k | BasicBlock *BB = Node->getBlock(); |
645 | 4.01k | if (!BB) |
646 | 714 | continue; |
647 | 3.30k | |
648 | 3.30k | // Collect all values in BB and push to stack. |
649 | 3.30k | fillRenameStack(BB, ValueBBs, RenameStack); |
650 | 3.30k | |
651 | 3.30k | // Fill outgoing values in each CHI corresponding to BB. |
652 | 3.30k | fillChiArgs(BB, CHIBBs, RenameStack); |
653 | 3.30k | } |
654 | 714 | } |
655 | | |
656 | | // Walk all the CHI-nodes to find ones which have a empty-entry and remove |
657 | | // them Then collect all the instructions which are safe to hoist and see if |
658 | | // they form a list of anticipable values. OutValues contains CHIs |
659 | | // corresponding to each basic block. |
660 | | void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K, |
661 | 714 | HoistingPointList &HPL) { |
662 | 1.41k | auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; }; |
663 | 714 | |
664 | 714 | // CHIArgs now have the outgoing values, so check for anticipability and |
665 | 714 | // accumulate hoistable candidates in HPL. |
666 | 159 | for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) { |
667 | 159 | BasicBlock *BB = A.first; |
668 | 159 | SmallVectorImpl<CHIArg> &CHIs = A.second; |
669 | 159 | // Vector of PHIs contains PHIs for different instructions. |
670 | 159 | // Sort the args according to their VNs, such that identical |
671 | 159 | // instructions are together. |
672 | 159 | std::sort(CHIs.begin(), CHIs.end(), cmpVN); |
673 | 159 | auto TI = BB->getTerminator(); |
674 | 159 | auto B = CHIs.begin(); |
675 | 159 | // [PreIt, PHIIt) form a range of CHIs which have identical VNs. |
676 | 159 | auto PHIIt = std::find_if(CHIs.begin(), CHIs.end(), |
677 | 406 | [B](CHIArg &A) { return A != *B; }); |
678 | 159 | auto PrevIt = CHIs.begin(); |
679 | 411 | while (PrevIt != PHIIt411 ) { |
680 | 252 | // Collect values which satisfy safety checks. |
681 | 252 | SmallVector<CHIArg, 2> Safe; |
682 | 252 | // We check for safety first because there might be multiple values in |
683 | 252 | // the same path, some of which are not safe to be hoisted, but overall |
684 | 252 | // each edge has at least one value which can be hoisted, making the |
685 | 252 | // value anticipable along that path. |
686 | 252 | checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe); |
687 | 252 | |
688 | 252 | // List of safe values should be anticipable at TI. |
689 | 252 | if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)252 ) { |
690 | 130 | HPL.push_back({BB, SmallVecInsn()}); |
691 | 130 | SmallVecInsn &V = HPL.back().second; |
692 | 130 | for (auto B : Safe) |
693 | 243 | V.push_back(B.I); |
694 | 130 | } |
695 | 252 | |
696 | 252 | // Check other VNs |
697 | 252 | PrevIt = PHIIt; |
698 | 252 | PHIIt = std::find_if(PrevIt, CHIs.end(), |
699 | 538 | [PrevIt](CHIArg &A) { return A != *PrevIt; }); |
700 | 252 | } |
701 | 159 | } |
702 | 714 | } |
703 | | |
704 | | // Compute insertion points for each values which can be fully anticipated at |
705 | | // a dominator. HPL contains all such values. |
706 | | void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL, |
707 | 714 | InsKind K) { |
708 | 714 | // Sort VNs based on their rankings |
709 | 714 | std::vector<VNType> Ranks; |
710 | 1.17k | for (const auto &Entry : Map) { |
711 | 1.17k | Ranks.push_back(Entry.first); |
712 | 1.17k | } |
713 | 714 | |
714 | 714 | // TODO: Remove fully-redundant expressions. |
715 | 714 | // Get instruction from the Map, assume that all the Instructions |
716 | 714 | // with same VNs have same rank (this is an approximation). |
717 | 714 | std::sort(Ranks.begin(), Ranks.end(), |
718 | 2.61k | [this, &Map](const VNType &r1, const VNType &r2) { |
719 | 2.61k | return (rank(*Map.lookup(r1).begin()) < |
720 | 2.61k | rank(*Map.lookup(r2).begin())); |
721 | 2.61k | }); |
722 | 714 | |
723 | 714 | // - Sort VNs according to their rank, and start with lowest ranked VN |
724 | 714 | // - Take a VN and for each instruction with same VN |
725 | 714 | // - Find the dominance frontier in the inverse graph (PDF) |
726 | 714 | // - Insert the chi-node at PDF |
727 | 714 | // - Remove the chi-nodes with missing entries |
728 | 714 | // - Remove values from CHI-nodes which do not truly flow out, e.g., |
729 | 714 | // modified along the path. |
730 | 714 | // - Collect the remaining values that are still anticipable |
731 | 714 | SmallVector<BasicBlock *, 2> IDFBlocks; |
732 | 714 | ReverseIDFCalculator IDFs(*PDT); |
733 | 714 | OutValuesType OutValue; |
734 | 714 | InValuesType InValue; |
735 | 1.17k | for (const auto &R : Ranks) { |
736 | 1.17k | const SmallVecInsn &V = Map.lookup(R); |
737 | 1.17k | if (V.size() < 2) |
738 | 979 | continue; |
739 | 200 | const VNType &VN = R; |
740 | 200 | SmallPtrSet<BasicBlock *, 2> VNBlocks; |
741 | 469 | for (auto &I : V) { |
742 | 469 | BasicBlock *BBI = I->getParent(); |
743 | 469 | if (!hasEH(BBI)) |
744 | 463 | VNBlocks.insert(BBI); |
745 | 469 | } |
746 | 200 | // Compute the Post Dominance Frontiers of each basic block |
747 | 200 | // The dominance frontier of a live block X in the reverse |
748 | 200 | // control graph is the set of blocks upon which X is control |
749 | 200 | // dependent. The following sequence computes the set of blocks |
750 | 200 | // which currently have dead terminators that are control |
751 | 200 | // dependence sources of a block which is in NewLiveBlocks. |
752 | 200 | IDFs.setDefiningBlocks(VNBlocks); |
753 | 200 | IDFs.calculate(IDFBlocks); |
754 | 200 | |
755 | 200 | // Make a map of BB vs instructions to be hoisted. |
756 | 669 | for (unsigned i = 0; i < V.size()669 ; ++i469 ) { |
757 | 469 | InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i])); |
758 | 469 | } |
759 | 200 | // Insert empty CHI node for this VN. This is used to factor out |
760 | 200 | // basic blocks where the ANTIC can potentially change. |
761 | 418 | for (auto IDFB : IDFBlocks) { // TODO: Prune out useless CHI insertions. |
762 | 1.42k | for (unsigned i = 0; i < V.size()1.42k ; ++i1.01k ) { |
763 | 1.01k | CHIArg C = {VN, nullptr, nullptr}; |
764 | 1.01k | if (DT->dominates(IDFB, V[i]->getParent())1.01k ) { // Ignore spurious PDFs. |
765 | 851 | // InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i])); |
766 | 851 | OutValue[IDFB].push_back(C); |
767 | 851 | DEBUG(dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName() |
768 | 851 | << ", for Insn: " << *V[i]); |
769 | 851 | } |
770 | 1.01k | } |
771 | 418 | } |
772 | 1.17k | } |
773 | 714 | |
774 | 714 | // Insert CHI args at each PDF to iterate on factored graph of |
775 | 714 | // control dependence. |
776 | 714 | insertCHI(InValue, OutValue); |
777 | 714 | // Using the CHI args inserted at each PDF, find fully anticipable values. |
778 | 714 | findHoistableCandidates(OutValue, K, HPL); |
779 | 714 | } |
780 | | |
781 | | // Return true when all operands of Instr are available at insertion point |
782 | | // HoistPt. When limiting the number of hoisted expressions, one could hoist |
783 | | // a load without hoisting its access function. So before hoisting any |
784 | | // expression, make sure that all its operands are available at insert point. |
785 | | bool allOperandsAvailable(const Instruction *I, |
786 | 112 | const BasicBlock *HoistPt) const { |
787 | 112 | for (const Use &Op : I->operands()) |
788 | 171 | if (const auto *171 Inst171 = dyn_cast<Instruction>(&Op)) |
789 | 101 | if (101 !DT->dominates(Inst->getParent(), HoistPt)101 ) |
790 | 19 | return false; |
791 | 93 | |
792 | 93 | return true; |
793 | 93 | } |
794 | | |
795 | | // Same as allOperandsAvailable with recursive check for GEP operands. |
796 | | bool allGepOperandsAvailable(const Instruction *I, |
797 | 21 | const BasicBlock *HoistPt) const { |
798 | 21 | for (const Use &Op : I->operands()) |
799 | 53 | if (const auto *53 Inst53 = dyn_cast<Instruction>(&Op)) |
800 | 14 | if (14 !DT->dominates(Inst->getParent(), HoistPt)14 ) { |
801 | 2 | if (const GetElementPtrInst *GepOp = |
802 | 2 | dyn_cast<GetElementPtrInst>(Inst)) { |
803 | 2 | if (!allGepOperandsAvailable(GepOp, HoistPt)) |
804 | 0 | return false; |
805 | 2 | // Gep is available if all operands of GepOp are available. |
806 | 0 | } else { |
807 | 0 | // Gep is not available if it has operands other than GEPs that are |
808 | 0 | // defined in blocks not dominating HoistPt. |
809 | 0 | return false; |
810 | 0 | } |
811 | 21 | } |
812 | 21 | return true; |
813 | 21 | } |
814 | | |
815 | | // Make all operands of the GEP available. |
816 | | void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, |
817 | | const SmallVecInsn &InstructionsToHoist, |
818 | 17 | Instruction *Gep) const { |
819 | 17 | assert(allGepOperandsAvailable(Gep, HoistPt) && |
820 | 17 | "GEP operands not available"); |
821 | 17 | |
822 | 17 | Instruction *ClonedGep = Gep->clone(); |
823 | 62 | for (unsigned i = 0, e = Gep->getNumOperands(); i != e62 ; ++i45 ) |
824 | 45 | if (Instruction *45 Op45 = dyn_cast<Instruction>(Gep->getOperand(i))) { |
825 | 12 | |
826 | 12 | // Check whether the operand is already available. |
827 | 12 | if (DT->dominates(Op->getParent(), HoistPt)) |
828 | 10 | continue; |
829 | 2 | |
830 | 2 | // As a GEP can refer to other GEPs, recursively make all the operands |
831 | 2 | // of this GEP available at HoistPt. |
832 | 2 | if (GetElementPtrInst *2 GepOp2 = dyn_cast<GetElementPtrInst>(Op)) |
833 | 2 | makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); |
834 | 45 | } |
835 | 17 | |
836 | 17 | // Copy Gep and replace its uses in Repl with ClonedGep. |
837 | 17 | ClonedGep->insertBefore(HoistPt->getTerminator()); |
838 | 17 | |
839 | 17 | // Conservatively discard any optimization hints, they may differ on the |
840 | 17 | // other paths. |
841 | 17 | ClonedGep->dropUnknownNonDebugMetadata(); |
842 | 17 | |
843 | 17 | // If we have optimization hints which agree with each other along different |
844 | 17 | // paths, preserve them. |
845 | 34 | for (const Instruction *OtherInst : InstructionsToHoist) { |
846 | 34 | const GetElementPtrInst *OtherGep; |
847 | 34 | if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst)) |
848 | 22 | OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand()); |
849 | 34 | else |
850 | 12 | OtherGep = cast<GetElementPtrInst>( |
851 | 12 | cast<StoreInst>(OtherInst)->getPointerOperand()); |
852 | 34 | ClonedGep->andIRFlags(OtherGep); |
853 | 34 | } |
854 | 17 | |
855 | 17 | // Replace uses of Gep with ClonedGep in Repl. |
856 | 17 | Repl->replaceUsesOfWith(Gep, ClonedGep); |
857 | 17 | } |
858 | | |
859 | 108 | void updateAlignment(Instruction *I, Instruction *Repl) { |
860 | 108 | if (auto *ReplacementLoad108 = dyn_cast<LoadInst>(Repl)) { |
861 | 40 | ReplacementLoad->setAlignment( |
862 | 40 | std::min(ReplacementLoad->getAlignment(), |
863 | 40 | cast<LoadInst>(I)->getAlignment())); |
864 | 40 | ++NumLoadsRemoved; |
865 | 108 | } else if (auto *68 ReplacementStore68 = dyn_cast<StoreInst>(Repl)) { |
866 | 12 | ReplacementStore->setAlignment( |
867 | 12 | std::min(ReplacementStore->getAlignment(), |
868 | 12 | cast<StoreInst>(I)->getAlignment())); |
869 | 12 | ++NumStoresRemoved; |
870 | 68 | } else if (auto *56 ReplacementAlloca56 = dyn_cast<AllocaInst>(Repl)) { |
871 | 0 | ReplacementAlloca->setAlignment( |
872 | 0 | std::max(ReplacementAlloca->getAlignment(), |
873 | 0 | cast<AllocaInst>(I)->getAlignment())); |
874 | 56 | } else if (56 isa<CallInst>(Repl)56 ) { |
875 | 3 | ++NumCallsRemoved; |
876 | 3 | } |
877 | 108 | } |
878 | | |
879 | | // Remove all the instructions in Candidates and replace their usage with Repl. |
880 | | // Returns the number of instructions removed. |
881 | | unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl, |
882 | 125 | MemoryUseOrDef *NewMemAcc) { |
883 | 125 | unsigned NR = 0; |
884 | 233 | for (Instruction *I : Candidates) { |
885 | 233 | if (I != Repl233 ) { |
886 | 108 | ++NR; |
887 | 108 | updateAlignment(I, Repl); |
888 | 108 | if (NewMemAcc108 ) { |
889 | 52 | // Update the uses of the old MSSA access with NewMemAcc. |
890 | 52 | MemoryAccess *OldMA = MSSA->getMemoryAccess(I); |
891 | 52 | OldMA->replaceAllUsesWith(NewMemAcc); |
892 | 52 | MSSAUpdater->removeMemoryAccess(OldMA); |
893 | 52 | } |
894 | 108 | |
895 | 108 | Repl->andIRFlags(I); |
896 | 108 | combineKnownMetadata(Repl, I); |
897 | 108 | I->replaceAllUsesWith(Repl); |
898 | 108 | // Also invalidate the Alias Analysis cache. |
899 | 108 | MD->removeInstruction(I); |
900 | 108 | I->eraseFromParent(); |
901 | 108 | } |
902 | 233 | } |
903 | 125 | return NR; |
904 | 125 | } |
905 | | |
906 | | // Replace all Memory PHI usage with NewMemAcc. |
907 | 60 | void raMPHIuw(MemoryUseOrDef *NewMemAcc) { |
908 | 60 | SmallPtrSet<MemoryPhi *, 4> UsePhis; |
909 | 60 | for (User *U : NewMemAcc->users()) |
910 | 16 | if (MemoryPhi *16 Phi16 = dyn_cast<MemoryPhi>(U)) |
911 | 11 | UsePhis.insert(Phi); |
912 | 60 | |
913 | 9 | for (MemoryPhi *Phi : UsePhis) { |
914 | 9 | auto In = Phi->incoming_values(); |
915 | 12 | if (all_of(In, [&](Use &U) 9 { return U == NewMemAcc; }12 )) { |
916 | 2 | Phi->replaceAllUsesWith(NewMemAcc); |
917 | 2 | MSSAUpdater->removeMemoryAccess(Phi); |
918 | 2 | } |
919 | 9 | } |
920 | 60 | } |
921 | | |
922 | | // Remove all other instructions and replace them with Repl. |
923 | | unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl, |
924 | 125 | BasicBlock *DestBB, bool MoveAccess) { |
925 | 125 | MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl); |
926 | 125 | if (MoveAccess && 125 NewMemAcc107 ) { |
927 | 51 | // The definition of this ld/st will not change: ld/st hoisting is |
928 | 51 | // legal when the ld/st is not moved past its current definition. |
929 | 51 | MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::End); |
930 | 51 | } |
931 | 125 | |
932 | 125 | // Replace all other instructions with Repl with memory access NewMemAcc. |
933 | 125 | unsigned NR = rauw(Candidates, Repl, NewMemAcc); |
934 | 125 | |
935 | 125 | // Remove MemorySSA phi nodes with the same arguments. |
936 | 125 | if (NewMemAcc) |
937 | 60 | raMPHIuw(NewMemAcc); |
938 | 125 | return NR; |
939 | 125 | } |
940 | | |
941 | | // In the case Repl is a load or a store, we make all their GEPs |
942 | | // available: GEPs are not hoisted by default to avoid the address |
943 | | // computations to be hoisted without the associated load or store. |
944 | | bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, |
945 | 19 | const SmallVecInsn &InstructionsToHoist) const { |
946 | 19 | // Check whether the GEP of a ld/st can be synthesized at HoistPt. |
947 | 19 | GetElementPtrInst *Gep = nullptr; |
948 | 19 | Instruction *Val = nullptr; |
949 | 19 | if (auto *Ld19 = dyn_cast<LoadInst>(Repl)) { |
950 | 9 | Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand()); |
951 | 19 | } else if (auto *10 St10 = dyn_cast<StoreInst>(Repl)) { |
952 | 9 | Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand()); |
953 | 9 | Val = dyn_cast<Instruction>(St->getValueOperand()); |
954 | 9 | // Check that the stored value is available. |
955 | 9 | if (Val9 ) { |
956 | 7 | if (isa<GetElementPtrInst>(Val)7 ) { |
957 | 5 | // Check whether we can compute the GEP at HoistPt. |
958 | 5 | if (!allGepOperandsAvailable(Val, HoistPt)) |
959 | 0 | return false; |
960 | 2 | } else if (2 !DT->dominates(Val->getParent(), HoistPt)2 ) |
961 | 0 | return false; |
962 | 19 | } |
963 | 10 | } |
964 | 19 | |
965 | 19 | // Check whether we can compute the Gep at HoistPt. |
966 | 19 | if (19 !Gep || 19 !allGepOperandsAvailable(Gep, HoistPt)14 ) |
967 | 5 | return false; |
968 | 14 | |
969 | 14 | makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); |
970 | 14 | |
971 | 14 | if (Val && 14 isa<GetElementPtrInst>(Val)3 ) |
972 | 1 | makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); |
973 | 19 | |
974 | 19 | return true; |
975 | 19 | } |
976 | | |
977 | 119 | std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) { |
978 | 119 | unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; |
979 | 130 | for (const HoistingPointInfo &HP : HPL) { |
980 | 130 | // Find out whether we already have one of the instructions in HoistPt, |
981 | 130 | // in which case we do not have to move it. |
982 | 130 | BasicBlock *DestBB = HP.first; |
983 | 130 | const SmallVecInsn &InstructionsToHoist = HP.second; |
984 | 130 | Instruction *Repl = nullptr; |
985 | 130 | for (Instruction *I : InstructionsToHoist) |
986 | 243 | if (243 I->getParent() == DestBB243 ) |
987 | 243 | // If there are two instructions in HoistPt to be hoisted in place: |
988 | 243 | // update Repl to be the first one, such that we can rename the uses |
989 | 243 | // of the second based on the first. |
990 | 18 | if (18 !Repl || 18 firstInBB(I, Repl)0 ) |
991 | 18 | Repl = I; |
992 | 130 | |
993 | 130 | // Keep track of whether we moved the instruction so we know whether we |
994 | 130 | // should move the MemoryAccess. |
995 | 130 | bool MoveAccess = true; |
996 | 130 | if (Repl130 ) { |
997 | 18 | // Repl is already in HoistPt: it remains in place. |
998 | 18 | assert(allOperandsAvailable(Repl, DestBB) && |
999 | 18 | "instruction depends on operands that are not available"); |
1000 | 18 | MoveAccess = false; |
1001 | 130 | } else { |
1002 | 112 | // When we do not find Repl in HoistPt, select the first in the list |
1003 | 112 | // and move it to HoistPt. |
1004 | 112 | Repl = InstructionsToHoist.front(); |
1005 | 112 | |
1006 | 112 | // We can move Repl in HoistPt only when all operands are available. |
1007 | 112 | // The order in which hoistings are done may influence the availability |
1008 | 112 | // of operands. |
1009 | 112 | if (!allOperandsAvailable(Repl, DestBB)112 ) { |
1010 | 19 | |
1011 | 19 | // When HoistingGeps there is nothing more we can do to make the |
1012 | 19 | // operands available: just continue. |
1013 | 19 | if (HoistingGeps) |
1014 | 0 | continue; |
1015 | 19 | |
1016 | 19 | // When not HoistingGeps we need to copy the GEPs. |
1017 | 19 | if (19 !makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist)19 ) |
1018 | 5 | continue; |
1019 | 107 | } |
1020 | 107 | |
1021 | 107 | // Move the instruction at the end of HoistPt. |
1022 | 107 | Instruction *Last = DestBB->getTerminator(); |
1023 | 107 | MD->removeInstruction(Repl); |
1024 | 107 | Repl->moveBefore(Last); |
1025 | 107 | |
1026 | 107 | DFSNumber[Repl] = DFSNumber[Last]++; |
1027 | 107 | } |
1028 | 130 | |
1029 | 125 | NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess); |
1030 | 125 | |
1031 | 125 | |
1032 | 125 | if (isa<LoadInst>(Repl)) |
1033 | 49 | ++NL; |
1034 | 76 | else if (76 isa<StoreInst>(Repl)76 ) |
1035 | 11 | ++NS; |
1036 | 65 | else if (65 isa<CallInst>(Repl)65 ) |
1037 | 3 | ++NC; |
1038 | 65 | else // Scalar |
1039 | 62 | ++NI; |
1040 | 130 | } |
1041 | 119 | |
1042 | 119 | NumHoisted += NL + NS + NC + NI; |
1043 | 119 | NumRemoved += NR; |
1044 | 119 | NumLoadsHoisted += NL; |
1045 | 119 | NumStoresHoisted += NS; |
1046 | 119 | NumCallsHoisted += NC; |
1047 | 119 | return {NI, NL + NC + NS}; |
1048 | 119 | } |
1049 | | |
1050 | | // Hoist all expressions. Returns Number of scalars hoisted |
1051 | | // and number of non-scalars hoisted. |
1052 | 119 | std::pair<unsigned, unsigned> hoistExpressions(Function &F) { |
1053 | 119 | InsnInfo II; |
1054 | 119 | LoadInfo LI; |
1055 | 119 | StoreInfo SI; |
1056 | 119 | CallInfo CI; |
1057 | 550 | for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { |
1058 | 550 | int InstructionNb = 0; |
1059 | 2.22k | for (Instruction &I1 : *BB) { |
1060 | 2.22k | // If I1 cannot guarantee progress, subsequent instructions |
1061 | 2.22k | // in BB cannot be hoisted anyways. |
1062 | 2.22k | if (!isGuaranteedToTransferExecutionToSuccessor(&I1)2.22k ) { |
1063 | 174 | HoistBarrier.insert(BB); |
1064 | 174 | break; |
1065 | 174 | } |
1066 | 2.04k | // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting |
1067 | 2.04k | // deeper may increase the register pressure and compilation time. |
1068 | 2.04k | if (2.04k MaxDepthInBB != -1 && 2.04k InstructionNb++ >= MaxDepthInBB2.04k ) |
1069 | 0 | break; |
1070 | 2.04k | |
1071 | 2.04k | // Do not value number terminator instructions. |
1072 | 2.04k | if (2.04k isa<TerminatorInst>(&I1)2.04k ) |
1073 | 367 | break; |
1074 | 1.68k | |
1075 | 1.68k | if (auto *1.68k Load1.68k = dyn_cast<LoadInst>(&I1)) |
1076 | 396 | LI.insert(Load, VN); |
1077 | 1.28k | else if (auto *1.28k Store1.28k = dyn_cast<StoreInst>(&I1)) |
1078 | 219 | SI.insert(Store, VN); |
1079 | 1.06k | else if (auto *1.06k Call1.06k = dyn_cast<CallInst>(&I1)) { |
1080 | 29 | if (auto *Intr29 = dyn_cast<IntrinsicInst>(Call)) { |
1081 | 7 | if (isa<DbgInfoIntrinsic>(Intr) || |
1082 | 7 | Intr->getIntrinsicID() == Intrinsic::assume) |
1083 | 0 | continue; |
1084 | 29 | } |
1085 | 29 | if (29 Call->mayHaveSideEffects()29 ) |
1086 | 1 | break; |
1087 | 28 | |
1088 | 28 | if (28 Call->isConvergent()28 ) |
1089 | 8 | break; |
1090 | 20 | |
1091 | 20 | CI.insert(Call, VN); |
1092 | 1.06k | } else if (1.03k HoistingGeps || 1.03k !isa<GetElementPtrInst>(&I1)1.03k ) |
1093 | 1.03k | // Do not hoist scalars past calls that may write to memory because |
1094 | 1.03k | // that could result in spills later. geps are handled separately. |
1095 | 1.03k | // TODO: We can relax this for targets like AArch64 as they have more |
1096 | 1.03k | // registers than X86. |
1097 | 813 | II.insert(&I1, VN); |
1098 | 2.22k | } |
1099 | 550 | } |
1100 | 119 | |
1101 | 119 | HoistingPointList HPL; |
1102 | 119 | computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); |
1103 | 119 | computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); |
1104 | 119 | computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); |
1105 | 119 | computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); |
1106 | 119 | computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); |
1107 | 119 | computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); |
1108 | 119 | return hoist(HPL); |
1109 | 119 | } |
1110 | | }; |
1111 | | |
1112 | | class GVNHoistLegacyPass : public FunctionPass { |
1113 | | public: |
1114 | | static char ID; |
1115 | | |
1116 | 27 | GVNHoistLegacyPass() : FunctionPass(ID) { |
1117 | 27 | initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); |
1118 | 27 | } |
1119 | | |
1120 | 60 | bool runOnFunction(Function &F) override { |
1121 | 60 | if (skipFunction(F)) |
1122 | 0 | return false; |
1123 | 60 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1124 | 60 | auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); |
1125 | 60 | auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); |
1126 | 60 | auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); |
1127 | 60 | auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); |
1128 | 60 | |
1129 | 60 | GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); |
1130 | 60 | return G.run(F); |
1131 | 60 | } |
1132 | | |
1133 | 27 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1134 | 27 | AU.addRequired<DominatorTreeWrapperPass>(); |
1135 | 27 | AU.addRequired<PostDominatorTreeWrapperPass>(); |
1136 | 27 | AU.addRequired<AAResultsWrapperPass>(); |
1137 | 27 | AU.addRequired<MemoryDependenceWrapperPass>(); |
1138 | 27 | AU.addRequired<MemorySSAWrapperPass>(); |
1139 | 27 | AU.addPreserved<DominatorTreeWrapperPass>(); |
1140 | 27 | AU.addPreserved<MemorySSAWrapperPass>(); |
1141 | 27 | AU.addPreserved<GlobalsAAWrapperPass>(); |
1142 | 27 | } |
1143 | | }; |
1144 | | } // namespace llvm |
1145 | | |
1146 | 0 | PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { |
1147 | 0 | DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); |
1148 | 0 | PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); |
1149 | 0 | AliasAnalysis &AA = AM.getResult<AAManager>(F); |
1150 | 0 | MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); |
1151 | 0 | MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); |
1152 | 0 | GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); |
1153 | 0 | if (!G.run(F)) |
1154 | 0 | return PreservedAnalyses::all(); |
1155 | 0 |
|
1156 | 0 | PreservedAnalyses PA; |
1157 | 0 | PA.preserve<DominatorTreeAnalysis>(); |
1158 | 0 | PA.preserve<MemorySSAAnalysis>(); |
1159 | 0 | PA.preserve<GlobalsAA>(); |
1160 | 0 | return PA; |
1161 | 0 | } |
1162 | | |
1163 | | char GVNHoistLegacyPass::ID = 0; |
1164 | 24.6k | INITIALIZE_PASS_BEGIN24.6k (GVNHoistLegacyPass, "gvn-hoist",
|
1165 | 24.6k | "Early GVN Hoisting of Expressions", false, false) |
1166 | 24.6k | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) |
1167 | 24.6k | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) |
1168 | 24.6k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
1169 | 24.6k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
1170 | 24.6k | INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist", |
1171 | | "Early GVN Hoisting of Expressions", false, false) |
1172 | | |
1173 | 1 | FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); } |