/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- SpeculateAroundPHIs.cpp --------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" |
10 | | #include "llvm/ADT/PostOrderIterator.h" |
11 | | #include "llvm/ADT/Sequence.h" |
12 | | #include "llvm/ADT/SetVector.h" |
13 | | #include "llvm/ADT/Statistic.h" |
14 | | #include "llvm/Analysis/TargetTransformInfo.h" |
15 | | #include "llvm/Analysis/ValueTracking.h" |
16 | | #include "llvm/IR/BasicBlock.h" |
17 | | #include "llvm/IR/IRBuilder.h" |
18 | | #include "llvm/IR/Instructions.h" |
19 | | #include "llvm/IR/IntrinsicInst.h" |
20 | | #include "llvm/Support/Debug.h" |
21 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
22 | | |
23 | | using namespace llvm; |
24 | | |
25 | | #define DEBUG_TYPE "spec-phis" |
26 | | |
27 | | STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around"); |
28 | | STATISTIC(NumEdgesSplit, |
29 | | "Number of critical edges which were split for speculation"); |
30 | | STATISTIC(NumSpeculatedInstructions, |
31 | | "Number of instructions we speculated around the PHI nodes"); |
32 | | STATISTIC(NumNewRedundantInstructions, |
33 | | "Number of new, redundant instructions inserted"); |
34 | | |
35 | | /// Check whether speculating the users of a PHI node around the PHI |
36 | | /// will be safe. |
37 | | /// |
38 | | /// This checks both that all of the users are safe and also that all of their |
39 | | /// operands are either recursively safe or already available along an incoming |
40 | | /// edge to the PHI. |
41 | | /// |
42 | | /// This routine caches both all the safe nodes explored in `PotentialSpecSet` |
43 | | /// and the chain of nodes that definitively reach any unsafe node in |
44 | | /// `UnsafeSet`. By preserving these between repeated calls to this routine for |
45 | | /// PHIs in the same basic block, the exploration here can be reused. However, |
46 | | /// these caches must no be reused for PHIs in a different basic block as they |
47 | | /// reflect what is available along incoming edges. |
48 | | static bool |
49 | | isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT, |
50 | | SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
51 | 51 | SmallPtrSetImpl<Instruction *> &UnsafeSet) { |
52 | 51 | auto *PhiBB = PN.getParent(); |
53 | 51 | SmallPtrSet<Instruction *, 4> Visited; |
54 | 51 | SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; |
55 | 51 | |
56 | 51 | // Walk each user of the PHI node. |
57 | 75 | for (Use &U : PN.uses()) { |
58 | 75 | auto *UI = cast<Instruction>(U.getUser()); |
59 | 75 | |
60 | 75 | // Ensure the use post-dominates the PHI node. This ensures that, in the |
61 | 75 | // absence of unwinding, the use will actually be reached. |
62 | 75 | // FIXME: We use a blunt hammer of requiring them to be in the same basic |
63 | 75 | // block. We should consider using actual post-dominance here in the |
64 | 75 | // future. |
65 | 75 | if (UI->getParent() != PhiBB) { |
66 | 3 | LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n"); |
67 | 3 | return false; |
68 | 3 | } |
69 | 72 | |
70 | 72 | if (auto CS = ImmutableCallSite(UI)) { |
71 | 3 | if (CS.isConvergent() || CS.cannotDuplicate()2 ) { |
72 | 2 | LLVM_DEBUG(dbgs() << " Unsafe: convergent " |
73 | 2 | "callsite cannot de duplicated: " << *UI << '\n'); |
74 | 2 | return false; |
75 | 2 | } |
76 | 70 | } |
77 | 70 | |
78 | 70 | // FIXME: This check is much too conservative. We're not going to move these |
79 | 70 | // instructions onto new dynamic paths through the program unless there is |
80 | 70 | // a call instruction between the use and the PHI node. And memory isn't |
81 | 70 | // changing unless there is a store in that same sequence. We should |
82 | 70 | // probably change this to do at least a limited scan of the intervening |
83 | 70 | // instructions and allow handling stores in easily proven safe cases. |
84 | 70 | if (mayBeMemoryDependent(*UI)) { |
85 | 2 | LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n"); |
86 | 2 | return false; |
87 | 2 | } |
88 | 68 | |
89 | 68 | // Now do a depth-first search of everything these users depend on to make |
90 | 68 | // sure they are transitively safe. This is a depth-first search, but we |
91 | 68 | // check nodes in preorder to minimize the amount of checking. |
92 | 68 | Visited.insert(UI); |
93 | 68 | DFSStack.push_back({UI, UI->value_op_begin()}); |
94 | 73 | do { |
95 | 73 | User::value_op_iterator OpIt; |
96 | 73 | std::tie(UI, OpIt) = DFSStack.pop_back_val(); |
97 | 73 | |
98 | 217 | while (OpIt != UI->value_op_end()) { |
99 | 144 | auto *OpI = dyn_cast<Instruction>(*OpIt); |
100 | 144 | // Increment to the next operand for whenever we continue. |
101 | 144 | ++OpIt; |
102 | 144 | // No need to visit non-instructions, which can't form dependencies. |
103 | 144 | if (!OpI) |
104 | 61 | continue; |
105 | 83 | |
106 | 83 | // Now do the main pre-order checks that this operand is a viable |
107 | 83 | // dependency of something we want to speculate. |
108 | 83 | |
109 | 83 | // First do a few checks for instructions that won't require |
110 | 83 | // speculation at all because they are trivially available on the |
111 | 83 | // incoming edge (either through dominance or through an incoming value |
112 | 83 | // to a PHI). |
113 | 83 | // |
114 | 83 | // The cases in the current block will be trivially dominated by the |
115 | 83 | // edge. |
116 | 83 | auto *ParentBB = OpI->getParent(); |
117 | 83 | if (ParentBB == PhiBB) { |
118 | 80 | if (isa<PHINode>(OpI)) { |
119 | 72 | // We can trivially map through phi nodes in the same block. |
120 | 72 | continue; |
121 | 72 | } |
122 | 3 | } else if (DT.dominates(ParentBB, PhiBB)) { |
123 | 3 | // Instructions from dominating blocks are already available. |
124 | 3 | continue; |
125 | 3 | } |
126 | 8 | |
127 | 8 | // Once we know that we're considering speculating the operand, check |
128 | 8 | // if we've already explored this subgraph and found it to be safe. |
129 | 8 | if (PotentialSpecSet.count(OpI)) |
130 | 3 | continue; |
131 | 5 | |
132 | 5 | // If we've already explored this subgraph and found it unsafe, bail. |
133 | 5 | // If when we directly test whether this is safe it fails, bail. |
134 | 5 | if (UnsafeSet.count(OpI) || ParentBB != PhiBB || |
135 | 5 | mayBeMemoryDependent(*OpI)) { |
136 | 0 | LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: " |
137 | 0 | << *OpI << "\n"); |
138 | 0 | // Record the stack of instructions which reach this node as unsafe |
139 | 0 | // so we prune subsequent searches. |
140 | 0 | UnsafeSet.insert(OpI); |
141 | 0 | for (auto &StackPair : DFSStack) { |
142 | 0 | Instruction *I = StackPair.first; |
143 | 0 | UnsafeSet.insert(I); |
144 | 0 | } |
145 | 0 | return false; |
146 | 0 | } |
147 | 5 | |
148 | 5 | // Skip any operands we're already recursively checking. |
149 | 5 | if (!Visited.insert(OpI).second) |
150 | 0 | continue; |
151 | 5 | |
152 | 5 | // Push onto the stack and descend. We can directly continue this |
153 | 5 | // loop when ascending. |
154 | 5 | DFSStack.push_back({UI, OpIt}); |
155 | 5 | UI = OpI; |
156 | 5 | OpIt = OpI->value_op_begin(); |
157 | 5 | } |
158 | 73 | |
159 | 73 | // This node and all its operands are safe. Go ahead and cache that for |
160 | 73 | // reuse later. |
161 | 73 | PotentialSpecSet.insert(UI); |
162 | 73 | |
163 | 73 | // Continue with the next node on the stack. |
164 | 73 | } while (!DFSStack.empty()); |
165 | 68 | } |
166 | 51 | |
167 | | #ifndef NDEBUG |
168 | | // Every visited operand should have been marked as safe for speculation at |
169 | | // this point. Verify this and return success. |
170 | | for (auto *I : Visited) |
171 | | assert(PotentialSpecSet.count(I) && |
172 | | "Failed to mark a visited instruction as safe!"); |
173 | | #endif |
174 | 44 | return true; |
175 | 51 | } |
176 | | |
177 | | /// Check whether, in isolation, a given PHI node is both safe and profitable |
178 | | /// to speculate users around. |
179 | | /// |
180 | | /// This handles checking whether there are any constant operands to a PHI |
181 | | /// which could represent a useful speculation candidate, whether the users of |
182 | | /// the PHI are safe to speculate including all their transitive dependencies, |
183 | | /// and whether after speculation there will be some cost savings (profit) to |
184 | | /// folding the operands into the users of the PHI node. Returns true if both |
185 | | /// safe and profitable with relevant cost savings updated in the map and with |
186 | | /// an update to the `PotentialSpecSet`. Returns false if either safety or |
187 | | /// profitability are absent. Some new entries may be made to the |
188 | | /// `PotentialSpecSet` even when this routine returns false, but they remain |
189 | | /// conservatively correct. |
190 | | /// |
191 | | /// The profitability check here is a local one, but it checks this in an |
192 | | /// interesting way. Beyond checking that the total cost of materializing the |
193 | | /// constants will be less than the cost of folding them into their users, it |
194 | | /// also checks that no one incoming constant will have a higher cost when |
195 | | /// folded into its users rather than materialized. This higher cost could |
196 | | /// result in a dynamic *path* that is more expensive even when the total cost |
197 | | /// is lower. Currently, all of the interesting cases where this optimization |
198 | | /// should fire are ones where it is a no-loss operation in this sense. If we |
199 | | /// ever want to be more aggressive here, we would need to balance the |
200 | | /// different incoming edges' cost by looking at their respective |
201 | | /// probabilities. |
202 | | static bool isSafeAndProfitableToSpeculateAroundPHI( |
203 | | PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap, |
204 | | SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
205 | | SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT, |
206 | 88 | TargetTransformInfo &TTI) { |
207 | 88 | // First see whether there is any cost savings to speculating around this |
208 | 88 | // PHI, and build up a map of the constant inputs to how many times they |
209 | 88 | // occur. |
210 | 88 | bool NonFreeMat = false; |
211 | 88 | struct CostsAndCount { |
212 | 88 | int MatCost = TargetTransformInfo::TCC_Free; |
213 | 88 | int FoldedCost = TargetTransformInfo::TCC_Free; |
214 | 88 | int Count = 0; |
215 | 88 | }; |
216 | 88 | SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts; |
217 | 88 | SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks; |
218 | 179 | for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) { |
219 | 179 | auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i)); |
220 | 179 | if (!IncomingC) |
221 | 85 | continue; |
222 | 94 | |
223 | 94 | // Only visit each incoming edge with a constant input once. |
224 | 94 | if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second) |
225 | 0 | continue; |
226 | 94 | |
227 | 94 | auto InsertResult = CostsAndCounts.insert({IncomingC, {}}); |
228 | 94 | // Count how many edges share a given incoming costant. |
229 | 94 | ++InsertResult.first->second.Count; |
230 | 94 | // Only compute the cost the first time we see a particular constant. |
231 | 94 | if (!InsertResult.second) |
232 | 2 | continue; |
233 | 92 | |
234 | 92 | int &MatCost = InsertResult.first->second.MatCost; |
235 | 92 | MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType()); |
236 | 92 | NonFreeMat |= MatCost != TTI.TCC_Free; |
237 | 92 | } |
238 | 88 | if (!NonFreeMat) { |
239 | 37 | LLVM_DEBUG(dbgs() << " Free: " << PN << "\n"); |
240 | 37 | // No profit in free materialization. |
241 | 37 | return false; |
242 | 37 | } |
243 | 51 | |
244 | 51 | // Now check that the uses of this PHI can actually be speculated, |
245 | 51 | // otherwise we'll still have to materialize the PHI value. |
246 | 51 | if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) { |
247 | 7 | LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n"); |
248 | 7 | return false; |
249 | 7 | } |
250 | 44 | |
251 | 44 | // Compute how much (if any) savings are available by speculating around this |
252 | 44 | // PHI. |
253 | 68 | for (Use &U : PN.uses())44 { |
254 | 68 | auto *UserI = cast<Instruction>(U.getUser()); |
255 | 68 | // Now check whether there is any savings to folding the incoming constants |
256 | 68 | // into this use. |
257 | 68 | unsigned Idx = U.getOperandNo(); |
258 | 68 | |
259 | 68 | // If we have a binary operator that is commutative, an actual constant |
260 | 68 | // operand would end up on the RHS, so pretend the use of the PHI is on the |
261 | 68 | // RHS. |
262 | 68 | // |
263 | 68 | // Technically, this is a bit weird if *both* operands are PHIs we're |
264 | 68 | // speculating. But if that is the case, giving an "optimistic" cost isn't |
265 | 68 | // a bad thing because after speculation it will constant fold. And |
266 | 68 | // moreover, such cases should likely have been constant folded already by |
267 | 68 | // some other pass, so we shouldn't worry about "modeling" them terribly |
268 | 68 | // accurately here. Similarly, if the other operand is a constant, it still |
269 | 68 | // seems fine to be "optimistic" in our cost modeling, because when the |
270 | 68 | // incoming operand from the PHI node is also a constant, we will end up |
271 | 68 | // constant folding. |
272 | 68 | if (UserI->isBinaryOp() && UserI->isCommutative()46 && Idx != 146 ) |
273 | 23 | // Assume we will commute the constant to the RHS to be canonical. |
274 | 23 | Idx = 1; |
275 | 68 | |
276 | 68 | // Get the intrinsic ID if this user is an intrinsic. |
277 | 68 | Intrinsic::ID IID = Intrinsic::not_intrinsic; |
278 | 68 | if (auto *UserII = dyn_cast<IntrinsicInst>(UserI)) |
279 | 1 | IID = UserII->getIntrinsicID(); |
280 | 68 | |
281 | 92 | for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) { |
282 | 92 | ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first; |
283 | 92 | int MatCost = IncomingConstantAndCostsAndCount.second.MatCost; |
284 | 92 | int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost; |
285 | 92 | if (IID) |
286 | 0 | FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(), |
287 | 0 | IncomingC->getType()); |
288 | 92 | else |
289 | 92 | FoldedCost += |
290 | 92 | TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(), |
291 | 92 | IncomingC->getType()); |
292 | 92 | |
293 | 92 | // If we accumulate more folded cost for this incoming constant than |
294 | 92 | // materialized cost, then we'll regress any edge with this constant so |
295 | 92 | // just bail. We're only interested in cases where folding the incoming |
296 | 92 | // constants is at least break-even on all paths. |
297 | 92 | if (FoldedCost > MatCost) { |
298 | 1 | LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC |
299 | 1 | << "\n" |
300 | 1 | " Materializing cost: " |
301 | 1 | << MatCost |
302 | 1 | << "\n" |
303 | 1 | " Accumulated folded cost: " |
304 | 1 | << FoldedCost << "\n"); |
305 | 1 | return false; |
306 | 1 | } |
307 | 92 | } |
308 | 68 | } |
309 | 44 | |
310 | 44 | // Compute the total cost savings afforded by this PHI node. |
311 | 44 | int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free; |
312 | 63 | for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) { |
313 | 63 | int MatCost = IncomingConstantAndCostsAndCount.second.MatCost; |
314 | 63 | int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost; |
315 | 63 | int Count = IncomingConstantAndCostsAndCount.second.Count; |
316 | 63 | |
317 | 63 | TotalMatCost += MatCost * Count; |
318 | 63 | TotalFoldedCost += FoldedCost * Count; |
319 | 63 | } |
320 | 43 | assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is " |
321 | 43 | "less that its materialized cost, " |
322 | 43 | "the sum must be as well."); |
323 | 43 | |
324 | 43 | LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost) |
325 | 43 | << ": " << PN << "\n"); |
326 | 43 | CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost; |
327 | 43 | return true; |
328 | 44 | } |
329 | | |
330 | | /// Simple helper to walk all the users of a list of phis depth first, and call |
331 | | /// a visit function on each one in post-order. |
332 | | /// |
333 | | /// All of the PHIs should be in the same basic block, and this is primarily |
334 | | /// used to make a single depth-first walk across their collective users |
335 | | /// without revisiting any subgraphs. Callers should provide a fast, idempotent |
336 | | /// callable to test whether a node has been visited and the more important |
337 | | /// callable to actually visit a particular node. |
338 | | /// |
339 | | /// Depth-first and postorder here refer to the *operand* graph -- we start |
340 | | /// from a collection of users of PHI nodes and walk "up" the operands |
341 | | /// depth-first. |
342 | | template <typename IsVisitedT, typename VisitT> |
343 | | static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs, |
344 | | IsVisitedT IsVisited, |
345 | 48 | VisitT Visit) { |
346 | 48 | SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; |
347 | 48 | for (auto *PN : PNs) |
348 | 79 | for (Use &U : PN->uses())56 { |
349 | 79 | auto *UI = cast<Instruction>(U.getUser()); |
350 | 79 | if (IsVisited(UI)) |
351 | 2 | // Already visited this user, continue across the roots. |
352 | 2 | continue; |
353 | 77 | |
354 | 77 | // Otherwise, walk the operand graph depth-first and visit each |
355 | 77 | // dependency in postorder. |
356 | 77 | DFSStack.push_back({UI, UI->value_op_begin()}); |
357 | 83 | do { |
358 | 83 | User::value_op_iterator OpIt; |
359 | 83 | std::tie(UI, OpIt) = DFSStack.pop_back_val(); |
360 | 245 | while (OpIt != UI->value_op_end()) { |
361 | 162 | auto *OpI = dyn_cast<Instruction>(*OpIt); |
362 | 162 | // Increment to the next operand for whenever we continue. |
363 | 162 | ++OpIt; |
364 | 162 | // No need to visit non-instructions, which can't form dependencies, |
365 | 162 | // or instructions outside of our potential dependency set that we |
366 | 162 | // were given. Finally, if we've already visited the node, continue |
367 | 162 | // to the next. |
368 | 162 | if (!OpI || IsVisited(OpI)99 ) |
369 | 156 | continue; |
370 | 6 | |
371 | 6 | // Push onto the stack and descend. We can directly continue this |
372 | 6 | // loop when ascending. |
373 | 6 | DFSStack.push_back({UI, OpIt}); |
374 | 6 | UI = OpI; |
375 | 6 | OpIt = OpI->value_op_begin(); |
376 | 6 | } |
377 | 83 | |
378 | 83 | // Finished visiting children, visit this node. |
379 | 83 | assert(!IsVisited(UI) && "Should not have already visited a node!"); |
380 | 83 | Visit(UI); |
381 | 83 | } while (!DFSStack.empty()); |
382 | 77 | } |
383 | 48 | } SpeculateAroundPHIs.cpp:void visitPHIUsersAndDepsInPostOrder<findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_1, findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_0>(llvm::ArrayRef<llvm::PHINode*>, findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_1, findProfitablePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallDenseMap<llvm::PHINode*, int, 16u, llvm::DenseMapInfo<llvm::PHINode*>, llvm::detail::DenseMapPair<llvm::PHINode*, int> > const&, llvm::SmallPtrSetImpl<llvm::Instruction*> const&, int, llvm::DominatorTree&, llvm::TargetTransformInfo&)::$_0) Line | Count | Source | 345 | 35 | VisitT Visit) { | 346 | 35 | SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; | 347 | 35 | for (auto *PN : PNs) | 348 | 62 | for (Use &U : PN->uses())39 { | 349 | 62 | auto *UI = cast<Instruction>(U.getUser()); | 350 | 62 | if (IsVisited(UI)) | 351 | 2 | // Already visited this user, continue across the roots. | 352 | 2 | continue; | 353 | 60 | | 354 | 60 | // Otherwise, walk the operand graph depth-first and visit each | 355 | 60 | // dependency in postorder. | 356 | 60 | DFSStack.push_back({UI, UI->value_op_begin()}); | 357 | 64 | do { | 358 | 64 | User::value_op_iterator OpIt; | 359 | 64 | std::tie(UI, OpIt) = DFSStack.pop_back_val(); | 360 | 190 | while (OpIt != UI->value_op_end()) { | 361 | 126 | auto *OpI = dyn_cast<Instruction>(*OpIt); | 362 | 126 | // Increment to the next operand for whenever we continue. | 363 | 126 | ++OpIt; | 364 | 126 | // No need to visit non-instructions, which can't form dependencies, | 365 | 126 | // or instructions outside of our potential dependency set that we | 366 | 126 | // were given. Finally, if we've already visited the node, continue | 367 | 126 | // to the next. | 368 | 126 | if (!OpI || IsVisited(OpI)72 ) | 369 | 122 | continue; | 370 | 4 | | 371 | 4 | // Push onto the stack and descend. We can directly continue this | 372 | 4 | // loop when ascending. | 373 | 4 | DFSStack.push_back({UI, OpIt}); | 374 | 4 | UI = OpI; | 375 | 4 | OpIt = OpI->value_op_begin(); | 376 | 4 | } | 377 | 64 | | 378 | 64 | // Finished visiting children, visit this node. | 379 | 64 | assert(!IsVisited(UI) && "Should not have already visited a node!"); | 380 | 64 | Visit(UI); | 381 | 64 | } while (!DFSStack.empty()); | 382 | 60 | } | 383 | 35 | } |
SpeculateAroundPHIs.cpp:void visitPHIUsersAndDepsInPostOrder<speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_2, speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_3>(llvm::ArrayRef<llvm::PHINode*>, speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_2, speculatePHIs(llvm::ArrayRef<llvm::PHINode*>, llvm::SmallPtrSetImpl<llvm::Instruction*>&, llvm::SmallSetVector<llvm::BasicBlock*, 16u>&, llvm::DominatorTree&)::$_3) Line | Count | Source | 345 | 13 | VisitT Visit) { | 346 | 13 | SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; | 347 | 13 | for (auto *PN : PNs) | 348 | 17 | for (Use &U : PN->uses()) { | 349 | 17 | auto *UI = cast<Instruction>(U.getUser()); | 350 | 17 | if (IsVisited(UI)) | 351 | 0 | // Already visited this user, continue across the roots. | 352 | 0 | continue; | 353 | 17 | | 354 | 17 | // Otherwise, walk the operand graph depth-first and visit each | 355 | 17 | // dependency in postorder. | 356 | 17 | DFSStack.push_back({UI, UI->value_op_begin()}); | 357 | 19 | do { | 358 | 19 | User::value_op_iterator OpIt; | 359 | 19 | std::tie(UI, OpIt) = DFSStack.pop_back_val(); | 360 | 55 | while (OpIt != UI->value_op_end()) { | 361 | 36 | auto *OpI = dyn_cast<Instruction>(*OpIt); | 362 | 36 | // Increment to the next operand for whenever we continue. | 363 | 36 | ++OpIt; | 364 | 36 | // No need to visit non-instructions, which can't form dependencies, | 365 | 36 | // or instructions outside of our potential dependency set that we | 366 | 36 | // were given. Finally, if we've already visited the node, continue | 367 | 36 | // to the next. | 368 | 36 | if (!OpI || IsVisited(OpI)27 ) | 369 | 34 | continue; | 370 | 2 | | 371 | 2 | // Push onto the stack and descend. We can directly continue this | 372 | 2 | // loop when ascending. | 373 | 2 | DFSStack.push_back({UI, OpIt}); | 374 | 2 | UI = OpI; | 375 | 2 | OpIt = OpI->value_op_begin(); | 376 | 2 | } | 377 | 19 | | 378 | 19 | // Finished visiting children, visit this node. | 379 | 19 | assert(!IsVisited(UI) && "Should not have already visited a node!"); | 380 | 19 | Visit(UI); | 381 | 19 | } while (!DFSStack.empty()); | 382 | 17 | } | 383 | 13 | } |
|
384 | | |
385 | | /// Find profitable PHIs to speculate. |
386 | | /// |
387 | | /// For a PHI node to be profitable, we need the cost of speculating its users |
388 | | /// (and their dependencies) to not exceed the savings of folding the PHI's |
389 | | /// constant operands into the speculated users. |
390 | | /// |
391 | | /// Computing this is surprisingly challenging. Because users of two different |
392 | | /// PHI nodes can depend on each other or on common other instructions, it may |
393 | | /// be profitable to speculate two PHI nodes together even though neither one |
394 | | /// in isolation is profitable. The straightforward way to find all the |
395 | | /// profitable PHIs would be to check each combination of PHIs' cost, but this |
396 | | /// is exponential in complexity. |
397 | | /// |
398 | | /// Even if we assume that we only care about cases where we can consider each |
399 | | /// PHI node in isolation (rather than considering cases where none are |
400 | | /// profitable in isolation but some subset are profitable as a set), we still |
401 | | /// have a challenge. The obvious way to find all individually profitable PHIs |
402 | | /// is to iterate until reaching a fixed point, but this will be quadratic in |
403 | | /// complexity. =/ |
404 | | /// |
405 | | /// This code currently uses a linear-to-compute order for a greedy approach. |
406 | | /// It won't find cases where a set of PHIs must be considered together, but it |
407 | | /// handles most cases of order dependence without quadratic iteration. The |
408 | | /// specific order used is the post-order across the operand DAG. When the last |
409 | | /// user of a PHI is visited in this postorder walk, we check it for |
410 | | /// profitability. |
411 | | /// |
412 | | /// There is an orthogonal extra complexity to all of this: computing the cost |
413 | | /// itself can easily become a linear computation making everything again (at |
414 | | /// best) quadratic. Using a postorder over the operand graph makes it |
415 | | /// particularly easy to avoid this through dynamic programming. As we do the |
416 | | /// postorder walk, we build the transitive cost of that subgraph. It is also |
417 | | /// straightforward to then update these costs when we mark a PHI for |
418 | | /// speculation so that subsequent PHIs don't re-pay the cost of already |
419 | | /// speculated instructions. |
420 | | static SmallVector<PHINode *, 16> |
421 | | findProfitablePHIs(ArrayRef<PHINode *> PNs, |
422 | | const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap, |
423 | | const SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
424 | 35 | int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) { |
425 | 35 | SmallVector<PHINode *, 16> SpecPNs; |
426 | 35 | |
427 | 35 | // First, establish a reverse mapping from immediate users of the PHI nodes |
428 | 35 | // to the nodes themselves, and count how many users each PHI node has in |
429 | 35 | // a way we can update while processing them. |
430 | 35 | SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap; |
431 | 35 | SmallDenseMap<PHINode *, int, 16> PNUserCountMap; |
432 | 35 | SmallPtrSet<Instruction *, 16> UserSet; |
433 | 39 | for (auto *PN : PNs) { |
434 | 39 | assert(UserSet.empty() && "Must start with an empty user set!"); |
435 | 39 | for (Use &U : PN->uses()) |
436 | 62 | UserSet.insert(cast<Instruction>(U.getUser())); |
437 | 39 | PNUserCountMap[PN] = UserSet.size(); |
438 | 39 | for (auto *UI : UserSet) |
439 | 62 | UserToPNMap.insert({UI, {}}).first->second.push_back(PN); |
440 | 39 | UserSet.clear(); |
441 | 39 | } |
442 | 35 | |
443 | 35 | // Now do a DFS across the operand graph of the users, computing cost as we |
444 | 35 | // go and when all costs for a given PHI are known, checking that PHI for |
445 | 35 | // profitability. |
446 | 35 | SmallDenseMap<Instruction *, int, 16> SpecCostMap; |
447 | 35 | visitPHIUsersAndDepsInPostOrder( |
448 | 35 | PNs, |
449 | 35 | /*IsVisited*/ |
450 | 134 | [&](Instruction *I) { |
451 | 134 | // We consider anything that isn't potentially speculated to be |
452 | 134 | // "visited" as it is already handled. Similarly, anything that *is* |
453 | 134 | // potentially speculated but for which we have an entry in our cost |
454 | 134 | // map, we're done. |
455 | 134 | return !PotentialSpecSet.count(I) || SpecCostMap.count(I)68 ; |
456 | 134 | }, |
457 | 35 | /*Visit*/ |
458 | 64 | [&](Instruction *I) { |
459 | 64 | // We've fully visited the operands, so sum their cost with this node |
460 | 64 | // and update the cost map. |
461 | 64 | int Cost = TTI.TCC_Free; |
462 | 64 | for (Value *OpV : I->operand_values()) |
463 | 126 | if (auto *OpI = dyn_cast<Instruction>(OpV)) { |
464 | 72 | auto CostMapIt = SpecCostMap.find(OpI); |
465 | 72 | if (CostMapIt != SpecCostMap.end()) |
466 | 6 | Cost += CostMapIt->second; |
467 | 72 | } |
468 | 64 | Cost += TTI.getUserCost(I); |
469 | 64 | bool Inserted = SpecCostMap.insert({I, Cost}).second; |
470 | 64 | (void)Inserted; |
471 | 64 | assert(Inserted && "Must not re-insert a cost during the DFS!"); |
472 | 64 | |
473 | 64 | // Now check if this node had a corresponding PHI node using it. If so, |
474 | 64 | // we need to decrement the outstanding user count for it. |
475 | 64 | auto UserPNsIt = UserToPNMap.find(I); |
476 | 64 | if (UserPNsIt == UserToPNMap.end()) |
477 | 2 | return; |
478 | 62 | auto &UserPNs = UserPNsIt->second; |
479 | 62 | auto UserPNsSplitIt = std::stable_partition( |
480 | 62 | UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) { |
481 | 62 | int &PNUserCount = PNUserCountMap.find(UserPN)->second; |
482 | 62 | assert( |
483 | 62 | PNUserCount > 0 && |
484 | 62 | "Should never re-visit a PN after its user count hits zero!"); |
485 | 62 | --PNUserCount; |
486 | 62 | return PNUserCount != 0; |
487 | 62 | }); |
488 | 62 | |
489 | 62 | // FIXME: Rather than one at a time, we should sum the savings as the |
490 | 62 | // cost will be completely shared. |
491 | 62 | SmallVector<Instruction *, 16> SpecWorklist; |
492 | 62 | for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) { |
493 | 39 | int SpecCost = TTI.TCC_Free; |
494 | 39 | for (Use &U : PN->uses()) |
495 | 62 | SpecCost += |
496 | 62 | SpecCostMap.find(cast<Instruction>(U.getUser()))->second; |
497 | 39 | SpecCost *= (NumPreds - 1); |
498 | 39 | // When the user count of a PHI node hits zero, we should check its |
499 | 39 | // profitability. If profitable, we should mark it for speculation |
500 | 39 | // and zero out the cost of everything it depends on. |
501 | 39 | int CostSavings = CostSavingsMap.find(PN)->second; |
502 | 39 | if (SpecCost > CostSavings) { |
503 | 22 | LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN |
504 | 22 | << "\n" |
505 | 22 | " Cost savings: " |
506 | 22 | << CostSavings |
507 | 22 | << "\n" |
508 | 22 | " Speculation cost: " |
509 | 22 | << SpecCost << "\n"); |
510 | 22 | continue; |
511 | 22 | } |
512 | 17 | |
513 | 17 | // We're going to speculate this user-associated PHI. Copy it out and |
514 | 17 | // add its users to the worklist to update their cost. |
515 | 17 | SpecPNs.push_back(PN); |
516 | 17 | for (Use &U : PN->uses()) { |
517 | 17 | auto *UI = cast<Instruction>(U.getUser()); |
518 | 17 | auto CostMapIt = SpecCostMap.find(UI); |
519 | 17 | if (CostMapIt->second == 0) |
520 | 0 | continue; |
521 | 17 | // Zero out this cost entry to avoid duplicates. |
522 | 17 | CostMapIt->second = 0; |
523 | 17 | SpecWorklist.push_back(UI); |
524 | 17 | } |
525 | 17 | } |
526 | 62 | |
527 | 62 | // Now walk all the operands of the users in the worklist transitively |
528 | 62 | // to zero out all the memoized costs. |
529 | 79 | while (!SpecWorklist.empty()) { |
530 | 17 | Instruction *SpecI = SpecWorklist.pop_back_val(); |
531 | 17 | assert(SpecCostMap.find(SpecI)->second == 0 && |
532 | 17 | "Didn't zero out a cost!"); |
533 | 17 | |
534 | 17 | // Walk the operands recursively to zero out their cost as well. |
535 | 34 | for (auto *OpV : SpecI->operand_values()) { |
536 | 34 | auto *OpI = dyn_cast<Instruction>(OpV); |
537 | 34 | if (!OpI) |
538 | 8 | continue; |
539 | 26 | auto CostMapIt = SpecCostMap.find(OpI); |
540 | 26 | if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 05 ) |
541 | 26 | continue; |
542 | 0 | CostMapIt->second = 0; |
543 | 0 | SpecWorklist.push_back(OpI); |
544 | 0 | } |
545 | 17 | } |
546 | 62 | }); |
547 | 35 | |
548 | 35 | return SpecPNs; |
549 | 35 | } |
550 | | |
551 | | /// Speculate users around a set of PHI nodes. |
552 | | /// |
553 | | /// This routine does the actual speculation around a set of PHI nodes where we |
554 | | /// have determined this to be both safe and profitable. |
555 | | /// |
556 | | /// This routine handles any spliting of critical edges necessary to create |
557 | | /// a safe block to speculate into as well as cloning the instructions and |
558 | | /// rewriting all uses. |
559 | | static void speculatePHIs(ArrayRef<PHINode *> SpecPNs, |
560 | | SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
561 | | SmallSetVector<BasicBlock *, 16> &PredSet, |
562 | 13 | DominatorTree &DT) { |
563 | 13 | LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n"); |
564 | 13 | NumPHIsSpeculated += SpecPNs.size(); |
565 | 13 | |
566 | 13 | // Split any critical edges so that we have a block to hoist into. |
567 | 13 | auto *ParentBB = SpecPNs[0]->getParent(); |
568 | 13 | SmallVector<BasicBlock *, 16> SpecPreds; |
569 | 13 | SpecPreds.reserve(PredSet.size()); |
570 | 26 | for (auto *PredBB : PredSet) { |
571 | 26 | auto *NewPredBB = SplitCriticalEdge( |
572 | 26 | PredBB, ParentBB, |
573 | 26 | CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges()); |
574 | 26 | if (NewPredBB) { |
575 | 3 | ++NumEdgesSplit; |
576 | 3 | LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName() |
577 | 3 | << "\n"); |
578 | 3 | SpecPreds.push_back(NewPredBB); |
579 | 23 | } else { |
580 | 23 | assert(PredBB->getSingleSuccessor() == ParentBB && |
581 | 23 | "We need a non-critical predecessor to speculate into."); |
582 | 23 | assert(!isa<InvokeInst>(PredBB->getTerminator()) && |
583 | 23 | "Cannot have a non-critical invoke!"); |
584 | 23 | |
585 | 23 | // Already non-critical, use existing pred. |
586 | 23 | SpecPreds.push_back(PredBB); |
587 | 23 | } |
588 | 26 | } |
589 | 13 | |
590 | 13 | SmallPtrSet<Instruction *, 16> SpecSet; |
591 | 13 | SmallVector<Instruction *, 16> SpecList; |
592 | 13 | visitPHIUsersAndDepsInPostOrder(SpecPNs, |
593 | 13 | /*IsVisited*/ |
594 | 44 | [&](Instruction *I) { |
595 | 44 | // This is visited if we don't need to |
596 | 44 | // speculate it or we already have |
597 | 44 | // speculated it. |
598 | 44 | return !PotentialSpecSet.count(I) || |
599 | 44 | SpecSet.count(I)23 ; |
600 | 44 | }, |
601 | 13 | /*Visit*/ |
602 | 19 | [&](Instruction *I) { |
603 | 19 | // All operands scheduled, schedule this |
604 | 19 | // node. |
605 | 19 | SpecSet.insert(I); |
606 | 19 | SpecList.push_back(I); |
607 | 19 | }); |
608 | 13 | |
609 | 13 | int NumSpecInsts = SpecList.size() * SpecPreds.size(); |
610 | 13 | int NumRedundantInsts = NumSpecInsts - SpecList.size(); |
611 | 13 | LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts |
612 | 13 | << " speculated instructions, " << NumRedundantInsts |
613 | 13 | << " redundancies\n"); |
614 | 13 | NumSpeculatedInstructions += NumSpecInsts; |
615 | 13 | NumNewRedundantInstructions += NumRedundantInsts; |
616 | 13 | |
617 | 13 | // Each predecessor is numbered by its index in `SpecPreds`, so for each |
618 | 13 | // instruction we speculate, the speculated instruction is stored in that |
619 | 13 | // index of the vector associated with the original instruction. We also |
620 | 13 | // store the incoming values for each predecessor from any PHIs used. |
621 | 13 | SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap; |
622 | 13 | |
623 | 13 | // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming |
624 | 13 | // value. This handles both the PHIs we are speculating around and any other |
625 | 13 | // PHIs that happen to be used. |
626 | 13 | for (auto *OrigI : SpecList) |
627 | 36 | for (auto *OpV : OrigI->operand_values())19 { |
628 | 36 | auto *OpPN = dyn_cast<PHINode>(OpV); |
629 | 36 | if (!OpPN || OpPN->getParent() != ParentBB19 ) |
630 | 18 | continue; |
631 | 18 | |
632 | 18 | auto InsertResult = SpeculatedValueMap.insert({OpPN, {}}); |
633 | 18 | if (!InsertResult.second) |
634 | 0 | continue; |
635 | 18 | |
636 | 18 | auto &SpeculatedVals = InsertResult.first->second; |
637 | 18 | |
638 | 18 | // Populating our structure for mapping is particularly annoying because |
639 | 18 | // finding an incoming value for a particular predecessor block in a PHI |
640 | 18 | // node is a linear time operation! To avoid quadratic behavior, we build |
641 | 18 | // a map for this PHI node's incoming values and then translate it into |
642 | 18 | // the more compact representation used below. |
643 | 18 | SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap; |
644 | 18 | for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues())) |
645 | 36 | IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i); |
646 | 18 | |
647 | 18 | for (auto *PredBB : SpecPreds) |
648 | 36 | SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second); |
649 | 18 | } |
650 | 13 | |
651 | 13 | // Speculate into each predecessor. |
652 | 26 | for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) { |
653 | 26 | auto *PredBB = SpecPreds[PredIdx]; |
654 | 26 | assert(PredBB->getSingleSuccessor() == ParentBB && |
655 | 26 | "We need a non-critical predecessor to speculate into."); |
656 | 26 | |
657 | 38 | for (auto *OrigI : SpecList) { |
658 | 38 | auto *NewI = OrigI->clone(); |
659 | 38 | NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx)); |
660 | 38 | NewI->insertBefore(PredBB->getTerminator()); |
661 | 38 | |
662 | 38 | // Rewrite all the operands to the previously speculated instructions. |
663 | 38 | // Because we're walking in-order, the defs must precede the uses and we |
664 | 38 | // should already have these mappings. |
665 | 72 | for (Use &U : NewI->operands()) { |
666 | 72 | auto *OpI = dyn_cast<Instruction>(U.get()); |
667 | 72 | if (!OpI) |
668 | 18 | continue; |
669 | 54 | auto MapIt = SpeculatedValueMap.find(OpI); |
670 | 54 | if (MapIt == SpeculatedValueMap.end()) |
671 | 6 | continue; |
672 | 48 | const auto &SpeculatedVals = MapIt->second; |
673 | 48 | assert(SpeculatedVals[PredIdx] && |
674 | 48 | "Must have a speculated value for this predecessor!"); |
675 | 48 | assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() && |
676 | 48 | "Speculated value has the wrong type!"); |
677 | 48 | |
678 | 48 | // Rewrite the use to this predecessor's speculated instruction. |
679 | 48 | U.set(SpeculatedVals[PredIdx]); |
680 | 48 | } |
681 | 38 | |
682 | 38 | // Commute instructions which now have a constant in the LHS but not the |
683 | 38 | // RHS. |
684 | 38 | if (NewI->isBinaryOp() && NewI->isCommutative()32 && |
685 | 38 | isa<Constant>(NewI->getOperand(0))32 && |
686 | 38 | !isa<Constant>(NewI->getOperand(1))3 ) |
687 | 2 | NewI->getOperandUse(0).swap(NewI->getOperandUse(1)); |
688 | 38 | |
689 | 38 | SpeculatedValueMap[OrigI].push_back(NewI); |
690 | 38 | assert(SpeculatedValueMap[OrigI][PredIdx] == NewI && |
691 | 38 | "Mismatched speculated instruction index!"); |
692 | 38 | } |
693 | 26 | } |
694 | 13 | |
695 | 13 | // Walk the speculated instruction list and if they have uses, insert a PHI |
696 | 13 | // for them from the speculated versions, and replace the uses with the PHI. |
697 | 13 | // Then erase the instructions as they have been fully speculated. The walk |
698 | 13 | // needs to be in reverse so that we don't think there are users when we'll |
699 | 13 | // actually eventually remove them later. |
700 | 13 | IRBuilder<> IRB(SpecPNs[0]); |
701 | 19 | for (auto *OrigI : llvm::reverse(SpecList)) { |
702 | 19 | // Check if we need a PHI for any remaining users and if so, insert it. |
703 | 19 | if (!OrigI->use_empty()) { |
704 | 13 | auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(), |
705 | 13 | Twine(OrigI->getName()) + ".phi"); |
706 | 13 | // Add the incoming values we speculated. |
707 | 13 | auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second; |
708 | 13 | for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) |
709 | 26 | SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]); |
710 | 13 | |
711 | 13 | // And replace the uses with the PHI node. |
712 | 13 | OrigI->replaceAllUsesWith(SpecIPN); |
713 | 13 | } |
714 | 19 | |
715 | 19 | // It is important to immediately erase this so that it stops using other |
716 | 19 | // instructions. This avoids inserting needless PHIs of them. |
717 | 19 | OrigI->eraseFromParent(); |
718 | 19 | } |
719 | 13 | |
720 | 13 | // All of the uses of the speculated phi nodes should be removed at this |
721 | 13 | // point, so erase them. |
722 | 17 | for (auto *SpecPN : SpecPNs) { |
723 | 17 | assert(SpecPN->use_empty() && "All users should have been speculated!"); |
724 | 17 | SpecPN->eraseFromParent(); |
725 | 17 | } |
726 | 13 | } |
727 | | |
728 | | /// Try to speculate around a series of PHIs from a single basic block. |
729 | | /// |
730 | | /// This routine checks whether any of these PHIs are profitable to speculate |
731 | | /// users around. If safe and profitable, it does the speculation. It returns |
732 | | /// true when at least some speculation occurs. |
733 | | static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs, |
734 | 75 | DominatorTree &DT, TargetTransformInfo &TTI) { |
735 | 75 | LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n"); |
736 | 75 | |
737 | 75 | // Savings in cost from speculating around a PHI node. |
738 | 75 | SmallDenseMap<PHINode *, int, 16> CostSavingsMap; |
739 | 75 | |
740 | 75 | // Remember the set of instructions that are candidates for speculation so |
741 | 75 | // that we can quickly walk things within that space. This prunes out |
742 | 75 | // instructions already available along edges, etc. |
743 | 75 | SmallPtrSet<Instruction *, 16> PotentialSpecSet; |
744 | 75 | |
745 | 75 | // Remember the set of instructions that are (transitively) unsafe to |
746 | 75 | // speculate into the incoming edges of this basic block. This avoids |
747 | 75 | // recomputing them for each PHI node we check. This set is specific to this |
748 | 75 | // block though as things are pruned out of it based on what is available |
749 | 75 | // along incoming edges. |
750 | 75 | SmallPtrSet<Instruction *, 16> UnsafeSet; |
751 | 75 | |
752 | 75 | // For each PHI node in this block, check whether there are immediate folding |
753 | 75 | // opportunities from speculation, and whether that speculation will be |
754 | 75 | // valid. This determise the set of safe PHIs to speculate. |
755 | 75 | PNs.erase(llvm::remove_if(PNs, |
756 | 88 | [&](PHINode *PN) { |
757 | 88 | return !isSafeAndProfitableToSpeculateAroundPHI( |
758 | 88 | *PN, CostSavingsMap, PotentialSpecSet, |
759 | 88 | UnsafeSet, DT, TTI); |
760 | 88 | }), |
761 | 75 | PNs.end()); |
762 | 75 | // If no PHIs were profitable, skip. |
763 | 75 | if (PNs.empty()) { |
764 | 36 | LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n"); |
765 | 36 | return false; |
766 | 36 | } |
767 | 39 | |
768 | 39 | // We need to know how much speculation will cost which is determined by how |
769 | 39 | // many incoming edges will need a copy of each speculated instruction. |
770 | 39 | SmallSetVector<BasicBlock *, 16> PredSet; |
771 | 74 | for (auto *PredBB : PNs[0]->blocks()) { |
772 | 74 | if (!PredSet.insert(PredBB)) |
773 | 0 | continue; |
774 | 74 | |
775 | 74 | // We cannot speculate when a predecessor is an indirect branch. |
776 | 74 | // FIXME: We also can't reliably create a non-critical edge block for |
777 | 74 | // speculation if the predecessor is an invoke. This doesn't seem |
778 | 74 | // fundamental and we should probably be splitting critical edges |
779 | 74 | // differently. |
780 | 74 | if (isa<IndirectBrInst>(PredBB->getTerminator()) || |
781 | 74 | isa<InvokeInst>(PredBB->getTerminator())73 ) { |
782 | 4 | LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: " |
783 | 4 | << PredBB->getName() << "\n"); |
784 | 4 | return false; |
785 | 4 | } |
786 | 74 | } |
787 | 39 | if (35 PredSet.size() < 235 ) { |
788 | 0 | LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n"); |
789 | 0 | return false; |
790 | 0 | } |
791 | 35 | |
792 | 35 | SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs( |
793 | 35 | PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI); |
794 | 35 | if (SpecPNs.empty()) |
795 | 22 | // Nothing to do. |
796 | 22 | return false; |
797 | 13 | |
798 | 13 | speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT); |
799 | 13 | return true; |
800 | 13 | } |
801 | | |
802 | | PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F, |
803 | 880 | FunctionAnalysisManager &AM) { |
804 | 880 | auto &DT = AM.getResult<DominatorTreeAnalysis>(F); |
805 | 880 | auto &TTI = AM.getResult<TargetIRAnalysis>(F); |
806 | 880 | |
807 | 880 | bool Changed = false; |
808 | 1.09k | for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) { |
809 | 1.09k | SmallVector<PHINode *, 16> PNs; |
810 | 1.09k | auto BBI = BB->begin(); |
811 | 1.18k | while (auto *PN = dyn_cast<PHINode>(&*BBI)) { |
812 | 88 | PNs.push_back(PN); |
813 | 88 | ++BBI; |
814 | 88 | } |
815 | 1.09k | |
816 | 1.09k | if (PNs.empty()) |
817 | 1.01k | continue; |
818 | 75 | |
819 | 75 | Changed |= tryToSpeculatePHIs(PNs, DT, TTI); |
820 | 75 | } |
821 | 880 | |
822 | 880 | if (!Changed) |
823 | 867 | return PreservedAnalyses::all(); |
824 | 13 | |
825 | 13 | PreservedAnalyses PA; |
826 | 13 | return PA; |
827 | 13 | } |