/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/LICM.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass performs loop invariant code motion, attempting to remove as much |
11 | | // code from the body of a loop as possible. It does this by either hoisting |
12 | | // code into the preheader block, or by sinking code to the exit blocks if it is |
13 | | // safe. This pass also promotes must-aliased memory locations in the loop to |
14 | | // live in registers, thus hoisting and sinking "invariant" loads and stores. |
15 | | // |
16 | | // This pass uses alias analysis for two purposes: |
17 | | // |
18 | | // 1. Moving loop invariant loads and calls out of loops. If we can determine |
19 | | // that a load or call inside of a loop never aliases anything stored to, |
20 | | // we can hoist it or sink it like any other instruction. |
21 | | // 2. Scalar Promotion of Memory - If there is a store instruction inside of |
22 | | // the loop, we try to move the store to happen AFTER the loop instead of |
23 | | // inside of the loop. This can only happen if a few conditions are true: |
24 | | // A. The pointer stored through is loop invariant |
25 | | // B. There are no stores or loads in the loop which _may_ alias the |
26 | | // pointer. There are no calls in the loop which mod/ref the pointer. |
27 | | // If these conditions are true, we can promote the loads and stores in the |
28 | | // loop of the pointer to use a temporary alloca'd variable. We then use |
29 | | // the SSAUpdater to construct the appropriate SSA form for the value. |
30 | | // |
31 | | //===----------------------------------------------------------------------===// |
32 | | |
33 | | #include "llvm/Transforms/Scalar/LICM.h" |
34 | | #include "llvm/ADT/Statistic.h" |
35 | | #include "llvm/Analysis/AliasAnalysis.h" |
36 | | #include "llvm/Analysis/AliasSetTracker.h" |
37 | | #include "llvm/Analysis/BasicAliasAnalysis.h" |
38 | | #include "llvm/Analysis/CaptureTracking.h" |
39 | | #include "llvm/Analysis/ConstantFolding.h" |
40 | | #include "llvm/Analysis/GlobalsModRef.h" |
41 | | #include "llvm/Analysis/Loads.h" |
42 | | #include "llvm/Analysis/LoopInfo.h" |
43 | | #include "llvm/Analysis/LoopPass.h" |
44 | | #include "llvm/Analysis/MemoryBuiltins.h" |
45 | | #include "llvm/Analysis/OptimizationDiagnosticInfo.h" |
46 | | #include "llvm/Analysis/ScalarEvolution.h" |
47 | | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" |
48 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
49 | | #include "llvm/Analysis/ValueTracking.h" |
50 | | #include "llvm/IR/CFG.h" |
51 | | #include "llvm/IR/Constants.h" |
52 | | #include "llvm/IR/DataLayout.h" |
53 | | #include "llvm/IR/DerivedTypes.h" |
54 | | #include "llvm/IR/Dominators.h" |
55 | | #include "llvm/IR/Instructions.h" |
56 | | #include "llvm/IR/IntrinsicInst.h" |
57 | | #include "llvm/IR/LLVMContext.h" |
58 | | #include "llvm/IR/Metadata.h" |
59 | | #include "llvm/IR/PredIteratorCache.h" |
60 | | #include "llvm/Support/CommandLine.h" |
61 | | #include "llvm/Support/Debug.h" |
62 | | #include "llvm/Support/raw_ostream.h" |
63 | | #include "llvm/Transforms/Scalar.h" |
64 | | #include "llvm/Transforms/Scalar/LoopPassManager.h" |
65 | | #include "llvm/Transforms/Utils/Local.h" |
66 | | #include "llvm/Transforms/Utils/LoopUtils.h" |
67 | | #include "llvm/Transforms/Utils/SSAUpdater.h" |
68 | | #include <algorithm> |
69 | | #include <utility> |
70 | | using namespace llvm; |
71 | | |
72 | 1.06M | #define DEBUG_TYPE "licm" |
73 | | |
74 | | STATISTIC(NumSunk, "Number of instructions sunk out of loop"); |
75 | | STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); |
76 | | STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); |
77 | | STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); |
78 | | STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); |
79 | | |
80 | | /// Memory promotion is enabled by default. |
81 | | static cl::opt<bool> |
82 | | DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), |
83 | | cl::desc("Disable memory promotion in LICM pass")); |
84 | | |
85 | | static cl::opt<uint32_t> MaxNumUsesTraversed( |
86 | | "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), |
87 | | cl::desc("Max num uses visited for identifying load " |
88 | | "invariance in loop using invariant start (default = 8)")); |
89 | | |
90 | | static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); |
91 | | static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, |
92 | | const LoopSafetyInfo *SafetyInfo); |
93 | | static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, |
94 | | const LoopSafetyInfo *SafetyInfo, |
95 | | OptimizationRemarkEmitter *ORE); |
96 | | static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, |
97 | | const Loop *CurLoop, AliasSetTracker *CurAST, |
98 | | const LoopSafetyInfo *SafetyInfo, |
99 | | OptimizationRemarkEmitter *ORE); |
100 | | static bool isSafeToExecuteUnconditionally(Instruction &Inst, |
101 | | const DominatorTree *DT, |
102 | | const Loop *CurLoop, |
103 | | const LoopSafetyInfo *SafetyInfo, |
104 | | OptimizationRemarkEmitter *ORE, |
105 | | const Instruction *CtxI = nullptr); |
106 | | static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, |
107 | | const AAMDNodes &AAInfo, |
108 | | AliasSetTracker *CurAST); |
109 | | static Instruction * |
110 | | CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, |
111 | | const LoopInfo *LI, |
112 | | const LoopSafetyInfo *SafetyInfo); |
113 | | |
114 | | namespace { |
115 | | struct LoopInvariantCodeMotion { |
116 | | bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, |
117 | | TargetLibraryInfo *TLI, ScalarEvolution *SE, |
118 | | OptimizationRemarkEmitter *ORE, bool DeleteAST); |
119 | | |
120 | 183k | DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() { |
121 | 183k | return LoopToAliasSetMap; |
122 | 183k | } |
123 | | |
124 | | private: |
125 | | DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap; |
126 | | |
127 | | AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, |
128 | | AliasAnalysis *AA); |
129 | | }; |
130 | | |
131 | | struct LegacyLICMPass : public LoopPass { |
132 | | static char ID; // Pass identification, replacement for typeid |
133 | 53.2k | LegacyLICMPass() : LoopPass(ID) { |
134 | 53.2k | initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); |
135 | 53.2k | } |
136 | | |
137 | 1.02M | bool runOnLoop(Loop *L, LPPassManager &LPM) override { |
138 | 1.02M | if (skipLoop(L)1.02M ) { |
139 | 55 | // If we have run LICM on a previous loop but now we are skipping |
140 | 55 | // (because we've hit the opt-bisect limit), we need to clear the |
141 | 55 | // loop alias information. |
142 | 55 | for (auto <AS : LICM.getLoopToAliasSetMap()) |
143 | 1 | delete LTAS.second; |
144 | 55 | LICM.getLoopToAliasSetMap().clear(); |
145 | 55 | return false; |
146 | 55 | } |
147 | 1.02M | |
148 | 1.02M | auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); |
149 | 1.02M | // For the old PM, we can't use OptimizationRemarkEmitter as an analysis |
150 | 1.02M | // pass. Function analyses need to be preserved across loop transformations |
151 | 1.02M | // but ORE cannot be preserved (see comment before the pass definition). |
152 | 1.02M | OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); |
153 | 1.02M | return LICM.runOnLoop(L, |
154 | 1.02M | &getAnalysis<AAResultsWrapperPass>().getAAResults(), |
155 | 1.02M | &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), |
156 | 1.02M | &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), |
157 | 1.02M | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), |
158 | 1.02M | SE ? &SE->getSE()1.02M : nullptr0 , &ORE, false); |
159 | 1.02M | } |
160 | | |
161 | | /// This transformation requires natural loop information & requires that |
162 | | /// loop preheaders be inserted into the CFG... |
163 | | /// |
164 | 53.2k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
165 | 53.2k | AU.setPreservesCFG(); |
166 | 53.2k | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
167 | 53.2k | getLoopAnalysisUsage(AU); |
168 | 53.2k | } |
169 | | |
170 | | using llvm::Pass::doFinalization; |
171 | | |
172 | 330k | bool doFinalization() override { |
173 | 330k | assert(LICM.getLoopToAliasSetMap().empty() && |
174 | 330k | "Didn't free loop alias sets"); |
175 | 330k | return false; |
176 | 330k | } |
177 | | |
178 | | private: |
179 | | LoopInvariantCodeMotion LICM; |
180 | | |
181 | | /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. |
182 | | void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, |
183 | | Loop *L) override; |
184 | | |
185 | | /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias |
186 | | /// set. |
187 | | void deleteAnalysisValue(Value *V, Loop *L) override; |
188 | | |
189 | | /// Simple Analysis hook. Delete loop L from alias set map. |
190 | | void deleteAnalysisLoop(Loop *L) override; |
191 | | }; |
192 | | } // namespace |
193 | | |
194 | | PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, |
195 | 208 | LoopStandardAnalysisResults &AR, LPMUpdater &) { |
196 | 208 | const auto &FAM = |
197 | 208 | AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); |
198 | 208 | Function *F = L.getHeader()->getParent(); |
199 | 208 | |
200 | 208 | auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F); |
201 | 208 | // FIXME: This should probably be optional rather than required. |
202 | 208 | if (!ORE) |
203 | 0 | report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " |
204 | 0 | "cached at a higher level"); |
205 | 208 | |
206 | 208 | LoopInvariantCodeMotion LICM; |
207 | 208 | if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true)) |
208 | 139 | return PreservedAnalyses::all(); |
209 | 69 | |
210 | 69 | auto PA = getLoopPassPreservedAnalyses(); |
211 | 69 | PA.preserveSet<CFGAnalyses>(); |
212 | 69 | return PA; |
213 | 69 | } |
214 | | |
215 | | char LegacyLICMPass::ID = 0; |
216 | 41.8k | INITIALIZE_PASS_BEGIN41.8k (LegacyLICMPass, "licm", "Loop Invariant Code Motion",
|
217 | 41.8k | false, false) |
218 | 41.8k | INITIALIZE_PASS_DEPENDENCY(LoopPass) |
219 | 41.8k | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
220 | 41.8k | INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, |
221 | | false) |
222 | | |
223 | 53.0k | Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } |
224 | | |
225 | | /// Hoist expressions out of the specified loop. Note, alias info for inner |
226 | | /// loop is not preserved so it is not a good idea to run LICM multiple |
227 | | /// times on one loop. |
228 | | /// We should delete AST for inner loops in the new pass manager to avoid |
229 | | /// memory leak. |
230 | | /// |
231 | | bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, |
232 | | LoopInfo *LI, DominatorTree *DT, |
233 | | TargetLibraryInfo *TLI, |
234 | | ScalarEvolution *SE, |
235 | | OptimizationRemarkEmitter *ORE, |
236 | 1.02M | bool DeleteAST) { |
237 | 1.02M | bool Changed = false; |
238 | 1.02M | |
239 | 1.02M | assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); |
240 | 1.02M | |
241 | 1.02M | AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); |
242 | 1.02M | |
243 | 1.02M | // Get the preheader block to move instructions into... |
244 | 1.02M | BasicBlock *Preheader = L->getLoopPreheader(); |
245 | 1.02M | |
246 | 1.02M | // Compute loop safety information. |
247 | 1.02M | LoopSafetyInfo SafetyInfo; |
248 | 1.02M | computeLoopSafetyInfo(&SafetyInfo, L); |
249 | 1.02M | |
250 | 1.02M | // We want to visit all of the instructions in this loop... that are not parts |
251 | 1.02M | // of our subloops (they have already had their invariants hoisted out of |
252 | 1.02M | // their loop, into this loop, so there is no need to process the BODIES of |
253 | 1.02M | // the subloops). |
254 | 1.02M | // |
255 | 1.02M | // Traverse the body of the loop in depth first order on the dominator tree so |
256 | 1.02M | // that we are guaranteed to see definitions before we see uses. This allows |
257 | 1.02M | // us to sink instructions in one pass, without iteration. After sinking |
258 | 1.02M | // instructions, we perform another pass to hoist them out of the loop. |
259 | 1.02M | // |
260 | 1.02M | if (L->hasDedicatedExits()) |
261 | 1.02M | Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, |
262 | 1.02M | CurAST, &SafetyInfo, ORE); |
263 | 1.02M | if (Preheader) |
264 | 1.02M | Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, |
265 | 1.02M | CurAST, &SafetyInfo, ORE); |
266 | 1.02M | |
267 | 1.02M | // Now that all loop invariants have been removed from the loop, promote any |
268 | 1.02M | // memory references to scalars that we can. |
269 | 1.02M | // Don't sink stores from loops without dedicated block exits. Exits |
270 | 1.02M | // containing indirect branches are not transformed by loop simplify, |
271 | 1.02M | // make sure we catch that. An additional load may be generated in the |
272 | 1.02M | // preheader for SSA updater, so also avoid sinking when no preheader |
273 | 1.02M | // is available. |
274 | 1.02M | if (!DisablePromotion && 1.02M Preheader1.02M && L->hasDedicatedExits()1.02M ) { |
275 | 1.02M | // Figure out the loop exits and their insertion points |
276 | 1.02M | SmallVector<BasicBlock *, 8> ExitBlocks; |
277 | 1.02M | L->getUniqueExitBlocks(ExitBlocks); |
278 | 1.02M | |
279 | 1.02M | // We can't insert into a catchswitch. |
280 | 1.31M | bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { |
281 | 1.31M | return isa<CatchSwitchInst>(Exit->getTerminator()); |
282 | 1.31M | }); |
283 | 1.02M | |
284 | 1.02M | if (!HasCatchSwitch1.02M ) { |
285 | 1.02M | SmallVector<Instruction *, 8> InsertPts; |
286 | 1.02M | InsertPts.reserve(ExitBlocks.size()); |
287 | 1.02M | for (BasicBlock *ExitBlock : ExitBlocks) |
288 | 1.31M | InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); |
289 | 1.02M | |
290 | 1.02M | PredIteratorCache PIC; |
291 | 1.02M | |
292 | 1.02M | bool Promoted = false; |
293 | 1.02M | |
294 | 1.02M | // Loop over all of the alias sets in the tracker object. |
295 | 1.68M | for (AliasSet &AS : *CurAST) { |
296 | 1.68M | // We can promote this alias set if it has a store, if it is a "Must" |
297 | 1.68M | // alias set, if the pointer is loop invariant, and if we are not |
298 | 1.68M | // eliminating any volatile loads or stores. |
299 | 1.68M | if (AS.isForwardingAliasSet() || 1.68M !AS.isMod()1.57M || !AS.isMustAlias()1.20M || |
300 | 1.68M | AS.isVolatile()510k || !L->isLoopInvariant(AS.begin()->getValue())510k ) |
301 | 1.67M | continue; |
302 | 5.44k | |
303 | 1.68M | assert( |
304 | 5.44k | !AS.empty() && |
305 | 5.44k | "Must alias set should have at least one pointer element in it!"); |
306 | 5.44k | |
307 | 5.44k | SmallSetVector<Value *, 8> PointerMustAliases; |
308 | 5.44k | for (const auto &ASI : AS) |
309 | 5.72k | PointerMustAliases.insert(ASI.getValue()); |
310 | 1.68M | |
311 | 1.68M | Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks, |
312 | 1.68M | InsertPts, PIC, LI, DT, TLI, L, |
313 | 1.68M | CurAST, &SafetyInfo, ORE); |
314 | 1.68M | } |
315 | 1.02M | |
316 | 1.02M | // Once we have promoted values across the loop body we have to |
317 | 1.02M | // recursively reform LCSSA as any nested loop may now have values defined |
318 | 1.02M | // within the loop used in the outer loop. |
319 | 1.02M | // FIXME: This is really heavy handed. It would be a bit better to use an |
320 | 1.02M | // SSAUpdater strategy during promotion that was LCSSA aware and reformed |
321 | 1.02M | // it as it went. |
322 | 1.02M | if (Promoted) |
323 | 1.40k | formLCSSARecursively(*L, *DT, LI, SE); |
324 | 1.02M | |
325 | 1.02M | Changed |= Promoted; |
326 | 1.02M | } |
327 | 1.02M | } |
328 | 1.02M | |
329 | 1.02M | // Check that neither this loop nor its parent have had LCSSA broken. LICM is |
330 | 1.02M | // specifically moving instructions across the loop boundary and so it is |
331 | 1.02M | // especially in need of sanity checking here. |
332 | 1.02M | assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); |
333 | 1.02M | assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && |
334 | 1.02M | "Parent loop not left in LCSSA form after LICM!"); |
335 | 1.02M | |
336 | 1.02M | // If this loop is nested inside of another one, save the alias information |
337 | 1.02M | // for when we process the outer loop. |
338 | 1.02M | if (L->getParentLoop() && 1.02M !DeleteAST309k ) |
339 | 309k | LoopToAliasSetMap[L] = CurAST; |
340 | 1.02M | else |
341 | 720k | delete CurAST; |
342 | 1.02M | |
343 | 1.02M | if (Changed && 1.02M SE63.4k ) |
344 | 63.4k | SE->forgetLoopDispositions(L); |
345 | 1.02M | return Changed; |
346 | 1.02M | } |
347 | | |
348 | | /// Walk the specified region of the CFG (defined by all blocks dominated by |
349 | | /// the specified block, and that are in the current loop) in reverse depth |
350 | | /// first order w.r.t the DominatorTree. This allows us to visit uses before |
351 | | /// definitions, allowing us to sink a loop body in one pass without iteration. |
352 | | /// |
353 | | bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, |
354 | | DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, |
355 | | AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, |
356 | 1.02M | OptimizationRemarkEmitter *ORE) { |
357 | 1.02M | |
358 | 1.02M | // Verify inputs. |
359 | 1.02M | assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && |
360 | 1.02M | CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && |
361 | 1.02M | "Unexpected input to sinkRegion"); |
362 | 1.02M | |
363 | 1.02M | // We want to visit children before parents. We will enque all the parents |
364 | 1.02M | // before their children in the worklist and process the worklist in reverse |
365 | 1.02M | // order. |
366 | 1.02M | SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); |
367 | 1.02M | |
368 | 1.02M | bool Changed = false; |
369 | 4.77M | for (DomTreeNode *DTN : reverse(Worklist)) { |
370 | 4.77M | BasicBlock *BB = DTN->getBlock(); |
371 | 4.77M | // Only need to process the contents of this block if it is not part of a |
372 | 4.77M | // subloop (which would already have been processed). |
373 | 4.77M | if (inSubLoop(BB, CurLoop, LI)) |
374 | 1.34M | continue; |
375 | 3.42M | |
376 | 24.5M | for (BasicBlock::iterator II = BB->end(); 3.42M II != BB->begin()24.5M ;) { |
377 | 21.1M | Instruction &I = *--II; |
378 | 21.1M | |
379 | 21.1M | // If the instruction is dead, we would try to sink it because it isn't |
380 | 21.1M | // used in the loop, instead, just delete it. |
381 | 21.1M | if (isInstructionTriviallyDead(&I, TLI)21.1M ) { |
382 | 767 | DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); |
383 | 767 | ++II; |
384 | 767 | CurAST->deleteValue(&I); |
385 | 767 | I.eraseFromParent(); |
386 | 767 | Changed = true; |
387 | 767 | continue; |
388 | 767 | } |
389 | 21.1M | |
390 | 21.1M | // Check to see if we can sink this instruction to the exit blocks |
391 | 21.1M | // of the loop. We can do this if the all users of the instruction are |
392 | 21.1M | // outside of the loop. In this case, it doesn't even matter if the |
393 | 21.1M | // operands of the instruction are loop invariant. |
394 | 21.1M | // |
395 | 21.1M | if (21.1M isNotUsedInLoop(I, CurLoop, SafetyInfo) && |
396 | 21.1M | canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)5.23M ) { |
397 | 1.03k | ++II; |
398 | 1.03k | Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); |
399 | 1.03k | } |
400 | 21.1M | } |
401 | 4.77M | } |
402 | 1.02M | return Changed; |
403 | 1.02M | } |
404 | | |
405 | | /// Walk the specified region of the CFG (defined by all blocks dominated by |
406 | | /// the specified block, and that are in the current loop) in depth first |
407 | | /// order w.r.t the DominatorTree. This allows us to visit definitions before |
408 | | /// uses, allowing us to hoist a loop body in one pass without iteration. |
409 | | /// |
410 | | bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, |
411 | | DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, |
412 | | AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, |
413 | 1.02M | OptimizationRemarkEmitter *ORE) { |
414 | 1.02M | // Verify inputs. |
415 | 1.02M | assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && |
416 | 1.02M | CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && |
417 | 1.02M | "Unexpected input to hoistRegion"); |
418 | 1.02M | |
419 | 1.02M | // We want to visit parents before children. We will enque all the parents |
420 | 1.02M | // before their children in the worklist and process the worklist in order. |
421 | 1.02M | SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop); |
422 | 1.02M | |
423 | 1.02M | bool Changed = false; |
424 | 4.77M | for (DomTreeNode *DTN : Worklist) { |
425 | 4.77M | BasicBlock *BB = DTN->getBlock(); |
426 | 4.77M | // Only need to process the contents of this block if it is not part of a |
427 | 4.77M | // subloop (which would already have been processed). |
428 | 4.77M | if (!inSubLoop(BB, CurLoop, LI)) |
429 | 24.5M | for (BasicBlock::iterator II = BB->begin(), E = BB->end(); 3.42M II != E24.5M ;) { |
430 | 21.1M | Instruction &I = *II++; |
431 | 21.1M | // Try constant folding this instruction. If all the operands are |
432 | 21.1M | // constants, it is technically hoistable, but it would be better to |
433 | 21.1M | // just fold it. |
434 | 21.1M | if (Constant *C = ConstantFoldInstruction( |
435 | 432 | &I, I.getModule()->getDataLayout(), TLI)) { |
436 | 432 | DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); |
437 | 432 | CurAST->copyValue(&I, C); |
438 | 432 | I.replaceAllUsesWith(C); |
439 | 432 | if (isInstructionTriviallyDead(&I, TLI)432 ) { |
440 | 432 | CurAST->deleteValue(&I); |
441 | 432 | I.eraseFromParent(); |
442 | 432 | } |
443 | 432 | Changed = true; |
444 | 432 | continue; |
445 | 432 | } |
446 | 21.1M | |
447 | 21.1M | // Attempt to remove floating point division out of the loop by |
448 | 21.1M | // converting it to a reciprocal multiplication. |
449 | 21.1M | if (21.1M I.getOpcode() == Instruction::FDiv && |
450 | 113k | CurLoop->isLoopInvariant(I.getOperand(1)) && |
451 | 21.1M | I.hasAllowReciprocal()19.8k ) { |
452 | 2 | auto Divisor = I.getOperand(1); |
453 | 2 | auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); |
454 | 2 | auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); |
455 | 2 | ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); |
456 | 2 | ReciprocalDivisor->insertBefore(&I); |
457 | 2 | |
458 | 2 | auto Product = |
459 | 2 | BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); |
460 | 2 | Product->setFastMathFlags(I.getFastMathFlags()); |
461 | 2 | Product->insertAfter(&I); |
462 | 2 | I.replaceAllUsesWith(Product); |
463 | 2 | I.eraseFromParent(); |
464 | 2 | |
465 | 2 | hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); |
466 | 2 | Changed = true; |
467 | 2 | continue; |
468 | 2 | } |
469 | 21.1M | |
470 | 21.1M | // Try hoisting the instruction out to the preheader. We can only do |
471 | 21.1M | // this if all of the operands of the instruction are loop invariant and |
472 | 21.1M | // if it is safe to hoist the instruction. |
473 | 21.1M | // |
474 | 21.1M | if (21.1M CurLoop->hasLoopInvariantOperands(&I) && |
475 | 2.77M | canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && |
476 | 238k | isSafeToExecuteUnconditionally( |
477 | 238k | I, DT, CurLoop, SafetyInfo, ORE, |
478 | 238k | CurLoop->getLoopPreheader()->getTerminator())) |
479 | 228k | Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); |
480 | 3.42M | } |
481 | 4.77M | } |
482 | 1.02M | |
483 | 1.02M | return Changed; |
484 | 1.02M | } |
485 | | |
486 | | /// Computes loop safety information, checks loop body & header |
487 | | /// for the possibility of may throw exception. |
488 | | /// |
489 | 1.16M | void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) { |
490 | 1.16M | assert(CurLoop != nullptr && "CurLoop cant be null"); |
491 | 1.16M | BasicBlock *Header = CurLoop->getHeader(); |
492 | 1.16M | // Setting default safety values. |
493 | 1.16M | SafetyInfo->MayThrow = false; |
494 | 1.16M | SafetyInfo->HeaderMayThrow = false; |
495 | 1.16M | // Iterate over header and compute safety info. |
496 | 1.16M | for (BasicBlock::iterator I = Header->begin(), E = Header->end(); |
497 | 10.2M | (I != E) && 10.2M !SafetyInfo->HeaderMayThrow9.40M ; ++I9.10M ) |
498 | 9.10M | SafetyInfo->HeaderMayThrow |= |
499 | 9.10M | !isGuaranteedToTransferExecutionToSuccessor(&*I); |
500 | 1.16M | |
501 | 1.16M | SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow; |
502 | 1.16M | // Iterate over loop instructions and compute safety info. |
503 | 1.16M | // Skip header as it has been computed and stored in HeaderMayThrow. |
504 | 1.16M | // The first block in loopinfo.Blocks is guaranteed to be the header. |
505 | 1.16M | assert(Header == *CurLoop->getBlocks().begin() && |
506 | 1.16M | "First block must be header"); |
507 | 1.16M | for (Loop::block_iterator BB = std::next(CurLoop->block_begin()), |
508 | 1.16M | BBE = CurLoop->block_end(); |
509 | 2.60M | (BB != BBE) && 2.60M !SafetyInfo->MayThrow1.71M ; ++BB1.43M ) |
510 | 1.43M | for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); |
511 | 9.86M | (I != E) && 9.86M !SafetyInfo->MayThrow8.63M ; ++I8.43M ) |
512 | 8.43M | SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I); |
513 | 1.16M | |
514 | 1.16M | // Compute funclet colors if we might sink/hoist in a function with a funclet |
515 | 1.16M | // personality routine. |
516 | 1.16M | Function *Fn = CurLoop->getHeader()->getParent(); |
517 | 1.16M | if (Fn->hasPersonalityFn()) |
518 | 16.5k | if (Constant *16.5k PersonalityFn16.5k = Fn->getPersonalityFn()) |
519 | 16.5k | if (16.5k isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))16.5k ) |
520 | 6 | SafetyInfo->BlockColors = colorEHFunclets(*Fn); |
521 | 1.16M | } |
522 | | |
523 | | // Return true if LI is invariant within scope of the loop. LI is invariant if |
524 | | // CurLoop is dominated by an invariant.start representing the same memory |
525 | | // location and size as the memory location LI loads from, and also the |
526 | | // invariant.start has no uses. |
527 | | static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, |
528 | 853k | Loop *CurLoop) { |
529 | 853k | Value *Addr = LI->getOperand(0); |
530 | 853k | const DataLayout &DL = LI->getModule()->getDataLayout(); |
531 | 853k | const uint32_t LocSizeInBits = DL.getTypeSizeInBits( |
532 | 853k | cast<PointerType>(Addr->getType())->getElementType()); |
533 | 853k | |
534 | 853k | // if the type is i8 addrspace(x)*, we know this is the type of |
535 | 853k | // llvm.invariant.start operand |
536 | 853k | auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), |
537 | 853k | LI->getPointerAddressSpace()); |
538 | 853k | unsigned BitcastsVisited = 0; |
539 | 853k | // Look through bitcasts until we reach the i8* type (this is invariant.start |
540 | 853k | // operand type). |
541 | 947k | while (Addr->getType() != PtrInt8Ty947k ) { |
542 | 884k | auto *BC = dyn_cast<BitCastInst>(Addr); |
543 | 884k | // Avoid traversing high number of bitcast uses. |
544 | 884k | if (++BitcastsVisited > MaxNumUsesTraversed || 884k !BC884k ) |
545 | 790k | return false; |
546 | 94.1k | Addr = BC->getOperand(0); |
547 | 94.1k | } |
548 | 853k | |
549 | 63.6k | unsigned UsesVisited = 0; |
550 | 63.6k | // Traverse all uses of the load operand value, to see if invariant.start is |
551 | 63.6k | // one of the uses, and whether it dominates the load instruction. |
552 | 153k | for (auto *U : Addr->users()) { |
553 | 153k | // Avoid traversing for Load operand with high number of users. |
554 | 153k | if (++UsesVisited > MaxNumUsesTraversed) |
555 | 4.24k | return false; |
556 | 149k | IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); |
557 | 149k | // If there are escaping uses of invariant.start instruction, the load maybe |
558 | 149k | // non-invariant. |
559 | 149k | if (!II || 149k II->getIntrinsicID() != Intrinsic::invariant_start2.76k || |
560 | 6 | !II->use_empty()) |
561 | 149k | continue; |
562 | 2 | unsigned InvariantSizeInBits = |
563 | 2 | cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8; |
564 | 2 | // Confirm the invariant.start location size contains the load operand size |
565 | 2 | // in bits. Also, the invariant.start should dominate the load, and we |
566 | 2 | // should not hoist the load out of a loop that contains this dominating |
567 | 2 | // invariant.start. |
568 | 2 | if (LocSizeInBits <= InvariantSizeInBits && |
569 | 2 | DT->properlyDominates(II->getParent(), CurLoop->getHeader())) |
570 | 2 | return true; |
571 | 59.4k | } |
572 | 59.4k | |
573 | 59.4k | return false; |
574 | 59.4k | } |
575 | | |
576 | | bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, |
577 | | Loop *CurLoop, AliasSetTracker *CurAST, |
578 | | LoopSafetyInfo *SafetyInfo, |
579 | 8.01M | OptimizationRemarkEmitter *ORE) { |
580 | 8.01M | // Loads have extra constraints we have to verify before we can hoist them. |
581 | 8.01M | if (LoadInst *LI8.01M = dyn_cast<LoadInst>(&I)) { |
582 | 856k | if (!LI->isUnordered()) |
583 | 1.36k | return false; // Don't hoist volatile/atomic loads! |
584 | 854k | |
585 | 854k | // Loads from constant memory are always safe to move, even if they end up |
586 | 854k | // in the same alias set as something that ends up being modified. |
587 | 854k | if (854k AA->pointsToConstantMemory(LI->getOperand(0))854k ) |
588 | 1.10k | return true; |
589 | 853k | if (853k LI->getMetadata(LLVMContext::MD_invariant_load)853k ) |
590 | 23 | return true; |
591 | 853k | |
592 | 853k | // This checks for an invariant.start dominating the load. |
593 | 853k | if (853k isLoadInvariantInLoop(LI, DT, CurLoop)853k ) |
594 | 2 | return true; |
595 | 853k | |
596 | 853k | // Don't hoist loads which have may-aliased stores in loop. |
597 | 853k | uint64_t Size = 0; |
598 | 853k | if (LI->getType()->isSized()) |
599 | 853k | Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); |
600 | 853k | |
601 | 853k | AAMDNodes AAInfo; |
602 | 853k | LI->getAAMetadata(AAInfo); |
603 | 853k | |
604 | 853k | bool Invalidated = |
605 | 853k | pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); |
606 | 853k | // Check loop-invariant address because this may also be a sinkable load |
607 | 853k | // whose address is not necessarily loop-invariant. |
608 | 853k | if (ORE && 853k Invalidated853k && CurLoop->isLoopInvariant(LI->getPointerOperand())819k ) |
609 | 819k | ORE->emit(OptimizationRemarkMissed( |
610 | 819k | DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) |
611 | 819k | << "failed to move load with loop-invariant address " |
612 | 819k | "because the loop may invalidate its value"); |
613 | 856k | |
614 | 856k | return !Invalidated; |
615 | 7.15M | } else if (CallInst *7.15M CI7.15M = dyn_cast<CallInst>(&I)) { |
616 | 873k | // Don't sink or hoist dbg info; it's legal, but not useful. |
617 | 873k | if (isa<DbgInfoIntrinsic>(I)) |
618 | 28 | return false; |
619 | 873k | |
620 | 873k | // Don't sink calls which can throw. |
621 | 873k | if (873k CI->mayThrow()873k ) |
622 | 10.4k | return false; |
623 | 863k | |
624 | 863k | // Handle simple cases by querying alias analysis. |
625 | 863k | FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); |
626 | 863k | if (Behavior == FMRB_DoesNotAccessMemory) |
627 | 463 | return true; |
628 | 862k | if (862k AliasAnalysis::onlyReadsMemory(Behavior)862k ) { |
629 | 4.26k | // A readonly argmemonly function only reads from memory pointed to by |
630 | 4.26k | // it's arguments with arbitrary offsets. If we can prove there are no |
631 | 4.26k | // writes to this memory in the loop, we can hoist or sink. |
632 | 4.26k | if (AliasAnalysis::onlyAccessesArgPointees(Behavior)4.26k ) { |
633 | 1.28k | for (Value *Op : CI->arg_operands()) |
634 | 1.28k | if (1.28k Op->getType()->isPointerTy() && |
635 | 1.28k | pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, |
636 | 1.28k | AAMDNodes(), CurAST)) |
637 | 1.22k | return false; |
638 | 63 | return true; |
639 | 63 | } |
640 | 2.97k | // If this call only reads from memory and there are no writes to memory |
641 | 2.97k | // in the loop, we can hoist or sink the call as appropriate. |
642 | 2.97k | bool FoundMod = false; |
643 | 3.15k | for (AliasSet &AS : *CurAST) { |
644 | 3.15k | if (!AS.isForwardingAliasSet() && 3.15k AS.isMod()2.97k ) { |
645 | 2.93k | FoundMod = true; |
646 | 2.93k | break; |
647 | 2.93k | } |
648 | 2.97k | } |
649 | 2.97k | if (!FoundMod) |
650 | 48 | return true; |
651 | 861k | } |
652 | 861k | |
653 | 861k | // FIXME: This should use mod/ref information to see if we can hoist or |
654 | 861k | // sink the call. |
655 | 861k | |
656 | 861k | return false; |
657 | 861k | } |
658 | 6.28M | |
659 | 6.28M | // Only these instructions are hoistable/sinkable. |
660 | 6.28M | if (6.28M !isa<BinaryOperator>(I) && 6.28M !isa<CastInst>(I)6.25M && !isa<SelectInst>(I)6.18M && |
661 | 6.28M | !isa<GetElementPtrInst>(I)6.18M && !isa<CmpInst>(I)6.10M && |
662 | 6.28M | !isa<InsertElementInst>(I)6.08M && !isa<ExtractElementInst>(I)6.08M && |
663 | 6.28M | !isa<ShuffleVectorInst>(I)6.08M && !isa<ExtractValueInst>(I)6.07M && |
664 | 6.07M | !isa<InsertValueInst>(I)) |
665 | 6.07M | return false; |
666 | 203k | |
667 | 203k | // SafetyInfo is nullptr if we are checking for sinking from preheader to |
668 | 203k | // loop body. It will be always safe as there is no speculative execution. |
669 | 203k | if (203k !SafetyInfo203k ) |
670 | 0 | return true; |
671 | 203k | |
672 | 203k | // TODO: Plumb the context instruction through to make hoisting and sinking |
673 | 203k | // more powerful. Hoisting of loads already works due to the special casing |
674 | 203k | // above. |
675 | 203k | return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr); |
676 | 203k | } |
677 | | |
678 | | /// Returns true if a PHINode is a trivially replaceable with an |
679 | | /// Instruction. |
680 | | /// This is true when all incoming values are that instruction. |
681 | | /// This pattern occurs most often with LCSSA PHI nodes. |
682 | | /// |
683 | 2.32M | static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { |
684 | 2.32M | for (const Value *IncValue : PN.incoming_values()) |
685 | 3.21M | if (3.21M IncValue != &I3.21M ) |
686 | 1.69M | return false; |
687 | 620k | |
688 | 620k | return true; |
689 | 620k | } |
690 | | |
691 | | /// Return true if the only users of this instruction are outside of |
692 | | /// the loop. If this is true, we can sink the instruction to the exit |
693 | | /// blocks of the loop. |
694 | | /// |
695 | | static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, |
696 | 21.1M | const LoopSafetyInfo *SafetyInfo) { |
697 | 21.1M | const auto &BlockColors = SafetyInfo->BlockColors; |
698 | 16.5M | for (const User *U : I.users()) { |
699 | 16.5M | const Instruction *UI = cast<Instruction>(U); |
700 | 16.5M | if (const PHINode *PN16.5M = dyn_cast<PHINode>(UI)) { |
701 | 2.32M | const BasicBlock *BB = PN->getParent(); |
702 | 2.32M | // We cannot sink uses in catchswitches. |
703 | 2.32M | if (isa<CatchSwitchInst>(BB->getTerminator())) |
704 | 2 | return false; |
705 | 2.32M | |
706 | 2.32M | // We need to sink a callsite to a unique funclet. Avoid sinking if the |
707 | 2.32M | // phi use is too muddled. |
708 | 2.32M | if (2.32M isa<CallInst>(I)2.32M ) |
709 | 296k | if (296k !BlockColors.empty() && |
710 | 2 | BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1) |
711 | 0 | return false; |
712 | 2.32M | |
713 | 2.32M | // A PHI node where all of the incoming values are this instruction are |
714 | 2.32M | // special -- they can just be RAUW'ed with the instruction and thus |
715 | 2.32M | // don't require a use in the predecessor. This is a particular important |
716 | 2.32M | // special case because it is the pattern found in LCSSA form. |
717 | 2.32M | if (2.32M isTriviallyReplacablePHI(*PN, I)2.32M ) { |
718 | 620k | if (CurLoop->contains(PN)) |
719 | 5.38k | return false; |
720 | 620k | else |
721 | 615k | continue; |
722 | 1.69M | } |
723 | 1.69M | |
724 | 1.69M | // Otherwise, PHI node uses occur in predecessor blocks if the incoming |
725 | 1.69M | // values. Check for such a use being inside the loop. |
726 | 3.60M | for (unsigned i = 0, e = PN->getNumIncomingValues(); 1.69M i != e3.60M ; ++i1.90M ) |
727 | 3.60M | if (3.60M PN->getIncomingValue(i) == &I3.60M ) |
728 | 1.69M | if (1.69M CurLoop->contains(PN->getIncomingBlock(i))1.69M ) |
729 | 1.69M | return false; |
730 | 1.69M | |
731 | 1 | continue; |
732 | 14.2M | } |
733 | 14.2M | |
734 | 14.2M | if (14.2M CurLoop->contains(UI)14.2M ) |
735 | 14.2M | return false; |
736 | 5.23M | } |
737 | 5.23M | return true; |
738 | 5.23M | } |
739 | | |
740 | | static Instruction * |
741 | | CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, |
742 | | const LoopInfo *LI, |
743 | 1.20k | const LoopSafetyInfo *SafetyInfo) { |
744 | 1.20k | Instruction *New; |
745 | 1.20k | if (auto *CI1.20k = dyn_cast<CallInst>(&I)) { |
746 | 7 | const auto &BlockColors = SafetyInfo->BlockColors; |
747 | 7 | |
748 | 7 | // Sinking call-sites need to be handled differently from other |
749 | 7 | // instructions. The cloned call-site needs a funclet bundle operand |
750 | 7 | // appropriate for it's location in the CFG. |
751 | 7 | SmallVector<OperandBundleDef, 1> OpBundles; |
752 | 7 | for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); |
753 | 7 | BundleIdx != BundleEnd7 ; ++BundleIdx0 ) { |
754 | 0 | OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); |
755 | 0 | if (Bundle.getTagID() == LLVMContext::OB_funclet) |
756 | 0 | continue; |
757 | 0 |
|
758 | 0 | OpBundles.emplace_back(Bundle); |
759 | 0 | } |
760 | 7 | |
761 | 7 | if (!BlockColors.empty()7 ) { |
762 | 2 | const ColorVector &CV = BlockColors.find(&ExitBlock)->second; |
763 | 2 | assert(CV.size() == 1 && "non-unique color for exit block!"); |
764 | 2 | BasicBlock *BBColor = CV.front(); |
765 | 2 | Instruction *EHPad = BBColor->getFirstNonPHI(); |
766 | 2 | if (EHPad->isEHPad()) |
767 | 2 | OpBundles.emplace_back("funclet", EHPad); |
768 | 2 | } |
769 | 7 | |
770 | 7 | New = CallInst::Create(CI, OpBundles); |
771 | 1.20k | } else { |
772 | 1.19k | New = I.clone(); |
773 | 1.19k | } |
774 | 1.20k | |
775 | 1.20k | ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); |
776 | 1.20k | if (!I.getName().empty()) |
777 | 59 | New->setName(I.getName() + ".le"); |
778 | 1.20k | |
779 | 1.20k | // Build LCSSA PHI nodes for any in-loop operands. Note that this is |
780 | 1.20k | // particularly cheap because we can rip off the PHI node that we're |
781 | 1.20k | // replacing for the number and blocks of the predecessors. |
782 | 1.20k | // OPT: If this shows up in a profile, we can instead finish sinking all |
783 | 1.20k | // invariant instructions, and then walk their operands to re-establish |
784 | 1.20k | // LCSSA. That will eliminate creating PHI nodes just to nuke them when |
785 | 1.20k | // sinking bottom-up. |
786 | 3.02k | for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; |
787 | 1.81k | ++OI) |
788 | 1.81k | if (Instruction *1.81k OInst1.81k = dyn_cast<Instruction>(*OI)) |
789 | 1.26k | if (Loop *1.26k OLoop1.26k = LI->getLoopFor(OInst->getParent())) |
790 | 1.15k | if (1.15k !OLoop->contains(&PN)1.15k ) { |
791 | 1.12k | PHINode *OpPN = |
792 | 1.12k | PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), |
793 | 1.12k | OInst->getName() + ".lcssa", &ExitBlock.front()); |
794 | 2.41k | for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e2.41k ; ++i1.28k ) |
795 | 1.28k | OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); |
796 | 1.81k | *OI = OpPN; |
797 | 1.81k | } |
798 | 1.20k | return New; |
799 | 1.20k | } |
800 | | |
801 | | /// When an instruction is found to only be used outside of the loop, this |
802 | | /// function moves it to the exit blocks and patches up SSA form as needed. |
803 | | /// This method is guaranteed to remove the original instruction from its |
804 | | /// position, and may either delete it or move it to outside of the loop. |
805 | | /// |
806 | | static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, |
807 | | const Loop *CurLoop, AliasSetTracker *CurAST, |
808 | | const LoopSafetyInfo *SafetyInfo, |
809 | 1.03k | OptimizationRemarkEmitter *ORE) { |
810 | 1.03k | DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); |
811 | 1.03k | ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) |
812 | 1.03k | << "sinking " << ore::NV("Inst", &I)); |
813 | 1.03k | bool Changed = false; |
814 | 1.03k | if (isa<LoadInst>(I)) |
815 | 108 | ++NumMovedLoads; |
816 | 928 | else if (928 isa<CallInst>(I)928 ) |
817 | 7 | ++NumMovedCalls; |
818 | 1.03k | ++NumSunk; |
819 | 1.03k | Changed = true; |
820 | 1.03k | |
821 | | #ifndef NDEBUG |
822 | | SmallVector<BasicBlock *, 32> ExitBlocks; |
823 | | CurLoop->getUniqueExitBlocks(ExitBlocks); |
824 | | SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), |
825 | | ExitBlocks.end()); |
826 | | #endif |
827 | | |
828 | 1.03k | // Clones of this instruction. Don't create more than one per exit block! |
829 | 1.03k | SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; |
830 | 1.03k | |
831 | 1.03k | // If this instruction is only used outside of the loop, then all users are |
832 | 1.03k | // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of |
833 | 1.03k | // the instruction. |
834 | 2.28k | while (!I.use_empty()2.28k ) { |
835 | 1.24k | Value::user_iterator UI = I.user_begin(); |
836 | 1.24k | auto *User = cast<Instruction>(*UI); |
837 | 1.24k | if (!DT->isReachableFromEntry(User->getParent())1.24k ) { |
838 | 1 | User->replaceUsesOfWith(&I, UndefValue::get(I.getType())); |
839 | 1 | continue; |
840 | 1 | } |
841 | 1.24k | // The user must be a PHI node. |
842 | 1.24k | PHINode *PN = cast<PHINode>(User); |
843 | 1.24k | |
844 | 1.24k | // Surprisingly, instructions can be used outside of loops without any |
845 | 1.24k | // exits. This can only happen in PHI nodes if the incoming block is |
846 | 1.24k | // unreachable. |
847 | 1.24k | Use &U = UI.getUse(); |
848 | 1.24k | BasicBlock *BB = PN->getIncomingBlock(U); |
849 | 1.24k | if (!DT->isReachableFromEntry(BB)1.24k ) { |
850 | 1 | U = UndefValue::get(I.getType()); |
851 | 1 | continue; |
852 | 1 | } |
853 | 1.24k | |
854 | 1.24k | BasicBlock *ExitBlock = PN->getParent(); |
855 | 1.24k | assert(ExitBlockSet.count(ExitBlock) && |
856 | 1.24k | "The LCSSA PHI is not in an exit block!"); |
857 | 1.24k | |
858 | 1.24k | Instruction *New; |
859 | 1.24k | auto It = SunkCopies.find(ExitBlock); |
860 | 1.24k | if (It != SunkCopies.end()) |
861 | 42 | New = It->second; |
862 | 1.24k | else |
863 | 1.20k | New = SunkCopies[ExitBlock] = |
864 | 1.20k | CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); |
865 | 1.24k | |
866 | 1.24k | PN->replaceAllUsesWith(New); |
867 | 1.24k | PN->eraseFromParent(); |
868 | 1.24k | } |
869 | 1.03k | |
870 | 1.03k | CurAST->deleteValue(&I); |
871 | 1.03k | I.eraseFromParent(); |
872 | 1.03k | return Changed; |
873 | 1.03k | } |
874 | | |
875 | | /// When an instruction is found to only use loop invariant operands that |
876 | | /// is safe to hoist, this instruction is called to do the dirty work. |
877 | | /// |
878 | | static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, |
879 | | const LoopSafetyInfo *SafetyInfo, |
880 | 228k | OptimizationRemarkEmitter *ORE) { |
881 | 228k | auto *Preheader = CurLoop->getLoopPreheader(); |
882 | 228k | DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I |
883 | 228k | << "\n"); |
884 | 228k | ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) |
885 | 228k | << "hoisting " << ore::NV("Inst", &I)); |
886 | 228k | |
887 | 228k | // Metadata can be dependent on conditions we are hoisting above. |
888 | 228k | // Conservatively strip all metadata on the instruction unless we were |
889 | 228k | // guaranteed to execute I if we entered the loop, in which case the metadata |
890 | 228k | // is valid in the loop preheader. |
891 | 228k | if (I.hasMetadataOtherThanDebugLoc() && |
892 | 228k | // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning |
893 | 228k | // time in isGuaranteedToExecute if we don't actually have anything to |
894 | 228k | // drop. It is a compile time optimization, not required for correctness. |
895 | 22.8k | !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo)) |
896 | 2.87k | I.dropUnknownNonDebugMetadata(); |
897 | 228k | |
898 | 228k | // Move the new node to the Preheader, before its terminator. |
899 | 228k | I.moveBefore(Preheader->getTerminator()); |
900 | 228k | |
901 | 228k | // Do not retain debug locations when we are moving instructions to different |
902 | 228k | // basic blocks, because we want to avoid jumpy line tables. Calls, however, |
903 | 228k | // need to retain their debug locs because they may be inlined. |
904 | 228k | // FIXME: How do we retain source locations without causing poor debugging |
905 | 228k | // behavior? |
906 | 228k | if (!isa<CallInst>(I)) |
907 | 228k | I.setDebugLoc(DebugLoc()); |
908 | 228k | |
909 | 228k | if (isa<LoadInst>(I)) |
910 | 25.2k | ++NumMovedLoads; |
911 | 203k | else if (203k isa<CallInst>(I)203k ) |
912 | 289 | ++NumMovedCalls; |
913 | 228k | ++NumHoisted; |
914 | 228k | return true; |
915 | 228k | } |
916 | | |
917 | | /// Only sink or hoist an instruction if it is not a trapping instruction, |
918 | | /// or if the instruction is known not to trap when moved to the preheader. |
919 | | /// or if it is a trapping instruction and is guaranteed to execute. |
920 | | static bool isSafeToExecuteUnconditionally(Instruction &Inst, |
921 | | const DominatorTree *DT, |
922 | | const Loop *CurLoop, |
923 | | const LoopSafetyInfo *SafetyInfo, |
924 | | OptimizationRemarkEmitter *ORE, |
925 | 443k | const Instruction *CtxI) { |
926 | 443k | if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT)) |
927 | 420k | return true; |
928 | 23.2k | |
929 | 23.2k | bool GuaranteedToExecute = |
930 | 23.2k | isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); |
931 | 23.2k | |
932 | 23.2k | if (!GuaranteedToExecute23.2k ) { |
933 | 10.8k | auto *LI = dyn_cast<LoadInst>(&Inst); |
934 | 10.8k | if (LI && 10.8k CurLoop->isLoopInvariant(LI->getPointerOperand())10.4k ) |
935 | 10.4k | ORE->emit(OptimizationRemarkMissed( |
936 | 10.4k | DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) |
937 | 10.4k | << "failed to hoist load with loop-invariant address " |
938 | 10.4k | "because load is conditionally executed"); |
939 | 10.8k | } |
940 | 443k | |
941 | 443k | return GuaranteedToExecute; |
942 | 443k | } |
943 | | |
944 | | namespace { |
945 | | class LoopPromoter : public LoadAndStorePromoter { |
946 | | Value *SomePtr; // Designated pointer to store to. |
947 | | const SmallSetVector<Value *, 8> &PointerMustAliases; |
948 | | SmallVectorImpl<BasicBlock *> &LoopExitBlocks; |
949 | | SmallVectorImpl<Instruction *> &LoopInsertPts; |
950 | | PredIteratorCache &PredCache; |
951 | | AliasSetTracker &AST; |
952 | | LoopInfo &LI; |
953 | | DebugLoc DL; |
954 | | int Alignment; |
955 | | bool UnorderedAtomic; |
956 | | AAMDNodes AATags; |
957 | | |
958 | 5.52k | Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { |
959 | 5.52k | if (Instruction *I = dyn_cast<Instruction>(V)) |
960 | 4.13k | if (Loop *4.13k L4.13k = LI.getLoopFor(I->getParent())) |
961 | 2.97k | if (2.97k !L->contains(BB)2.97k ) { |
962 | 2.52k | // We need to create an LCSSA PHI node for the incoming value and |
963 | 2.52k | // store that. |
964 | 2.52k | PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), |
965 | 2.52k | I->getName() + ".lcssa", &BB->front()); |
966 | 2.52k | for (BasicBlock *Pred : PredCache.get(BB)) |
967 | 2.57k | PN->addIncoming(I, Pred); |
968 | 4.13k | return PN; |
969 | 4.13k | } |
970 | 2.99k | return V; |
971 | 2.99k | } |
972 | | |
973 | | public: |
974 | | LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S, |
975 | | const SmallSetVector<Value *, 8> &PMA, |
976 | | SmallVectorImpl<BasicBlock *> &LEB, |
977 | | SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC, |
978 | | AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, |
979 | | bool UnorderedAtomic, const AAMDNodes &AATags) |
980 | | : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), |
981 | | LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), |
982 | | LI(li), DL(std::move(dl)), Alignment(alignment), |
983 | 2.50k | UnorderedAtomic(UnorderedAtomic), AATags(AATags) {} |
984 | | |
985 | | bool isInstInList(Instruction *I, |
986 | 14.6k | const SmallVectorImpl<Instruction *> &) const override { |
987 | 14.6k | Value *Ptr; |
988 | 14.6k | if (LoadInst *LI = dyn_cast<LoadInst>(I)) |
989 | 9.37k | Ptr = LI->getOperand(0); |
990 | 14.6k | else |
991 | 5.30k | Ptr = cast<StoreInst>(I)->getPointerOperand(); |
992 | 14.6k | return PointerMustAliases.count(Ptr); |
993 | 14.6k | } |
994 | | |
995 | 2.50k | void doExtraRewritesBeforeFinalDeletion() const override { |
996 | 2.50k | // Insert stores after in the loop exit blocks. Each exit block gets a |
997 | 2.50k | // store of the live-out values that feed them. Since we've already told |
998 | 2.50k | // the SSA updater about the defs in the loop and the preheader |
999 | 2.50k | // definition, it is all set and we can start using it. |
1000 | 5.26k | for (unsigned i = 0, e = LoopExitBlocks.size(); i != e5.26k ; ++i2.76k ) { |
1001 | 2.76k | BasicBlock *ExitBlock = LoopExitBlocks[i]; |
1002 | 2.76k | Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); |
1003 | 2.76k | LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); |
1004 | 2.76k | Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); |
1005 | 2.76k | Instruction *InsertPos = LoopInsertPts[i]; |
1006 | 2.76k | StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); |
1007 | 2.76k | if (UnorderedAtomic) |
1008 | 6 | NewSI->setOrdering(AtomicOrdering::Unordered); |
1009 | 2.76k | NewSI->setAlignment(Alignment); |
1010 | 2.76k | NewSI->setDebugLoc(DL); |
1011 | 2.76k | if (AATags) |
1012 | 2.55k | NewSI->setAAMetadata(AATags); |
1013 | 2.76k | } |
1014 | 2.50k | } |
1015 | | |
1016 | 1.53k | void replaceLoadWithValue(LoadInst *LI, Value *V) const override { |
1017 | 1.53k | // Update alias analysis. |
1018 | 1.53k | AST.copyValue(LI, V); |
1019 | 1.53k | } |
1020 | 4.80k | void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } |
1021 | | }; |
1022 | | } // namespace |
1023 | | |
1024 | | /// Try to promote memory values to scalars by sinking stores out of the |
1025 | | /// loop and moving loads to before the loop. We do this by looping over |
1026 | | /// the stores in the loop, looking for stores to Must pointers which are |
1027 | | /// loop invariant. |
1028 | | /// |
1029 | | bool llvm::promoteLoopAccessesToScalars( |
1030 | | const SmallSetVector<Value *, 8> &PointerMustAliases, |
1031 | | SmallVectorImpl<BasicBlock *> &ExitBlocks, |
1032 | | SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC, |
1033 | | LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, |
1034 | | Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, |
1035 | 5.44k | OptimizationRemarkEmitter *ORE) { |
1036 | 5.44k | // Verify inputs. |
1037 | 5.44k | assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && |
1038 | 5.44k | CurAST != nullptr && SafetyInfo != nullptr && |
1039 | 5.44k | "Unexpected Input to promoteLoopAccessesToScalars"); |
1040 | 5.44k | |
1041 | 5.44k | Value *SomePtr = *PointerMustAliases.begin(); |
1042 | 5.44k | BasicBlock *Preheader = CurLoop->getLoopPreheader(); |
1043 | 5.44k | |
1044 | 5.44k | // It isn't safe to promote a load/store from the loop if the load/store is |
1045 | 5.44k | // conditional. For example, turning: |
1046 | 5.44k | // |
1047 | 5.44k | // for () { if (c) *P += 1; } |
1048 | 5.44k | // |
1049 | 5.44k | // into: |
1050 | 5.44k | // |
1051 | 5.44k | // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; |
1052 | 5.44k | // |
1053 | 5.44k | // is not safe, because *P may only be valid to access if 'c' is true. |
1054 | 5.44k | // |
1055 | 5.44k | // The safety property divides into two parts: |
1056 | 5.44k | // p1) The memory may not be dereferenceable on entry to the loop. In this |
1057 | 5.44k | // case, we can't insert the required load in the preheader. |
1058 | 5.44k | // p2) The memory model does not allow us to insert a store along any dynamic |
1059 | 5.44k | // path which did not originally have one. |
1060 | 5.44k | // |
1061 | 5.44k | // If at least one store is guaranteed to execute, both properties are |
1062 | 5.44k | // satisfied, and promotion is legal. |
1063 | 5.44k | // |
1064 | 5.44k | // This, however, is not a necessary condition. Even if no store/load is |
1065 | 5.44k | // guaranteed to execute, we can still establish these properties. |
1066 | 5.44k | // We can establish (p1) by proving that hoisting the load into the preheader |
1067 | 5.44k | // is safe (i.e. proving dereferenceability on all paths through the loop). We |
1068 | 5.44k | // can use any access within the alias set to prove dereferenceability, |
1069 | 5.44k | // since they're all must alias. |
1070 | 5.44k | // |
1071 | 5.44k | // There are two ways establish (p2): |
1072 | 5.44k | // a) Prove the location is thread-local. In this case the memory model |
1073 | 5.44k | // requirement does not apply, and stores are safe to insert. |
1074 | 5.44k | // b) Prove a store dominates every exit block. In this case, if an exit |
1075 | 5.44k | // blocks is reached, the original dynamic path would have taken us through |
1076 | 5.44k | // the store, so inserting a store into the exit block is safe. Note that this |
1077 | 5.44k | // is different from the store being guaranteed to execute. For instance, |
1078 | 5.44k | // if an exception is thrown on the first iteration of the loop, the original |
1079 | 5.44k | // store is never executed, but the exit blocks are not executed either. |
1080 | 5.44k | |
1081 | 5.44k | bool DereferenceableInPH = false; |
1082 | 5.44k | bool SafeToInsertStore = false; |
1083 | 5.44k | |
1084 | 5.44k | SmallVector<Instruction *, 64> LoopUses; |
1085 | 5.44k | |
1086 | 5.44k | // We start with an alignment of one and try to find instructions that allow |
1087 | 5.44k | // us to prove better alignment. |
1088 | 5.44k | unsigned Alignment = 1; |
1089 | 5.44k | // Keep track of which types of access we see |
1090 | 5.44k | bool SawUnorderedAtomic = false; |
1091 | 5.44k | bool SawNotAtomic = false; |
1092 | 5.44k | AAMDNodes AATags; |
1093 | 5.44k | |
1094 | 5.44k | const DataLayout &MDL = Preheader->getModule()->getDataLayout(); |
1095 | 5.44k | |
1096 | 5.44k | // Do we know this object does not escape ? |
1097 | 5.44k | bool IsKnownNonEscapingObject = false; |
1098 | 5.44k | if (SafetyInfo->MayThrow5.44k ) { |
1099 | 580 | // If a loop can throw, we have to insert a store along each unwind edge. |
1100 | 580 | // That said, we can't actually make the unwind edge explicit. Therefore, |
1101 | 580 | // we have to prove that the store is dead along the unwind edge. |
1102 | 580 | // |
1103 | 580 | // If the underlying object is not an alloca, nor a pointer that does not |
1104 | 580 | // escape, then we can not effectively prove that the store is dead along |
1105 | 580 | // the unwind edge. i.e. the caller of this function could have ways to |
1106 | 580 | // access the pointed object. |
1107 | 580 | Value *Object = GetUnderlyingObject(SomePtr, MDL); |
1108 | 580 | // If this is a base pointer we do not understand, simply bail. |
1109 | 580 | // We only handle alloca and return value from alloc-like fn right now. |
1110 | 580 | if (!isa<AllocaInst>(Object)580 ) { |
1111 | 305 | if (!isAllocLikeFn(Object, TLI)) |
1112 | 201 | return false; |
1113 | 104 | // If this is an alloc like fn. There are more constraints we need to |
1114 | 104 | // verify. More specifically, we must make sure that the pointer can not |
1115 | 104 | // escape. |
1116 | 104 | // |
1117 | 104 | // NOTE: PointerMayBeCaptured is not enough as the pointer may have |
1118 | 104 | // escaped even though its not captured by the enclosing function. |
1119 | 104 | // Standard allocation functions like malloc, calloc, and operator new |
1120 | 104 | // return values which can be assumed not to have previously escaped. |
1121 | 104 | if (104 PointerMayBeCaptured(Object, true, true)104 ) |
1122 | 2 | return false; |
1123 | 102 | IsKnownNonEscapingObject = true; |
1124 | 102 | } |
1125 | 580 | } |
1126 | 5.44k | |
1127 | 5.44k | // Check that all of the pointers in the alias set have the same type. We |
1128 | 5.44k | // cannot (yet) promote a memory location that is loaded and stored in |
1129 | 5.44k | // different sizes. While we are at it, collect alignment and AA info. |
1130 | 5.24k | for (Value *ASIV : PointerMustAliases) 5.24k { |
1131 | 5.47k | // Check that all of the pointers in the alias set have the same type. We |
1132 | 5.47k | // cannot (yet) promote a memory location that is loaded and stored in |
1133 | 5.47k | // different sizes. |
1134 | 5.47k | if (SomePtr->getType() != ASIV->getType()) |
1135 | 125 | return false; |
1136 | 5.35k | |
1137 | 5.35k | for (User *U : ASIV->users()) 5.35k { |
1138 | 108k | // Ignore instructions that are outside the loop. |
1139 | 108k | Instruction *UI = dyn_cast<Instruction>(U); |
1140 | 108k | if (!UI || 108k !CurLoop->contains(UI)108k ) |
1141 | 98.7k | continue; |
1142 | 9.84k | |
1143 | 9.84k | // If there is an non-load/store instruction in the loop, we can't promote |
1144 | 9.84k | // it. |
1145 | 9.84k | if (LoadInst *9.84k Load9.84k = dyn_cast<LoadInst>(UI)) { |
1146 | 3.07k | assert(!Load->isVolatile() && "AST broken"); |
1147 | 3.07k | if (!Load->isUnordered()) |
1148 | 0 | return false; |
1149 | 3.07k | |
1150 | 3.07k | SawUnorderedAtomic |= Load->isAtomic(); |
1151 | 3.07k | SawNotAtomic |= !Load->isAtomic(); |
1152 | 3.07k | |
1153 | 3.07k | if (!DereferenceableInPH) |
1154 | 965 | DereferenceableInPH = isSafeToExecuteUnconditionally( |
1155 | 965 | *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); |
1156 | 9.84k | } else if (const StoreInst *6.76k Store6.76k = dyn_cast<StoreInst>(UI)) { |
1157 | 6.71k | // Stores *of* the pointer are not interesting, only stores *to* the |
1158 | 6.71k | // pointer. |
1159 | 6.71k | if (UI->getOperand(1) != ASIV) |
1160 | 4 | continue; |
1161 | 6.71k | assert(!Store->isVolatile() && "AST broken"); |
1162 | 6.71k | if (!Store->isUnordered()) |
1163 | 4 | return false; |
1164 | 6.70k | |
1165 | 6.70k | SawUnorderedAtomic |= Store->isAtomic(); |
1166 | 6.70k | SawNotAtomic |= !Store->isAtomic(); |
1167 | 6.70k | |
1168 | 6.70k | // If the store is guaranteed to execute, both properties are satisfied. |
1169 | 6.70k | // We may want to check if a store is guaranteed to execute even if we |
1170 | 6.70k | // already know that promotion is safe, since it may have higher |
1171 | 6.70k | // alignment than any other guaranteed stores, in which case we can |
1172 | 6.70k | // raise the alignment on the promoted store. |
1173 | 6.70k | unsigned InstAlignment = Store->getAlignment(); |
1174 | 6.70k | if (!InstAlignment) |
1175 | 89 | InstAlignment = |
1176 | 89 | MDL.getABITypeAlignment(Store->getValueOperand()->getType()); |
1177 | 6.70k | |
1178 | 6.70k | if (!DereferenceableInPH || 6.70k !SafeToInsertStore1.91k || |
1179 | 6.70k | (InstAlignment > Alignment)633 ) { |
1180 | 6.08k | if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)6.08k ) { |
1181 | 2.24k | DereferenceableInPH = true; |
1182 | 2.24k | SafeToInsertStore = true; |
1183 | 2.24k | Alignment = std::max(Alignment, InstAlignment); |
1184 | 2.24k | } |
1185 | 6.08k | } |
1186 | 6.70k | |
1187 | 6.70k | // If a store dominates all exit blocks, it is safe to sink. |
1188 | 6.70k | // As explained above, if an exit block was executed, a dominating |
1189 | 6.70k | // store must have been been executed at least once, so we are not |
1190 | 6.70k | // introducing stores on paths that did not have them. |
1191 | 6.70k | // Note that this only looks at explicit exit blocks. If we ever |
1192 | 6.70k | // start sinking stores into unwind edges (see above), this will break. |
1193 | 6.70k | if (!SafeToInsertStore) |
1194 | 3.81k | SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) 3.81k { |
1195 | 3.90k | return DT->dominates(Store->getParent(), Exit); |
1196 | 3.81k | }); |
1197 | 6.70k | |
1198 | 6.70k | // If the store is not guaranteed to execute, we may still get |
1199 | 6.70k | // deref info through it. |
1200 | 6.70k | if (!DereferenceableInPH6.70k ) { |
1201 | 2.90k | DereferenceableInPH = isDereferenceableAndAlignedPointer( |
1202 | 2.90k | Store->getPointerOperand(), Store->getAlignment(), MDL, |
1203 | 2.90k | Preheader->getTerminator(), DT); |
1204 | 2.90k | } |
1205 | 6.71k | } else |
1206 | 49 | return false; // Not a load or store. |
1207 | 9.78k | |
1208 | 9.78k | // Merge the AA tags. |
1209 | 9.78k | if (9.78k LoopUses.empty()9.78k ) { |
1210 | 5.18k | // On the first load/store, just take its AA tags. |
1211 | 5.18k | UI->getAAMetadata(AATags); |
1212 | 9.78k | } else if (4.59k AATags4.59k ) { |
1213 | 4.15k | UI->getAAMetadata(AATags, /* Merge = */ true); |
1214 | 4.15k | } |
1215 | 108k | |
1216 | 108k | LoopUses.push_back(UI); |
1217 | 108k | } |
1218 | 5.47k | } |
1219 | 5.24k | |
1220 | 5.24k | // If we found both an unordered atomic instruction and a non-atomic memory |
1221 | 5.24k | // access, bail. We can't blindly promote non-atomic to atomic since we |
1222 | 5.24k | // might not be able to lower the result. We can't downgrade since that |
1223 | 5.24k | // would violate memory model. Also, align 0 is an error for atomics. |
1224 | 5.06k | if (5.06k SawUnorderedAtomic && 5.06k SawNotAtomic8 ) |
1225 | 2 | return false; |
1226 | 5.06k | |
1227 | 5.06k | // If we couldn't prove we can hoist the load, bail. |
1228 | 5.06k | if (5.06k !DereferenceableInPH5.06k ) |
1229 | 742 | return false; |
1230 | 4.31k | |
1231 | 4.31k | // We know we can hoist the load, but don't have a guaranteed store. |
1232 | 4.31k | // Check whether the location is thread-local. If it is, then we can insert |
1233 | 4.31k | // stores along paths which originally didn't have them without violating the |
1234 | 4.31k | // memory model. |
1235 | 4.31k | if (4.31k !SafeToInsertStore4.31k ) { |
1236 | 2.01k | // If this is a known non-escaping object, it is safe to insert the stores. |
1237 | 2.01k | if (IsKnownNonEscapingObject) |
1238 | 0 | SafeToInsertStore = true; |
1239 | 2.01k | else { |
1240 | 2.01k | Value *Object = GetUnderlyingObject(SomePtr, MDL); |
1241 | 2.01k | SafeToInsertStore = |
1242 | 2.00k | (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) && |
1243 | 388 | !PointerMayBeCaptured(Object, true, true); |
1244 | 2.01k | } |
1245 | 2.01k | } |
1246 | 4.31k | |
1247 | 4.31k | // If we've still failed to prove we can sink the store, give up. |
1248 | 4.31k | if (!SafeToInsertStore) |
1249 | 1.81k | return false; |
1250 | 2.50k | |
1251 | 2.50k | // Otherwise, this is safe to promote, lets do it! |
1252 | 2.50k | DEBUG2.50k (dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr |
1253 | 2.50k | << '\n'); |
1254 | 2.50k | ORE->emit( |
1255 | 2.50k | OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) |
1256 | 2.50k | << "Moving accesses to memory location out of the loop"); |
1257 | 2.50k | ++NumPromoted; |
1258 | 2.50k | |
1259 | 2.50k | // Grab a debug location for the inserted loads/stores; given that the |
1260 | 2.50k | // inserted loads/stores have little relation to the original loads/stores, |
1261 | 2.50k | // this code just arbitrarily picks a location from one, since any debug |
1262 | 2.50k | // location is better than none. |
1263 | 2.50k | DebugLoc DL = LoopUses[0]->getDebugLoc(); |
1264 | 2.50k | |
1265 | 2.50k | // We use the SSAUpdater interface to insert phi nodes as required. |
1266 | 2.50k | SmallVector<PHINode *, 16> NewPHIs; |
1267 | 2.50k | SSAUpdater SSA(&NewPHIs); |
1268 | 2.50k | LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, |
1269 | 2.50k | InsertPts, PIC, *CurAST, *LI, DL, Alignment, |
1270 | 2.50k | SawUnorderedAtomic, AATags); |
1271 | 2.50k | |
1272 | 2.50k | // Set up the preheader to have a definition of the value. It is the live-out |
1273 | 2.50k | // value from the preheader that uses in the loop will use. |
1274 | 2.50k | LoadInst *PreheaderLoad = new LoadInst( |
1275 | 2.50k | SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); |
1276 | 2.50k | if (SawUnorderedAtomic) |
1277 | 6 | PreheaderLoad->setOrdering(AtomicOrdering::Unordered); |
1278 | 2.50k | PreheaderLoad->setAlignment(Alignment); |
1279 | 2.50k | PreheaderLoad->setDebugLoc(DL); |
1280 | 2.50k | if (AATags) |
1281 | 2.29k | PreheaderLoad->setAAMetadata(AATags); |
1282 | 2.50k | SSA.AddAvailableValue(Preheader, PreheaderLoad); |
1283 | 2.50k | |
1284 | 2.50k | // Rewrite all the loads in the loop and remember all the definitions from |
1285 | 2.50k | // stores in the loop. |
1286 | 2.50k | Promoter.run(LoopUses); |
1287 | 2.50k | |
1288 | 2.50k | // If the SSAUpdater didn't use the load in the preheader, just zap it now. |
1289 | 2.50k | if (PreheaderLoad->use_empty()) |
1290 | 1.32k | PreheaderLoad->eraseFromParent(); |
1291 | 5.44k | |
1292 | 5.44k | return true; |
1293 | 5.44k | } |
1294 | | |
1295 | | /// Returns an owning pointer to an alias set which incorporates aliasing info |
1296 | | /// from L and all subloops of L. |
1297 | | /// FIXME: In new pass manager, there is no helper function to handle loop |
1298 | | /// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed |
1299 | | /// from scratch for every loop. Hook up with the helper functions when |
1300 | | /// available in the new pass manager to avoid redundant computation. |
1301 | | AliasSetTracker * |
1302 | | LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, |
1303 | 1.02M | AliasAnalysis *AA) { |
1304 | 1.02M | AliasSetTracker *CurAST = nullptr; |
1305 | 1.02M | SmallVector<Loop *, 4> RecomputeLoops; |
1306 | 309k | for (Loop *InnerL : L->getSubLoops()) { |
1307 | 309k | auto MapI = LoopToAliasSetMap.find(InnerL); |
1308 | 309k | // If the AST for this inner loop is missing it may have been merged into |
1309 | 309k | // some other loop's AST and then that loop unrolled, and so we need to |
1310 | 309k | // recompute it. |
1311 | 309k | if (MapI == LoopToAliasSetMap.end()309k ) { |
1312 | 12 | RecomputeLoops.push_back(InnerL); |
1313 | 12 | continue; |
1314 | 12 | } |
1315 | 309k | AliasSetTracker *InnerAST = MapI->second; |
1316 | 309k | |
1317 | 309k | if (CurAST != nullptr309k ) { |
1318 | 106k | // What if InnerLoop was modified by other passes ? |
1319 | 106k | CurAST->add(*InnerAST); |
1320 | 106k | |
1321 | 106k | // Once we've incorporated the inner loop's AST into ours, we don't need |
1322 | 106k | // the subloop's anymore. |
1323 | 106k | delete InnerAST; |
1324 | 309k | } else { |
1325 | 203k | CurAST = InnerAST; |
1326 | 203k | } |
1327 | 309k | LoopToAliasSetMap.erase(MapI); |
1328 | 309k | } |
1329 | 1.02M | if (CurAST == nullptr) |
1330 | 826k | CurAST = new AliasSetTracker(*AA); |
1331 | 1.02M | |
1332 | 1.02M | auto mergeLoop = [&](Loop *L) { |
1333 | 1.02M | // Loop over the body of this loop, looking for calls, invokes, and stores. |
1334 | 1.02M | for (BasicBlock *BB : L->blocks()) |
1335 | 4.77M | CurAST->add(*BB); // Incorporate the specified basic block |
1336 | 1.02M | }; |
1337 | 1.02M | |
1338 | 1.02M | // Add everything from the sub loops that are no longer directly available. |
1339 | 1.02M | for (Loop *InnerL : RecomputeLoops) |
1340 | 12 | mergeLoop(InnerL); |
1341 | 1.02M | |
1342 | 1.02M | // And merge in this loop. |
1343 | 1.02M | mergeLoop(L); |
1344 | 1.02M | |
1345 | 1.02M | return CurAST; |
1346 | 1.02M | } |
1347 | | |
1348 | | /// Simple analysis hook. Clone alias set info. |
1349 | | /// |
1350 | | void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, |
1351 | 170k | Loop *L) { |
1352 | 170k | AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); |
1353 | 170k | if (!AST) |
1354 | 113k | return; |
1355 | 57.2k | |
1356 | 57.2k | AST->copyValue(From, To); |
1357 | 57.2k | } |
1358 | | |
1359 | | /// Simple Analysis hook. Delete value V from alias set |
1360 | | /// |
1361 | 12.1k | void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) { |
1362 | 12.1k | AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); |
1363 | 12.1k | if (!AST) |
1364 | 6.96k | return; |
1365 | 5.15k | |
1366 | 5.15k | AST->deleteValue(V); |
1367 | 5.15k | } |
1368 | | |
1369 | | /// Simple Analysis hook. Delete value L from alias set map. |
1370 | | /// |
1371 | 8 | void LegacyLICMPass::deleteAnalysisLoop(Loop *L) { |
1372 | 8 | AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L); |
1373 | 8 | if (!AST) |
1374 | 4 | return; |
1375 | 4 | |
1376 | 4 | delete AST; |
1377 | 4 | LICM.getLoopToAliasSetMap().erase(L); |
1378 | 4 | } |
1379 | | |
1380 | | /// Return true if the body of this loop may store into the memory |
1381 | | /// location pointed to by V. |
1382 | | /// |
1383 | | static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, |
1384 | | const AAMDNodes &AAInfo, |
1385 | 855k | AliasSetTracker *CurAST) { |
1386 | 855k | // Check to see if any of the basic blocks in CurLoop invalidate *V. |
1387 | 855k | return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); |
1388 | 855k | } |
1389 | | |
1390 | | /// Little predicate that returns true if the specified basic block is in |
1391 | | /// a subloop of the current one, not the current one itself. |
1392 | | /// |
1393 | 9.54M | static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { |
1394 | 9.54M | assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); |
1395 | 9.54M | return LI->getLoopFor(BB) != CurLoop; |
1396 | 9.54M | } |