Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/LICM.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass performs loop invariant code motion, attempting to remove as much
11
// code from the body of a loop as possible.  It does this by either hoisting
12
// code into the preheader block, or by sinking code to the exit blocks if it is
13
// safe.  This pass also promotes must-aliased memory locations in the loop to
14
// live in registers, thus hoisting and sinking "invariant" loads and stores.
15
//
16
// This pass uses alias analysis for two purposes:
17
//
18
//  1. Moving loop invariant loads and calls out of loops.  If we can determine
19
//     that a load or call inside of a loop never aliases anything stored to,
20
//     we can hoist it or sink it like any other instruction.
21
//  2. Scalar Promotion of Memory - If there is a store instruction inside of
22
//     the loop, we try to move the store to happen AFTER the loop instead of
23
//     inside of the loop.  This can only happen if a few conditions are true:
24
//       A. The pointer stored through is loop invariant
25
//       B. There are no stores or loads in the loop which _may_ alias the
26
//          pointer.  There are no calls in the loop which mod/ref the pointer.
27
//     If these conditions are true, we can promote the loads and stores in the
28
//     loop of the pointer to use a temporary alloca'd variable.  We then use
29
//     the SSAUpdater to construct the appropriate SSA form for the value.
30
//
31
//===----------------------------------------------------------------------===//
32
33
#include "llvm/Transforms/Scalar/LICM.h"
34
#include "llvm/ADT/Statistic.h"
35
#include "llvm/Analysis/AliasAnalysis.h"
36
#include "llvm/Analysis/AliasSetTracker.h"
37
#include "llvm/Analysis/BasicAliasAnalysis.h"
38
#include "llvm/Analysis/CaptureTracking.h"
39
#include "llvm/Analysis/ConstantFolding.h"
40
#include "llvm/Analysis/GlobalsModRef.h"
41
#include "llvm/Analysis/Loads.h"
42
#include "llvm/Analysis/LoopInfo.h"
43
#include "llvm/Analysis/LoopPass.h"
44
#include "llvm/Analysis/MemoryBuiltins.h"
45
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
46
#include "llvm/Analysis/ScalarEvolution.h"
47
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
48
#include "llvm/Analysis/TargetLibraryInfo.h"
49
#include "llvm/Analysis/ValueTracking.h"
50
#include "llvm/IR/CFG.h"
51
#include "llvm/IR/Constants.h"
52
#include "llvm/IR/DataLayout.h"
53
#include "llvm/IR/DerivedTypes.h"
54
#include "llvm/IR/Dominators.h"
55
#include "llvm/IR/Instructions.h"
56
#include "llvm/IR/IntrinsicInst.h"
57
#include "llvm/IR/LLVMContext.h"
58
#include "llvm/IR/Metadata.h"
59
#include "llvm/IR/PredIteratorCache.h"
60
#include "llvm/Support/CommandLine.h"
61
#include "llvm/Support/Debug.h"
62
#include "llvm/Support/raw_ostream.h"
63
#include "llvm/Transforms/Scalar.h"
64
#include "llvm/Transforms/Scalar/LoopPassManager.h"
65
#include "llvm/Transforms/Utils/Local.h"
66
#include "llvm/Transforms/Utils/LoopUtils.h"
67
#include "llvm/Transforms/Utils/SSAUpdater.h"
68
#include <algorithm>
69
#include <utility>
70
using namespace llvm;
71
72
1.06M
#define DEBUG_TYPE "licm"
73
74
STATISTIC(NumSunk, "Number of instructions sunk out of loop");
75
STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
76
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
77
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
78
STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
79
80
/// Memory promotion is enabled by default.
81
static cl::opt<bool>
82
    DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
83
                     cl::desc("Disable memory promotion in LICM pass"));
84
85
static cl::opt<uint32_t> MaxNumUsesTraversed(
86
    "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
87
    cl::desc("Max num uses visited for identifying load "
88
             "invariance in loop using invariant start (default = 8)"));
89
90
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
91
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
92
                            const LoopSafetyInfo *SafetyInfo);
93
static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
94
                  const LoopSafetyInfo *SafetyInfo,
95
                  OptimizationRemarkEmitter *ORE);
96
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
97
                 const Loop *CurLoop, AliasSetTracker *CurAST,
98
                 const LoopSafetyInfo *SafetyInfo,
99
                 OptimizationRemarkEmitter *ORE);
100
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
101
                                           const DominatorTree *DT,
102
                                           const Loop *CurLoop,
103
                                           const LoopSafetyInfo *SafetyInfo,
104
                                           OptimizationRemarkEmitter *ORE,
105
                                           const Instruction *CtxI = nullptr);
106
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
107
                                     const AAMDNodes &AAInfo,
108
                                     AliasSetTracker *CurAST);
109
static Instruction *
110
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
111
                            const LoopInfo *LI,
112
                            const LoopSafetyInfo *SafetyInfo);
113
114
namespace {
115
struct LoopInvariantCodeMotion {
116
  bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
117
                 TargetLibraryInfo *TLI, ScalarEvolution *SE,
118
                 OptimizationRemarkEmitter *ORE, bool DeleteAST);
119
120
183k
  DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
121
183k
    return LoopToAliasSetMap;
122
183k
  }
123
124
private:
125
  DenseMap<Loop *, AliasSetTracker *> LoopToAliasSetMap;
126
127
  AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
128
                                           AliasAnalysis *AA);
129
};
130
131
struct LegacyLICMPass : public LoopPass {
132
  static char ID; // Pass identification, replacement for typeid
133
53.2k
  LegacyLICMPass() : LoopPass(ID) {
134
53.2k
    initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
135
53.2k
  }
136
137
1.02M
  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
138
1.02M
    if (
skipLoop(L)1.02M
) {
139
55
      // If we have run LICM on a previous loop but now we are skipping
140
55
      // (because we've hit the opt-bisect limit), we need to clear the
141
55
      // loop alias information.
142
55
      for (auto &LTAS : LICM.getLoopToAliasSetMap())
143
1
        delete LTAS.second;
144
55
      LICM.getLoopToAliasSetMap().clear();
145
55
      return false;
146
55
    }
147
1.02M
148
1.02M
    auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
149
1.02M
    // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
150
1.02M
    // pass.  Function analyses need to be preserved across loop transformations
151
1.02M
    // but ORE cannot be preserved (see comment before the pass definition).
152
1.02M
    OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
153
1.02M
    return LICM.runOnLoop(L,
154
1.02M
                          &getAnalysis<AAResultsWrapperPass>().getAAResults(),
155
1.02M
                          &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
156
1.02M
                          &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
157
1.02M
                          &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
158
1.02M
                          SE ? 
&SE->getSE()1.02M
:
nullptr0
, &ORE, false);
159
1.02M
  }
160
161
  /// This transformation requires natural loop information & requires that
162
  /// loop preheaders be inserted into the CFG...
163
  ///
164
53.2k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
165
53.2k
    AU.setPreservesCFG();
166
53.2k
    AU.addRequired<TargetLibraryInfoWrapperPass>();
167
53.2k
    getLoopAnalysisUsage(AU);
168
53.2k
  }
169
170
  using llvm::Pass::doFinalization;
171
172
330k
  bool doFinalization() override {
173
330k
    assert(LICM.getLoopToAliasSetMap().empty() &&
174
330k
           "Didn't free loop alias sets");
175
330k
    return false;
176
330k
  }
177
178
private:
179
  LoopInvariantCodeMotion LICM;
180
181
  /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
182
  void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
183
                               Loop *L) override;
184
185
  /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
186
  /// set.
187
  void deleteAnalysisValue(Value *V, Loop *L) override;
188
189
  /// Simple Analysis hook. Delete loop L from alias set map.
190
  void deleteAnalysisLoop(Loop *L) override;
191
};
192
} // namespace
193
194
PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
195
208
                                LoopStandardAnalysisResults &AR, LPMUpdater &) {
196
208
  const auto &FAM =
197
208
      AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
198
208
  Function *F = L.getHeader()->getParent();
199
208
200
208
  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
201
208
  // FIXME: This should probably be optional rather than required.
202
208
  if (!ORE)
203
0
    report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
204
0
                       "cached at a higher level");
205
208
206
208
  LoopInvariantCodeMotion LICM;
207
208
  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true))
208
139
    return PreservedAnalyses::all();
209
69
210
69
  auto PA = getLoopPassPreservedAnalyses();
211
69
  PA.preserveSet<CFGAnalyses>();
212
69
  return PA;
213
69
}
214
215
char LegacyLICMPass::ID = 0;
216
41.8k
INITIALIZE_PASS_BEGIN41.8k
(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
217
41.8k
                      false, false)
218
41.8k
INITIALIZE_PASS_DEPENDENCY(LoopPass)
219
41.8k
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
220
41.8k
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
221
                    false)
222
223
53.0k
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
224
225
/// Hoist expressions out of the specified loop. Note, alias info for inner
226
/// loop is not preserved so it is not a good idea to run LICM multiple
227
/// times on one loop.
228
/// We should delete AST for inner loops in the new pass manager to avoid
229
/// memory leak.
230
///
231
bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
232
                                        LoopInfo *LI, DominatorTree *DT,
233
                                        TargetLibraryInfo *TLI,
234
                                        ScalarEvolution *SE,
235
                                        OptimizationRemarkEmitter *ORE,
236
1.02M
                                        bool DeleteAST) {
237
1.02M
  bool Changed = false;
238
1.02M
239
1.02M
  assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
240
1.02M
241
1.02M
  AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);
242
1.02M
243
1.02M
  // Get the preheader block to move instructions into...
244
1.02M
  BasicBlock *Preheader = L->getLoopPreheader();
245
1.02M
246
1.02M
  // Compute loop safety information.
247
1.02M
  LoopSafetyInfo SafetyInfo;
248
1.02M
  computeLoopSafetyInfo(&SafetyInfo, L);
249
1.02M
250
1.02M
  // We want to visit all of the instructions in this loop... that are not parts
251
1.02M
  // of our subloops (they have already had their invariants hoisted out of
252
1.02M
  // their loop, into this loop, so there is no need to process the BODIES of
253
1.02M
  // the subloops).
254
1.02M
  //
255
1.02M
  // Traverse the body of the loop in depth first order on the dominator tree so
256
1.02M
  // that we are guaranteed to see definitions before we see uses.  This allows
257
1.02M
  // us to sink instructions in one pass, without iteration.  After sinking
258
1.02M
  // instructions, we perform another pass to hoist them out of the loop.
259
1.02M
  //
260
1.02M
  if (L->hasDedicatedExits())
261
1.02M
    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
262
1.02M
                          CurAST, &SafetyInfo, ORE);
263
1.02M
  if (Preheader)
264
1.02M
    Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
265
1.02M
                           CurAST, &SafetyInfo, ORE);
266
1.02M
267
1.02M
  // Now that all loop invariants have been removed from the loop, promote any
268
1.02M
  // memory references to scalars that we can.
269
1.02M
  // Don't sink stores from loops without dedicated block exits. Exits
270
1.02M
  // containing indirect branches are not transformed by loop simplify,
271
1.02M
  // make sure we catch that. An additional load may be generated in the
272
1.02M
  // preheader for SSA updater, so also avoid sinking when no preheader
273
1.02M
  // is available.
274
1.02M
  if (
!DisablePromotion && 1.02M
Preheader1.02M
&&
L->hasDedicatedExits()1.02M
) {
275
1.02M
    // Figure out the loop exits and their insertion points
276
1.02M
    SmallVector<BasicBlock *, 8> ExitBlocks;
277
1.02M
    L->getUniqueExitBlocks(ExitBlocks);
278
1.02M
279
1.02M
    // We can't insert into a catchswitch.
280
1.31M
    bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
281
1.31M
      return isa<CatchSwitchInst>(Exit->getTerminator());
282
1.31M
    });
283
1.02M
284
1.02M
    if (
!HasCatchSwitch1.02M
) {
285
1.02M
      SmallVector<Instruction *, 8> InsertPts;
286
1.02M
      InsertPts.reserve(ExitBlocks.size());
287
1.02M
      for (BasicBlock *ExitBlock : ExitBlocks)
288
1.31M
        InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
289
1.02M
290
1.02M
      PredIteratorCache PIC;
291
1.02M
292
1.02M
      bool Promoted = false;
293
1.02M
294
1.02M
      // Loop over all of the alias sets in the tracker object.
295
1.68M
      for (AliasSet &AS : *CurAST) {
296
1.68M
        // We can promote this alias set if it has a store, if it is a "Must"
297
1.68M
        // alias set, if the pointer is loop invariant, and if we are not
298
1.68M
        // eliminating any volatile loads or stores.
299
1.68M
        if (
AS.isForwardingAliasSet() || 1.68M
!AS.isMod()1.57M
||
!AS.isMustAlias()1.20M
||
300
1.68M
            
AS.isVolatile()510k
||
!L->isLoopInvariant(AS.begin()->getValue())510k
)
301
1.67M
          continue;
302
5.44k
303
1.68M
        assert(
304
5.44k
            !AS.empty() &&
305
5.44k
            "Must alias set should have at least one pointer element in it!");
306
5.44k
307
5.44k
        SmallSetVector<Value *, 8> PointerMustAliases;
308
5.44k
        for (const auto &ASI : AS)
309
5.72k
          PointerMustAliases.insert(ASI.getValue());
310
1.68M
311
1.68M
        Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks,
312
1.68M
                                                 InsertPts, PIC, LI, DT, TLI, L,
313
1.68M
                                                 CurAST, &SafetyInfo, ORE);
314
1.68M
      }
315
1.02M
316
1.02M
      // Once we have promoted values across the loop body we have to
317
1.02M
      // recursively reform LCSSA as any nested loop may now have values defined
318
1.02M
      // within the loop used in the outer loop.
319
1.02M
      // FIXME: This is really heavy handed. It would be a bit better to use an
320
1.02M
      // SSAUpdater strategy during promotion that was LCSSA aware and reformed
321
1.02M
      // it as it went.
322
1.02M
      if (Promoted)
323
1.40k
        formLCSSARecursively(*L, *DT, LI, SE);
324
1.02M
325
1.02M
      Changed |= Promoted;
326
1.02M
    }
327
1.02M
  }
328
1.02M
329
1.02M
  // Check that neither this loop nor its parent have had LCSSA broken. LICM is
330
1.02M
  // specifically moving instructions across the loop boundary and so it is
331
1.02M
  // especially in need of sanity checking here.
332
1.02M
  assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
333
1.02M
  assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
334
1.02M
         "Parent loop not left in LCSSA form after LICM!");
335
1.02M
336
1.02M
  // If this loop is nested inside of another one, save the alias information
337
1.02M
  // for when we process the outer loop.
338
1.02M
  if (
L->getParentLoop() && 1.02M
!DeleteAST309k
)
339
309k
    LoopToAliasSetMap[L] = CurAST;
340
1.02M
  else
341
720k
    delete CurAST;
342
1.02M
343
1.02M
  if (
Changed && 1.02M
SE63.4k
)
344
63.4k
    SE->forgetLoopDispositions(L);
345
1.02M
  return Changed;
346
1.02M
}
347
348
/// Walk the specified region of the CFG (defined by all blocks dominated by
349
/// the specified block, and that are in the current loop) in reverse depth
350
/// first order w.r.t the DominatorTree.  This allows us to visit uses before
351
/// definitions, allowing us to sink a loop body in one pass without iteration.
352
///
353
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
354
                      DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
355
                      AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
356
1.02M
                      OptimizationRemarkEmitter *ORE) {
357
1.02M
358
1.02M
  // Verify inputs.
359
1.02M
  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
360
1.02M
         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
361
1.02M
         "Unexpected input to sinkRegion");
362
1.02M
363
1.02M
  // We want to visit children before parents. We will enque all the parents
364
1.02M
  // before their children in the worklist and process the worklist in reverse
365
1.02M
  // order.
366
1.02M
  SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
367
1.02M
368
1.02M
  bool Changed = false;
369
4.77M
  for (DomTreeNode *DTN : reverse(Worklist)) {
370
4.77M
    BasicBlock *BB = DTN->getBlock();
371
4.77M
    // Only need to process the contents of this block if it is not part of a
372
4.77M
    // subloop (which would already have been processed).
373
4.77M
    if (inSubLoop(BB, CurLoop, LI))
374
1.34M
      continue;
375
3.42M
376
24.5M
    
for (BasicBlock::iterator II = BB->end(); 3.42M
II != BB->begin()24.5M
;) {
377
21.1M
      Instruction &I = *--II;
378
21.1M
379
21.1M
      // If the instruction is dead, we would try to sink it because it isn't
380
21.1M
      // used in the loop, instead, just delete it.
381
21.1M
      if (
isInstructionTriviallyDead(&I, TLI)21.1M
) {
382
767
        DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
383
767
        ++II;
384
767
        CurAST->deleteValue(&I);
385
767
        I.eraseFromParent();
386
767
        Changed = true;
387
767
        continue;
388
767
      }
389
21.1M
390
21.1M
      // Check to see if we can sink this instruction to the exit blocks
391
21.1M
      // of the loop.  We can do this if the all users of the instruction are
392
21.1M
      // outside of the loop.  In this case, it doesn't even matter if the
393
21.1M
      // operands of the instruction are loop invariant.
394
21.1M
      //
395
21.1M
      
if (21.1M
isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
396
21.1M
          
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)5.23M
) {
397
1.03k
        ++II;
398
1.03k
        Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE);
399
1.03k
      }
400
21.1M
    }
401
4.77M
  }
402
1.02M
  return Changed;
403
1.02M
}
404
405
/// Walk the specified region of the CFG (defined by all blocks dominated by
406
/// the specified block, and that are in the current loop) in depth first
407
/// order w.r.t the DominatorTree.  This allows us to visit definitions before
408
/// uses, allowing us to hoist a loop body in one pass without iteration.
409
///
410
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
411
                       DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
412
                       AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
413
1.02M
                       OptimizationRemarkEmitter *ORE) {
414
1.02M
  // Verify inputs.
415
1.02M
  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
416
1.02M
         CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
417
1.02M
         "Unexpected input to hoistRegion");
418
1.02M
419
1.02M
  // We want to visit parents before children. We will enque all the parents
420
1.02M
  // before their children in the worklist and process the worklist in order.
421
1.02M
  SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
422
1.02M
423
1.02M
  bool Changed = false;
424
4.77M
  for (DomTreeNode *DTN : Worklist) {
425
4.77M
    BasicBlock *BB = DTN->getBlock();
426
4.77M
    // Only need to process the contents of this block if it is not part of a
427
4.77M
    // subloop (which would already have been processed).
428
4.77M
    if (!inSubLoop(BB, CurLoop, LI))
429
24.5M
      
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); 3.42M
II != E24.5M
;) {
430
21.1M
        Instruction &I = *II++;
431
21.1M
        // Try constant folding this instruction.  If all the operands are
432
21.1M
        // constants, it is technically hoistable, but it would be better to
433
21.1M
        // just fold it.
434
21.1M
        if (Constant *C = ConstantFoldInstruction(
435
432
                &I, I.getModule()->getDataLayout(), TLI)) {
436
432
          DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
437
432
          CurAST->copyValue(&I, C);
438
432
          I.replaceAllUsesWith(C);
439
432
          if (
isInstructionTriviallyDead(&I, TLI)432
) {
440
432
            CurAST->deleteValue(&I);
441
432
            I.eraseFromParent();
442
432
          }
443
432
          Changed = true;
444
432
          continue;
445
432
        }
446
21.1M
447
21.1M
        // Attempt to remove floating point division out of the loop by
448
21.1M
        // converting it to a reciprocal multiplication.
449
21.1M
        
if (21.1M
I.getOpcode() == Instruction::FDiv &&
450
113k
            CurLoop->isLoopInvariant(I.getOperand(1)) &&
451
21.1M
            
I.hasAllowReciprocal()19.8k
) {
452
2
          auto Divisor = I.getOperand(1);
453
2
          auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
454
2
          auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
455
2
          ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
456
2
          ReciprocalDivisor->insertBefore(&I);
457
2
458
2
          auto Product =
459
2
              BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
460
2
          Product->setFastMathFlags(I.getFastMathFlags());
461
2
          Product->insertAfter(&I);
462
2
          I.replaceAllUsesWith(Product);
463
2
          I.eraseFromParent();
464
2
465
2
          hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
466
2
          Changed = true;
467
2
          continue;
468
2
        }
469
21.1M
470
21.1M
        // Try hoisting the instruction out to the preheader.  We can only do
471
21.1M
        // this if all of the operands of the instruction are loop invariant and
472
21.1M
        // if it is safe to hoist the instruction.
473
21.1M
        //
474
21.1M
        
if (21.1M
CurLoop->hasLoopInvariantOperands(&I) &&
475
2.77M
            canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) &&
476
238k
            isSafeToExecuteUnconditionally(
477
238k
                I, DT, CurLoop, SafetyInfo, ORE,
478
238k
                CurLoop->getLoopPreheader()->getTerminator()))
479
228k
          Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE);
480
3.42M
      }
481
4.77M
  }
482
1.02M
483
1.02M
  return Changed;
484
1.02M
}
485
486
/// Computes loop safety information, checks loop body & header
487
/// for the possibility of may throw exception.
488
///
489
1.16M
void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
490
1.16M
  assert(CurLoop != nullptr && "CurLoop cant be null");
491
1.16M
  BasicBlock *Header = CurLoop->getHeader();
492
1.16M
  // Setting default safety values.
493
1.16M
  SafetyInfo->MayThrow = false;
494
1.16M
  SafetyInfo->HeaderMayThrow = false;
495
1.16M
  // Iterate over header and compute safety info.
496
1.16M
  for (BasicBlock::iterator I = Header->begin(), E = Header->end();
497
10.2M
       
(I != E) && 10.2M
!SafetyInfo->HeaderMayThrow9.40M
;
++I9.10M
)
498
9.10M
    SafetyInfo->HeaderMayThrow |=
499
9.10M
        !isGuaranteedToTransferExecutionToSuccessor(&*I);
500
1.16M
501
1.16M
  SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
502
1.16M
  // Iterate over loop instructions and compute safety info.
503
1.16M
  // Skip header as it has been computed and stored in HeaderMayThrow.
504
1.16M
  // The first block in loopinfo.Blocks is guaranteed to be the header.
505
1.16M
  assert(Header == *CurLoop->getBlocks().begin() &&
506
1.16M
         "First block must be header");
507
1.16M
  for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
508
1.16M
                            BBE = CurLoop->block_end();
509
2.60M
       
(BB != BBE) && 2.60M
!SafetyInfo->MayThrow1.71M
;
++BB1.43M
)
510
1.43M
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
511
9.86M
         
(I != E) && 9.86M
!SafetyInfo->MayThrow8.63M
;
++I8.43M
)
512
8.43M
      SafetyInfo->MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(&*I);
513
1.16M
514
1.16M
  // Compute funclet colors if we might sink/hoist in a function with a funclet
515
1.16M
  // personality routine.
516
1.16M
  Function *Fn = CurLoop->getHeader()->getParent();
517
1.16M
  if (Fn->hasPersonalityFn())
518
16.5k
    
if (Constant *16.5k
PersonalityFn16.5k
= Fn->getPersonalityFn())
519
16.5k
      
if (16.5k
isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))16.5k
)
520
6
        SafetyInfo->BlockColors = colorEHFunclets(*Fn);
521
1.16M
}
522
523
// Return true if LI is invariant within scope of the loop. LI is invariant if
524
// CurLoop is dominated by an invariant.start representing the same memory
525
// location and size as the memory location LI loads from, and also the
526
// invariant.start has no uses.
527
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
528
853k
                                  Loop *CurLoop) {
529
853k
  Value *Addr = LI->getOperand(0);
530
853k
  const DataLayout &DL = LI->getModule()->getDataLayout();
531
853k
  const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
532
853k
      cast<PointerType>(Addr->getType())->getElementType());
533
853k
534
853k
  // if the type is i8 addrspace(x)*, we know this is the type of
535
853k
  // llvm.invariant.start operand
536
853k
  auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
537
853k
                                     LI->getPointerAddressSpace());
538
853k
  unsigned BitcastsVisited = 0;
539
853k
  // Look through bitcasts until we reach the i8* type (this is invariant.start
540
853k
  // operand type).
541
947k
  while (
Addr->getType() != PtrInt8Ty947k
) {
542
884k
    auto *BC = dyn_cast<BitCastInst>(Addr);
543
884k
    // Avoid traversing high number of bitcast uses.
544
884k
    if (
++BitcastsVisited > MaxNumUsesTraversed || 884k
!BC884k
)
545
790k
      return false;
546
94.1k
    Addr = BC->getOperand(0);
547
94.1k
  }
548
853k
549
63.6k
  unsigned UsesVisited = 0;
550
63.6k
  // Traverse all uses of the load operand value, to see if invariant.start is
551
63.6k
  // one of the uses, and whether it dominates the load instruction.
552
153k
  for (auto *U : Addr->users()) {
553
153k
    // Avoid traversing for Load operand with high number of users.
554
153k
    if (++UsesVisited > MaxNumUsesTraversed)
555
4.24k
      return false;
556
149k
    IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
557
149k
    // If there are escaping uses of invariant.start instruction, the load maybe
558
149k
    // non-invariant.
559
149k
    if (
!II || 149k
II->getIntrinsicID() != Intrinsic::invariant_start2.76k
||
560
6
        !II->use_empty())
561
149k
      continue;
562
2
    unsigned InvariantSizeInBits =
563
2
        cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
564
2
    // Confirm the invariant.start location size contains the load operand size
565
2
    // in bits. Also, the invariant.start should dominate the load, and we
566
2
    // should not hoist the load out of a loop that contains this dominating
567
2
    // invariant.start.
568
2
    if (LocSizeInBits <= InvariantSizeInBits &&
569
2
        DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
570
2
      return true;
571
59.4k
  }
572
59.4k
573
59.4k
  return false;
574
59.4k
}
575
576
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
577
                              Loop *CurLoop, AliasSetTracker *CurAST,
578
                              LoopSafetyInfo *SafetyInfo,
579
8.01M
                              OptimizationRemarkEmitter *ORE) {
580
8.01M
  // Loads have extra constraints we have to verify before we can hoist them.
581
8.01M
  if (LoadInst *
LI8.01M
= dyn_cast<LoadInst>(&I)) {
582
856k
    if (!LI->isUnordered())
583
1.36k
      return false; // Don't hoist volatile/atomic loads!
584
854k
585
854k
    // Loads from constant memory are always safe to move, even if they end up
586
854k
    // in the same alias set as something that ends up being modified.
587
854k
    
if (854k
AA->pointsToConstantMemory(LI->getOperand(0))854k
)
588
1.10k
      return true;
589
853k
    
if (853k
LI->getMetadata(LLVMContext::MD_invariant_load)853k
)
590
23
      return true;
591
853k
592
853k
    // This checks for an invariant.start dominating the load.
593
853k
    
if (853k
isLoadInvariantInLoop(LI, DT, CurLoop)853k
)
594
2
      return true;
595
853k
596
853k
    // Don't hoist loads which have may-aliased stores in loop.
597
853k
    uint64_t Size = 0;
598
853k
    if (LI->getType()->isSized())
599
853k
      Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
600
853k
601
853k
    AAMDNodes AAInfo;
602
853k
    LI->getAAMetadata(AAInfo);
603
853k
604
853k
    bool Invalidated =
605
853k
        pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
606
853k
    // Check loop-invariant address because this may also be a sinkable load
607
853k
    // whose address is not necessarily loop-invariant.
608
853k
    if (
ORE && 853k
Invalidated853k
&&
CurLoop->isLoopInvariant(LI->getPointerOperand())819k
)
609
819k
      ORE->emit(OptimizationRemarkMissed(
610
819k
                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
611
819k
                << "failed to move load with loop-invariant address "
612
819k
                   "because the loop may invalidate its value");
613
856k
614
856k
    return !Invalidated;
615
7.15M
  } else 
if (CallInst *7.15M
CI7.15M
= dyn_cast<CallInst>(&I)) {
616
873k
    // Don't sink or hoist dbg info; it's legal, but not useful.
617
873k
    if (isa<DbgInfoIntrinsic>(I))
618
28
      return false;
619
873k
620
873k
    // Don't sink calls which can throw.
621
873k
    
if (873k
CI->mayThrow()873k
)
622
10.4k
      return false;
623
863k
624
863k
    // Handle simple cases by querying alias analysis.
625
863k
    FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
626
863k
    if (Behavior == FMRB_DoesNotAccessMemory)
627
463
      return true;
628
862k
    
if (862k
AliasAnalysis::onlyReadsMemory(Behavior)862k
) {
629
4.26k
      // A readonly argmemonly function only reads from memory pointed to by
630
4.26k
      // it's arguments with arbitrary offsets.  If we can prove there are no
631
4.26k
      // writes to this memory in the loop, we can hoist or sink.
632
4.26k
      if (
AliasAnalysis::onlyAccessesArgPointees(Behavior)4.26k
) {
633
1.28k
        for (Value *Op : CI->arg_operands())
634
1.28k
          
if (1.28k
Op->getType()->isPointerTy() &&
635
1.28k
              pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
636
1.28k
                                       AAMDNodes(), CurAST))
637
1.22k
            return false;
638
63
        return true;
639
63
      }
640
2.97k
      // If this call only reads from memory and there are no writes to memory
641
2.97k
      // in the loop, we can hoist or sink the call as appropriate.
642
2.97k
      bool FoundMod = false;
643
3.15k
      for (AliasSet &AS : *CurAST) {
644
3.15k
        if (
!AS.isForwardingAliasSet() && 3.15k
AS.isMod()2.97k
) {
645
2.93k
          FoundMod = true;
646
2.93k
          break;
647
2.93k
        }
648
2.97k
      }
649
2.97k
      if (!FoundMod)
650
48
        return true;
651
861k
    }
652
861k
653
861k
    // FIXME: This should use mod/ref information to see if we can hoist or
654
861k
    // sink the call.
655
861k
656
861k
    return false;
657
861k
  }
658
6.28M
659
6.28M
  // Only these instructions are hoistable/sinkable.
660
6.28M
  
if (6.28M
!isa<BinaryOperator>(I) && 6.28M
!isa<CastInst>(I)6.25M
&&
!isa<SelectInst>(I)6.18M
&&
661
6.28M
      
!isa<GetElementPtrInst>(I)6.18M
&&
!isa<CmpInst>(I)6.10M
&&
662
6.28M
      
!isa<InsertElementInst>(I)6.08M
&&
!isa<ExtractElementInst>(I)6.08M
&&
663
6.28M
      
!isa<ShuffleVectorInst>(I)6.08M
&&
!isa<ExtractValueInst>(I)6.07M
&&
664
6.07M
      !isa<InsertValueInst>(I))
665
6.07M
    return false;
666
203k
667
203k
  // SafetyInfo is nullptr if we are checking for sinking from preheader to
668
203k
  // loop body. It will be always safe as there is no speculative execution.
669
203k
  
if (203k
!SafetyInfo203k
)
670
0
    return true;
671
203k
672
203k
  // TODO: Plumb the context instruction through to make hoisting and sinking
673
203k
  // more powerful. Hoisting of loads already works due to the special casing
674
203k
  // above.
675
203k
  return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo, nullptr);
676
203k
}
677
678
/// Returns true if a PHINode is a trivially replaceable with an
679
/// Instruction.
680
/// This is true when all incoming values are that instruction.
681
/// This pattern occurs most often with LCSSA PHI nodes.
682
///
683
2.32M
static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
684
2.32M
  for (const Value *IncValue : PN.incoming_values())
685
3.21M
    
if (3.21M
IncValue != &I3.21M
)
686
1.69M
      return false;
687
620k
688
620k
  return true;
689
620k
}
690
691
/// Return true if the only users of this instruction are outside of
692
/// the loop. If this is true, we can sink the instruction to the exit
693
/// blocks of the loop.
694
///
695
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
696
21.1M
                            const LoopSafetyInfo *SafetyInfo) {
697
21.1M
  const auto &BlockColors = SafetyInfo->BlockColors;
698
16.5M
  for (const User *U : I.users()) {
699
16.5M
    const Instruction *UI = cast<Instruction>(U);
700
16.5M
    if (const PHINode *
PN16.5M
= dyn_cast<PHINode>(UI)) {
701
2.32M
      const BasicBlock *BB = PN->getParent();
702
2.32M
      // We cannot sink uses in catchswitches.
703
2.32M
      if (isa<CatchSwitchInst>(BB->getTerminator()))
704
2
        return false;
705
2.32M
706
2.32M
      // We need to sink a callsite to a unique funclet.  Avoid sinking if the
707
2.32M
      // phi use is too muddled.
708
2.32M
      
if (2.32M
isa<CallInst>(I)2.32M
)
709
296k
        
if (296k
!BlockColors.empty() &&
710
2
            BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
711
0
          return false;
712
2.32M
713
2.32M
      // A PHI node where all of the incoming values are this instruction are
714
2.32M
      // special -- they can just be RAUW'ed with the instruction and thus
715
2.32M
      // don't require a use in the predecessor. This is a particular important
716
2.32M
      // special case because it is the pattern found in LCSSA form.
717
2.32M
      
if (2.32M
isTriviallyReplacablePHI(*PN, I)2.32M
) {
718
620k
        if (CurLoop->contains(PN))
719
5.38k
          return false;
720
620k
        else
721
615k
          continue;
722
1.69M
      }
723
1.69M
724
1.69M
      // Otherwise, PHI node uses occur in predecessor blocks if the incoming
725
1.69M
      // values. Check for such a use being inside the loop.
726
3.60M
      
for (unsigned i = 0, e = PN->getNumIncomingValues(); 1.69M
i != e3.60M
;
++i1.90M
)
727
3.60M
        
if (3.60M
PN->getIncomingValue(i) == &I3.60M
)
728
1.69M
          
if (1.69M
CurLoop->contains(PN->getIncomingBlock(i))1.69M
)
729
1.69M
            return false;
730
1.69M
731
1
      continue;
732
14.2M
    }
733
14.2M
734
14.2M
    
if (14.2M
CurLoop->contains(UI)14.2M
)
735
14.2M
      return false;
736
5.23M
  }
737
5.23M
  return true;
738
5.23M
}
739
740
static Instruction *
741
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
742
                            const LoopInfo *LI,
743
1.20k
                            const LoopSafetyInfo *SafetyInfo) {
744
1.20k
  Instruction *New;
745
1.20k
  if (auto *
CI1.20k
= dyn_cast<CallInst>(&I)) {
746
7
    const auto &BlockColors = SafetyInfo->BlockColors;
747
7
748
7
    // Sinking call-sites need to be handled differently from other
749
7
    // instructions.  The cloned call-site needs a funclet bundle operand
750
7
    // appropriate for it's location in the CFG.
751
7
    SmallVector<OperandBundleDef, 1> OpBundles;
752
7
    for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
753
7
         
BundleIdx != BundleEnd7
;
++BundleIdx0
) {
754
0
      OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
755
0
      if (Bundle.getTagID() == LLVMContext::OB_funclet)
756
0
        continue;
757
0
758
0
      OpBundles.emplace_back(Bundle);
759
0
    }
760
7
761
7
    if (
!BlockColors.empty()7
) {
762
2
      const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
763
2
      assert(CV.size() == 1 && "non-unique color for exit block!");
764
2
      BasicBlock *BBColor = CV.front();
765
2
      Instruction *EHPad = BBColor->getFirstNonPHI();
766
2
      if (EHPad->isEHPad())
767
2
        OpBundles.emplace_back("funclet", EHPad);
768
2
    }
769
7
770
7
    New = CallInst::Create(CI, OpBundles);
771
1.20k
  } else {
772
1.19k
    New = I.clone();
773
1.19k
  }
774
1.20k
775
1.20k
  ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
776
1.20k
  if (!I.getName().empty())
777
59
    New->setName(I.getName() + ".le");
778
1.20k
779
1.20k
  // Build LCSSA PHI nodes for any in-loop operands. Note that this is
780
1.20k
  // particularly cheap because we can rip off the PHI node that we're
781
1.20k
  // replacing for the number and blocks of the predecessors.
782
1.20k
  // OPT: If this shows up in a profile, we can instead finish sinking all
783
1.20k
  // invariant instructions, and then walk their operands to re-establish
784
1.20k
  // LCSSA. That will eliminate creating PHI nodes just to nuke them when
785
1.20k
  // sinking bottom-up.
786
3.02k
  for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
787
1.81k
       ++OI)
788
1.81k
    
if (Instruction *1.81k
OInst1.81k
= dyn_cast<Instruction>(*OI))
789
1.26k
      
if (Loop *1.26k
OLoop1.26k
= LI->getLoopFor(OInst->getParent()))
790
1.15k
        
if (1.15k
!OLoop->contains(&PN)1.15k
) {
791
1.12k
          PHINode *OpPN =
792
1.12k
              PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
793
1.12k
                              OInst->getName() + ".lcssa", &ExitBlock.front());
794
2.41k
          for (unsigned i = 0, e = PN.getNumIncomingValues(); 
i != e2.41k
;
++i1.28k
)
795
1.28k
            OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
796
1.81k
          *OI = OpPN;
797
1.81k
        }
798
1.20k
  return New;
799
1.20k
}
800
801
/// When an instruction is found to only be used outside of the loop, this
802
/// function moves it to the exit blocks and patches up SSA form as needed.
803
/// This method is guaranteed to remove the original instruction from its
804
/// position, and may either delete it or move it to outside of the loop.
805
///
806
static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
807
                 const Loop *CurLoop, AliasSetTracker *CurAST,
808
                 const LoopSafetyInfo *SafetyInfo,
809
1.03k
                 OptimizationRemarkEmitter *ORE) {
810
1.03k
  DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
811
1.03k
  ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
812
1.03k
            << "sinking " << ore::NV("Inst", &I));
813
1.03k
  bool Changed = false;
814
1.03k
  if (isa<LoadInst>(I))
815
108
    ++NumMovedLoads;
816
928
  else 
if (928
isa<CallInst>(I)928
)
817
7
    ++NumMovedCalls;
818
1.03k
  ++NumSunk;
819
1.03k
  Changed = true;
820
1.03k
821
#ifndef NDEBUG
822
  SmallVector<BasicBlock *, 32> ExitBlocks;
823
  CurLoop->getUniqueExitBlocks(ExitBlocks);
824
  SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
825
                                             ExitBlocks.end());
826
#endif
827
828
1.03k
  // Clones of this instruction. Don't create more than one per exit block!
829
1.03k
  SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
830
1.03k
831
1.03k
  // If this instruction is only used outside of the loop, then all users are
832
1.03k
  // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
833
1.03k
  // the instruction.
834
2.28k
  while (
!I.use_empty()2.28k
) {
835
1.24k
    Value::user_iterator UI = I.user_begin();
836
1.24k
    auto *User = cast<Instruction>(*UI);
837
1.24k
    if (
!DT->isReachableFromEntry(User->getParent())1.24k
) {
838
1
      User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
839
1
      continue;
840
1
    }
841
1.24k
    // The user must be a PHI node.
842
1.24k
    PHINode *PN = cast<PHINode>(User);
843
1.24k
844
1.24k
    // Surprisingly, instructions can be used outside of loops without any
845
1.24k
    // exits.  This can only happen in PHI nodes if the incoming block is
846
1.24k
    // unreachable.
847
1.24k
    Use &U = UI.getUse();
848
1.24k
    BasicBlock *BB = PN->getIncomingBlock(U);
849
1.24k
    if (
!DT->isReachableFromEntry(BB)1.24k
) {
850
1
      U = UndefValue::get(I.getType());
851
1
      continue;
852
1
    }
853
1.24k
854
1.24k
    BasicBlock *ExitBlock = PN->getParent();
855
1.24k
    assert(ExitBlockSet.count(ExitBlock) &&
856
1.24k
           "The LCSSA PHI is not in an exit block!");
857
1.24k
858
1.24k
    Instruction *New;
859
1.24k
    auto It = SunkCopies.find(ExitBlock);
860
1.24k
    if (It != SunkCopies.end())
861
42
      New = It->second;
862
1.24k
    else
863
1.20k
      New = SunkCopies[ExitBlock] =
864
1.20k
          CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo);
865
1.24k
866
1.24k
    PN->replaceAllUsesWith(New);
867
1.24k
    PN->eraseFromParent();
868
1.24k
  }
869
1.03k
870
1.03k
  CurAST->deleteValue(&I);
871
1.03k
  I.eraseFromParent();
872
1.03k
  return Changed;
873
1.03k
}
874
875
/// When an instruction is found to only use loop invariant operands that
876
/// is safe to hoist, this instruction is called to do the dirty work.
877
///
878
static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
879
                  const LoopSafetyInfo *SafetyInfo,
880
228k
                  OptimizationRemarkEmitter *ORE) {
881
228k
  auto *Preheader = CurLoop->getLoopPreheader();
882
228k
  DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
883
228k
               << "\n");
884
228k
  ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I)
885
228k
            << "hoisting " << ore::NV("Inst", &I));
886
228k
887
228k
  // Metadata can be dependent on conditions we are hoisting above.
888
228k
  // Conservatively strip all metadata on the instruction unless we were
889
228k
  // guaranteed to execute I if we entered the loop, in which case the metadata
890
228k
  // is valid in the loop preheader.
891
228k
  if (I.hasMetadataOtherThanDebugLoc() &&
892
228k
      // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
893
228k
      // time in isGuaranteedToExecute if we don't actually have anything to
894
228k
      // drop.  It is a compile time optimization, not required for correctness.
895
22.8k
      !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
896
2.87k
    I.dropUnknownNonDebugMetadata();
897
228k
898
228k
  // Move the new node to the Preheader, before its terminator.
899
228k
  I.moveBefore(Preheader->getTerminator());
900
228k
901
228k
  // Do not retain debug locations when we are moving instructions to different
902
228k
  // basic blocks, because we want to avoid jumpy line tables. Calls, however,
903
228k
  // need to retain their debug locs because they may be inlined.
904
228k
  // FIXME: How do we retain source locations without causing poor debugging
905
228k
  // behavior?
906
228k
  if (!isa<CallInst>(I))
907
228k
    I.setDebugLoc(DebugLoc());
908
228k
909
228k
  if (isa<LoadInst>(I))
910
25.2k
    ++NumMovedLoads;
911
203k
  else 
if (203k
isa<CallInst>(I)203k
)
912
289
    ++NumMovedCalls;
913
228k
  ++NumHoisted;
914
228k
  return true;
915
228k
}
916
917
/// Only sink or hoist an instruction if it is not a trapping instruction,
918
/// or if the instruction is known not to trap when moved to the preheader.
919
/// or if it is a trapping instruction and is guaranteed to execute.
920
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
921
                                           const DominatorTree *DT,
922
                                           const Loop *CurLoop,
923
                                           const LoopSafetyInfo *SafetyInfo,
924
                                           OptimizationRemarkEmitter *ORE,
925
443k
                                           const Instruction *CtxI) {
926
443k
  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT))
927
420k
    return true;
928
23.2k
929
23.2k
  bool GuaranteedToExecute =
930
23.2k
      isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
931
23.2k
932
23.2k
  if (
!GuaranteedToExecute23.2k
) {
933
10.8k
    auto *LI = dyn_cast<LoadInst>(&Inst);
934
10.8k
    if (
LI && 10.8k
CurLoop->isLoopInvariant(LI->getPointerOperand())10.4k
)
935
10.4k
      ORE->emit(OptimizationRemarkMissed(
936
10.4k
                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
937
10.4k
                << "failed to hoist load with loop-invariant address "
938
10.4k
                   "because load is conditionally executed");
939
10.8k
  }
940
443k
941
443k
  return GuaranteedToExecute;
942
443k
}
943
944
namespace {
945
class LoopPromoter : public LoadAndStorePromoter {
946
  Value *SomePtr; // Designated pointer to store to.
947
  const SmallSetVector<Value *, 8> &PointerMustAliases;
948
  SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
949
  SmallVectorImpl<Instruction *> &LoopInsertPts;
950
  PredIteratorCache &PredCache;
951
  AliasSetTracker &AST;
952
  LoopInfo &LI;
953
  DebugLoc DL;
954
  int Alignment;
955
  bool UnorderedAtomic;
956
  AAMDNodes AATags;
957
958
5.52k
  Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
959
5.52k
    if (Instruction *I = dyn_cast<Instruction>(V))
960
4.13k
      
if (Loop *4.13k
L4.13k
= LI.getLoopFor(I->getParent()))
961
2.97k
        
if (2.97k
!L->contains(BB)2.97k
) {
962
2.52k
          // We need to create an LCSSA PHI node for the incoming value and
963
2.52k
          // store that.
964
2.52k
          PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
965
2.52k
                                        I->getName() + ".lcssa", &BB->front());
966
2.52k
          for (BasicBlock *Pred : PredCache.get(BB))
967
2.57k
            PN->addIncoming(I, Pred);
968
4.13k
          return PN;
969
4.13k
        }
970
2.99k
    return V;
971
2.99k
  }
972
973
public:
974
  LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
975
               const SmallSetVector<Value *, 8> &PMA,
976
               SmallVectorImpl<BasicBlock *> &LEB,
977
               SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
978
               AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
979
               bool UnorderedAtomic, const AAMDNodes &AATags)
980
      : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
981
        LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
982
        LI(li), DL(std::move(dl)), Alignment(alignment),
983
2.50k
        UnorderedAtomic(UnorderedAtomic), AATags(AATags) {}
984
985
  bool isInstInList(Instruction *I,
986
14.6k
                    const SmallVectorImpl<Instruction *> &) const override {
987
14.6k
    Value *Ptr;
988
14.6k
    if (LoadInst *LI = dyn_cast<LoadInst>(I))
989
9.37k
      Ptr = LI->getOperand(0);
990
14.6k
    else
991
5.30k
      Ptr = cast<StoreInst>(I)->getPointerOperand();
992
14.6k
    return PointerMustAliases.count(Ptr);
993
14.6k
  }
994
995
2.50k
  void doExtraRewritesBeforeFinalDeletion() const override {
996
2.50k
    // Insert stores after in the loop exit blocks.  Each exit block gets a
997
2.50k
    // store of the live-out values that feed them.  Since we've already told
998
2.50k
    // the SSA updater about the defs in the loop and the preheader
999
2.50k
    // definition, it is all set and we can start using it.
1000
5.26k
    for (unsigned i = 0, e = LoopExitBlocks.size(); 
i != e5.26k
;
++i2.76k
) {
1001
2.76k
      BasicBlock *ExitBlock = LoopExitBlocks[i];
1002
2.76k
      Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1003
2.76k
      LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1004
2.76k
      Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1005
2.76k
      Instruction *InsertPos = LoopInsertPts[i];
1006
2.76k
      StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1007
2.76k
      if (UnorderedAtomic)
1008
6
        NewSI->setOrdering(AtomicOrdering::Unordered);
1009
2.76k
      NewSI->setAlignment(Alignment);
1010
2.76k
      NewSI->setDebugLoc(DL);
1011
2.76k
      if (AATags)
1012
2.55k
        NewSI->setAAMetadata(AATags);
1013
2.76k
    }
1014
2.50k
  }
1015
1016
1.53k
  void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
1017
1.53k
    // Update alias analysis.
1018
1.53k
    AST.copyValue(LI, V);
1019
1.53k
  }
1020
4.80k
  void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); }
1021
};
1022
} // namespace
1023
1024
/// Try to promote memory values to scalars by sinking stores out of the
1025
/// loop and moving loads to before the loop.  We do this by looping over
1026
/// the stores in the loop, looking for stores to Must pointers which are
1027
/// loop invariant.
1028
///
1029
bool llvm::promoteLoopAccessesToScalars(
1030
    const SmallSetVector<Value *, 8> &PointerMustAliases,
1031
    SmallVectorImpl<BasicBlock *> &ExitBlocks,
1032
    SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
1033
    LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
1034
    Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
1035
5.44k
    OptimizationRemarkEmitter *ORE) {
1036
5.44k
  // Verify inputs.
1037
5.44k
  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1038
5.44k
         CurAST != nullptr && SafetyInfo != nullptr &&
1039
5.44k
         "Unexpected Input to promoteLoopAccessesToScalars");
1040
5.44k
1041
5.44k
  Value *SomePtr = *PointerMustAliases.begin();
1042
5.44k
  BasicBlock *Preheader = CurLoop->getLoopPreheader();
1043
5.44k
1044
5.44k
  // It isn't safe to promote a load/store from the loop if the load/store is
1045
5.44k
  // conditional.  For example, turning:
1046
5.44k
  //
1047
5.44k
  //    for () { if (c) *P += 1; }
1048
5.44k
  //
1049
5.44k
  // into:
1050
5.44k
  //
1051
5.44k
  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1052
5.44k
  //
1053
5.44k
  // is not safe, because *P may only be valid to access if 'c' is true.
1054
5.44k
  //
1055
5.44k
  // The safety property divides into two parts:
1056
5.44k
  // p1) The memory may not be dereferenceable on entry to the loop.  In this
1057
5.44k
  //    case, we can't insert the required load in the preheader.
1058
5.44k
  // p2) The memory model does not allow us to insert a store along any dynamic
1059
5.44k
  //    path which did not originally have one.
1060
5.44k
  //
1061
5.44k
  // If at least one store is guaranteed to execute, both properties are
1062
5.44k
  // satisfied, and promotion is legal.
1063
5.44k
  //
1064
5.44k
  // This, however, is not a necessary condition. Even if no store/load is
1065
5.44k
  // guaranteed to execute, we can still establish these properties.
1066
5.44k
  // We can establish (p1) by proving that hoisting the load into the preheader
1067
5.44k
  // is safe (i.e. proving dereferenceability on all paths through the loop). We
1068
5.44k
  // can use any access within the alias set to prove dereferenceability,
1069
5.44k
  // since they're all must alias.
1070
5.44k
  //
1071
5.44k
  // There are two ways establish (p2):
1072
5.44k
  // a) Prove the location is thread-local. In this case the memory model
1073
5.44k
  // requirement does not apply, and stores are safe to insert.
1074
5.44k
  // b) Prove a store dominates every exit block. In this case, if an exit
1075
5.44k
  // blocks is reached, the original dynamic path would have taken us through
1076
5.44k
  // the store, so inserting a store into the exit block is safe. Note that this
1077
5.44k
  // is different from the store being guaranteed to execute. For instance,
1078
5.44k
  // if an exception is thrown on the first iteration of the loop, the original
1079
5.44k
  // store is never executed, but the exit blocks are not executed either.
1080
5.44k
1081
5.44k
  bool DereferenceableInPH = false;
1082
5.44k
  bool SafeToInsertStore = false;
1083
5.44k
1084
5.44k
  SmallVector<Instruction *, 64> LoopUses;
1085
5.44k
1086
5.44k
  // We start with an alignment of one and try to find instructions that allow
1087
5.44k
  // us to prove better alignment.
1088
5.44k
  unsigned Alignment = 1;
1089
5.44k
  // Keep track of which types of access we see
1090
5.44k
  bool SawUnorderedAtomic = false;
1091
5.44k
  bool SawNotAtomic = false;
1092
5.44k
  AAMDNodes AATags;
1093
5.44k
1094
5.44k
  const DataLayout &MDL = Preheader->getModule()->getDataLayout();
1095
5.44k
1096
5.44k
  // Do we know this object does not escape ?
1097
5.44k
  bool IsKnownNonEscapingObject = false;
1098
5.44k
  if (
SafetyInfo->MayThrow5.44k
) {
1099
580
    // If a loop can throw, we have to insert a store along each unwind edge.
1100
580
    // That said, we can't actually make the unwind edge explicit. Therefore,
1101
580
    // we have to prove that the store is dead along the unwind edge.
1102
580
    //
1103
580
    // If the underlying object is not an alloca, nor a pointer that does not
1104
580
    // escape, then we can not effectively prove that the store is dead along
1105
580
    // the unwind edge. i.e. the caller of this function could have ways to
1106
580
    // access the pointed object.
1107
580
    Value *Object = GetUnderlyingObject(SomePtr, MDL);
1108
580
    // If this is a base pointer we do not understand, simply bail.
1109
580
    // We only handle alloca and return value from alloc-like fn right now.
1110
580
    if (
!isa<AllocaInst>(Object)580
) {
1111
305
      if (!isAllocLikeFn(Object, TLI))
1112
201
        return false;
1113
104
      // If this is an alloc like fn. There are more constraints we need to
1114
104
      // verify. More specifically, we must make sure that the pointer can not
1115
104
      // escape.
1116
104
      //
1117
104
      // NOTE: PointerMayBeCaptured is not enough as the pointer may have
1118
104
      // escaped even though its not captured by the enclosing function.
1119
104
      // Standard allocation functions like malloc, calloc, and operator new
1120
104
      // return values which can be assumed not to have previously escaped.
1121
104
      
if (104
PointerMayBeCaptured(Object, true, true)104
)
1122
2
        return false;
1123
102
      IsKnownNonEscapingObject = true;
1124
102
    }
1125
580
  }
1126
5.44k
1127
5.44k
  // Check that all of the pointers in the alias set have the same type.  We
1128
5.44k
  // cannot (yet) promote a memory location that is loaded and stored in
1129
5.44k
  // different sizes.  While we are at it, collect alignment and AA info.
1130
5.24k
  
for (Value *ASIV : PointerMustAliases) 5.24k
{
1131
5.47k
    // Check that all of the pointers in the alias set have the same type.  We
1132
5.47k
    // cannot (yet) promote a memory location that is loaded and stored in
1133
5.47k
    // different sizes.
1134
5.47k
    if (SomePtr->getType() != ASIV->getType())
1135
125
      return false;
1136
5.35k
1137
5.35k
    
for (User *U : ASIV->users()) 5.35k
{
1138
108k
      // Ignore instructions that are outside the loop.
1139
108k
      Instruction *UI = dyn_cast<Instruction>(U);
1140
108k
      if (
!UI || 108k
!CurLoop->contains(UI)108k
)
1141
98.7k
        continue;
1142
9.84k
1143
9.84k
      // If there is an non-load/store instruction in the loop, we can't promote
1144
9.84k
      // it.
1145
9.84k
      
if (LoadInst *9.84k
Load9.84k
= dyn_cast<LoadInst>(UI)) {
1146
3.07k
        assert(!Load->isVolatile() && "AST broken");
1147
3.07k
        if (!Load->isUnordered())
1148
0
          return false;
1149
3.07k
1150
3.07k
        SawUnorderedAtomic |= Load->isAtomic();
1151
3.07k
        SawNotAtomic |= !Load->isAtomic();
1152
3.07k
1153
3.07k
        if (!DereferenceableInPH)
1154
965
          DereferenceableInPH = isSafeToExecuteUnconditionally(
1155
965
              *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
1156
9.84k
      } else 
if (const StoreInst *6.76k
Store6.76k
= dyn_cast<StoreInst>(UI)) {
1157
6.71k
        // Stores *of* the pointer are not interesting, only stores *to* the
1158
6.71k
        // pointer.
1159
6.71k
        if (UI->getOperand(1) != ASIV)
1160
4
          continue;
1161
6.71k
        assert(!Store->isVolatile() && "AST broken");
1162
6.71k
        if (!Store->isUnordered())
1163
4
          return false;
1164
6.70k
1165
6.70k
        SawUnorderedAtomic |= Store->isAtomic();
1166
6.70k
        SawNotAtomic |= !Store->isAtomic();
1167
6.70k
1168
6.70k
        // If the store is guaranteed to execute, both properties are satisfied.
1169
6.70k
        // We may want to check if a store is guaranteed to execute even if we
1170
6.70k
        // already know that promotion is safe, since it may have higher
1171
6.70k
        // alignment than any other guaranteed stores, in which case we can
1172
6.70k
        // raise the alignment on the promoted store.
1173
6.70k
        unsigned InstAlignment = Store->getAlignment();
1174
6.70k
        if (!InstAlignment)
1175
89
          InstAlignment =
1176
89
              MDL.getABITypeAlignment(Store->getValueOperand()->getType());
1177
6.70k
1178
6.70k
        if (
!DereferenceableInPH || 6.70k
!SafeToInsertStore1.91k
||
1179
6.70k
            
(InstAlignment > Alignment)633
) {
1180
6.08k
          if (
isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)6.08k
) {
1181
2.24k
            DereferenceableInPH = true;
1182
2.24k
            SafeToInsertStore = true;
1183
2.24k
            Alignment = std::max(Alignment, InstAlignment);
1184
2.24k
          }
1185
6.08k
        }
1186
6.70k
1187
6.70k
        // If a store dominates all exit blocks, it is safe to sink.
1188
6.70k
        // As explained above, if an exit block was executed, a dominating
1189
6.70k
        // store must have been been executed at least once, so we are not
1190
6.70k
        // introducing stores on paths that did not have them.
1191
6.70k
        // Note that this only looks at explicit exit blocks. If we ever
1192
6.70k
        // start sinking stores into unwind edges (see above), this will break.
1193
6.70k
        if (!SafeToInsertStore)
1194
3.81k
          
SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) 3.81k
{
1195
3.90k
            return DT->dominates(Store->getParent(), Exit);
1196
3.81k
          });
1197
6.70k
1198
6.70k
        // If the store is not guaranteed to execute, we may still get
1199
6.70k
        // deref info through it.
1200
6.70k
        if (
!DereferenceableInPH6.70k
) {
1201
2.90k
          DereferenceableInPH = isDereferenceableAndAlignedPointer(
1202
2.90k
              Store->getPointerOperand(), Store->getAlignment(), MDL,
1203
2.90k
              Preheader->getTerminator(), DT);
1204
2.90k
        }
1205
6.71k
      } else
1206
49
        return false; // Not a load or store.
1207
9.78k
1208
9.78k
      // Merge the AA tags.
1209
9.78k
      
if (9.78k
LoopUses.empty()9.78k
) {
1210
5.18k
        // On the first load/store, just take its AA tags.
1211
5.18k
        UI->getAAMetadata(AATags);
1212
9.78k
      } else 
if (4.59k
AATags4.59k
) {
1213
4.15k
        UI->getAAMetadata(AATags, /* Merge = */ true);
1214
4.15k
      }
1215
108k
1216
108k
      LoopUses.push_back(UI);
1217
108k
    }
1218
5.47k
  }
1219
5.24k
1220
5.24k
  // If we found both an unordered atomic instruction and a non-atomic memory
1221
5.24k
  // access, bail.  We can't blindly promote non-atomic to atomic since we
1222
5.24k
  // might not be able to lower the result.  We can't downgrade since that
1223
5.24k
  // would violate memory model.  Also, align 0 is an error for atomics.
1224
5.06k
  
if (5.06k
SawUnorderedAtomic && 5.06k
SawNotAtomic8
)
1225
2
    return false;
1226
5.06k
1227
5.06k
  // If we couldn't prove we can hoist the load, bail.
1228
5.06k
  
if (5.06k
!DereferenceableInPH5.06k
)
1229
742
    return false;
1230
4.31k
1231
4.31k
  // We know we can hoist the load, but don't have a guaranteed store.
1232
4.31k
  // Check whether the location is thread-local. If it is, then we can insert
1233
4.31k
  // stores along paths which originally didn't have them without violating the
1234
4.31k
  // memory model.
1235
4.31k
  
if (4.31k
!SafeToInsertStore4.31k
) {
1236
2.01k
    // If this is a known non-escaping object, it is safe to insert the stores.
1237
2.01k
    if (IsKnownNonEscapingObject)
1238
0
      SafeToInsertStore = true;
1239
2.01k
    else {
1240
2.01k
      Value *Object = GetUnderlyingObject(SomePtr, MDL);
1241
2.01k
      SafeToInsertStore =
1242
2.00k
          (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
1243
388
          !PointerMayBeCaptured(Object, true, true);
1244
2.01k
    }
1245
2.01k
  }
1246
4.31k
1247
4.31k
  // If we've still failed to prove we can sink the store, give up.
1248
4.31k
  if (!SafeToInsertStore)
1249
1.81k
    return false;
1250
2.50k
1251
2.50k
  // Otherwise, this is safe to promote, lets do it!
1252
2.50k
  
DEBUG2.50k
(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
1253
2.50k
               << '\n');
1254
2.50k
  ORE->emit(
1255
2.50k
      OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0])
1256
2.50k
      << "Moving accesses to memory location out of the loop");
1257
2.50k
  ++NumPromoted;
1258
2.50k
1259
2.50k
  // Grab a debug location for the inserted loads/stores; given that the
1260
2.50k
  // inserted loads/stores have little relation to the original loads/stores,
1261
2.50k
  // this code just arbitrarily picks a location from one, since any debug
1262
2.50k
  // location is better than none.
1263
2.50k
  DebugLoc DL = LoopUses[0]->getDebugLoc();
1264
2.50k
1265
2.50k
  // We use the SSAUpdater interface to insert phi nodes as required.
1266
2.50k
  SmallVector<PHINode *, 16> NewPHIs;
1267
2.50k
  SSAUpdater SSA(&NewPHIs);
1268
2.50k
  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
1269
2.50k
                        InsertPts, PIC, *CurAST, *LI, DL, Alignment,
1270
2.50k
                        SawUnorderedAtomic, AATags);
1271
2.50k
1272
2.50k
  // Set up the preheader to have a definition of the value.  It is the live-out
1273
2.50k
  // value from the preheader that uses in the loop will use.
1274
2.50k
  LoadInst *PreheaderLoad = new LoadInst(
1275
2.50k
      SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
1276
2.50k
  if (SawUnorderedAtomic)
1277
6
    PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
1278
2.50k
  PreheaderLoad->setAlignment(Alignment);
1279
2.50k
  PreheaderLoad->setDebugLoc(DL);
1280
2.50k
  if (AATags)
1281
2.29k
    PreheaderLoad->setAAMetadata(AATags);
1282
2.50k
  SSA.AddAvailableValue(Preheader, PreheaderLoad);
1283
2.50k
1284
2.50k
  // Rewrite all the loads in the loop and remember all the definitions from
1285
2.50k
  // stores in the loop.
1286
2.50k
  Promoter.run(LoopUses);
1287
2.50k
1288
2.50k
  // If the SSAUpdater didn't use the load in the preheader, just zap it now.
1289
2.50k
  if (PreheaderLoad->use_empty())
1290
1.32k
    PreheaderLoad->eraseFromParent();
1291
5.44k
1292
5.44k
  return true;
1293
5.44k
}
1294
1295
/// Returns an owning pointer to an alias set which incorporates aliasing info
1296
/// from L and all subloops of L.
1297
/// FIXME: In new pass manager, there is no helper function to handle loop
1298
/// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
1299
/// from scratch for every loop. Hook up with the helper functions when
1300
/// available in the new pass manager to avoid redundant computation.
1301
AliasSetTracker *
1302
LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
1303
1.02M
                                                 AliasAnalysis *AA) {
1304
1.02M
  AliasSetTracker *CurAST = nullptr;
1305
1.02M
  SmallVector<Loop *, 4> RecomputeLoops;
1306
309k
  for (Loop *InnerL : L->getSubLoops()) {
1307
309k
    auto MapI = LoopToAliasSetMap.find(InnerL);
1308
309k
    // If the AST for this inner loop is missing it may have been merged into
1309
309k
    // some other loop's AST and then that loop unrolled, and so we need to
1310
309k
    // recompute it.
1311
309k
    if (
MapI == LoopToAliasSetMap.end()309k
) {
1312
12
      RecomputeLoops.push_back(InnerL);
1313
12
      continue;
1314
12
    }
1315
309k
    AliasSetTracker *InnerAST = MapI->second;
1316
309k
1317
309k
    if (
CurAST != nullptr309k
) {
1318
106k
      // What if InnerLoop was modified by other passes ?
1319
106k
      CurAST->add(*InnerAST);
1320
106k
1321
106k
      // Once we've incorporated the inner loop's AST into ours, we don't need
1322
106k
      // the subloop's anymore.
1323
106k
      delete InnerAST;
1324
309k
    } else {
1325
203k
      CurAST = InnerAST;
1326
203k
    }
1327
309k
    LoopToAliasSetMap.erase(MapI);
1328
309k
  }
1329
1.02M
  if (CurAST == nullptr)
1330
826k
    CurAST = new AliasSetTracker(*AA);
1331
1.02M
1332
1.02M
  auto mergeLoop = [&](Loop *L) {
1333
1.02M
    // Loop over the body of this loop, looking for calls, invokes, and stores.
1334
1.02M
    for (BasicBlock *BB : L->blocks())
1335
4.77M
      CurAST->add(*BB); // Incorporate the specified basic block
1336
1.02M
  };
1337
1.02M
1338
1.02M
  // Add everything from the sub loops that are no longer directly available.
1339
1.02M
  for (Loop *InnerL : RecomputeLoops)
1340
12
    mergeLoop(InnerL);
1341
1.02M
1342
1.02M
  // And merge in this loop.
1343
1.02M
  mergeLoop(L);
1344
1.02M
1345
1.02M
  return CurAST;
1346
1.02M
}
1347
1348
/// Simple analysis hook. Clone alias set info.
1349
///
1350
void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
1351
170k
                                             Loop *L) {
1352
170k
  AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
1353
170k
  if (!AST)
1354
113k
    return;
1355
57.2k
1356
57.2k
  AST->copyValue(From, To);
1357
57.2k
}
1358
1359
/// Simple Analysis hook. Delete value V from alias set
1360
///
1361
12.1k
void LegacyLICMPass::deleteAnalysisValue(Value *V, Loop *L) {
1362
12.1k
  AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
1363
12.1k
  if (!AST)
1364
6.96k
    return;
1365
5.15k
1366
5.15k
  AST->deleteValue(V);
1367
5.15k
}
1368
1369
/// Simple Analysis hook. Delete value L from alias set map.
1370
///
1371
8
void LegacyLICMPass::deleteAnalysisLoop(Loop *L) {
1372
8
  AliasSetTracker *AST = LICM.getLoopToAliasSetMap().lookup(L);
1373
8
  if (!AST)
1374
4
    return;
1375
4
1376
4
  delete AST;
1377
4
  LICM.getLoopToAliasSetMap().erase(L);
1378
4
}
1379
1380
/// Return true if the body of this loop may store into the memory
1381
/// location pointed to by V.
1382
///
1383
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
1384
                                     const AAMDNodes &AAInfo,
1385
855k
                                     AliasSetTracker *CurAST) {
1386
855k
  // Check to see if any of the basic blocks in CurLoop invalidate *V.
1387
855k
  return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
1388
855k
}
1389
1390
/// Little predicate that returns true if the specified basic block is in
1391
/// a subloop of the current one, not the current one itself.
1392
///
1393
9.54M
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
1394
9.54M
  assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
1395
9.54M
  return LI->getLoopFor(BB) != CurLoop;
1396
9.54M
}