Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implement a loop-aware load elimination pass.
11
//
12
// It uses LoopAccessAnalysis to identify loop-carried dependences with a
13
// distance of one between stores and loads.  These form the candidates for the
14
// transformation.  The source value of each store then propagated to the user
15
// of the corresponding load.  This makes the load dead.
16
//
17
// The pass can also version the loop and add memchecks in order to prove that
18
// may-aliasing stores can't change the value in memory before it's read by the
19
// load.
20
//
21
//===----------------------------------------------------------------------===//
22
23
#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
24
#include "llvm/ADT/APInt.h"
25
#include "llvm/ADT/DenseMap.h"
26
#include "llvm/ADT/DepthFirstIterator.h"
27
#include "llvm/ADT/STLExtras.h"
28
#include "llvm/ADT/SmallSet.h"
29
#include "llvm/ADT/SmallVector.h"
30
#include "llvm/ADT/Statistic.h"
31
#include "llvm/Analysis/GlobalsModRef.h"
32
#include "llvm/Analysis/LoopAccessAnalysis.h"
33
#include "llvm/Analysis/LoopInfo.h"
34
#include "llvm/Analysis/ScalarEvolution.h"
35
#include "llvm/Analysis/ScalarEvolutionExpander.h"
36
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
37
#include "llvm/IR/DataLayout.h"
38
#include "llvm/IR/Dominators.h"
39
#include "llvm/IR/Instructions.h"
40
#include "llvm/IR/Module.h"
41
#include "llvm/IR/Type.h"
42
#include "llvm/IR/Value.h"
43
#include "llvm/Pass.h"
44
#include "llvm/Support/Casting.h"
45
#include "llvm/Support/CommandLine.h"
46
#include "llvm/Support/Debug.h"
47
#include "llvm/Transforms/Scalar.h"
48
#include "llvm/Transforms/Utils/LoopVersioning.h"
49
#include <algorithm>
50
#include <cassert>
51
#include <forward_list>
52
#include <set>
53
#include <tuple>
54
#include <utility>
55
56
#define LLE_OPTION "loop-load-elim"
57
#define DEBUG_TYPE LLE_OPTION
58
59
using namespace llvm;
60
61
static cl::opt<unsigned> CheckPerElim(
62
    "runtime-check-per-loop-load-elim", cl::Hidden,
63
    cl::desc("Max number of memchecks allowed per eliminated load on average"),
64
    cl::init(1));
65
66
static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
67
    "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
68
    cl::desc("The maximum number of SCEV checks allowed for Loop "
69
             "Load Elimination"));
70
71
STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
72
73
namespace {
74
75
/// \brief Represent a store-to-forwarding candidate.
76
struct StoreToLoadForwardingCandidate {
77
  LoadInst *Load;
78
  StoreInst *Store;
79
80
  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
81
1.43k
      : Load(Load), Store(Store) {}
82
83
  /// \brief Return true if the dependence from the store to the load has a
84
  /// distance of one.  E.g. A[i+1] = A[i]
85
  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
86
643
                                 Loop *L) const {
87
643
    Value *LoadPtr = Load->getPointerOperand();
88
643
    Value *StorePtr = Store->getPointerOperand();
89
643
    Type *LoadPtrType = LoadPtr->getType();
90
643
    Type *LoadType = LoadPtrType->getPointerElementType();
91
643
92
643
    assert(LoadPtrType->getPointerAddressSpace() ==
93
643
               StorePtr->getType()->getPointerAddressSpace() &&
94
643
           LoadType == StorePtr->getType()->getPointerElementType() &&
95
643
           "Should be a known dependence");
96
643
97
643
    // Currently we only support accesses with unit stride.  FIXME: we should be
98
643
    // able to handle non unit stirde as well as long as the stride is equal to
99
643
    // the dependence distance.
100
643
    if (getPtrStride(PSE, LoadPtr, L) != 1 ||
101
506
        getPtrStride(PSE, StorePtr, L) != 1)
102
137
      return false;
103
506
104
506
    auto &DL = Load->getParent()->getModule()->getDataLayout();
105
506
    unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
106
506
107
506
    auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
108
506
    auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
109
506
110
506
    // We don't need to check non-wrapping here because forward/backward
111
506
    // dependence wouldn't be valid if these weren't monotonic accesses.
112
506
    auto *Dist = cast<SCEVConstant>(
113
506
        PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
114
506
    const APInt &Val = Dist->getAPInt();
115
506
    return Val == TypeByteSize;
116
506
  }
117
118
38
  Value *getLoadPtr() const { return Load->getPointerOperand(); }
119
120
#ifndef NDEBUG
121
  friend raw_ostream &operator<<(raw_ostream &OS,
122
                                 const StoreToLoadForwardingCandidate &Cand) {
123
    OS << *Cand.Store << " -->\n";
124
    OS.indent(2) << *Cand.Load << "\n";
125
    return OS;
126
  }
127
#endif
128
};
129
130
/// \brief Check if the store dominates all latches, so as long as there is no
131
/// intervening store this value will be loaded in the next iteration.
132
bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
133
540
                                  DominatorTree *DT) {
134
540
  SmallVector<BasicBlock *, 8> Latches;
135
540
  L->getLoopLatches(Latches);
136
540
  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
137
540
    return DT->dominates(StoreBlock, Latch);
138
540
  });
139
540
}
140
141
/// \brief Return true if the load is not executed on all paths in the loop.
142
470
static bool isLoadConditional(LoadInst *Load, Loop *L) {
143
470
  return Load->getParent() != L->getHeader();
144
470
}
145
146
/// \brief The per-loop class that does most of the work.
147
class LoadEliminationForLoop {
148
public:
149
  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
150
                         DominatorTree *DT)
151
276k
      : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
152
153
  /// \brief Look through the loop-carried and loop-independent dependences in
154
  /// this loop and find store->load dependences.
155
  ///
156
  /// Note that no candidate is returned if LAA has failed to analyze the loop
157
  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
158
  std::forward_list<StoreToLoadForwardingCandidate>
159
276k
  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
160
276k
    std::forward_list<StoreToLoadForwardingCandidate> Candidates;
161
276k
162
276k
    const auto *Deps = LAI.getDepChecker().getDependences();
163
276k
    if (!Deps)
164
270
      return Candidates;
165
275k
166
275k
    // Find store->load dependences (consequently true dep).  Both lexically
167
275k
    // forward and backward dependences qualify.  Disqualify loads that have
168
275k
    // other unknown dependences.
169
275k
170
275k
    SmallSet<Instruction *, 4> LoadsWithUnknownDepedence;
171
275k
172
12.8k
    for (const auto &Dep : *Deps) {
173
12.8k
      Instruction *Source = Dep.getSource(LAI);
174
12.8k
      Instruction *Destination = Dep.getDestination(LAI);
175
12.8k
176
12.8k
      if (
Dep.Type == MemoryDepChecker::Dependence::Unknown12.8k
) {
177
3.69k
        if (isa<LoadInst>(Source))
178
1.87k
          LoadsWithUnknownDepedence.insert(Source);
179
3.69k
        if (isa<LoadInst>(Destination))
180
627
          LoadsWithUnknownDepedence.insert(Destination);
181
3.69k
        continue;
182
3.69k
      }
183
9.20k
184
9.20k
      
if (9.20k
Dep.isBackward()9.20k
)
185
9.20k
        // Note that the designations source and destination follow the program
186
9.20k
        // order, i.e. source is always first.  (The direction is given by the
187
9.20k
        // DepType.)
188
3.51k
        std::swap(Source, Destination);
189
9.20k
      else
190
9.20k
        assert(Dep.isForward() && "Needs to be a forward dependence");
191
9.20k
192
9.20k
      auto *Store = dyn_cast<StoreInst>(Source);
193
9.20k
      if (!Store)
194
4.93k
        continue;
195
4.26k
      auto *Load = dyn_cast<LoadInst>(Destination);
196
4.26k
      if (!Load)
197
2.82k
        continue;
198
1.43k
199
1.43k
      // Only progagate the value if they are of the same type.
200
1.43k
      
if (1.43k
Store->getPointerOperandType() != Load->getPointerOperandType()1.43k
)
201
5
        continue;
202
1.43k
203
1.43k
      Candidates.emplace_front(Load, Store);
204
1.43k
    }
205
275k
206
275k
    if (!LoadsWithUnknownDepedence.empty())
207
464
      
Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) 464
{
208
77
        return LoadsWithUnknownDepedence.count(C.Load);
209
464
      });
210
276k
211
276k
    return Candidates;
212
276k
  }
213
214
  /// \brief Return the index of the instruction according to program order.
215
86
  unsigned getInstrIndex(Instruction *Inst) {
216
86
    auto I = InstOrder.find(Inst);
217
86
    assert(I != InstOrder.end() && "No index for instruction");
218
86
    return I->second;
219
86
  }
220
221
  /// \brief If a load has multiple candidates associated (i.e. different
222
  /// stores), it means that it could be forwarding from multiple stores
223
  /// depending on control flow.  Remove these candidates.
224
  ///
225
  /// Here, we rely on LAA to include the relevant loop-independent dependences.
226
  /// LAA is known to omit these in the very simple case when the read and the
227
  /// write within an alias set always takes place using the *same* pointer.
228
  ///
229
  /// However, we know that this is not the case here, i.e. we can rely on LAA
230
  /// to provide us with loop-independent dependences for the cases we're
231
  /// interested.  Consider the case for example where a loop-independent
232
  /// dependece S1->S2 invalidates the forwarding S3->S2.
233
  ///
234
  ///         A[i]   = ...   (S1)
235
  ///         ...    = A[i]  (S2)
236
  ///         A[i+1] = ...   (S3)
237
  ///
238
  /// LAA will perform dependence analysis here because there are two
239
  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
240
  void removeDependencesFromMultipleStores(
241
409
      std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
242
409
    // If Store is nullptr it means that we have multiple stores forwarding to
243
409
    // this store.
244
409
    typedef DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>
245
409
        LoadToSingleCandT;
246
409
    LoadToSingleCandT LoadToSingleCand;
247
409
248
1.42k
    for (const auto &Cand : Candidates) {
249
1.42k
      bool NewElt;
250
1.42k
      LoadToSingleCandT::iterator Iter;
251
1.42k
252
1.42k
      std::tie(Iter, NewElt) =
253
1.42k
          LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
254
1.42k
      if (
!NewElt1.42k
) {
255
654
        const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
256
654
        // Already multiple stores forward to this load.
257
654
        if (OtherCand == nullptr)
258
418
          continue;
259
236
260
236
        // Handle the very basic case when the two stores are in the same block
261
236
        // so deciding which one forwards is easy.  The later one forwards as
262
236
        // long as they both have a dependence distance of one to the load.
263
236
        
if (236
Cand.Store->getParent() == OtherCand->Store->getParent() &&
264
188
            Cand.isDependenceDistanceOfOne(PSE, L) &&
265
236
            
OtherCand->isDependenceDistanceOfOne(PSE, L)2
) {
266
1
          // They are in the same block, the later one will forward to the load.
267
1
          if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
268
0
            OtherCand = &Cand;
269
1
        } else
270
235
          OtherCand = nullptr;
271
654
      }
272
1.42k
    }
273
409
274
1.42k
    Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
275
1.42k
      if (
LoadToSingleCand[Cand.Load] != &Cand1.42k
) {
276
889
        DEBUG(dbgs() << "Removing from candidates: \n" << Cand
277
889
                     << "  The load may have multiple stores forwarding to "
278
889
                     << "it\n");
279
889
        return true;
280
889
      }
281
540
      return false;
282
540
    });
283
409
  }
284
285
  /// \brief Given two pointers operations by their RuntimePointerChecking
286
  /// indices, return true if they require an alias check.
287
  ///
288
  /// We need a check if one is a pointer for a candidate load and the other is
289
  /// a pointer for a possibly intervening store.
290
  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
291
                     const SmallSet<Value *, 4> &PtrsWrittenOnFwdingPath,
292
64
                     const std::set<Value *> &CandLoadPtrs) {
293
64
    Value *Ptr1 =
294
64
        LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
295
64
    Value *Ptr2 =
296
64
        LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue;
297
22
    return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
298
64
            
(PtrsWrittenOnFwdingPath.count(Ptr2) && 64
CandLoadPtrs.count(Ptr1)13
));
299
64
  }
300
301
  /// \brief Return pointers that are possibly written to on the path from a
302
  /// forwarding store to a load.
303
  ///
304
  /// These pointers need to be alias-checked against the forwarding candidates.
305
  SmallSet<Value *, 4> findPointersWrittenOnForwardingPath(
306
34
      const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
307
34
    // From FirstStore to LastLoad neither of the elimination candidate loads
308
34
    // should overlap with any of the stores.
309
34
    //
310
34
    // E.g.:
311
34
    //
312
34
    // st1 C[i]
313
34
    // ld1 B[i] <-------,
314
34
    // ld0 A[i] <----,  |              * LastLoad
315
34
    // ...           |  |
316
34
    // st2 E[i]      |  |
317
34
    // st3 B[i+1] -- | -'              * FirstStore
318
34
    // st0 A[i+1] ---'
319
34
    // st4 D[i]
320
34
    //
321
34
    // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
322
34
    // ld0.
323
34
324
34
    LoadInst *LastLoad =
325
34
        std::max_element(Candidates.begin(), Candidates.end(),
326
34
                         [&](const StoreToLoadForwardingCandidate &A,
327
4
                             const StoreToLoadForwardingCandidate &B) {
328
4
                           return getInstrIndex(A.Load) < getInstrIndex(B.Load);
329
4
                         })
330
34
            ->Load;
331
34
    StoreInst *FirstStore =
332
34
        std::min_element(Candidates.begin(), Candidates.end(),
333
34
                         [&](const StoreToLoadForwardingCandidate &A,
334
4
                             const StoreToLoadForwardingCandidate &B) {
335
4
                           return getInstrIndex(A.Store) <
336
4
                                  getInstrIndex(B.Store);
337
4
                         })
338
34
            ->Store;
339
34
340
34
    // We're looking for stores after the first forwarding store until the end
341
34
    // of the loop, then from the beginning of the loop until the last
342
34
    // forwarded-to load.  Collect the pointer for the stores.
343
34
    SmallSet<Value *, 4> PtrsWrittenOnFwdingPath;
344
34
345
54
    auto InsertStorePtr = [&](Instruction *I) {
346
54
      if (auto *S = dyn_cast<StoreInst>(I))
347
22
        PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
348
54
    };
349
34
    const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
350
34
    std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
351
34
                  MemInstrs.end(), InsertStorePtr);
352
34
    std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
353
34
                  InsertStorePtr);
354
34
355
34
    return PtrsWrittenOnFwdingPath;
356
34
  }
357
358
  /// \brief Determine the pointer alias checks to prove that there are no
359
  /// intervening stores.
360
  SmallVector<RuntimePointerChecking::PointerCheck, 4> collectMemchecks(
361
34
      const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
362
34
363
34
    SmallSet<Value *, 4> PtrsWrittenOnFwdingPath =
364
34
        findPointersWrittenOnForwardingPath(Candidates);
365
34
366
34
    // Collect the pointers of the candidate loads.
367
34
    // FIXME: SmallSet does not work with std::inserter.
368
34
    std::set<Value *> CandLoadPtrs;
369
34
    transform(Candidates,
370
34
                   std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
371
34
                   std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
372
34
373
34
    const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
374
34
    SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
375
34
376
34
    copy_if(AllChecks, std::back_inserter(Checks),
377
38
            [&](const RuntimePointerChecking::PointerCheck &Check) {
378
38
              for (auto PtrIdx1 : Check.first->Members)
379
60
                for (auto PtrIdx2 : Check.second->Members)
380
64
                  
if (64
needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
381
64
                                    CandLoadPtrs))
382
11
                    return true;
383
27
              return false;
384
27
            });
385
34
386
34
    DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
387
34
    DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
388
34
389
34
    return Checks;
390
34
  }
391
392
  /// \brief Perform the transformation for a candidate.
393
  void
394
  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
395
35
                                  SCEVExpander &SEE) {
396
35
    //
397
35
    // loop:
398
35
    //      %x = load %gep_i
399
35
    //         = ... %x
400
35
    //      store %y, %gep_i_plus_1
401
35
    //
402
35
    // =>
403
35
    //
404
35
    // ph:
405
35
    //      %x.initial = load %gep_0
406
35
    // loop:
407
35
    //      %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
408
35
    //      %x = load %gep_i            <---- now dead
409
35
    //         = ... %x.storeforward
410
35
    //      store %y, %gep_i_plus_1
411
35
412
35
    Value *Ptr = Cand.Load->getPointerOperand();
413
35
    auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
414
35
    auto *PH = L->getLoopPreheader();
415
35
    Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
416
35
                                          PH->getTerminator());
417
35
    Value *Initial =
418
35
        new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
419
35
                     Cand.Load->getAlignment(), PH->getTerminator());
420
35
421
35
    PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
422
35
                                   &L->getHeader()->front());
423
35
    PHI->addIncoming(Initial, PH);
424
35
    PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
425
35
426
35
    Cand.Load->replaceAllUsesWith(PHI);
427
35
  }
428
429
  /// \brief Top-level driver for each loop: find store->load forwarding
430
  /// candidates, add run-time checks and perform transformation.
431
276k
  bool processLoop() {
432
276k
    DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
433
276k
                 << "\" checking " << *L << "\n");
434
276k
    // Look for store-to-load forwarding cases across the
435
276k
    // backedge. E.g.:
436
276k
    //
437
276k
    // loop:
438
276k
    //      %x = load %gep_i
439
276k
    //         = ... %x
440
276k
    //      store %y, %gep_i_plus_1
441
276k
    //
442
276k
    // =>
443
276k
    //
444
276k
    // ph:
445
276k
    //      %x.initial = load %gep_0
446
276k
    // loop:
447
276k
    //      %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
448
276k
    //      %x = load %gep_i            <---- now dead
449
276k
    //         = ... %x.storeforward
450
276k
    //      store %y, %gep_i_plus_1
451
276k
452
276k
    // First start with store->load dependences.
453
276k
    auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
454
276k
    if (StoreToLoadDependences.empty())
455
275k
      return false;
456
409
457
409
    // Generate an index for each load and store according to the original
458
409
    // program order.  This will be used later.
459
409
    InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
460
409
461
409
    // To keep things simple for now, remove those where the load is potentially
462
409
    // fed by multiple stores.
463
409
    removeDependencesFromMultipleStores(StoreToLoadDependences);
464
409
    if (StoreToLoadDependences.empty())
465
9
      return false;
466
400
467
400
    // Filter the candidates further.
468
400
    SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
469
400
    unsigned NumForwarding = 0;
470
540
    for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
471
540
      DEBUG(dbgs() << "Candidate " << Cand);
472
540
473
540
      // Make sure that the stored values is available everywhere in the loop in
474
540
      // the next iteration.
475
540
      if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
476
70
        continue;
477
470
478
470
      // If the load is conditional we can't hoist its 0-iteration instance to
479
470
      // the preheader because that would make it unconditional.  Thus we would
480
470
      // access a memory location that the original loop did not access.
481
470
      
if (470
isLoadConditional(Cand.Load, L)470
)
482
17
        continue;
483
453
484
453
      // Check whether the SCEV difference is the same as the induction step,
485
453
      // thus we load the value in the next iteration.
486
453
      
if (453
!Cand.isDependenceDistanceOfOne(PSE, L)453
)
487
415
        continue;
488
38
489
38
      ++NumForwarding;
490
38
      DEBUG(dbgs()
491
540
            << NumForwarding
492
540
            << ". Valid store-to-load forwarding across the loop backedge\n");
493
540
      Candidates.push_back(Cand);
494
540
    }
495
400
    if (Candidates.empty())
496
366
      return false;
497
34
498
34
    // Check intervening may-alias stores.  These need runtime checks for alias
499
34
    // disambiguation.
500
34
    SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks =
501
34
        collectMemchecks(Candidates);
502
34
503
34
    // Too many checks are likely to outweigh the benefits of forwarding.
504
34
    if (
Checks.size() > Candidates.size() * CheckPerElim34
) {
505
1
      DEBUG(dbgs() << "Too many run-time checks needed.\n");
506
1
      return false;
507
1
    }
508
33
509
33
    
if (33
LAI.getPSE().getUnionPredicate().getComplexity() >
510
33
        LoadElimSCEVCheckThreshold) {
511
1
      DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
512
1
      return false;
513
1
    }
514
32
515
32
    
if (32
!Checks.empty() || 32
!LAI.getPSE().getUnionPredicate().isAlwaysTrue()24
) {
516
11
      if (
L->getHeader()->getParent()->optForSize()11
) {
517
1
        DEBUG(dbgs() << "Versioning is needed but not allowed when optimizing "
518
1
                        "for size.\n");
519
1
        return false;
520
1
      }
521
10
522
10
      
if (10
!L->isLoopSimplifyForm()10
) {
523
0
        DEBUG(dbgs() << "Loop is not is loop-simplify form");
524
0
        return false;
525
0
      }
526
10
527
10
      // Point of no-return, start the transformation.  First, version the loop
528
10
      // if necessary.
529
10
530
10
      LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
531
10
      LV.setAliasChecks(std::move(Checks));
532
10
      LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
533
10
      LV.versionLoop();
534
10
    }
535
32
536
32
    // Next, propagate the value stored by the store to the users of the load.
537
32
    // Also for the first iteration, generate the initial value of the load.
538
31
    SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
539
31
                     "storeforward");
540
31
    for (const auto &Cand : Candidates)
541
35
      propagateStoredValueToLoadUsers(Cand, SEE);
542
31
    NumLoopLoadEliminted += NumForwarding;
543
31
544
31
    return true;
545
276k
  }
546
547
private:
548
  Loop *L;
549
550
  /// \brief Maps the load/store instructions to their index according to
551
  /// program order.
552
  DenseMap<Instruction *, unsigned> InstOrder;
553
554
  // Analyses used.
555
  LoopInfo *LI;
556
  const LoopAccessInfo &LAI;
557
  DominatorTree *DT;
558
  PredicatedScalarEvolution PSE;
559
};
560
561
static bool
562
eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
563
462k
                          function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
564
462k
  // Build up a worklist of inner-loops to transform to avoid iterator
565
462k
  // invalidation.
566
462k
  // FIXME: This logic comes from other passes that actually change the loop
567
462k
  // nest structure. It isn't clear this is necessary (or useful) for a pass
568
462k
  // which merely optimizes the use of loads in a loop.
569
462k
  SmallVector<Loop *, 8> Worklist;
570
462k
571
462k
  for (Loop *TopLevelLoop : LI)
572
237k
    for (Loop *L : depth_first(TopLevelLoop))
573
237k
      // We only handle inner-most loops.
574
333k
      
if (333k
L->empty()333k
)
575
276k
        Worklist.push_back(L);
576
462k
577
462k
  // Now walk the identified inner loops.
578
462k
  bool Changed = false;
579
276k
  for (Loop *L : Worklist) {
580
276k
    // The actual work is performed by LoadEliminationForLoop.
581
276k
    LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
582
276k
    Changed |= LEL.processLoop();
583
276k
  }
584
462k
  return Changed;
585
462k
}
586
587
/// \brief The pass.  Most of the work is delegated to the per-loop
588
/// LoadEliminationForLoop class.
589
class LoopLoadElimination : public FunctionPass {
590
public:
591
17.2k
  LoopLoadElimination() : FunctionPass(ID) {
592
17.2k
    initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
593
17.2k
  }
594
595
462k
  bool runOnFunction(Function &F) override {
596
462k
    if (skipFunction(F))
597
75
      return false;
598
462k
599
462k
    auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
600
462k
    auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
601
462k
    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
602
462k
603
462k
    // Process each loop nest in the function.
604
462k
    return eliminateLoadsAcrossLoops(
605
462k
        F, LI, DT,
606
276k
        [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
607
462k
  }
608
609
17.2k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
610
17.2k
    AU.addRequiredID(LoopSimplifyID);
611
17.2k
    AU.addRequired<LoopInfoWrapperPass>();
612
17.2k
    AU.addPreserved<LoopInfoWrapperPass>();
613
17.2k
    AU.addRequired<LoopAccessLegacyAnalysis>();
614
17.2k
    AU.addRequired<ScalarEvolutionWrapperPass>();
615
17.2k
    AU.addRequired<DominatorTreeWrapperPass>();
616
17.2k
    AU.addPreserved<DominatorTreeWrapperPass>();
617
17.2k
    AU.addPreserved<GlobalsAAWrapperPass>();
618
17.2k
  }
619
620
  static char ID;
621
};
622
623
} // end anonymous namespace
624
625
char LoopLoadElimination::ID;
626
static const char LLE_name[] = "Loop Load Elimination";
627
628
41.6k
INITIALIZE_PASS_BEGIN41.6k
(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
629
41.6k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
630
41.6k
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
631
41.6k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
632
41.6k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
633
41.6k
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
634
41.6k
INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
635
636
namespace llvm {
637
638
17.2k
FunctionPass *createLoopLoadEliminationPass() {
639
17.2k
  return new LoopLoadElimination();
640
17.2k
}
641
642
PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
643
73
                                               FunctionAnalysisManager &AM) {
644
73
  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
645
73
  auto &LI = AM.getResult<LoopAnalysis>(F);
646
73
  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
647
73
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
648
73
  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
649
73
  auto &AA = AM.getResult<AAManager>(F);
650
73
  auto &AC = AM.getResult<AssumptionAnalysis>(F);
651
73
652
73
  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
653
73
  bool Changed = eliminateLoadsAcrossLoops(
654
26
      F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
655
26
        LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI};
656
26
        return LAM.getResult<LoopAccessAnalysis>(L, AR);
657
26
      });
658
73
659
73
  if (!Changed)
660
71
    return PreservedAnalyses::all();
661
2
662
2
  PreservedAnalyses PA;
663
2
  return PA;
664
2
}
665
666
} // end namespace llvm