Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the Loop Distribution Pass.  Its main focus is to
10
// distribute loops that cannot be vectorized due to dependence cycles.  It
11
// tries to isolate the offending dependences into a new loop allowing
12
// vectorization of the remaining parts.
13
//
14
// For dependence analysis, the pass uses the LoopVectorizer's
15
// LoopAccessAnalysis.  Because this analysis presumes no change in the order of
16
// memory operations, special care is taken to preserve the lexical order of
17
// these operations.
18
//
19
// Similarly to the Vectorizer, the pass also supports loop versioning to
20
// run-time disambiguate potentially overlapping arrays.
21
//
22
//===----------------------------------------------------------------------===//
23
24
#include "llvm/Transforms/Scalar/LoopDistribute.h"
25
#include "llvm/ADT/DenseMap.h"
26
#include "llvm/ADT/DepthFirstIterator.h"
27
#include "llvm/ADT/EquivalenceClasses.h"
28
#include "llvm/ADT/Optional.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/ADT/SmallPtrSet.h"
31
#include "llvm/ADT/SmallVector.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/ADT/StringRef.h"
34
#include "llvm/ADT/Twine.h"
35
#include "llvm/ADT/iterator_range.h"
36
#include "llvm/Analysis/AliasAnalysis.h"
37
#include "llvm/Analysis/AssumptionCache.h"
38
#include "llvm/Analysis/GlobalsModRef.h"
39
#include "llvm/Analysis/LoopAccessAnalysis.h"
40
#include "llvm/Analysis/LoopAnalysisManager.h"
41
#include "llvm/Analysis/LoopInfo.h"
42
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
43
#include "llvm/Analysis/ScalarEvolution.h"
44
#include "llvm/Analysis/TargetLibraryInfo.h"
45
#include "llvm/Analysis/TargetTransformInfo.h"
46
#include "llvm/IR/BasicBlock.h"
47
#include "llvm/IR/Constants.h"
48
#include "llvm/IR/DiagnosticInfo.h"
49
#include "llvm/IR/Dominators.h"
50
#include "llvm/IR/Function.h"
51
#include "llvm/IR/InstrTypes.h"
52
#include "llvm/IR/Instruction.h"
53
#include "llvm/IR/Instructions.h"
54
#include "llvm/IR/LLVMContext.h"
55
#include "llvm/IR/Metadata.h"
56
#include "llvm/IR/PassManager.h"
57
#include "llvm/IR/Value.h"
58
#include "llvm/Pass.h"
59
#include "llvm/Support/Casting.h"
60
#include "llvm/Support/CommandLine.h"
61
#include "llvm/Support/Debug.h"
62
#include "llvm/Support/raw_ostream.h"
63
#include "llvm/Transforms/Scalar.h"
64
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
65
#include "llvm/Transforms/Utils/Cloning.h"
66
#include "llvm/Transforms/Utils/LoopUtils.h"
67
#include "llvm/Transforms/Utils/LoopVersioning.h"
68
#include "llvm/Transforms/Utils/ValueMapper.h"
69
#include <cassert>
70
#include <functional>
71
#include <list>
72
#include <tuple>
73
#include <utility>
74
75
using namespace llvm;
76
77
33
#define LDIST_NAME "loop-distribute"
78
#define DEBUG_TYPE LDIST_NAME
79
80
/// @{
81
/// Metadata attribute names
82
static const char *const LLVMLoopDistributeFollowupAll =
83
    "llvm.loop.distribute.followup_all";
84
static const char *const LLVMLoopDistributeFollowupCoincident =
85
    "llvm.loop.distribute.followup_coincident";
86
static const char *const LLVMLoopDistributeFollowupSequential =
87
    "llvm.loop.distribute.followup_sequential";
88
static const char *const LLVMLoopDistributeFollowupFallback =
89
    "llvm.loop.distribute.followup_fallback";
90
/// @}
91
92
static cl::opt<bool>
93
    LDistVerify("loop-distribute-verify", cl::Hidden,
94
                cl::desc("Turn on DominatorTree and LoopInfo verification "
95
                         "after Loop Distribution"),
96
                cl::init(false));
97
98
static cl::opt<bool> DistributeNonIfConvertible(
99
    "loop-distribute-non-if-convertible", cl::Hidden,
100
    cl::desc("Whether to distribute into a loop that may not be "
101
             "if-convertible by the loop vectorizer"),
102
    cl::init(false));
103
104
static cl::opt<unsigned> DistributeSCEVCheckThreshold(
105
    "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
106
    cl::desc("The maximum number of SCEV checks allowed for Loop "
107
             "Distribution"));
108
109
static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
110
    "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
111
    cl::Hidden,
112
    cl::desc(
113
        "The maximum number of SCEV checks allowed for Loop "
114
        "Distribution for loop marked with #pragma loop distribute(enable)"));
115
116
static cl::opt<bool> EnableLoopDistribute(
117
    "enable-loop-distribute", cl::Hidden,
118
    cl::desc("Enable the new, experimental LoopDistribution Pass"),
119
    cl::init(false));
120
121
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
122
123
namespace {
124
125
/// Maintains the set of instructions of the loop for a partition before
126
/// cloning.  After cloning, it hosts the new loop.
127
class InstPartition {
128
  using InstructionSet = SmallPtrSet<Instruction *, 8>;
129
130
public:
131
  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
132
161
      : DepCycle(DepCycle), OrigLoop(L) {
133
161
    Set.insert(I);
134
161
  }
135
136
  /// Returns whether this partition contains a dependence cycle.
137
383
  bool hasDepCycle() const { return DepCycle; }
138
139
  /// Adds an instruction to this partition.
140
84
  void add(Instruction *I) { Set.insert(I); }
141
142
  /// Collection accessors.
143
82
  InstructionSet::iterator begin() { return Set.begin(); }
144
82
  InstructionSet::iterator end() { return Set.end(); }
145
116
  InstructionSet::const_iterator begin() const { return Set.begin(); }
146
116
  InstructionSet::const_iterator end() const { return Set.end(); }
147
4
  bool empty() const { return Set.empty(); }
148
149
  /// Moves this partition into \p Other.  This partition becomes empty
150
  /// after this.
151
81
  void moveTo(InstPartition &Other) {
152
81
    Other.Set.insert(Set.begin(), Set.end());
153
81
    Set.clear();
154
81
    Other.DepCycle |= DepCycle;
155
81
  }
156
157
  /// Populates the partition with a transitive closure of all the
158
  /// instructions that the seeded instructions dependent on.
159
82
  void populateUsedSet() {
160
82
    // FIXME: We currently don't use control-dependence but simply include all
161
82
    // blocks (possibly empty at the end) and let simplifycfg mostly clean this
162
82
    // up.
163
82
    for (auto *B : OrigLoop->getBlocks())
164
86
      Set.insert(B->getTerminator());
165
82
166
82
    // Follow the use-def chains to form a transitive closure of all the
167
82
    // instructions that the originally seeded instructions depend on.
168
82
    SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
169
1.03k
    while (!Worklist.empty()) {
170
955
      Instruction *I = Worklist.pop_back_val();
171
955
      // Insert instructions from the loop that we depend on.
172
1.81k
      for (Value *V : I->operand_values()) {
173
1.81k
        auto *I = dyn_cast<Instruction>(V);
174
1.81k
        if (I && 
OrigLoop->contains(I->getParent())1.18k
&&
Set.insert(I).second1.11k
)
175
624
          Worklist.push_back(I);
176
1.81k
      }
177
955
    }
178
82
  }
179
180
  /// Clones the original loop.
181
  ///
182
  /// Updates LoopInfo and DominatorTree using the information that block \p
183
  /// LoopDomBB dominates the loop.
184
  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
185
                               unsigned Index, LoopInfo *LI,
186
27
                               DominatorTree *DT) {
187
27
    ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
188
27
                                          VMap, Twine(".ldist") + Twine(Index),
189
27
                                          LI, DT, ClonedLoopBlocks);
190
27
    return ClonedLoop;
191
27
  }
192
193
  /// The cloned loop.  If this partition is mapped to the original loop,
194
  /// this is null.
195
0
  const Loop *getClonedLoop() const { return ClonedLoop; }
196
197
  /// Returns the loop where this partition ends up after distribution.
198
  /// If this partition is mapped to the original loop then use the block from
199
  /// the loop.
200
56
  Loop *getDistributedLoop() const {
201
56
    return ClonedLoop ? 
ClonedLoop28
:
OrigLoop28
;
202
56
  }
203
204
  /// The VMap that is populated by cloning and then used in
205
  /// remapinstruction to remap the cloned instructions.
206
27
  ValueToValueMapTy &getVMap() { return VMap; }
207
208
  /// Remaps the cloned instructions using VMap.
209
27
  void remapInstructions() {
210
27
    remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
211
27
  }
212
213
  /// Based on the set of instructions selected for this partition,
214
  /// removes the unnecessary ones.
215
54
  void removeUnusedInsts() {
216
54
    SmallVector<Instruction *, 8> Unused;
217
54
218
54
    for (auto *Block : OrigLoop->getBlocks())
219
58
      for (auto &Inst : *Block)
220
994
        if (!Set.count(&Inst)) {
221
378
          Instruction *NewInst = &Inst;
222
378
          if (!VMap.empty())
223
192
            NewInst = cast<Instruction>(VMap[NewInst]);
224
378
225
378
          assert(!isa<BranchInst>(NewInst) &&
226
378
                 "Branches are marked used early on");
227
378
          Unused.push_back(NewInst);
228
378
        }
229
54
230
54
    // Delete the instructions backwards, as it has a reduced likelihood of
231
54
    // having to update as many def-use and use-def chains.
232
378
    for (auto *Inst : reverse(Unused)) {
233
378
      if (!Inst->use_empty())
234
1
        Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
235
378
      Inst->eraseFromParent();
236
378
    }
237
54
  }
238
239
0
  void print() const {
240
0
    if (DepCycle)
241
0
      dbgs() << "  (cycle)\n";
242
0
    for (auto *I : Set)
243
0
      // Prefix with the block name.
244
0
      dbgs() << "  " << I->getParent()->getName() << ":" << *I << "\n";
245
0
  }
246
247
0
  void printBlocks() const {
248
0
    for (auto *BB : getDistributedLoop()->getBlocks())
249
0
      dbgs() << *BB;
250
0
  }
251
252
private:
253
  /// Instructions from OrigLoop selected for this partition.
254
  InstructionSet Set;
255
256
  /// Whether this partition contains a dependence cycle.
257
  bool DepCycle;
258
259
  /// The original loop.
260
  Loop *OrigLoop;
261
262
  /// The cloned loop.  If this partition is mapped to the original loop,
263
  /// this is null.
264
  Loop *ClonedLoop = nullptr;
265
266
  /// The blocks of ClonedLoop including the preheader.  If this
267
  /// partition is mapped to the original loop, this is empty.
268
  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
269
270
  /// These gets populated once the set of instructions have been
271
  /// finalized. If this partition is mapped to the original loop, these are not
272
  /// set.
273
  ValueToValueMapTy VMap;
274
};
275
276
/// Holds the set of Partitions.  It populates them, merges them and then
277
/// clones the loops.
278
class InstPartitionContainer {
279
  using InstToPartitionIdT = DenseMap<Instruction *, int>;
280
281
public:
282
  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
283
41
      : L(L), LI(LI), DT(DT) {}
284
285
  /// Returns the number of partitions.
286
111
  unsigned getSize() const { return PartitionContainer.size(); }
287
288
  /// Adds \p Inst into the current partition if that is marked to
289
  /// contain cycles.  Otherwise start a new partition for it.
290
125
  void addToCyclicPartition(Instruction *Inst) {
291
125
    // If the current partition is non-cyclic.  Start a new one.
292
125
    if (PartitionContainer.empty() || 
!PartitionContainer.back().hasDepCycle()85
)
293
41
      PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
294
84
    else
295
84
      PartitionContainer.back().add(Inst);
296
125
  }
297
298
  /// Adds \p Inst into a partition that is not marked to contain
299
  /// dependence cycles.
300
  ///
301
  //  Initially we isolate memory instructions into as many partitions as
302
  //  possible, then later we may merge them back together.
303
120
  void addToNewNonCyclicPartition(Instruction *Inst) {
304
120
    PartitionContainer.emplace_back(Inst, L);
305
120
  }
306
307
  /// Merges adjacent non-cyclic partitions.
308
  ///
309
  /// The idea is that we currently only want to isolate the non-vectorizable
310
  /// partition.  We could later allow more distribution among these partition
311
  /// too.
312
41
  void mergeAdjacentNonCyclic() {
313
41
    mergeAdjacentPartitionsIf(
314
161
        [](const InstPartition *P) { return !P->hasDepCycle(); });
315
41
  }
316
317
  /// If a partition contains only conditional stores, we won't vectorize
318
  /// it.  Try to merge it with a previous cyclic partition.
319
41
  void mergeNonIfConvertible() {
320
83
    mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
321
83
      if (Partition->hasDepCycle())
322
41
        return true;
323
42
324
42
      // Now, check if all stores are conditional in this partition.
325
42
      bool seenStore = false;
326
42
327
42
      for (auto *Inst : *Partition)
328
120
        if (isa<StoreInst>(Inst)) {
329
41
          seenStore = true;
330
41
          if (!LoopAccessInfo::blockNeedsPredication(Inst->getParent(), L, DT))
331
40
            return false;
332
41
        }
333
42
      
return seenStore2
;
334
42
    });
335
41
  }
336
337
  /// Merges the partitions according to various heuristics.
338
41
  void mergeBeforePopulating() {
339
41
    mergeAdjacentNonCyclic();
340
41
    if (!DistributeNonIfConvertible)
341
41
      mergeNonIfConvertible();
342
41
  }
343
344
  /// Merges partitions in order to ensure that no loads are duplicated.
345
  ///
346
  /// We can't duplicate loads because that could potentially reorder them.
347
  /// LoopAccessAnalysis provides dependency information with the context that
348
  /// the order of memory operation is preserved.
349
  ///
350
  /// Return if any partitions were merged.
351
41
  bool mergeToAvoidDuplicatedLoads() {
352
41
    using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
353
41
    using ToBeMergedT = EquivalenceClasses<InstPartition *>;
354
41
355
41
    LoadToPartitionT LoadToPartition;
356
41
    ToBeMergedT ToBeMerged;
357
41
358
41
    // Step through the partitions and create equivalence between partitions
359
41
    // that contain the same load.  Also put partitions in between them in the
360
41
    // same equivalence class to avoid reordering of memory operations.
361
41
    for (PartitionContainerT::iterator I = PartitionContainer.begin(),
362
41
                                       E = PartitionContainer.end();
363
123
         I != E; 
++I82
) {
364
82
      auto *PartI = &*I;
365
82
366
82
      // If a load occurs in two partitions PartI and PartJ, merge all
367
82
      // partitions (PartI, PartJ] into PartI.
368
82
      for (Instruction *Inst : *PartI)
369
955
        if (isa<LoadInst>(Inst)) {
370
164
          bool NewElt;
371
164
          LoadToPartitionT::iterator LoadToPart;
372
164
373
164
          std::tie(LoadToPart, NewElt) =
374
164
              LoadToPartition.insert(std::make_pair(Inst, PartI));
375
164
          if (!NewElt) {
376
3
            LLVM_DEBUG(dbgs()
377
3
                       << "Merging partitions due to this load in multiple "
378
3
                       << "partitions: " << PartI << ", " << LoadToPart->second
379
3
                       << "\n"
380
3
                       << *Inst << "\n");
381
3
382
3
            auto PartJ = I;
383
3
            do {
384
3
              --PartJ;
385
3
              ToBeMerged.unionSets(PartI, &*PartJ);
386
3
            } while (&*PartJ != LoadToPart->second);
387
3
          }
388
164
        }
389
82
    }
390
41
    if (ToBeMerged.empty())
391
39
      return false;
392
2
393
2
    // Merge the member of an equivalence class into its class leader.  This
394
2
    // makes the members empty.
395
2
    for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
396
6
         I != E; 
++I4
) {
397
4
      if (!I->isLeader())
398
2
        continue;
399
2
400
2
      auto PartI = I->getData();
401
2
      for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
402
2
                                   ToBeMerged.member_end())) {
403
2
        PartJ->moveTo(*PartI);
404
2
      }
405
2
    }
406
2
407
2
    // Remove the empty partitions.
408
2
    PartitionContainer.remove_if(
409
4
        [](const InstPartition &P) { return P.empty(); });
410
2
411
2
    return true;
412
2
  }
413
414
  /// Sets up the mapping between instructions to partitions.  If the
415
  /// instruction is duplicated across multiple partitions, set the entry to -1.
416
37
  void setupPartitionIdOnInstructions() {
417
37
    int PartitionID = 0;
418
74
    for (const auto &Partition : PartitionContainer) {
419
850
      for (Instruction *Inst : Partition) {
420
850
        bool NewElt;
421
850
        InstToPartitionIdT::iterator Iter;
422
850
423
850
        std::tie(Iter, NewElt) =
424
850
            InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
425
850
        if (!NewElt)
426
163
          Iter->second = -1;
427
850
      }
428
74
      ++PartitionID;
429
74
    }
430
37
  }
431
432
  /// Populates the partition with everything that the seeding
433
  /// instructions require.
434
41
  void populateUsedSet() {
435
41
    for (auto &P : PartitionContainer)
436
82
      P.populateUsedSet();
437
41
  }
438
439
  /// This performs the main chunk of the work of cloning the loops for
440
  /// the partitions.
441
27
  void cloneLoops() {
442
27
    BasicBlock *OrigPH = L->getLoopPreheader();
443
27
    // At this point the predecessor of the preheader is either the memcheck
444
27
    // block or the top part of the original preheader.
445
27
    BasicBlock *Pred = OrigPH->getSinglePredecessor();
446
27
    assert(Pred && "Preheader does not have a single predecessor");
447
27
    BasicBlock *ExitBlock = L->getExitBlock();
448
27
    assert(ExitBlock && "No single exit block");
449
27
    Loop *NewLoop;
450
27
451
27
    assert(!PartitionContainer.empty() && "at least two partitions expected");
452
27
    // We're cloning the preheader along with the loop so we already made sure
453
27
    // it was empty.
454
27
    assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
455
27
           "preheader not empty");
456
27
457
27
    // Preserve the original loop ID for use after the transformation.
458
27
    MDNode *OrigLoopID = L->getLoopID();
459
27
460
27
    // Create a loop for each partition except the last.  Clone the original
461
27
    // loop before PH along with adding a preheader for the cloned loop.  Then
462
27
    // update PH to point to the newly added preheader.
463
27
    BasicBlock *TopPH = OrigPH;
464
27
    unsigned Index = getSize() - 1;
465
27
    for (auto I = std::next(PartitionContainer.rbegin()),
466
27
              E = PartitionContainer.rend();
467
54
         I != E; 
++I, --Index, TopPH = NewLoop->getLoopPreheader()27
) {
468
27
      auto *Part = &*I;
469
27
470
27
      NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
471
27
472
27
      Part->getVMap()[ExitBlock] = TopPH;
473
27
      Part->remapInstructions();
474
27
      setNewLoopID(OrigLoopID, Part);
475
27
    }
476
27
    Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
477
27
478
27
    // Also set a new loop ID for the last loop.
479
27
    setNewLoopID(OrigLoopID, &PartitionContainer.back());
480
27
481
27
    // Now go in forward order and update the immediate dominator for the
482
27
    // preheaders with the exiting block of the previous loop.  Dominance
483
27
    // within the loop is updated in cloneLoopWithPreheader.
484
27
    for (auto Curr = PartitionContainer.cbegin(),
485
27
              Next = std::next(PartitionContainer.cbegin()),
486
27
              E = PartitionContainer.cend();
487
54
         Next != E; 
++Curr, ++Next27
)
488
27
      DT->changeImmediateDominator(
489
27
          Next->getDistributedLoop()->getLoopPreheader(),
490
27
          Curr->getDistributedLoop()->getExitingBlock());
491
27
  }
492
493
  /// Removes the dead instructions from the cloned loops.
494
27
  void removeUnusedInsts() {
495
27
    for (auto &Partition : PartitionContainer)
496
54
      Partition.removeUnusedInsts();
497
27
  }
498
499
  /// For each memory pointer, it computes the partitionId the pointer is
500
  /// used in.
501
  ///
502
  /// This returns an array of int where the I-th entry corresponds to I-th
503
  /// entry in LAI.getRuntimePointerCheck().  If the pointer is used in multiple
504
  /// partitions its entry is set to -1.
505
  SmallVector<int, 8>
506
37
  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
507
37
    const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
508
37
509
37
    unsigned N = RtPtrCheck->Pointers.size();
510
37
    SmallVector<int, 8> PtrToPartitions(N);
511
257
    for (unsigned I = 0; I < N; 
++I220
) {
512
220
      Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
513
220
      auto Instructions =
514
220
          LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
515
220
516
220
      int &Partition = PtrToPartitions[I];
517
220
      // First set it to uninitialized.
518
220
      Partition = -2;
519
220
      for (Instruction *Inst : Instructions) {
520
220
        // Note that this could be -1 if Inst is duplicated across multiple
521
220
        // partitions.
522
220
        int ThisPartition = this->InstToPartitionId[Inst];
523
220
        if (Partition == -2)
524
220
          Partition = ThisPartition;
525
0
        // -1 means belonging to multiple partitions.
526
0
        else if (Partition == -1)
527
0
          break;
528
0
        else if (Partition != (int)ThisPartition)
529
0
          Partition = -1;
530
220
      }
531
220
      assert(Partition != -2 && "Pointer not belonging to any partition");
532
220
    }
533
37
534
37
    return PtrToPartitions;
535
37
  }
536
537
0
  void print(raw_ostream &OS) const {
538
0
    unsigned Index = 0;
539
0
    for (const auto &P : PartitionContainer) {
540
0
      OS << "Partition " << Index++ << " (" << &P << "):\n";
541
0
      P.print();
542
0
    }
543
0
  }
544
545
0
  void dump() const { print(dbgs()); }
546
547
#ifndef NDEBUG
548
  friend raw_ostream &operator<<(raw_ostream &OS,
549
                                 const InstPartitionContainer &Partitions) {
550
    Partitions.print(OS);
551
    return OS;
552
  }
553
#endif
554
555
0
  void printBlocks() const {
556
0
    unsigned Index = 0;
557
0
    for (const auto &P : PartitionContainer) {
558
0
      dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
559
0
      P.printBlocks();
560
0
    }
561
0
  }
562
563
private:
564
  using PartitionContainerT = std::list<InstPartition>;
565
566
  /// List of partitions.
567
  PartitionContainerT PartitionContainer;
568
569
  /// Mapping from Instruction to partition Id.  If the instruction
570
  /// belongs to multiple partitions the entry contains -1.
571
  InstToPartitionIdT InstToPartitionId;
572
573
  Loop *L;
574
  LoopInfo *LI;
575
  DominatorTree *DT;
576
577
  /// The control structure to merge adjacent partitions if both satisfy
578
  /// the \p Predicate.
579
  template <class UnaryPredicate>
580
82
  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
581
82
    InstPartition *PrevMatch = nullptr;
582
326
    for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
583
244
      auto DoesMatch = Predicate(&*I);
584
244
      if (PrevMatch == nullptr && 
DoesMatch124
) {
585
83
        PrevMatch = &*I;
586
83
        ++I;
587
161
      } else if (PrevMatch != nullptr && 
DoesMatch120
) {
588
79
        I->moveTo(*PrevMatch);
589
79
        I = PartitionContainer.erase(I);
590
82
      } else {
591
82
        PrevMatch = nullptr;
592
82
        ++I;
593
82
      }
594
244
    }
595
82
  }
LoopDistribute.cpp:void (anonymous namespace)::InstPartitionContainer::mergeAdjacentPartitionsIf<(anonymous namespace)::InstPartitionContainer::mergeAdjacentNonCyclic()::'lambda'((anonymous namespace)::InstPartition const*)>((anonymous namespace)::InstPartitionContainer::mergeAdjacentNonCyclic()::'lambda'((anonymous namespace)::InstPartition const*))
Line
Count
Source
580
41
  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
581
41
    InstPartition *PrevMatch = nullptr;
582
202
    for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
583
161
      auto DoesMatch = Predicate(&*I);
584
161
      if (PrevMatch == nullptr && 
DoesMatch82
) {
585
42
        PrevMatch = &*I;
586
42
        ++I;
587
119
      } else if (PrevMatch != nullptr && 
DoesMatch79
) {
588
78
        I->moveTo(*PrevMatch);
589
78
        I = PartitionContainer.erase(I);
590
78
      } else {
591
41
        PrevMatch = nullptr;
592
41
        ++I;
593
41
      }
594
161
    }
595
41
  }
LoopDistribute.cpp:void (anonymous namespace)::InstPartitionContainer::mergeAdjacentPartitionsIf<(anonymous namespace)::InstPartitionContainer::mergeNonIfConvertible()::'lambda'((anonymous namespace)::InstPartition const*)>((anonymous namespace)::InstPartitionContainer::mergeNonIfConvertible()::'lambda'((anonymous namespace)::InstPartition const*))
Line
Count
Source
580
41
  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
581
41
    InstPartition *PrevMatch = nullptr;
582
124
    for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
583
83
      auto DoesMatch = Predicate(&*I);
584
83
      if (PrevMatch == nullptr && 
DoesMatch42
) {
585
41
        PrevMatch = &*I;
586
41
        ++I;
587
42
      } else if (PrevMatch != nullptr && 
DoesMatch41
) {
588
1
        I->moveTo(*PrevMatch);
589
1
        I = PartitionContainer.erase(I);
590
41
      } else {
591
41
        PrevMatch = nullptr;
592
41
        ++I;
593
41
      }
594
83
    }
595
41
  }
596
597
  /// Assign new LoopIDs for the partition's cloned loop.
598
54
  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
599
54
    Optional<MDNode *> PartitionID = makeFollowupLoopID(
600
54
        OrigLoopID,
601
54
        {LLVMLoopDistributeFollowupAll,
602
54
         Part->hasDepCycle() ? 
LLVMLoopDistributeFollowupSequential27
603
54
                             : 
LLVMLoopDistributeFollowupCoincident27
});
604
54
    if (PartitionID.hasValue()) {
605
2
      Loop *NewLoop = Part->getDistributedLoop();
606
2
      NewLoop->setLoopID(PartitionID.getValue());
607
2
    }
608
54
  }
609
};
610
611
/// For each memory instruction, this class maintains difference of the
612
/// number of unsafe dependences that start out from this instruction minus
613
/// those that end here.
614
///
615
/// By traversing the memory instructions in program order and accumulating this
616
/// number, we know whether any unsafe dependence crosses over a program point.
617
class MemoryInstructionDependences {
618
  using Dependence = MemoryDepChecker::Dependence;
619
620
public:
621
  struct Entry {
622
    Instruction *Inst;
623
    unsigned NumUnsafeDependencesStartOrEnd = 0;
624
625
244
    Entry(Instruction *Inst) : Inst(Inst) {}
626
  };
627
628
  using AccessesType = SmallVector<Entry, 8>;
629
630
41
  AccessesType::const_iterator begin() const { return Accesses.begin(); }
631
41
  AccessesType::const_iterator end() const { return Accesses.end(); }
632
633
  MemoryInstructionDependences(
634
      const SmallVectorImpl<Instruction *> &Instructions,
635
41
      const SmallVectorImpl<Dependence> &Dependences) {
636
41
    Accesses.append(Instructions.begin(), Instructions.end());
637
41
638
41
    LLVM_DEBUG(dbgs() << "Backward dependences:\n");
639
41
    for (auto &Dep : Dependences)
640
43
      if (Dep.isPossiblyBackward()) {
641
42
        // Note that the designations source and destination follow the program
642
42
        // order, i.e. source is always first.  (The direction is given by the
643
42
        // DepType.)
644
42
        ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
645
42
        --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
646
42
647
42
        LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
648
42
      }
649
41
  }
650
651
private:
652
  AccessesType Accesses;
653
};
654
655
/// The actual class performing the per-loop work.
656
class LoopDistributeForLoop {
657
public:
658
  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
659
                        ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
660
145k
      : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
661
145k
    setForced();
662
145k
  }
663
664
  /// Try to distribute an inner-most loop.
665
59
  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
666
59
    assert(L->empty() && "Only process inner loops.");
667
59
668
59
    LLVM_DEBUG(dbgs() << "\nLDist: In \""
669
59
                      << L->getHeader()->getParent()->getName()
670
59
                      << "\" checking " << *L << "\n");
671
59
672
59
    if (!L->getExitBlock())
673
0
      return fail("MultipleExitBlocks", "multiple exit blocks");
674
59
    if (!L->isLoopSimplifyForm())
675
1
      return fail("NotLoopSimplifyForm",
676
1
                  "loop is not in loop-simplify form");
677
58
678
58
    BasicBlock *PH = L->getLoopPreheader();
679
58
680
58
    // LAA will check that we only have a single exiting block.
681
58
    LAI = &GetLAA(*L);
682
58
683
58
    // Currently, we only distribute to isolate the part of the loop with
684
58
    // dependence cycles to enable partial vectorization.
685
58
    if (LAI->canVectorizeMemory())
686
13
      return fail("MemOpsCanBeVectorized",
687
13
                  "memory operations are safe for vectorization");
688
45
689
45
    auto *Dependences = LAI->getDepChecker().getDependences();
690
45
    if (!Dependences || Dependences->empty())
691
4
      return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
692
41
693
41
    InstPartitionContainer Partitions(L, LI, DT);
694
41
695
41
    // First, go through each memory operation and assign them to consecutive
696
41
    // partitions (the order of partitions follows program order).  Put those
697
41
    // with unsafe dependences into "cyclic" partition otherwise put each store
698
41
    // in its own "non-cyclic" partition (we'll merge these later).
699
41
    //
700
41
    // Note that a memory operation (e.g. Load2 below) at a program point that
701
41
    // has an unsafe dependence (Store3->Load1) spanning over it must be
702
41
    // included in the same cyclic partition as the dependent operations.  This
703
41
    // is to preserve the original program order after distribution.  E.g.:
704
41
    //
705
41
    //                NumUnsafeDependencesStartOrEnd  NumUnsafeDependencesActive
706
41
    //  Load1   -.                     1                       0->1
707
41
    //  Load2    | /Unsafe/            0                       1
708
41
    //  Store3  -'                    -1                       1->0
709
41
    //  Load4                          0                       0
710
41
    //
711
41
    // NumUnsafeDependencesActive > 0 indicates this situation and in this case
712
41
    // we just keep assigning to the same cyclic partition until
713
41
    // NumUnsafeDependencesActive reaches 0.
714
41
    const MemoryDepChecker &DepChecker = LAI->getDepChecker();
715
41
    MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
716
41
                                     *Dependences);
717
41
718
41
    int NumUnsafeDependencesActive = 0;
719
244
    for (auto &InstDep : MID) {
720
244
      Instruction *I = InstDep.Inst;
721
244
      // We update NumUnsafeDependencesActive post-instruction, catch the
722
244
      // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
723
244
      if (NumUnsafeDependencesActive ||
724
244
          
InstDep.NumUnsafeDependencesStartOrEnd > 0160
)
725
125
        Partitions.addToCyclicPartition(I);
726
119
      else
727
119
        Partitions.addToNewNonCyclicPartition(I);
728
244
      NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
729
244
      assert(NumUnsafeDependencesActive >= 0 &&
730
244
             "Negative number of dependences active");
731
244
    }
732
41
733
41
    // Add partitions for values used outside.  These partitions can be out of
734
41
    // order from the original program order.  This is OK because if the
735
41
    // partition uses a load we will merge this partition with the original
736
41
    // partition of the load that we set up in the previous loop (see
737
41
    // mergeToAvoidDuplicatedLoads).
738
41
    auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
739
41
    for (auto *Inst : DefsUsedOutside)
740
1
      Partitions.addToNewNonCyclicPartition(Inst);
741
41
742
41
    LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
743
41
    if (Partitions.getSize() < 2)
744
0
      return fail("CantIsolateUnsafeDeps",
745
0
                  "cannot isolate unsafe dependencies");
746
41
747
41
    // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
748
41
    // should be able to vectorize these together.
749
41
    Partitions.mergeBeforePopulating();
750
41
    LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
751
41
    if (Partitions.getSize() < 2)
752
0
      return fail("CantIsolateUnsafeDeps",
753
0
                  "cannot isolate unsafe dependencies");
754
41
755
41
    // Now, populate the partitions with non-memory operations.
756
41
    Partitions.populateUsedSet();
757
41
    LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
758
41
759
41
    // In order to preserve original lexical order for loads, keep them in the
760
41
    // partition that we set up in the MemoryInstructionDependences loop.
761
41
    if (Partitions.mergeToAvoidDuplicatedLoads()) {
762
2
      LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
763
2
                        << Partitions);
764
2
      if (Partitions.getSize() < 2)
765
2
        return fail("CantIsolateUnsafeDeps",
766
2
                    "cannot isolate unsafe dependencies");
767
39
    }
768
39
769
39
    // Don't distribute the loop if we need too many SCEV run-time checks, or
770
39
    // any if it's illegal.
771
39
    const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
772
39
    if (LAI->hasConvergentOp() && 
!Pred.isAlwaysTrue()15
) {
773
1
      return fail("RuntimeCheckWithConvergent",
774
1
                  "may not insert runtime check with convergent operation");
775
1
    }
776
38
777
38
    if (Pred.getComplexity() > (IsForced.getValueOr(false)
778
38
                                    ? 
PragmaDistributeSCEVCheckThreshold11
779
38
                                    : 
DistributeSCEVCheckThreshold27
))
780
0
      return fail("TooManySCEVRuntimeChecks",
781
0
                  "too many SCEV run-time checks needed.\n");
782
38
783
38
    if (!IsForced.getValueOr(false) && 
hasDisableAllTransformsHint(L)27
)
784
1
      return fail("HeuristicDisabled", "distribution heuristic disabled");
785
37
786
37
    LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
787
37
    // We're done forming the partitions set up the reverse mapping from
788
37
    // instructions to partitions.
789
37
    Partitions.setupPartitionIdOnInstructions();
790
37
791
37
    // To keep things simple have an empty preheader before we version or clone
792
37
    // the loop.  (Also split if this has no predecessor, i.e. entry, because we
793
37
    // rely on PH having a predecessor.)
794
37
    if (!PH->getSinglePredecessor() || 
&*PH->begin() != PH->getTerminator()9
)
795
29
      SplitBlock(PH, PH->getTerminator(), DT, LI);
796
37
797
37
    // If we need run-time checks, version the loop now.
798
37
    auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
799
37
    const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
800
37
    const auto &AllChecks = RtPtrChecking->getChecks();
801
37
    auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
802
37
                                                  RtPtrChecking);
803
37
804
37
    if (LAI->hasConvergentOp() && 
!Checks.empty()14
) {
805
10
      return fail("RuntimeCheckWithConvergent",
806
10
                  "may not insert runtime check with convergent operation");
807
10
    }
808
27
809
27
    if (!Pred.isAlwaysTrue() || 
!Checks.empty()25
) {
810
14
      assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
811
14
812
14
      MDNode *OrigLoopID = L->getLoopID();
813
14
814
14
      LLVM_DEBUG(dbgs() << "\nPointers:\n");
815
14
      LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
816
14
      LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
817
14
      LVer.setAliasChecks(std::move(Checks));
818
14
      LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
819
14
      LVer.versionLoop(DefsUsedOutside);
820
14
      LVer.annotateLoopWithNoAlias();
821
14
822
14
      // The unversioned loop will not be changed, so we inherit all attributes
823
14
      // from the original loop, but remove the loop distribution metadata to
824
14
      // avoid to distribute it again.
825
14
      MDNode *UnversionedLoopID =
826
14
          makeFollowupLoopID(OrigLoopID,
827
14
                             {LLVMLoopDistributeFollowupAll,
828
14
                              LLVMLoopDistributeFollowupFallback},
829
14
                             "llvm.loop.distribute.", true)
830
14
              .getValue();
831
14
      LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
832
14
    }
833
27
834
27
    // Create identical copies of the original loop for each partition and hook
835
27
    // them up sequentially.
836
27
    Partitions.cloneLoops();
837
27
838
27
    // Now, we remove the instruction from each loop that don't belong to that
839
27
    // partition.
840
27
    Partitions.removeUnusedInsts();
841
27
    LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
842
27
    LLVM_DEBUG(Partitions.printBlocks());
843
27
844
27
    if (LDistVerify) {
845
0
      LI->verify(*DT);
846
0
      assert(DT->verify(DominatorTree::VerificationLevel::Fast));
847
0
    }
848
27
849
27
    ++NumLoopsDistributed;
850
27
    // Report the success.
851
27
    ORE->emit([&]() {
852
3
      return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
853
3
                                L->getHeader())
854
3
             << "distributed loop";
855
3
    });
856
27
    return true;
857
27
  }
858
859
  /// Provide diagnostics then \return with false.
860
32
  bool fail(StringRef RemarkName, StringRef Message) {
861
32
    LLVMContext &Ctx = F->getContext();
862
32
    bool Forced = isForced().getValueOr(false);
863
32
864
32
    LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
865
32
866
32
    // With Rpass-missed report that distribution failed.
867
32
    ORE->emit([&]() {
868
13
      return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
869
13
                                      L->getStartLoc(), L->getHeader())
870
13
             << "loop not distributed: use -Rpass-analysis=loop-distribute for "
871
13
                "more "
872
13
                "info";
873
13
    });
874
32
875
32
    // With Rpass-analysis report why.  This is on by default if distribution
876
32
    // was requested explicitly.
877
32
    ORE->emit(OptimizationRemarkAnalysis(
878
32
                  Forced ? 
OptimizationRemarkAnalysis::AlwaysPrint15
:
LDIST_NAME17
,
879
32
                  RemarkName, L->getStartLoc(), L->getHeader())
880
32
              << "loop not distributed: " << Message);
881
32
882
32
    // Also issue a warning if distribution was requested explicitly but it
883
32
    // failed.
884
32
    if (Forced)
885
15
      Ctx.diagnose(DiagnosticInfoOptimizationFailure(
886
15
          *F, L->getStartLoc(), "loop not distributed: failed "
887
15
                                "explicitly specified loop distribution"));
888
32
889
32
    return false;
890
32
  }
891
892
  /// Return if distribution forced to be enabled/disabled for the loop.
893
  ///
894
  /// If the optional has a value, it indicates whether distribution was forced
895
  /// to be enabled (true) or disabled (false).  If the optional has no value
896
  /// distribution was not forced either way.
897
145k
  const Optional<bool> &isForced() const { return IsForced; }
898
899
private:
900
  /// Filter out checks between pointers from the same partition.
901
  ///
902
  /// \p PtrToPartition contains the partition number for pointers.  Partition
903
  /// number -1 means that the pointer is used in multiple partitions.  In this
904
  /// case we can't safely omit the check.
905
  SmallVector<RuntimePointerChecking::PointerCheck, 4>
906
  includeOnlyCrossPartitionChecks(
907
      const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
908
      const SmallVectorImpl<int> &PtrToPartition,
909
37
      const RuntimePointerChecking *RtPtrChecking) {
910
37
    SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
911
37
912
37
    copy_if(AllChecks, std::back_inserter(Checks),
913
145
            [&](const RuntimePointerChecking::PointerCheck &Check) {
914
145
              for (unsigned PtrIdx1 : Check.first->Members)
915
209
                for (unsigned PtrIdx2 : Check.second->Members)
916
209
                  // Only include this check if there is a pair of pointers
917
209
                  // that require checking and the pointers fall into
918
209
                  // separate partitions.
919
209
                  //
920
209
                  // (Note that we already know at this point that the two
921
209
                  // pointer groups need checking but it doesn't follow
922
209
                  // that each pair of pointers within the two groups need
923
209
                  // checking as well.
924
209
                  //
925
209
                  // In other words we don't want to include a check just
926
209
                  // because there is a pair of pointers between the two
927
209
                  // pointer groups that require checks and a different
928
209
                  // pair whose pointers fall into different partitions.)
929
209
                  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
930
209
                      !RuntimePointerChecking::arePointersInSamePartition(
931
145
                          PtrToPartition, PtrIdx1, PtrIdx2))
932
82
                    return true;
933
145
              
return false63
;
934
145
            });
935
37
936
37
    return Checks;
937
37
  }
938
939
  /// Check whether the loop metadata is forcing distribution to be
940
  /// enabled/disabled.
941
145k
  void setForced() {
942
145k
    Optional<const MDOperand *> Value =
943
145k
        findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
944
145k
    if (!Value)
945
145k
      return;
946
21
947
21
    const MDOperand *Op = *Value;
948
21
    assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
949
21
    IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
950
21
  }
951
952
  Loop *L;
953
  Function *F;
954
955
  // Analyses used.
956
  LoopInfo *LI;
957
  const LoopAccessInfo *LAI = nullptr;
958
  DominatorTree *DT;
959
  ScalarEvolution *SE;
960
  OptimizationRemarkEmitter *ORE;
961
962
  /// Indicates whether distribution is forced to be enabled/disabled for
963
  /// the loop.
964
  ///
965
  /// If the optional has a value, it indicates whether distribution was forced
966
  /// to be enabled (true) or disabled (false).  If the optional has no value
967
  /// distribution was not forced either way.
968
  Optional<bool> IsForced;
969
};
970
971
} // end anonymous namespace
972
973
/// Shared implementation between new and old PMs.
974
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
975
                    ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
976
279k
                    std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
977
279k
  // Build up a worklist of inner-loops to vectorize. This is necessary as the
978
279k
  // act of distributing a loop creates new loops and can invalidate iterators
979
279k
  // across the loops.
980
279k
  SmallVector<Loop *, 8> Worklist;
981
279k
982
279k
  for (Loop *TopLevelLoop : *LI)
983
128k
    for (Loop *L : depth_first(TopLevelLoop))
984
178k
      // We only handle inner-most loops.
985
178k
      if (L->empty())
986
145k
        Worklist.push_back(L);
987
279k
988
279k
  // Now walk the identified inner loops.
989
279k
  bool Changed = false;
990
279k
  for (Loop *L : Worklist) {
991
145k
    LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
992
145k
993
145k
    // If distribution was forced for the specific loop to be
994
145k
    // enabled/disabled, follow that.  Otherwise use the global flag.
995
145k
    if (LDL.isForced().getValueOr(EnableLoopDistribute))
996
59
      Changed |= LDL.processLoop(GetLAA);
997
145k
  }
998
279k
999
279k
  // Process each loop nest in the function.
1000
279k
  return Changed;
1001
279k
}
1002
1003
namespace {
1004
1005
/// The pass class.
1006
class LoopDistributeLegacy : public FunctionPass {
1007
public:
1008
  static char ID;
1009
1010
12.9k
  LoopDistributeLegacy() : FunctionPass(ID) {
1011
12.9k
    // The default is set by the caller.
1012
12.9k
    initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
1013
12.9k
  }
1014
1015
278k
  bool runOnFunction(Function &F) override {
1016
278k
    if (skipFunction(F))
1017
44
      return false;
1018
278k
1019
278k
    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1020
278k
    auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
1021
278k
    auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1022
278k
    auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1023
278k
    auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1024
278k
    std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1025
278k
        [&](Loop &L) -> const LoopAccessInfo & 
{ return LAA->getInfo(&L); }56
;
1026
278k
1027
278k
    return runImpl(F, LI, DT, SE, ORE, GetLAA);
1028
278k
  }
1029
1030
12.9k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
1031
12.9k
    AU.addRequired<ScalarEvolutionWrapperPass>();
1032
12.9k
    AU.addRequired<LoopInfoWrapperPass>();
1033
12.9k
    AU.addPreserved<LoopInfoWrapperPass>();
1034
12.9k
    AU.addRequired<LoopAccessLegacyAnalysis>();
1035
12.9k
    AU.addRequired<DominatorTreeWrapperPass>();
1036
12.9k
    AU.addPreserved<DominatorTreeWrapperPass>();
1037
12.9k
    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1038
12.9k
    AU.addPreserved<GlobalsAAWrapperPass>();
1039
12.9k
  }
1040
};
1041
1042
} // end anonymous namespace
1043
1044
PreservedAnalyses LoopDistributePass::run(Function &F,
1045
862
                                          FunctionAnalysisManager &AM) {
1046
862
  auto &LI = AM.getResult<LoopAnalysis>(F);
1047
862
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1048
862
  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1049
862
  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1050
862
1051
862
  // We don't directly need these analyses but they're required for loop
1052
862
  // analyses so provide them below.
1053
862
  auto &AA = AM.getResult<AAManager>(F);
1054
862
  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1055
862
  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1056
862
  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1057
862
1058
862
  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
1059
862
  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1060
862
      [&](Loop &L) -> const LoopAccessInfo & {
1061
2
    LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1062
2
    return LAM.getResult<LoopAccessAnalysis>(L, AR);
1063
2
  };
1064
862
1065
862
  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1066
862
  if (!Changed)
1067
862
    return PreservedAnalyses::all();
1068
0
  PreservedAnalyses PA;
1069
0
  PA.preserve<LoopAnalysis>();
1070
0
  PA.preserve<DominatorTreeAnalysis>();
1071
0
  PA.preserve<GlobalsAA>();
1072
0
  return PA;
1073
0
}
1074
1075
char LoopDistributeLegacy::ID;
1076
1077
static const char ldist_name[] = "Loop Distribution";
1078
1079
48.6k
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1080
48.6k
                      false)
1081
48.6k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1082
48.6k
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
1083
48.6k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1084
48.6k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1085
48.6k
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1086
48.6k
INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1087
1088
12.9k
FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }