Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass implements a simple loop reroller.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/ADT/APInt.h"
14
#include "llvm/ADT/BitVector.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/DenseSet.h"
17
#include "llvm/ADT/MapVector.h"
18
#include "llvm/ADT/STLExtras.h"
19
#include "llvm/ADT/SmallPtrSet.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/Analysis/AliasAnalysis.h"
23
#include "llvm/Analysis/AliasSetTracker.h"
24
#include "llvm/Analysis/LoopInfo.h"
25
#include "llvm/Analysis/LoopPass.h"
26
#include "llvm/Analysis/ScalarEvolution.h"
27
#include "llvm/Analysis/ScalarEvolutionExpander.h"
28
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
29
#include "llvm/Analysis/TargetLibraryInfo.h"
30
#include "llvm/Transforms/Utils/Local.h"
31
#include "llvm/Analysis/ValueTracking.h"
32
#include "llvm/IR/BasicBlock.h"
33
#include "llvm/IR/Constants.h"
34
#include "llvm/IR/DataLayout.h"
35
#include "llvm/IR/DerivedTypes.h"
36
#include "llvm/IR/Dominators.h"
37
#include "llvm/IR/IRBuilder.h"
38
#include "llvm/IR/InstrTypes.h"
39
#include "llvm/IR/Instruction.h"
40
#include "llvm/IR/Instructions.h"
41
#include "llvm/IR/IntrinsicInst.h"
42
#include "llvm/IR/Intrinsics.h"
43
#include "llvm/IR/Module.h"
44
#include "llvm/IR/Type.h"
45
#include "llvm/IR/Use.h"
46
#include "llvm/IR/User.h"
47
#include "llvm/IR/Value.h"
48
#include "llvm/Pass.h"
49
#include "llvm/Support/Casting.h"
50
#include "llvm/Support/CommandLine.h"
51
#include "llvm/Support/Debug.h"
52
#include "llvm/Support/raw_ostream.h"
53
#include "llvm/Transforms/Scalar.h"
54
#include "llvm/Transforms/Utils.h"
55
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
56
#include "llvm/Transforms/Utils/LoopUtils.h"
57
#include <cassert>
58
#include <cstddef>
59
#include <cstdint>
60
#include <cstdlib>
61
#include <iterator>
62
#include <map>
63
#include <utility>
64
65
using namespace llvm;
66
67
#define DEBUG_TYPE "loop-reroll"
68
69
STATISTIC(NumRerolledLoops, "Number of rerolled loops");
70
71
static cl::opt<unsigned>
72
NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
73
                          cl::Hidden,
74
                          cl::desc("The maximum number of failures to tolerate"
75
                                   " during fuzzy matching. (default: 400)"));
76
77
// This loop re-rolling transformation aims to transform loops like this:
78
//
79
// int foo(int a);
80
// void bar(int *x) {
81
//   for (int i = 0; i < 500; i += 3) {
82
//     foo(i);
83
//     foo(i+1);
84
//     foo(i+2);
85
//   }
86
// }
87
//
88
// into a loop like this:
89
//
90
// void bar(int *x) {
91
//   for (int i = 0; i < 500; ++i)
92
//     foo(i);
93
// }
94
//
95
// It does this by looking for loops that, besides the latch code, are composed
96
// of isomorphic DAGs of instructions, with each DAG rooted at some increment
97
// to the induction variable, and where each DAG is isomorphic to the DAG
98
// rooted at the induction variable (excepting the sub-DAGs which root the
99
// other induction-variable increments). In other words, we're looking for loop
100
// bodies of the form:
101
//
102
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
103
// f(%iv)
104
// %iv.1 = add %iv, 1                <-- a root increment
105
// f(%iv.1)
106
// %iv.2 = add %iv, 2                <-- a root increment
107
// f(%iv.2)
108
// %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
109
// f(%iv.scale_m_1)
110
// ...
111
// %iv.next = add %iv, scale
112
// %cmp = icmp(%iv, ...)
113
// br %cmp, header, exit
114
//
115
// where each f(i) is a set of instructions that, collectively, are a function
116
// only of i (and other loop-invariant values).
117
//
118
// As a special case, we can also reroll loops like this:
119
//
120
// int foo(int);
121
// void bar(int *x) {
122
//   for (int i = 0; i < 500; ++i) {
123
//     x[3*i] = foo(0);
124
//     x[3*i+1] = foo(0);
125
//     x[3*i+2] = foo(0);
126
//   }
127
// }
128
//
129
// into this:
130
//
131
// void bar(int *x) {
132
//   for (int i = 0; i < 1500; ++i)
133
//     x[i] = foo(0);
134
// }
135
//
136
// in which case, we're looking for inputs like this:
137
//
138
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
139
// %scaled.iv = mul %iv, scale
140
// f(%scaled.iv)
141
// %scaled.iv.1 = add %scaled.iv, 1
142
// f(%scaled.iv.1)
143
// %scaled.iv.2 = add %scaled.iv, 2
144
// f(%scaled.iv.2)
145
// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
146
// f(%scaled.iv.scale_m_1)
147
// ...
148
// %iv.next = add %iv, 1
149
// %cmp = icmp(%iv, ...)
150
// br %cmp, header, exit
151
152
namespace {
153
154
  enum IterationLimits {
155
    /// The maximum number of iterations that we'll try and reroll.
156
    IL_MaxRerollIterations = 32,
157
    /// The bitvector index used by loop induction variables and other
158
    /// instructions that belong to all iterations.
159
    IL_All,
160
    IL_End
161
  };
162
163
  class LoopReroll : public LoopPass {
164
  public:
165
    static char ID; // Pass ID, replacement for typeid
166
167
9
    LoopReroll() : LoopPass(ID) {
168
9
      initializeLoopRerollPass(*PassRegistry::getPassRegistry());
169
9
    }
170
171
    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
172
173
9
    void getAnalysisUsage(AnalysisUsage &AU) const override {
174
9
      AU.addRequired<TargetLibraryInfoWrapperPass>();
175
9
      getLoopAnalysisUsage(AU);
176
9
    }
177
178
  protected:
179
    AliasAnalysis *AA;
180
    LoopInfo *LI;
181
    ScalarEvolution *SE;
182
    TargetLibraryInfo *TLI;
183
    DominatorTree *DT;
184
    bool PreserveLCSSA;
185
186
    using SmallInstructionVector = SmallVector<Instruction *, 16>;
187
    using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
188
189
    // Map between induction variable and its increment
190
    DenseMap<Instruction *, int64_t> IVToIncMap;
191
192
    // For loop with multiple induction variable, remember the one used only to
193
    // control the loop.
194
    Instruction *LoopControlIV;
195
196
    // A chain of isomorphic instructions, identified by a single-use PHI
197
    // representing a reduction. Only the last value may be used outside the
198
    // loop.
199
    struct SimpleLoopReduction {
200
46
      SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
201
46
        assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
202
46
        add(L);
203
46
      }
204
205
46
      bool valid() const {
206
46
        return Valid;
207
46
      }
208
209
24
      Instruction *getPHI() const {
210
24
        assert(Valid && "Using invalid reduction");
211
24
        return Instructions.front();
212
24
      }
213
214
32
      Instruction *getReducedValue() const {
215
32
        assert(Valid && "Using invalid reduction");
216
32
        return Instructions.back();
217
32
      }
218
219
32
      Instruction *get(size_t i) const {
220
32
        assert(Valid && "Using invalid reduction");
221
32
        return Instructions[i+1];
222
32
      }
223
224
32
      Instruction *operator [] (size_t i) const { return get(i); }
225
226
      // The size, ignoring the initial PHI.
227
16
      size_t size() const {
228
16
        assert(Valid && "Using invalid reduction");
229
16
        return Instructions.size()-1;
230
16
      }
231
232
      using iterator = SmallInstructionVector::iterator;
233
      using const_iterator = SmallInstructionVector::const_iterator;
234
235
16
      iterator begin() {
236
16
        assert(Valid && "Using invalid reduction");
237
16
        return std::next(Instructions.begin());
238
16
      }
239
240
0
      const_iterator begin() const {
241
0
        assert(Valid && "Using invalid reduction");
242
0
        return std::next(Instructions.begin());
243
0
      }
244
245
16
      iterator end() { return Instructions.end(); }
246
0
      const_iterator end() const { return Instructions.end(); }
247
248
    protected:
249
      bool Valid = false;
250
      SmallInstructionVector Instructions;
251
252
      void add(Loop *L);
253
    };
254
255
    // The set of all reductions, and state tracking of possible reductions
256
    // during loop instruction processing.
257
    struct ReductionTracker {
258
      using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
259
260
      // Add a new possible reduction.
261
8
      void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
262
263
      // Setup to track possible reductions corresponding to the provided
264
      // rerolling scale. Only reductions with a number of non-PHI instructions
265
      // that is divisible by the scale are considered. Three instructions sets
266
      // are filled in:
267
      //   - A set of all possible instructions in eligible reductions.
268
      //   - A set of all PHIs in eligible reductions
269
      //   - A set of all reduced values (last instructions) in eligible
270
      //     reductions.
271
      void restrictToScale(uint64_t Scale,
272
                           SmallInstructionSet &PossibleRedSet,
273
                           SmallInstructionSet &PossibleRedPHISet,
274
31
                           SmallInstructionSet &PossibleRedLastSet) {
275
31
        PossibleRedIdx.clear();
276
31
        PossibleRedIter.clear();
277
31
        Reds.clear();
278
31
279
39
        for (unsigned i = 0, e = PossibleReds.size(); i != e; 
++i8
)
280
8
          if (PossibleReds[i].size() % Scale == 0) {
281
8
            PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
282
8
            PossibleRedPHISet.insert(PossibleReds[i].getPHI());
283
8
284
8
            PossibleRedSet.insert(PossibleReds[i].getPHI());
285
8
            PossibleRedIdx[PossibleReds[i].getPHI()] = i;
286
22
            for (Instruction *J : PossibleReds[i]) {
287
22
              PossibleRedSet.insert(J);
288
22
              PossibleRedIdx[J] = i;
289
22
            }
290
8
          }
291
31
      }
292
293
      // The functions below are used while processing the loop instructions.
294
295
      // Are the two instructions both from reductions, and furthermore, from
296
      // the same reduction?
297
461
      bool isPairInSame(Instruction *J1, Instruction *J2) {
298
461
        DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
299
461
        if (J1I != PossibleRedIdx.end()) {
300
20
          DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
301
20
          if (J2I != PossibleRedIdx.end() && 
J1I->second == J2I->second17
)
302
17
            return true;
303
444
        }
304
444
305
444
        return false;
306
444
      }
307
308
      // The two provided instructions, the first from the base iteration, and
309
      // the second from iteration i, form a matched pair. If these are part of
310
      // a reduction, record that fact.
311
455
      void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
312
455
        if (PossibleRedIdx.count(J1)) {
313
14
          assert(PossibleRedIdx.count(J2) &&
314
14
                 "Recording reduction vs. non-reduction instruction?");
315
14
316
14
          PossibleRedIter[J1] = 0;
317
14
          PossibleRedIter[J2] = i;
318
14
319
14
          int Idx = PossibleRedIdx[J1];
320
14
          assert(Idx == PossibleRedIdx[J2] &&
321
14
                 "Recording pair from different reductions?");
322
14
          Reds.insert(Idx);
323
14
        }
324
455
      }
325
326
      // The functions below can be called after we've finished processing all
327
      // instructions in the loop, and we know which reductions were selected.
328
329
      bool validateSelected();
330
      void replaceSelected();
331
332
    protected:
333
      // The vector of all possible reductions (for any scale).
334
      SmallReductionVector PossibleReds;
335
336
      DenseMap<Instruction *, int> PossibleRedIdx;
337
      DenseMap<Instruction *, int> PossibleRedIter;
338
      DenseSet<int> Reds;
339
    };
340
341
    // A DAGRootSet models an induction variable being used in a rerollable
342
    // loop. For example,
343
    //
344
    //   x[i*3+0] = y1
345
    //   x[i*3+1] = y2
346
    //   x[i*3+2] = y3
347
    //
348
    //   Base instruction -> i*3
349
    //                    +---+----+
350
    //                   /    |     \
351
    //               ST[y1]  +1     +2  <-- Roots
352
    //                        |      |
353
    //                      ST[y2] ST[y3]
354
    //
355
    // There may be multiple DAGRoots, for example:
356
    //
357
    //   x[i*2+0] = ...   (1)
358
    //   x[i*2+1] = ...   (1)
359
    //   x[i*2+4] = ...   (2)
360
    //   x[i*2+5] = ...   (2)
361
    //   x[(i+1234)*2+5678] = ... (3)
362
    //   x[(i+1234)*2+5679] = ... (3)
363
    //
364
    // The loop will be rerolled by adding a new loop induction variable,
365
    // one for the Base instruction in each DAGRootSet.
366
    //
367
    struct DAGRootSet {
368
      Instruction *BaseInst;
369
      SmallInstructionVector Roots;
370
371
      // The instructions between IV and BaseInst (but not including BaseInst).
372
      SmallInstructionSet SubsumedInsts;
373
    };
374
375
    // The set of all DAG roots, and state tracking of all roots
376
    // for a particular induction variable.
377
    struct DAGRootTracker {
378
      DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
379
                     ScalarEvolution *SE, AliasAnalysis *AA,
380
                     TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
381
                     bool PreserveLCSSA,
382
                     DenseMap<Instruction *, int64_t> &IncrMap,
383
                     Instruction *LoopCtrlIV)
384
          : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
385
            PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
386
33
            LoopControlIV(LoopCtrlIV) {}
387
388
      /// Stage 1: Find all the DAG roots for the induction variable.
389
      bool findRoots();
390
391
      /// Stage 2: Validate if the found roots are valid.
392
      bool validate(ReductionTracker &Reductions);
393
394
      /// Stage 3: Assuming validate() returned true, perform the
395
      /// replacement.
396
      /// @param BackedgeTakenCount The backedge-taken count of L.
397
      void replace(const SCEV *BackedgeTakenCount);
398
399
    protected:
400
      using UsesTy = MapVector<Instruction *, BitVector>;
401
402
      void findRootsRecursive(Instruction *IVU,
403
                              SmallInstructionSet SubsumedInsts);
404
      bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
405
      bool collectPossibleRoots(Instruction *Base,
406
                                std::map<int64_t,Instruction*> &Roots);
407
      bool validateRootSet(DAGRootSet &DRS);
408
409
      bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
410
      void collectInLoopUserSet(const SmallInstructionVector &Roots,
411
                                const SmallInstructionSet &Exclude,
412
                                const SmallInstructionSet &Final,
413
                                DenseSet<Instruction *> &Users);
414
      void collectInLoopUserSet(Instruction *Root,
415
                                const SmallInstructionSet &Exclude,
416
                                const SmallInstructionSet &Final,
417
                                DenseSet<Instruction *> &Users);
418
419
      UsesTy::iterator nextInstr(int Val, UsesTy &In,
420
                                 const SmallInstructionSet &Exclude,
421
                                 UsesTy::iterator *StartI=nullptr);
422
      bool isBaseInst(Instruction *I);
423
      bool isRootInst(Instruction *I);
424
      bool instrDependsOn(Instruction *I,
425
                          UsesTy::iterator Start,
426
                          UsesTy::iterator End);
427
      void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr);
428
429
      LoopReroll *Parent;
430
431
      // Members of Parent, replicated here for brevity.
432
      Loop *L;
433
      ScalarEvolution *SE;
434
      AliasAnalysis *AA;
435
      TargetLibraryInfo *TLI;
436
      DominatorTree *DT;
437
      LoopInfo *LI;
438
      bool PreserveLCSSA;
439
440
      // The loop induction variable.
441
      Instruction *IV;
442
443
      // Loop step amount.
444
      int64_t Inc;
445
446
      // Loop reroll count; if Inc == 1, this records the scaling applied
447
      // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
448
      // If Inc is not 1, Scale = Inc.
449
      uint64_t Scale;
450
451
      // The roots themselves.
452
      SmallVector<DAGRootSet,16> RootSets;
453
454
      // All increment instructions for IV.
455
      SmallInstructionVector LoopIncs;
456
457
      // Map of all instructions in the loop (in order) to the iterations
458
      // they are used in (or specially, IL_All for instructions
459
      // used in the loop increment mechanism).
460
      UsesTy Uses;
461
462
      // Map between induction variable and its increment
463
      DenseMap<Instruction *, int64_t> &IVToIncMap;
464
465
      Instruction *LoopControlIV;
466
    };
467
468
    // Check if it is a compare-like instruction whose user is a branch
469
12
    bool isCompareUsedByBranch(Instruction *I) {
470
12
      auto *TI = I->getParent()->getTerminator();
471
12
      if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
472
8
        return false;
473
4
      return I->hasOneUse() && TI->getOperand(0) == I;
474
4
    };
475
476
    bool isLoopControlIV(Loop *L, Instruction *IV);
477
    void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
478
    void collectPossibleReductions(Loop *L,
479
           ReductionTracker &Reductions);
480
    bool reroll(Instruction *IV, Loop *L, BasicBlock *Header,
481
                const SCEV *BackedgeTakenCount, ReductionTracker &Reductions);
482
  };
483
484
} // end anonymous namespace
485
486
char LoopReroll::ID = 0;
487
488
36.0k
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
489
36.0k
INITIALIZE_PASS_DEPENDENCY(LoopPass)
490
36.0k
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
491
36.0k
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
492
493
0
Pass *llvm::createLoopRerollPass() {
494
0
  return new LoopReroll;
495
0
}
496
497
// Returns true if the provided instruction is used outside the given loop.
498
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
499
// non-loop blocks to be outside the loop.
500
902
static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
501
902
  for (User *U : I->users()) {
502
842
    if (!L->contains(cast<Instruction>(U)))
503
0
      return true;
504
842
  }
505
902
  return false;
506
902
}
507
508
// Check if an IV is only used to control the loop. There are two cases:
509
// 1. It only has one use which is loop increment, and the increment is only
510
// used by comparison and the PHI (could has sext with nsw in between), and the
511
// comparison is only used by branch.
512
// 2. It is used by loop increment and the comparison, the loop increment is
513
// only used by the PHI, and the comparison is used only by the branch.
514
37
bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
515
37
  unsigned IVUses = IV->getNumUses();
516
37
  if (IVUses != 2 && 
IVUses != 133
)
517
29
    return false;
518
8
519
8
  for (auto *User : IV->users()) {
520
8
    int32_t IncOrCmpUses = User->getNumUses();
521
8
    bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
522
8
523
8
    // User can only have one or two uses.
524
8
    if (IncOrCmpUses != 2 && 
IncOrCmpUses != 10
)
525
0
      return false;
526
8
527
8
    // Case 1
528
8
    if (IVUses == 1) {
529
4
      // The only user must be the loop increment.
530
4
      // The loop increment must have two uses.
531
4
      if (IsCompInst || IncOrCmpUses != 2)
532
0
        return false;
533
8
    }
534
8
535
8
    // Case 2
536
8
    if (IVUses == 2 && 
IncOrCmpUses != 14
)
537
4
      return false;
538
4
539
4
    // The users of the IV must be a binary operation or a comparison
540
4
    if (auto *BO = dyn_cast<BinaryOperator>(User)) {
541
4
      if (BO->getOpcode() == Instruction::Add) {
542
4
        // Loop Increment
543
4
        // User of Loop Increment should be either PHI or CMP
544
8
        for (auto *UU : User->users()) {
545
8
          if (PHINode *PN = dyn_cast<PHINode>(UU)) {
546
4
            if (PN != IV)
547
0
              return false;
548
4
          }
549
4
          // Must be a CMP or an ext (of a value with nsw) then CMP
550
4
          else {
551
4
            Instruction *UUser = dyn_cast<Instruction>(UU);
552
4
            // Skip SExt if we are extending an nsw value
553
4
            // TODO: Allow ZExt too
554
4
            if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
555
4
                isa<SExtInst>(UUser))
556
1
              UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
557
4
            if (!isCompareUsedByBranch(UUser))
558
0
              return false;
559
4
          }
560
8
        }
561
4
      } else
562
0
        return false;
563
0
      // Compare : can only have one use, and must be branch
564
0
    } else if (!IsCompInst)
565
0
      return false;
566
4
  }
567
8
  
return true4
;
568
8
}
569
570
// Collect the list of loop induction variables with respect to which it might
571
// be possible to reroll the loop.
572
void LoopReroll::collectPossibleIVs(Loop *L,
573
33
                                    SmallInstructionVector &PossibleIVs) {
574
33
  BasicBlock *Header = L->getHeader();
575
33
  for (BasicBlock::iterator I = Header->begin(),
576
79
       IE = Header->getFirstInsertionPt(); I != IE; 
++I46
) {
577
46
    if (!isa<PHINode>(I))
578
0
      continue;
579
46
    if (!I->getType()->isIntegerTy() && 
!I->getType()->isPointerTy()6
)
580
1
      continue;
581
45
582
45
    if (const SCEVAddRecExpr *PHISCEV =
583
37
            dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
584
37
      if (PHISCEV->getLoop() != L)
585
0
        continue;
586
37
      if (!PHISCEV->isAffine())
587
0
        continue;
588
37
      auto IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
589
37
      if (IncSCEV) {
590
37
        IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
591
37
        LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
592
37
                          << "\n");
593
37
594
37
        if (isLoopControlIV(L, &*I)) {
595
4
          assert(!LoopControlIV && "Found two loop control only IV");
596
4
          LoopControlIV = &(*I);
597
4
          LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I
598
4
                            << " = " << *PHISCEV << "\n");
599
4
        } else
600
33
          PossibleIVs.push_back(&*I);
601
37
      }
602
37
    }
603
45
  }
604
33
}
605
606
// Add the remainder of the reduction-variable chain to the instruction vector
607
// (the initial PHINode has already been added). If successful, the object is
608
// marked as valid.
609
46
void LoopReroll::SimpleLoopReduction::add(Loop *L) {
610
46
  assert(!Valid && "Cannot add to an already-valid chain");
611
46
612
46
  // The reduction variable must be a chain of single-use instructions
613
46
  // (including the PHI), except for the last value (which is used by the PHI
614
46
  // and also outside the loop).
615
46
  Instruction *C = Instructions.front();
616
46
  if (C->user_empty())
617
1
    return;
618
45
619
60
  
do 45
{
620
60
    C = cast<Instruction>(*C->user_begin());
621
60
    if (C->hasOneUse()) {
622
22
      if (!C->isBinaryOp())
623
7
        return;
624
15
625
15
      if (!(isa<PHINode>(Instructions.back()) ||
626
15
            
C->isSameOperationAs(Instructions.back())6
))
627
0
        return;
628
15
629
15
      Instructions.push_back(C);
630
15
    }
631
60
  } while (
C->hasOneUse()53
);
632
45
633
45
  
if (38
Instructions.size() < 238
||
634
38
      
!C->isSameOperationAs(Instructions.back())9
||
635
38
      
C->use_empty()8
)
636
30
    return;
637
8
638
8
  // C is now the (potential) last instruction in the reduction chain.
639
16
  
for (User *U : C->users())8
{
640
16
    // The only in-loop user can be the initial PHI.
641
16
    if (L->contains(cast<Instruction>(U)))
642
8
      if (cast<Instruction>(U) != Instructions.front())
643
0
        return;
644
16
  }
645
8
646
8
  Instructions.push_back(C);
647
8
  Valid = true;
648
8
}
649
650
// Collect the vector of possible reduction variables.
651
void LoopReroll::collectPossibleReductions(Loop *L,
652
33
  ReductionTracker &Reductions) {
653
33
  BasicBlock *Header = L->getHeader();
654
33
  for (BasicBlock::iterator I = Header->begin(),
655
79
       IE = Header->getFirstInsertionPt(); I != IE; 
++I46
) {
656
46
    if (!isa<PHINode>(I))
657
0
      continue;
658
46
    if (!I->getType()->isSingleValueType())
659
0
      continue;
660
46
661
46
    SimpleLoopReduction SLR(&*I, L);
662
46
    if (!SLR.valid())
663
38
      continue;
664
8
665
8
    LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with "
666
8
                      << SLR.size() << " chained instructions)\n");
667
8
    Reductions.addSLR(SLR);
668
8
  }
669
33
}
670
671
// Collect the set of all users of the provided root instruction. This set of
672
// users contains not only the direct users of the root instruction, but also
673
// all users of those users, and so on. There are two exceptions:
674
//
675
//   1. Instructions in the set of excluded instructions are never added to the
676
//   use set (even if they are users). This is used, for example, to exclude
677
//   including root increments in the use set of the primary IV.
678
//
679
//   2. Instructions in the set of final instructions are added to the use set
680
//   if they are users, but their users are not added. This is used, for
681
//   example, to prevent a reduction update from forcing all later reduction
682
//   updates into the use set.
683
void LoopReroll::DAGRootTracker::collectInLoopUserSet(
684
  Instruction *Root, const SmallInstructionSet &Exclude,
685
  const SmallInstructionSet &Final,
686
159
  DenseSet<Instruction *> &Users) {
687
159
  SmallInstructionVector Queue(1, Root);
688
1.57k
  while (!Queue.empty()) {
689
1.41k
    Instruction *I = Queue.pop_back_val();
690
1.41k
    if (!Users.insert(I).second)
691
592
      continue;
692
824
693
824
    if (!Final.count(I))
694
950
      
for (Use &U : I->uses())802
{
695
950
        Instruction *User = cast<Instruction>(U.getUser());
696
950
        if (PHINode *PN = dyn_cast<PHINode>(User)) {
697
34
          // Ignore "wrap-around" uses to PHIs of this loop's header.
698
34
          if (PN->getIncomingBlock(U) == L->getHeader())
699
34
            continue;
700
916
        }
701
916
702
916
        if (L->contains(User) && !Exclude.count(User)) {
703
792
          Queue.push_back(User);
704
792
        }
705
916
      }
706
824
707
824
    // We also want to collect single-user "feeder" values.
708
824
    for (User::op_iterator OI = I->op_begin(),
709
2.31k
         OIE = I->op_end(); OI != OIE; 
++OI1.49k
) {
710
1.49k
      if (Instruction *Op = dyn_cast<Instruction>(*OI))
711
996
        if (Op->hasOneUse() && 
L->contains(Op)533
&&
!Exclude.count(Op)533
&&
712
996
            
!Final.count(Op)487
)
713
465
          Queue.push_back(Op);
714
1.49k
    }
715
824
  }
716
159
}
717
718
// Collect all of the users of all of the provided root instructions (combined
719
// into a single set).
720
void LoopReroll::DAGRootTracker::collectInLoopUserSet(
721
  const SmallInstructionVector &Roots,
722
  const SmallInstructionSet &Exclude,
723
  const SmallInstructionSet &Final,
724
30
  DenseSet<Instruction *> &Users) {
725
30
  for (Instruction *Root : Roots)
726
35
    collectInLoopUserSet(Root, Exclude, Final, Users);
727
30
}
728
729
455
static bool isUnorderedLoadStore(Instruction *I) {
730
455
  if (LoadInst *LI = dyn_cast<LoadInst>(I))
731
115
    return LI->isUnordered();
732
340
  if (StoreInst *SI = dyn_cast<StoreInst>(I))
733
0
    return SI->isUnordered();
734
340
  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
735
0
    return !MI->isVolatile();
736
340
  return false;
737
340
}
738
739
/// Return true if IVU is a "simple" arithmetic operation.
740
/// This is used for narrowing the search space for DAGRoots; only arithmetic
741
/// and GEPs can be part of a DAGRoot.
742
18
static bool isSimpleArithmeticOp(User *IVU) {
743
18
  if (Instruction *I = dyn_cast<Instruction>(IVU)) {
744
18
    switch (I->getOpcode()) {
745
18
    
default: return false3
;
746
18
    case Instruction::Add:
747
15
    case Instruction::Sub:
748
15
    case Instruction::Mul:
749
15
    case Instruction::Shl:
750
15
    case Instruction::AShr:
751
15
    case Instruction::LShr:
752
15
    case Instruction::GetElementPtr:
753
15
    case Instruction::Trunc:
754
15
    case Instruction::ZExt:
755
15
    case Instruction::SExt:
756
15
      return true;
757
0
    }
758
0
  }
759
0
  return false;
760
0
}
761
762
195
static bool isLoopIncrement(User *U, Instruction *IV) {
763
195
  BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
764
195
765
195
  if ((BO && 
BO->getOpcode() != Instruction::Add130
) ||
766
195
      
(141
!BO141
&&
!isa<GetElementPtrInst>(U)65
))
767
66
    return false;
768
129
769
186
  
for (auto *UU : U->users())129
{
770
186
    PHINode *PN = dyn_cast<PHINode>(UU);
771
186
    if (PN && 
PN == IV33
)
772
33
      return true;
773
186
  }
774
129
  
return false96
;
775
129
}
776
777
bool LoopReroll::DAGRootTracker::
778
42
collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
779
42
  SmallInstructionVector BaseUsers;
780
42
781
181
  for (auto *I : Base->users()) {
782
181
    ConstantInt *CI = nullptr;
783
181
784
181
    if (isLoopIncrement(I, IV)) {
785
27
      LoopIncs.push_back(cast<Instruction>(I));
786
27
      continue;
787
27
    }
788
154
789
154
    // The root nodes must be either GEPs, ORs or ADDs.
790
154
    if (auto *BO = dyn_cast<BinaryOperator>(I)) {
791
94
      if (BO->getOpcode() == Instruction::Add ||
792
94
          
BO->getOpcode() == Instruction::Or48
)
793
91
        CI = dyn_cast<ConstantInt>(BO->getOperand(1));
794
94
    } else 
if (auto *60
GEP60
= dyn_cast<GetElementPtrInst>(I)) {
795
48
      Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
796
48
      CI = dyn_cast<ConstantInt>(LastOperand);
797
48
    }
798
154
799
154
    if (!CI) {
800
52
      if (Instruction *II = dyn_cast<Instruction>(I)) {
801
52
        BaseUsers.push_back(II);
802
52
        continue;
803
52
      } else {
804
0
        LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I
805
0
                          << "\n");
806
0
        return false;
807
0
      }
808
102
    }
809
102
810
102
    int64_t V = std::abs(CI->getValue().getSExtValue());
811
102
    if (Roots.find(V) != Roots.end())
812
0
      // No duplicates, please.
813
0
      return false;
814
102
815
102
    Roots[V] = cast<Instruction>(I);
816
102
  }
817
42
818
42
  // Make sure we have at least two roots.
819
42
  if (Roots.empty() || 
(36
Roots.size() == 136
&&
BaseUsers.empty()15
))
820
8
    return false;
821
34
822
34
  // If we found non-loop-inc, non-root users of Base, assume they are
823
34
  // for the zeroth root index. This is because "add %a, 0" gets optimized
824
34
  // away.
825
34
  if (BaseUsers.size()) {
826
32
    if (Roots.find(0) != Roots.end()) {
827
0
      LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
828
0
      return false;
829
0
    }
830
32
    Roots[0] = Base;
831
32
  }
832
34
833
34
  // Calculate the number of users of the base, or lowest indexed, iteration.
834
34
  unsigned NumBaseUses = BaseUsers.size();
835
34
  if (NumBaseUses == 0)
836
2
    NumBaseUses = Roots.begin()->second->getNumUses();
837
34
838
34
  // Check that every node has the same number of users.
839
132
  for (auto &KV : Roots) {
840
132
    if (KV.first == 0)
841
33
      continue;
842
99
    if (!KV.second->hasNUses(NumBaseUses)) {
843
0
      LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
844
0
                        << "#Base=" << NumBaseUses
845
0
                        << ", #Root=" << KV.second->getNumUses() << "\n");
846
0
      return false;
847
0
    }
848
99
  }
849
34
850
34
  return true;
851
34
}
852
853
void LoopReroll::DAGRootTracker::
854
21
findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
855
21
  // Does the user look like it could be part of a root set?
856
21
  // All its users must be simple arithmetic ops.
857
21
  if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
858
0
    return;
859
21
860
21
  if (I != IV && 
findRootsBase(I, SubsumedInsts)15
)
861
6
    return;
862
15
863
15
  SubsumedInsts.insert(I);
864
15
865
24
  for (User *V : I->users()) {
866
24
    Instruction *I = cast<Instruction>(V);
867
24
    if (is_contained(LoopIncs, I))
868
6
      continue;
869
18
870
18
    if (!isSimpleArithmeticOp(I))
871
3
      continue;
872
15
873
15
    // The recursive call makes a copy of SubsumedInsts.
874
15
    findRootsRecursive(I, SubsumedInsts);
875
15
  }
876
15
}
877
878
35
bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
879
35
  if (DRS.Roots.empty())
880
0
    return false;
881
35
882
35
  // Consider a DAGRootSet with N-1 roots (so N different values including
883
35
  //   BaseInst).
884
35
  // Define d = Roots[0] - BaseInst, which should be the same as
885
35
  //   Roots[I] - Roots[I-1] for all I in [1..N).
886
35
  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
887
35
  //   loop iteration J.
888
35
  //
889
35
  // Now, For the loop iterations to be consecutive:
890
35
  //   D = d * N
891
35
  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
892
35
  if (!ADR)
893
0
    return false;
894
35
895
35
  // Check that the first root is evenly spaced.
896
35
  unsigned N = DRS.Roots.size() + 1;
897
35
  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
898
35
  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
899
35
  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
900
1
    return false;
901
34
902
34
  // Check that the remainling roots are evenly spaced.
903
94
  
for (unsigned i = 1; 34
i < N - 1;
++i60
) {
904
61
    const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
905
61
                                               SE->getSCEV(DRS.Roots[i-1]));
906
61
    if (NewStepSCEV != StepSCEV)
907
1
      return false;
908
61
  }
909
34
910
34
  
return true33
;
911
34
}
912
913
bool LoopReroll::DAGRootTracker::
914
42
findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
915
42
  // The base of a RootSet must be an AddRec, so it can be erased.
916
42
  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
917
42
  if (!IVU_ADR || IVU_ADR->getLoop() != L)
918
0
    return false;
919
42
920
42
  std::map<int64_t, Instruction*> V;
921
42
  if (!collectPossibleRoots(IVU, V))
922
8
    return false;
923
34
924
34
  // If we didn't get a root for index zero, then IVU must be
925
34
  // subsumed.
926
34
  if (V.find(0) == V.end())
927
1
    SubsumedInsts.insert(IVU);
928
34
929
34
  // Partition the vector into monotonically increasing indexes.
930
34
  DAGRootSet DRS;
931
34
  DRS.BaseInst = nullptr;
932
34
933
34
  SmallVector<DAGRootSet, 16> PotentialRootSets;
934
34
935
132
  for (auto &KV : V) {
936
132
    if (!DRS.BaseInst) {
937
34
      DRS.BaseInst = KV.second;
938
34
      DRS.SubsumedInsts = SubsumedInsts;
939
98
    } else if (DRS.Roots.empty()) {
940
35
      DRS.Roots.push_back(KV.second);
941
63
    } else if (V.find(KV.first - 1) != V.end()) {
942
62
      DRS.Roots.push_back(KV.second);
943
62
    } else {
944
1
      // Linear sequence terminated.
945
1
      if (!validateRootSet(DRS))
946
0
        return false;
947
1
948
1
      // Construct a new DAGRootSet with the next sequence.
949
1
      PotentialRootSets.push_back(DRS);
950
1
      DRS.BaseInst = KV.second;
951
1
      DRS.Roots.clear();
952
1
    }
953
132
  }
954
34
955
34
  if (!validateRootSet(DRS))
956
2
    return false;
957
32
958
32
  PotentialRootSets.push_back(DRS);
959
32
960
32
  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
961
32
962
32
  return true;
963
32
}
964
965
33
bool LoopReroll::DAGRootTracker::findRoots() {
966
33
  Inc = IVToIncMap[IV];
967
33
968
33
  assert(RootSets.empty() && "Unclean state!");
969
33
  if (std::abs(Inc) == 1) {
970
14
    for (auto *IVU : IV->users()) {
971
14
      if (isLoopIncrement(IVU, IV))
972
6
        LoopIncs.push_back(cast<Instruction>(IVU));
973
14
    }
974
6
    findRootsRecursive(IV, SmallInstructionSet());
975
6
    LoopIncs.push_back(IV);
976
27
  } else {
977
27
    if (!findRootsBase(IV, SmallInstructionSet()))
978
1
      return false;
979
32
  }
980
32
981
32
  // Ensure all sets have the same size.
982
32
  if (RootSets.empty()) {
983
1
    LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
984
1
    return false;
985
1
  }
986
33
  
for (auto &V : RootSets)31
{
987
33
    if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
988
0
      LLVM_DEBUG(
989
0
          dbgs()
990
0
          << "LRR: Aborting because not all root sets have the same size\n");
991
0
      return false;
992
0
    }
993
33
  }
994
31
995
31
  Scale = RootSets[0].Roots.size() + 1;
996
31
997
31
  if (Scale > IL_MaxRerollIterations) {
998
0
    LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
999
0
                      << "#Found=" << Scale
1000
0
                      << ", #Max=" << IL_MaxRerollIterations << "\n");
1001
0
    return false;
1002
0
  }
1003
31
1004
31
  LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale
1005
31
                    << "\n");
1006
31
1007
31
  return true;
1008
31
}
1009
1010
31
bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
1011
31
  // Populate the MapVector with all instructions in the block, in order first,
1012
31
  // so we can iterate over the contents later in perfect order.
1013
851
  for (auto &I : *L->getHeader()) {
1014
851
    Uses[&I].resize(IL_End);
1015
851
  }
1016
31
1017
31
  SmallInstructionSet Exclude;
1018
33
  for (auto &DRS : RootSets) {
1019
33
    Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1020
33
    Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1021
33
    Exclude.insert(DRS.BaseInst);
1022
33
  }
1023
31
  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
1024
31
1025
33
  for (auto &DRS : RootSets) {
1026
33
    DenseSet<Instruction*> VBase;
1027
33
    collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
1028
169
    for (auto *I : VBase) {
1029
169
      Uses[I].set(0);
1030
169
    }
1031
33
1032
33
    unsigned Idx = 1;
1033
91
    for (auto *Root : DRS.Roots) {
1034
91
      DenseSet<Instruction*> V;
1035
91
      collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
1036
91
1037
91
      // While we're here, check the use sets are the same size.
1038
91
      if (V.size() != VBase.size()) {
1039
1
        LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
1040
1
        return false;
1041
1
      }
1042
90
1043
551
      
for (auto *I : V)90
{
1044
551
        Uses[I].set(Idx);
1045
551
      }
1046
90
      ++Idx;
1047
90
    }
1048
33
1049
33
    // Make sure our subsumed instructions are remembered too.
1050
33
    
for (auto *I : DRS.SubsumedInsts)32
{
1051
10
      Uses[I].set(IL_All);
1052
10
    }
1053
32
  }
1054
31
1055
31
  // Make sure the loop increments are also accounted for.
1056
31
1057
31
  Exclude.clear();
1058
32
  for (auto &DRS : RootSets) {
1059
32
    Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
1060
32
    Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
1061
32
    Exclude.insert(DRS.BaseInst);
1062
32
  }
1063
30
1064
30
  DenseSet<Instruction*> V;
1065
30
  collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
1066
93
  for (auto *I : V) {
1067
93
    Uses[I].set(IL_All);
1068
93
  }
1069
30
1070
30
  return true;
1071
31
}
1072
1073
/// Get the next instruction in "In" that is a member of set Val.
1074
/// Start searching from StartI, and do not return anything in Exclude.
1075
/// If StartI is not given, start from In.begin().
1076
LoopReroll::DAGRootTracker::UsesTy::iterator
1077
LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
1078
                                      const SmallInstructionSet &Exclude,
1079
1.26k
                                      UsesTy::iterator *StartI) {
1080
1.26k
  UsesTy::iterator I = StartI ? 
*StartI3
:
In.begin()1.25k
;
1081
59.1k
  while (I != In.end() && 
(58.9k
I->second.test(Val) == 058.9k
||
1082
58.9k
                           
Exclude.count(I->first) != 05.55k
))
1083
57.9k
    ++I;
1084
1.26k
  return I;
1085
1.26k
}
1086
1087
550
bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
1088
572
  for (auto &DRS : RootSets) {
1089
572
    if (DRS.BaseInst == I)
1090
89
      return true;
1091
572
  }
1092
550
  
return false461
;
1093
550
}
1094
1095
550
bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
1096
572
  for (auto &DRS : RootSets) {
1097
572
    if (is_contained(DRS.Roots, I))
1098
89
      return true;
1099
572
  }
1100
550
  
return false461
;
1101
550
}
1102
1103
/// Return true if instruction I depends on any instruction between
1104
/// Start and End.
1105
bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
1106
                                                UsesTy::iterator Start,
1107
2
                                                UsesTy::iterator End) {
1108
2
  for (auto *U : I->users()) {
1109
4
    for (auto It = Start; It != End; 
++It2
)
1110
2
      if (U == It->first)
1111
0
        return true;
1112
2
  }
1113
2
  return false;
1114
2
}
1115
1116
2
static bool isIgnorableInst(const Instruction *I) {
1117
2
  if (isa<DbgInfoIntrinsic>(I))
1118
1
    return true;
1119
1
  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
1120
1
  if (!II)
1121
1
    return false;
1122
0
  switch (II->getIntrinsicID()) {
1123
0
    default:
1124
0
      return false;
1125
0
    case Intrinsic::annotation:
1126
0
    case Intrinsic::ptr_annotation:
1127
0
    case Intrinsic::var_annotation:
1128
0
    // TODO: the following intrinsics may also be whitelisted:
1129
0
    //   lifetime_start, lifetime_end, invariant_start, invariant_end
1130
0
      return true;
1131
0
  }
1132
0
  return false;
1133
0
}
1134
1135
31
bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
1136
31
  // We now need to check for equivalence of the use graph of each root with
1137
31
  // that of the primary induction variable (excluding the roots). Our goal
1138
31
  // here is not to solve the full graph isomorphism problem, but rather to
1139
31
  // catch common cases without a lot of work. As a result, we will assume
1140
31
  // that the relative order of the instructions in each unrolled iteration
1141
31
  // is the same (although we will not make an assumption about how the
1142
31
  // different iterations are intermixed). Note that while the order must be
1143
31
  // the same, the instructions may not be in the same basic block.
1144
31
1145
31
  // An array of just the possible reductions for this scale factor. When we
1146
31
  // collect the set of all users of some root instructions, these reduction
1147
31
  // instructions are treated as 'final' (their uses are not considered).
1148
31
  // This is important because we don't want the root use set to search down
1149
31
  // the reduction chain.
1150
31
  SmallInstructionSet PossibleRedSet;
1151
31
  SmallInstructionSet PossibleRedLastSet;
1152
31
  SmallInstructionSet PossibleRedPHISet;
1153
31
  Reductions.restrictToScale(Scale, PossibleRedSet,
1154
31
                             PossibleRedPHISet, PossibleRedLastSet);
1155
31
1156
31
  // Populate "Uses" with where each instruction is used.
1157
31
  if (!collectUsedInstructions(PossibleRedSet))
1158
1
    return false;
1159
30
1160
30
  // Make sure we mark the reduction PHIs as used in all iterations.
1161
30
  for (auto *I : PossibleRedPHISet) {
1162
8
    Uses[I].set(IL_All);
1163
8
  }
1164
30
1165
30
  // Make sure we mark loop-control-only PHIs as used in all iterations. See
1166
30
  // comment above LoopReroll::isLoopControlIV for more information.
1167
30
  BasicBlock *Header = L->getHeader();
1168
30
  if (LoopControlIV && 
LoopControlIV != IV4
) {
1169
4
    for (auto *U : LoopControlIV->users()) {
1170
4
      Instruction *IVUser = dyn_cast<Instruction>(U);
1171
4
      // IVUser could be loop increment or compare
1172
4
      Uses[IVUser].set(IL_All);
1173
8
      for (auto *UU : IVUser->users()) {
1174
8
        Instruction *UUser = dyn_cast<Instruction>(UU);
1175
8
        // UUser could be compare, PHI or branch
1176
8
        Uses[UUser].set(IL_All);
1177
8
        // Skip SExt
1178
8
        if (isa<SExtInst>(UUser)) {
1179
1
          UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
1180
1
          Uses[UUser].set(IL_All);
1181
1
        }
1182
8
        // Is UUser a compare instruction?
1183
8
        if (UU->hasOneUse()) {
1184
8
          Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
1185
8
          if (BI == cast<BranchInst>(Header->getTerminator()))
1186
4
            Uses[BI].set(IL_All);
1187
8
        }
1188
8
      }
1189
4
    }
1190
4
  }
1191
30
1192
30
  // Make sure all instructions in the loop are in one and only one
1193
30
  // set.
1194
826
  for (auto &KV : Uses) {
1195
826
    if (KV.second.count() != 1 && 
!isIgnorableInst(KV.first)2
) {
1196
1
      LLVM_DEBUG(
1197
1
          dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
1198
1
                 << *KV.first << " (#uses=" << KV.second.count() << ")\n");
1199
1
      return false;
1200
1
    }
1201
826
  }
1202
30
1203
30
  
LLVM_DEBUG29
(for (auto &KV
1204
29
                  : Uses) {
1205
29
    dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
1206
29
  });
1207
29
1208
112
  for (unsigned Iter = 1; Iter < Scale; 
++Iter83
) {
1209
85
    // In addition to regular aliasing information, we need to look for
1210
85
    // instructions from later (future) iterations that have side effects
1211
85
    // preventing us from reordering them past other instructions with side
1212
85
    // effects.
1213
85
    bool FutureSideEffects = false;
1214
85
    AliasSetTracker AST(*AA);
1215
85
    // The map between instructions in f(%iv.(i+1)) and f(%iv).
1216
85
    DenseMap<Value *, Value *> BaseMap;
1217
85
1218
85
    // Compare iteration Iter to the base.
1219
85
    SmallInstructionSet Visited;
1220
85
    auto BaseIt = nextInstr(0, Uses, Visited);
1221
85
    auto RootIt = nextInstr(Iter, Uses, Visited);
1222
85
    auto LastRootIt = Uses.begin();
1223
85
1224
633
    while (BaseIt != Uses.end() && 
RootIt != Uses.end()550
) {
1225
550
      Instruction *BaseInst = BaseIt->first;
1226
550
      Instruction *RootInst = RootIt->first;
1227
550
1228
550
      // Skip over the IV or root instructions; only match their users.
1229
550
      bool Continue = false;
1230
550
      if (isBaseInst(BaseInst)) {
1231
89
        Visited.insert(BaseInst);
1232
89
        BaseIt = nextInstr(0, Uses, Visited);
1233
89
        Continue = true;
1234
89
      }
1235
550
      if (isRootInst(RootInst)) {
1236
89
        LastRootIt = RootIt;
1237
89
        Visited.insert(RootInst);
1238
89
        RootIt = nextInstr(Iter, Uses, Visited);
1239
89
        Continue = true;
1240
89
      }
1241
550
      if (Continue) 
continue93
;
1242
457
1243
457
      if (!BaseInst->isSameOperationAs(RootInst)) {
1244
3
        // Last chance saloon. We don't try and solve the full isomorphism
1245
3
        // problem, but try and at least catch the case where two instructions
1246
3
        // *of different types* are round the wrong way. We won't be able to
1247
3
        // efficiently tell, given two ADD instructions, which way around we
1248
3
        // should match them, but given an ADD and a SUB, we can at least infer
1249
3
        // which one is which.
1250
3
        //
1251
3
        // This should allow us to deal with a greater subset of the isomorphism
1252
3
        // problem. It does however change a linear algorithm into a quadratic
1253
3
        // one, so limit the number of probes we do.
1254
3
        auto TryIt = RootIt;
1255
3
        unsigned N = NumToleratedFailedMatches;
1256
6
        while (TryIt != Uses.end() &&
1257
6
               
!BaseInst->isSameOperationAs(TryIt->first)5
&&
1258
6
               
N--3
) {
1259
3
          ++TryIt;
1260
3
          TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1261
3
        }
1262
3
1263
3
        if (TryIt == Uses.end() || 
TryIt == RootIt2
||
1264
3
            
instrDependsOn(TryIt->first, RootIt, TryIt)2
) {
1265
1
          LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1266
1
                            << *BaseInst << " vs. " << *RootInst << "\n");
1267
1
          return false;
1268
1
        }
1269
2
1270
2
        RootIt = TryIt;
1271
2
        RootInst = TryIt->first;
1272
2
      }
1273
457
1274
457
      // All instructions between the last root and this root
1275
457
      // may belong to some other iteration. If they belong to a
1276
457
      // future iteration, then they're dangerous to alias with.
1277
457
      //
1278
457
      // Note that because we allow a limited amount of flexibility in the order
1279
457
      // that we visit nodes, LastRootIt might be *before* RootIt, in which
1280
457
      // case we've already checked this set of instructions so we shouldn't
1281
457
      // do anything.
1282
957
      
for (; 456
LastRootIt < RootIt;
++LastRootIt501
) {
1283
501
        Instruction *I = LastRootIt->first;
1284
501
        if (LastRootIt->second.find_first() < (int)Iter)
1285
44
          continue;
1286
457
        if (I->mayWriteToMemory())
1287
1
          AST.add(I);
1288
457
        // Note: This is specifically guarded by a check on isa<PHINode>,
1289
457
        // which while a valid (somewhat arbitrary) micro-optimization, is
1290
457
        // needed because otherwise isSafeToSpeculativelyExecute returns
1291
457
        // false on PHI nodes.
1292
457
        if (!isa<PHINode>(I) && 
!isUnorderedLoadStore(I)455
&&
1293
457
            
!isSafeToSpeculativelyExecute(I)341
)
1294
1
          // Intervening instructions cause side effects.
1295
1
          FutureSideEffects = true;
1296
457
      }
1297
456
1298
456
      // Make sure that this instruction, which is in the use set of this
1299
456
      // root instruction, does not also belong to the base set or the set of
1300
456
      // some other root instruction.
1301
456
      if (RootIt->second.count() > 1) {
1302
0
        LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1303
0
                          << " vs. " << *RootInst << " (prev. case overlap)\n");
1304
0
        return false;
1305
0
      }
1306
456
1307
456
      // Make sure that we don't alias with any instruction in the alias set
1308
456
      // tracker. If we do, then we depend on a future iteration, and we
1309
456
      // can't reroll.
1310
456
      if (RootInst->mayReadFromMemory())
1311
125
        for (auto &K : AST) {
1312
1
          if (K.aliasesUnknownInst(RootInst, *AA)) {
1313
1
            LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
1314
1
                              << *BaseInst << " vs. " << *RootInst
1315
1
                              << " (depends on future store)\n");
1316
1
            return false;
1317
1
          }
1318
1
        }
1319
456
1320
456
      // If we've past an instruction from a future iteration that may have
1321
456
      // side effects, and this instruction might also, then we can't reorder
1322
456
      // them, and this matching fails. As an exception, we allow the alias
1323
456
      // set tracker to handle regular (unordered) load/store dependencies.
1324
456
      
if (455
FutureSideEffects455
&&
(0
(0
!isUnorderedLoadStore(BaseInst)0
&&
1325
0
                                 !isSafeToSpeculativelyExecute(BaseInst)) ||
1326
0
                                (!isUnorderedLoadStore(RootInst) &&
1327
0
                                 !isSafeToSpeculativelyExecute(RootInst)))) {
1328
0
        LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1329
0
                          << " vs. " << *RootInst
1330
0
                          << " (side effects prevent reordering)\n");
1331
0
        return false;
1332
0
      }
1333
455
1334
455
      // For instructions that are part of a reduction, if the operation is
1335
455
      // associative, then don't bother matching the operands (because we
1336
455
      // already know that the instructions are isomorphic, and the order
1337
455
      // within the iteration does not matter). For non-associative reductions,
1338
455
      // we do need to match the operands, because we need to reject
1339
455
      // out-of-order instructions within an iteration!
1340
455
      // For example (assume floating-point addition), we need to reject this:
1341
455
      //   x += a[i]; x += b[i];
1342
455
      //   x += a[i+1]; x += b[i+1];
1343
455
      //   x += b[i+2]; x += a[i+2];
1344
455
      bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
1345
455
1346
455
      if (!(InReduction && 
BaseInst->isAssociative()14
)) {
1347
444
        bool Swapped = false, SomeOpMatched = false;
1348
1.19k
        for (unsigned j = 0; j < BaseInst->getNumOperands(); 
++j755
) {
1349
755
          Value *Op2 = RootInst->getOperand(j);
1350
755
1351
755
          // If this is part of a reduction (and the operation is not
1352
755
          // associatve), then we match all operands, but not those that are
1353
755
          // part of the reduction.
1354
755
          if (InReduction)
1355
6
            if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
1356
6
              if (Reductions.isPairInSame(RootInst, Op2I))
1357
3
                continue;
1358
752
1359
752
          DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
1360
752
          if (BMI != BaseMap.end()) {
1361
396
            Op2 = BMI->second;
1362
396
          } else {
1363
376
            for (auto &DRS : RootSets) {
1364
376
              if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
1365
146
                Op2 = DRS.BaseInst;
1366
146
                break;
1367
146
              }
1368
376
            }
1369
356
          }
1370
752
1371
752
          if (BaseInst->getOperand(Swapped ? 
unsigned(!j)0
: j) != Op2) {
1372
3
            // If we've not already decided to swap the matched operands, and
1373
3
            // we've not already matched our first operand (note that we could
1374
3
            // have skipped matching the first operand because it is part of a
1375
3
            // reduction above), and the instruction is commutative, then try
1376
3
            // the swapped match.
1377
3
            if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
1378
3
                BaseInst->getOperand(!j) == Op2) {
1379
3
              Swapped = true;
1380
3
            } else {
1381
0
              LLVM_DEBUG(dbgs()
1382
0
                         << "LRR: iteration root match failed at " << *BaseInst
1383
0
                         << " vs. " << *RootInst << " (operand " << j << ")\n");
1384
0
              return false;
1385
0
            }
1386
752
          }
1387
752
1388
752
          SomeOpMatched = true;
1389
752
        }
1390
444
      }
1391
455
1392
455
      if ((!PossibleRedLastSet.count(BaseInst) &&
1393
455
           hasUsesOutsideLoop(BaseInst, L)) ||
1394
455
          (!PossibleRedLastSet.count(RootInst) &&
1395
455
           
hasUsesOutsideLoop(RootInst, L)447
)) {
1396
0
        LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
1397
0
                          << " vs. " << *RootInst << " (uses outside loop)\n");
1398
0
        return false;
1399
0
      }
1400
455
1401
455
      Reductions.recordPair(BaseInst, RootInst, Iter);
1402
455
      BaseMap.insert(std::make_pair(RootInst, BaseInst));
1403
455
1404
455
      LastRootIt = RootIt;
1405
455
      Visited.insert(BaseInst);
1406
455
      Visited.insert(RootInst);
1407
455
      BaseIt = nextInstr(0, Uses, Visited);
1408
455
      RootIt = nextInstr(Iter, Uses, Visited);
1409
455
    }
1410
85
    assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
1411
83
           "Mismatched set sizes!");
1412
83
  }
1413
29
1414
29
  
LLVM_DEBUG27
(dbgs() << "LRR: Matched all iteration increments for " << *IV
1415
27
                    << "\n");
1416
27
1417
27
  return true;
1418
29
}
1419
1420
27
void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) {
1421
27
  BasicBlock *Header = L->getHeader();
1422
27
1423
27
  // Compute the start and increment for each BaseInst before we start erasing
1424
27
  // instructions.
1425
27
  SmallVector<const SCEV *, 8> StartExprs;
1426
27
  SmallVector<const SCEV *, 8> IncrExprs;
1427
29
  for (auto &DRS : RootSets) {
1428
29
    const SCEVAddRecExpr *IVSCEV =
1429
29
        cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
1430
29
    StartExprs.push_back(IVSCEV->getStart());
1431
29
    IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV));
1432
29
  }
1433
27
1434
27
  // Remove instructions associated with non-base iterations.
1435
27
  for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
1436
817
       J != JE;) {
1437
790
    unsigned I = Uses[&*J].find_first();
1438
790
    if (I > 0 && 
I < IL_All649
) {
1439
536
      LLVM_DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
1440
536
      J++->eraseFromParent();
1441
536
      continue;
1442
536
    }
1443
254
1444
254
    ++J;
1445
254
  }
1446
27
1447
27
  // Rewrite each BaseInst using SCEV.
1448
56
  for (size_t i = 0, e = RootSets.size(); i != e; 
++i29
)
1449
29
    // Insert the new induction variable.
1450
29
    replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]);
1451
27
1452
27
  { // Limit the lifetime of SCEVExpander.
1453
27
    BranchInst *BI = cast<BranchInst>(Header->getTerminator());
1454
27
    const DataLayout &DL = Header->getModule()->getDataLayout();
1455
27
    SCEVExpander Expander(*SE, DL, "reroll");
1456
27
    auto Zero = SE->getZero(BackedgeTakenCount->getType());
1457
27
    auto One = SE->getOne(BackedgeTakenCount->getType());
1458
27
    auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap);
1459
27
    Value *NewIV =
1460
27
        Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(),
1461
27
                               Header->getFirstNonPHIOrDbg());
1462
27
    // FIXME: This arithmetic can overflow.
1463
27
    auto TripCount = SE->getAddExpr(BackedgeTakenCount, One);
1464
27
    auto ScaledTripCount = SE->getMulExpr(
1465
27
        TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale));
1466
27
    auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One);
1467
27
    Value *TakenCount =
1468
27
        Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(),
1469
27
                               Header->getFirstNonPHIOrDbg());
1470
27
    Value *Cond =
1471
27
        new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond");
1472
27
    BI->setCondition(Cond);
1473
27
1474
27
    if (BI->getSuccessor(1) != Header)
1475
16
      BI->swapSuccessors();
1476
27
  }
1477
27
1478
27
  SimplifyInstructionsInBlock(Header, TLI);
1479
27
  DeleteDeadPHIs(Header, TLI);
1480
27
}
1481
1482
void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS,
1483
                                           const SCEV *Start,
1484
29
                                           const SCEV *IncrExpr) {
1485
29
  BasicBlock *Header = L->getHeader();
1486
29
  Instruction *Inst = DRS.BaseInst;
1487
29
1488
29
  const SCEV *NewIVSCEV =
1489
29
      SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
1490
29
1491
29
  { // Limit the lifetime of SCEVExpander.
1492
29
    const DataLayout &DL = Header->getModule()->getDataLayout();
1493
29
    SCEVExpander Expander(*SE, DL, "reroll");
1494
29
    Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
1495
29
                                          Header->getFirstNonPHIOrDbg());
1496
29
1497
29
    for (auto &KV : Uses)
1498
835
      if (KV.second.find_first() == 0)
1499
153
        KV.first->replaceUsesOfWith(Inst, NewIV);
1500
29
  }
1501
29
}
1502
1503
// Validate the selected reductions. All iterations must have an isomorphic
1504
// part of the reduction chain and, for non-associative reductions, the chain
1505
// entries must appear in order.
1506
27
bool LoopReroll::ReductionTracker::validateSelected() {
1507
27
  // For a non-associative reduction, the chain entries must appear in order.
1508
27
  for (int i : Reds) {
1509
8
    int PrevIter = 0, BaseCount = 0, Count = 0;
1510
22
    for (Instruction *J : PossibleReds[i]) {
1511
22
      // Note that all instructions in the chain must have been found because
1512
22
      // all instructions in the function must have been assigned to some
1513
22
      // iteration.
1514
22
      int Iter = PossibleRedIter[J];
1515
22
      if (Iter != PrevIter && 
Iter != PrevIter + 114
&&
1516
22
          
!PossibleReds[i].getReducedValue()->isAssociative()0
) {
1517
0
        LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: "
1518
0
                          << J << "\n");
1519
0
        return false;
1520
0
      }
1521
22
1522
22
      if (Iter != PrevIter) {
1523
14
        if (Count != BaseCount) {
1524
0
          LLVM_DEBUG(dbgs()
1525
0
                     << "LRR: Iteration " << PrevIter << " reduction use count "
1526
0
                     << Count << " is not equal to the base use count "
1527
0
                     << BaseCount << "\n");
1528
0
          return false;
1529
0
        }
1530
14
1531
14
        Count = 0;
1532
14
      }
1533
22
1534
22
      ++Count;
1535
22
      if (Iter == 0)
1536
8
        ++BaseCount;
1537
22
1538
22
      PrevIter = Iter;
1539
22
    }
1540
8
  }
1541
27
1542
27
  return true;
1543
27
}
1544
1545
// For all selected reductions, remove all parts except those in the first
1546
// iteration (and the PHI). Replace outside uses of the reduced value with uses
1547
// of the first-iteration reduced value (in other words, reroll the selected
1548
// reductions).
1549
27
void LoopReroll::ReductionTracker::replaceSelected() {
1550
27
  // Fixup reductions to refer to the last instruction associated with the
1551
27
  // first iteration (not the last).
1552
27
  for (int i : Reds) {
1553
8
    int j = 0;
1554
16
    for (int e = PossibleReds[i].size(); j != e; 
++j8
)
1555
16
      if (PossibleRedIter[PossibleReds[i][j]] != 0) {
1556
8
        --j;
1557
8
        break;
1558
8
      }
1559
8
1560
8
    // Replace users with the new end-of-chain value.
1561
8
    SmallInstructionVector Users;
1562
16
    for (User *U : PossibleReds[i].getReducedValue()->users()) {
1563
16
      Users.push_back(cast<Instruction>(U));
1564
16
    }
1565
8
1566
8
    for (Instruction *User : Users)
1567
16
      User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
1568
16
                              PossibleReds[i][j]);
1569
8
  }
1570
27
}
1571
1572
// Reroll the provided loop with respect to the provided induction variable.
1573
// Generally, we're looking for a loop like this:
1574
//
1575
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
1576
// f(%iv)
1577
// %iv.1 = add %iv, 1                <-- a root increment
1578
// f(%iv.1)
1579
// %iv.2 = add %iv, 2                <-- a root increment
1580
// f(%iv.2)
1581
// %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
1582
// f(%iv.scale_m_1)
1583
// ...
1584
// %iv.next = add %iv, scale
1585
// %cmp = icmp(%iv, ...)
1586
// br %cmp, header, exit
1587
//
1588
// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
1589
// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
1590
// be intermixed with eachother. The restriction imposed by this algorithm is
1591
// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
1592
// etc. be the same.
1593
//
1594
// First, we collect the use set of %iv, excluding the other increment roots.
1595
// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
1596
// times, having collected the use set of f(%iv.(i+1)), during which we:
1597
//   - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
1598
//     the next unmatched instruction in f(%iv.(i+1)).
1599
//   - Ensure that both matched instructions don't have any external users
1600
//     (with the exception of last-in-chain reduction instructions).
1601
//   - Track the (aliasing) write set, and other side effects, of all
1602
//     instructions that belong to future iterations that come before the matched
1603
//     instructions. If the matched instructions read from that write set, then
1604
//     f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
1605
//     f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
1606
//     if any of these future instructions had side effects (could not be
1607
//     speculatively executed), and so do the matched instructions, when we
1608
//     cannot reorder those side-effect-producing instructions, and rerolling
1609
//     fails.
1610
//
1611
// Finally, we make sure that all loop instructions are either loop increment
1612
// roots, belong to simple latch code, parts of validated reductions, part of
1613
// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
1614
// have been validated), then we reroll the loop.
1615
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
1616
                        const SCEV *BackedgeTakenCount,
1617
33
                        ReductionTracker &Reductions) {
1618
33
  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
1619
33
                          IVToIncMap, LoopControlIV);
1620
33
1621
33
  if (!DAGRoots.findRoots())
1622
2
    return false;
1623
31
  LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV
1624
31
                    << "\n");
1625
31
1626
31
  if (!DAGRoots.validate(Reductions))
1627
4
    return false;
1628
27
  if (!Reductions.validateSelected())
1629
0
    return false;
1630
27
  // At this point, we've validated the rerolling, and we're committed to
1631
27
  // making changes!
1632
27
1633
27
  Reductions.replaceSelected();
1634
27
  DAGRoots.replace(BackedgeTakenCount);
1635
27
1636
27
  ++NumRerolledLoops;
1637
27
  return true;
1638
27
}
1639
1640
34
bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
1641
34
  if (skipLoop(L))
1642
0
    return false;
1643
34
1644
34
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1645
34
  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1646
34
  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1647
34
  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1648
34
  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1649
34
  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1650
34
1651
34
  BasicBlock *Header = L->getHeader();
1652
34
  LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
1653
34
                    << Header->getName() << " (" << L->getNumBlocks()
1654
34
                    << " block(s))\n");
1655
34
1656
34
  // For now, we'll handle only single BB loops.
1657
34
  if (L->getNumBlocks() > 1)
1658
0
    return false;
1659
34
1660
34
  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
1661
1
    return false;
1662
33
1663
33
  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1664
33
  LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
1665
33
  LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount
1666
33
               << "\n");
1667
33
1668
33
  // First, we need to find the induction variable with respect to which we can
1669
33
  // reroll (there may be several possible options).
1670
33
  SmallInstructionVector PossibleIVs;
1671
33
  IVToIncMap.clear();
1672
33
  LoopControlIV = nullptr;
1673
33
  collectPossibleIVs(L, PossibleIVs);
1674
33
1675
33
  if (PossibleIVs.empty()) {
1676
0
    LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n");
1677
0
    return false;
1678
0
  }
1679
33
1680
33
  ReductionTracker Reductions;
1681
33
  collectPossibleReductions(L, Reductions);
1682
33
  bool Changed = false;
1683
33
1684
33
  // For each possible IV, collect the associated possible set of 'root' nodes
1685
33
  // (i+1, i+2, etc.).
1686
33
  for (Instruction *PossibleIV : PossibleIVs)
1687
33
    if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) {
1688
27
      Changed = true;
1689
27
      break;
1690
27
    }
1691
33
  LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
1692
33
1693
33
  // Trip count of L has changed so SE must be re-evaluated.
1694
33
  if (Changed)
1695
27
    SE->forgetLoop(L);
1696
33
1697
33
  return Changed;
1698
33
}