Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/LoopRotation.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements Loop Rotation Pass.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Transforms/Scalar/LoopRotation.h"
15
#include "llvm/ADT/Statistic.h"
16
#include "llvm/Analysis/AliasAnalysis.h"
17
#include "llvm/Analysis/AssumptionCache.h"
18
#include "llvm/Analysis/BasicAliasAnalysis.h"
19
#include "llvm/Analysis/CodeMetrics.h"
20
#include "llvm/Analysis/GlobalsModRef.h"
21
#include "llvm/Analysis/InstructionSimplify.h"
22
#include "llvm/Analysis/LoopPass.h"
23
#include "llvm/Analysis/ScalarEvolution.h"
24
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
25
#include "llvm/Analysis/TargetTransformInfo.h"
26
#include "llvm/Analysis/ValueTracking.h"
27
#include "llvm/IR/CFG.h"
28
#include "llvm/IR/Dominators.h"
29
#include "llvm/IR/Function.h"
30
#include "llvm/IR/IntrinsicInst.h"
31
#include "llvm/IR/Module.h"
32
#include "llvm/Support/CommandLine.h"
33
#include "llvm/Support/Debug.h"
34
#include "llvm/Support/raw_ostream.h"
35
#include "llvm/Transforms/Scalar.h"
36
#include "llvm/Transforms/Scalar/LoopPassManager.h"
37
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38
#include "llvm/Transforms/Utils/Local.h"
39
#include "llvm/Transforms/Utils/LoopUtils.h"
40
#include "llvm/Transforms/Utils/SSAUpdater.h"
41
#include "llvm/Transforms/Utils/ValueMapper.h"
42
using namespace llvm;
43
44
#define DEBUG_TYPE "loop-rotate"
45
46
static cl::opt<unsigned> DefaultRotationThreshold(
47
    "rotation-max-header-size", cl::init(16), cl::Hidden,
48
    cl::desc("The default maximum header size for automatic loop rotation"));
49
50
STATISTIC(NumRotated, "Number of loops rotated");
51
52
namespace {
53
/// A simple loop rotation transformation.
54
class LoopRotate {
55
  const unsigned MaxHeaderSize;
56
  LoopInfo *LI;
57
  const TargetTransformInfo *TTI;
58
  AssumptionCache *AC;
59
  DominatorTree *DT;
60
  ScalarEvolution *SE;
61
  const SimplifyQuery &SQ;
62
63
public:
64
  LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
65
             const TargetTransformInfo *TTI, AssumptionCache *AC,
66
             DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ)
67
      : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
68
700k
        SQ(SQ) {}
69
  bool processLoop(Loop *L);
70
71
private:
72
  bool rotateLoop(Loop *L, bool SimplifiedLatch);
73
  bool simplifyLoopLatch(Loop *L);
74
};
75
} // end anonymous namespace
76
77
/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
78
/// old header into the preheader.  If there were uses of the values produced by
79
/// these instruction that were outside of the loop, we have to insert PHI nodes
80
/// to merge the two values.  Do this now.
81
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
82
                                            BasicBlock *OrigPreheader,
83
                                            ValueToValueMapTy &ValueMap,
84
51.1k
                                SmallVectorImpl<PHINode*> *InsertedPHIs) {
85
51.1k
  // Remove PHI node entries that are no longer live.
86
51.1k
  BasicBlock::iterator I, E = OrigHeader->end();
87
132k
  for (I = OrigHeader->begin(); PHINode *
PN132k
= dyn_cast<PHINode>(I);
++I81.0k
)
88
81.0k
    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
89
51.1k
90
51.1k
  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
91
51.1k
  // as necessary.
92
51.1k
  SSAUpdater SSA(InsertedPHIs);
93
274k
  for (I = OrigHeader->begin(); 
I != E274k
;
++I223k
) {
94
223k
    Value *OrigHeaderVal = &*I;
95
223k
96
223k
    // If there are no uses of the value (e.g. because it returns void), there
97
223k
    // is nothing to rewrite.
98
223k
    if (OrigHeaderVal->use_empty())
99
53.0k
      continue;
100
170k
101
170k
    Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
102
170k
103
170k
    // The value now exits in two versions: the initial value in the preheader
104
170k
    // and the loop "next" value in the original header.
105
170k
    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
106
170k
    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
107
170k
    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
108
170k
109
170k
    // Visit each use of the OrigHeader instruction.
110
170k
    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
111
170k
                             UE = OrigHeaderVal->use_end();
112
599k
         
UI != UE599k
;) {
113
428k
      // Grab the use before incrementing the iterator.
114
428k
      Use &U = *UI;
115
428k
116
428k
      // Increment the iterator before removing the use from the list.
117
428k
      ++UI;
118
428k
119
428k
      // SSAUpdater can't handle a non-PHI use in the same block as an
120
428k
      // earlier def. We can easily handle those cases manually.
121
428k
      Instruction *UserInst = cast<Instruction>(U.getUser());
122
428k
      if (
!isa<PHINode>(UserInst)428k
) {
123
315k
        BasicBlock *UserBB = UserInst->getParent();
124
315k
125
315k
        // The original users in the OrigHeader are already using the
126
315k
        // original definitions.
127
315k
        if (UserBB == OrigHeader)
128
142k
          continue;
129
172k
130
172k
        // Users in the OrigPreHeader need to use the value to which the
131
172k
        // original definitions are mapped.
132
172k
        
if (172k
UserBB == OrigPreheader172k
) {
133
0
          U = OrigPreHeaderVal;
134
0
          continue;
135
0
        }
136
286k
      }
137
286k
138
286k
      // Anything else can be handled by SSAUpdater.
139
286k
      SSA.RewriteUse(U);
140
286k
    }
141
170k
142
170k
    // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
143
170k
    // intrinsics.
144
170k
    LLVMContext &C = OrigHeader->getContext();
145
170k
    if (auto *
VAM170k
= ValueAsMetadata::getIfExists(OrigHeaderVal)) {
146
9
      if (auto *
MAV9
= MetadataAsValue::getIfExists(C, VAM)) {
147
21
        for (auto UI = MAV->use_begin(), E = MAV->use_end(); 
UI != E21
;) {
148
12
          // Grab the use before incrementing the iterator. Otherwise, altering
149
12
          // the Use will invalidate the iterator.
150
12
          Use &U = *UI++;
151
12
          DbgInfoIntrinsic *UserInst = dyn_cast<DbgInfoIntrinsic>(U.getUser());
152
12
          if (!UserInst)
153
0
            continue;
154
12
155
12
          // The original users in the OrigHeader are already using the original
156
12
          // definitions.
157
12
          BasicBlock *UserBB = UserInst->getParent();
158
12
          if (UserBB == OrigHeader)
159
6
            continue;
160
6
161
6
          // Users in the OrigPreHeader need to use the value to which the
162
6
          // original definitions are mapped and anything else can be handled by
163
6
          // the SSAUpdater. To avoid adding PHINodes, check if the value is
164
6
          // available in UserBB, if not substitute undef.
165
6
          Value *NewVal;
166
6
          if (UserBB == OrigPreheader)
167
0
            NewVal = OrigPreHeaderVal;
168
6
          else 
if (6
SSA.HasValueForBlock(UserBB)6
)
169
3
            NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
170
6
          else
171
3
            NewVal = UndefValue::get(OrigHeaderVal->getType());
172
12
          U = MetadataAsValue::get(C, ValueAsMetadata::get(NewVal));
173
12
        }
174
9
      }
175
9
    }
176
223k
  }
177
51.1k
}
178
179
/// Propagate dbg.value intrinsics through the newly inserted Phis.
180
static void insertDebugValues(BasicBlock *OrigHeader,
181
50.3k
                              SmallVectorImpl<PHINode*> &InsertedPHIs) {
182
50.3k
  ValueToValueMapTy DbgValueMap;
183
50.3k
184
50.3k
  // Map existing PHI nodes to their dbg.values.
185
220k
  for (auto &I : *OrigHeader) {
186
220k
    if (auto 
DbgII220k
= dyn_cast<DbgInfoIntrinsic>(&I)) {
187
6
      if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
188
6
        DbgValueMap.insert({Loc, DbgII});
189
6
    }
190
220k
  }
191
50.3k
192
50.3k
  // Then iterate through the new PHIs and look to see if they use one of the
193
50.3k
  // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will
194
50.3k
  // propagate the info through the new PHI.
195
50.3k
  LLVMContext &C = OrigHeader->getContext();
196
89.3k
  for (auto PHI : InsertedPHIs) {
197
181k
    for (auto VI : PHI->operand_values()) {
198
181k
      auto V = DbgValueMap.find(VI);
199
181k
      if (
V != DbgValueMap.end()181k
) {
200
6
        auto *DbgII = cast<DbgInfoIntrinsic>(V->second);
201
6
        Instruction *NewDbgII = DbgII->clone();
202
6
        auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI));
203
6
        NewDbgII->setOperand(0, PhiMAV);
204
6
        BasicBlock *Parent = PHI->getParent();
205
6
        NewDbgII->insertBefore(Parent->getFirstNonPHIOrDbgOrLifetime());
206
6
      }
207
181k
    }
208
89.3k
  }
209
50.3k
}
210
211
/// Rotate loop LP. Return true if the loop is rotated.
212
///
213
/// \param SimplifiedLatch is true if the latch was just folded into the final
214
/// loop exit. In this case we may want to rotate even though the new latch is
215
/// now an exiting branch. This rotation would have happened had the latch not
216
/// been simplified. However, if SimplifiedLatch is false, then we avoid
217
/// rotating loops in which the latch exits to avoid excessive or endless
218
/// rotation. LoopRotate should be repeatable and converge to a canonical
219
/// form. This property is satisfied because simplifying the loop latch can only
220
/// happen once across multiple invocations of the LoopRotate pass.
221
700k
bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
222
700k
  // If the loop has only one block then there is not much to rotate.
223
700k
  if (L->getBlocks().size() == 1)
224
348k
    return false;
225
351k
226
351k
  BasicBlock *OrigHeader = L->getHeader();
227
351k
  BasicBlock *OrigLatch = L->getLoopLatch();
228
351k
229
351k
  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
230
351k
  if (
!BI || 351k
BI->isUnconditional()346k
)
231
56.5k
    return false;
232
295k
233
295k
  // If the loop header is not one of the loop exiting blocks then
234
295k
  // either this loop is already rotated or it is not
235
295k
  // suitable for loop rotation transformations.
236
295k
  
if (295k
!L->isLoopExiting(OrigHeader)295k
)
237
160k
    return false;
238
135k
239
135k
  // If the loop latch already contains a branch that leaves the loop then the
240
135k
  // loop is already rotated.
241
135k
  
if (135k
!OrigLatch135k
)
242
0
    return false;
243
135k
244
135k
  // Rotate if either the loop latch does *not* exit the loop, or if the loop
245
135k
  // latch was just simplified.
246
135k
  
if (135k
L->isLoopExiting(OrigLatch) && 135k
!SimplifiedLatch86.1k
)
247
83.5k
    return false;
248
51.6k
249
51.6k
  // Check size of original header and reject loop if it is very big or we can't
250
51.6k
  // duplicate blocks inside it.
251
51.6k
  {
252
51.6k
    SmallPtrSet<const Value *, 32> EphValues;
253
51.6k
    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
254
51.6k
255
51.6k
    CodeMetrics Metrics;
256
51.6k
    Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
257
51.6k
    if (
Metrics.notDuplicatable51.6k
) {
258
2
      DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
259
2
                   << " instructions: ";
260
2
            L->dump());
261
2
      return false;
262
2
    }
263
51.6k
    
if (51.6k
Metrics.convergent51.6k
) {
264
1
      DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
265
1
                      "instructions: ";
266
1
            L->dump());
267
1
      return false;
268
1
    }
269
51.6k
    
if (51.6k
Metrics.NumInsts > MaxHeaderSize51.6k
)
270
498
      return false;
271
51.1k
  }
272
51.1k
273
51.1k
  // Now, this loop is suitable for rotation.
274
51.1k
  BasicBlock *OrigPreheader = L->getLoopPreheader();
275
51.1k
276
51.1k
  // If the loop could not be converted to canonical form, it must have an
277
51.1k
  // indirectbr in it, just give up.
278
51.1k
  if (!OrigPreheader)
279
0
    return false;
280
51.1k
281
51.1k
  // Anything ScalarEvolution may know about this loop or the PHI nodes
282
51.1k
  // in its header will soon be invalidated.
283
51.1k
  
if (51.1k
SE51.1k
)
284
51.1k
    SE->forgetLoop(L);
285
51.1k
286
51.1k
  DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
287
51.1k
288
51.1k
  // Find new Loop header. NewHeader is a Header's one and only successor
289
51.1k
  // that is inside loop.  Header's other successor is outside the
290
51.1k
  // loop.  Otherwise loop is not suitable for rotation.
291
51.1k
  BasicBlock *Exit = BI->getSuccessor(0);
292
51.1k
  BasicBlock *NewHeader = BI->getSuccessor(1);
293
51.1k
  if (L->contains(Exit))
294
36.8k
    std::swap(Exit, NewHeader);
295
51.1k
  assert(NewHeader && "Unable to determine new loop header");
296
51.1k
  assert(L->contains(NewHeader) && !L->contains(Exit) &&
297
51.1k
         "Unable to determine loop header and exit blocks");
298
51.1k
299
51.1k
  // This code assumes that the new header has exactly one predecessor.
300
51.1k
  // Remove any single-entry PHI nodes in it.
301
51.1k
  assert(NewHeader->getSinglePredecessor() &&
302
51.1k
         "New header doesn't have one pred!");
303
51.1k
  FoldSingleEntryPHINodes(NewHeader);
304
51.1k
305
51.1k
  // Begin by walking OrigHeader and populating ValueMap with an entry for
306
51.1k
  // each Instruction.
307
51.1k
  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
308
51.1k
  ValueToValueMapTy ValueMap;
309
51.1k
310
51.1k
  // For PHI nodes, the value available in OldPreHeader is just the
311
51.1k
  // incoming value from OldPreHeader.
312
132k
  for (; PHINode *
PN132k
= dyn_cast<PHINode>(I);
++I81.0k
)
313
81.0k
    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
314
51.1k
315
51.1k
  // For the rest of the instructions, either hoist to the OrigPreheader if
316
51.1k
  // possible or create a clone in the OldPreHeader if not.
317
51.1k
  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
318
201k
  while (
I != E201k
) {
319
150k
    Instruction *Inst = &*I++;
320
150k
321
150k
    // If the instruction's operands are invariant and it doesn't read or write
322
150k
    // memory, then it is safe to hoist.  Doing this doesn't change the order of
323
150k
    // execution in the preheader, but does prevent the instruction from
324
150k
    // executing in each iteration of the loop.  This means it is safe to hoist
325
150k
    // something that might trap, but isn't safe to hoist something that reads
326
150k
    // memory (without proving that the loop doesn't write).
327
150k
    if (
L->hasLoopInvariantOperands(Inst) && 150k
!Inst->mayReadFromMemory()22.2k
&&
328
150k
        
!Inst->mayWriteToMemory()8.22k
&&
!isa<TerminatorInst>(Inst)8.16k
&&
329
150k
        
!isa<DbgInfoIntrinsic>(Inst)8.06k
&&
!isa<AllocaInst>(Inst)8.06k
) {
330
8.05k
      Inst->moveBefore(LoopEntryBranch);
331
8.05k
      continue;
332
8.05k
    }
333
142k
334
142k
    // Otherwise, create a duplicate of the instruction.
335
142k
    Instruction *C = Inst->clone();
336
142k
337
142k
    // Eagerly remap the operands of the instruction.
338
142k
    RemapInstruction(C, ValueMap,
339
142k
                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
340
142k
341
142k
    // With the operands remapped, see if the instruction constant folds or is
342
142k
    // otherwise simplifyable.  This commonly occurs because the entry from PHI
343
142k
    // nodes allows icmps and other instructions to fold.
344
142k
    Value *V = SimplifyInstruction(C, SQ);
345
142k
    if (
V && 142k
LI->replacementPreservesLCSSAForm(C, V)17.0k
) {
346
16.6k
      // If so, then delete the temporary instruction and stick the folded value
347
16.6k
      // in the map.
348
16.6k
      ValueMap[Inst] = V;
349
16.6k
      if (
!C->mayHaveSideEffects()16.6k
) {
350
16.6k
        C->deleteValue();
351
16.6k
        C = nullptr;
352
16.6k
      }
353
142k
    } else {
354
125k
      ValueMap[Inst] = C;
355
125k
    }
356
142k
    if (
C142k
) {
357
125k
      // Otherwise, stick the new instruction into the new block!
358
125k
      C->setName(Inst->getName());
359
125k
      C->insertBefore(LoopEntryBranch);
360
125k
361
125k
      if (auto *II = dyn_cast<IntrinsicInst>(C))
362
135
        
if (135
II->getIntrinsicID() == Intrinsic::assume135
)
363
0
          AC->registerAssumption(II);
364
125k
    }
365
150k
  }
366
51.1k
367
51.1k
  // Along with all the other instructions, we just cloned OrigHeader's
368
51.1k
  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
369
51.1k
  // successors by duplicating their incoming values for OrigHeader.
370
51.1k
  TerminatorInst *TI = OrigHeader->getTerminator();
371
51.1k
  for (BasicBlock *SuccBB : TI->successors())
372
102k
    for (BasicBlock::iterator BI = SuccBB->begin();
373
131k
         PHINode *
PN131k
= dyn_cast<PHINode>(BI);
++BI29.2k
)
374
29.2k
      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
375
51.1k
376
51.1k
  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
377
51.1k
  // OrigPreHeader's old terminator (the original branch into the loop), and
378
51.1k
  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
379
51.1k
  LoopEntryBranch->eraseFromParent();
380
51.1k
381
51.1k
382
51.1k
  SmallVector<PHINode*, 2> InsertedPHIs;
383
51.1k
  // If there were any uses of instructions in the duplicated block outside the
384
51.1k
  // loop, update them, inserting PHI nodes as required
385
51.1k
  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
386
51.1k
                                  &InsertedPHIs);
387
51.1k
388
51.1k
  // Attach dbg.value intrinsics to the new phis if that phi uses a value that
389
51.1k
  // previously had debug metadata attached. This keeps the debug info
390
51.1k
  // up-to-date in the loop body.
391
51.1k
  if (!InsertedPHIs.empty())
392
50.3k
    insertDebugValues(OrigHeader, InsertedPHIs);
393
51.1k
394
51.1k
  // NewHeader is now the header of the loop.
395
51.1k
  L->moveToHeader(NewHeader);
396
51.1k
  assert(L->getHeader() == NewHeader && "Latch block is our new header");
397
51.1k
398
51.1k
  // Inform DT about changes to the CFG.
399
51.1k
  if (
DT51.1k
) {
400
51.1k
    // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
401
51.1k
    // the DT about the removed edge to the OrigHeader (that got removed).
402
51.1k
    SmallVector<DominatorTree::UpdateType, 3> Updates;
403
51.1k
    Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
404
51.1k
    Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
405
51.1k
    Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
406
51.1k
    DT->applyUpdates(Updates);
407
51.1k
  }
408
51.1k
409
51.1k
  // At this point, we've finished our major CFG changes.  As part of cloning
410
51.1k
  // the loop into the preheader we've simplified instructions and the
411
51.1k
  // duplicated conditional branch may now be branching on a constant.  If it is
412
51.1k
  // branching on a constant and if that constant means that we enter the loop,
413
51.1k
  // then we fold away the cond branch to an uncond branch.  This simplifies the
414
51.1k
  // loop in cases important for nested loops, and it also means we don't have
415
51.1k
  // to split as many edges.
416
51.1k
  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
417
51.1k
  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
418
51.1k
  if (!isa<ConstantInt>(PHBI->getCondition()) ||
419
13.1k
      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
420
51.1k
          NewHeader) {
421
38.0k
    // The conditional branch can't be folded, handle the general case.
422
38.0k
    // Split edges as necessary to preserve LoopSimplify form.
423
38.0k
424
38.0k
    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
425
38.0k
    // thus is not a preheader anymore.
426
38.0k
    // Split the edge to form a real preheader.
427
38.0k
    BasicBlock *NewPH = SplitCriticalEdge(
428
38.0k
        OrigPreheader, NewHeader,
429
38.0k
        CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
430
38.0k
    NewPH->setName(NewHeader->getName() + ".lr.ph");
431
38.0k
432
38.0k
    // Preserve canonical loop form, which means that 'Exit' should have only
433
38.0k
    // one predecessor. Note that Exit could be an exit block for multiple
434
38.0k
    // nested loops, causing both of the edges to now be critical and need to
435
38.0k
    // be split.
436
38.0k
    SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
437
38.0k
    bool SplitLatchEdge = false;
438
81.1k
    for (BasicBlock *ExitPred : ExitPreds) {
439
81.1k
      // We only need to split loop exit edges.
440
81.1k
      Loop *PredLoop = LI->getLoopFor(ExitPred);
441
81.1k
      if (
!PredLoop || 81.1k
PredLoop->contains(Exit)55.4k
)
442
37.6k
        continue;
443
43.4k
      
if (43.4k
isa<IndirectBrInst>(ExitPred->getTerminator())43.4k
)
444
1
        continue;
445
43.4k
      SplitLatchEdge |= L->getLoopLatch() == ExitPred;
446
43.4k
      BasicBlock *ExitSplit = SplitCriticalEdge(
447
43.4k
          ExitPred, Exit,
448
43.4k
          CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
449
43.4k
      ExitSplit->moveBefore(Exit);
450
43.4k
    }
451
38.0k
    assert(SplitLatchEdge &&
452
38.0k
           "Despite splitting all preds, failed to split latch exit?");
453
51.1k
  } else {
454
13.1k
    // We can fold the conditional branch in the preheader, this makes things
455
13.1k
    // simpler. The first step is to remove the extra edge to the Exit block.
456
13.1k
    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
457
13.1k
    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
458
13.1k
    NewBI->setDebugLoc(PHBI->getDebugLoc());
459
13.1k
    PHBI->eraseFromParent();
460
13.1k
461
13.1k
    // With our CFG finalized, update DomTree if it is available.
462
13.1k
    if (
DT13.1k
)
DT->deleteEdge(OrigPreheader, Exit)13.1k
;
463
13.1k
  }
464
51.1k
465
51.1k
  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
466
51.1k
  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
467
51.1k
468
51.1k
  // Now that the CFG and DomTree are in a consistent state again, try to merge
469
51.1k
  // the OrigHeader block into OrigLatch.  This will succeed if they are
470
51.1k
  // connected by an unconditional branch.  This is just a cleanup so the
471
51.1k
  // emitted code isn't too gross in this common case.
472
51.1k
  MergeBlockIntoPredecessor(OrigHeader, DT, LI);
473
51.1k
474
51.1k
  DEBUG(dbgs() << "LoopRotation: into "; L->dump());
475
700k
476
700k
  ++NumRotated;
477
700k
  return true;
478
700k
}
479
480
/// Determine whether the instructions in this range may be safely and cheaply
481
/// speculated. This is not an important enough situation to develop complex
482
/// heuristics. We handle a single arithmetic instruction along with any type
483
/// conversions.
484
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
485
31.7k
                                  BasicBlock::iterator End, Loop *L) {
486
31.7k
  bool seenIncrement = false;
487
31.7k
  bool MultiExitLoop = false;
488
31.7k
489
31.7k
  if (!L->getExitingBlock())
490
6.83k
    MultiExitLoop = true;
491
31.7k
492
52.4k
  for (BasicBlock::iterator I = Begin; 
I != End52.4k
;
++I20.6k
) {
493
46.9k
494
46.9k
    if (!isSafeToSpeculativelyExecute(&*I))
495
6.75k
      return false;
496
40.2k
497
40.2k
    
if (40.2k
isa<DbgInfoIntrinsic>(I)40.2k
)
498
3
      continue;
499
40.2k
500
40.2k
    switch (I->getOpcode()) {
501
6.08k
    default:
502
6.08k
      return false;
503
16.5k
    case Instruction::GetElementPtr:
504
16.5k
      // GEPs are cheap if all indices are constant.
505
16.5k
      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
506
9.78k
        return false;
507
6.73k
      // fall-thru to increment case
508
6.73k
      
LLVM_FALLTHROUGH6.73k
;
509
14.5k
    case Instruction::Add:
510
14.5k
    case Instruction::Sub:
511
14.5k
    case Instruction::And:
512
14.5k
    case Instruction::Or:
513
14.5k
    case Instruction::Xor:
514
14.5k
    case Instruction::Shl:
515
14.5k
    case Instruction::LShr:
516
14.5k
    case Instruction::AShr: {
517
14.5k
      Value *IVOpnd =
518
14.5k
          !isa<Constant>(I->getOperand(0))
519
14.3k
              ? I->getOperand(0)
520
220
              : 
!isa<Constant>(I->getOperand(1)) ? 220
I->getOperand(1)217
:
nullptr3
;
521
14.5k
      if (!IVOpnd)
522
3
        return false;
523
14.5k
524
14.5k
      // If increment operand is used outside of the loop, this speculation
525
14.5k
      // could cause extra live range interference.
526
14.5k
      
if (14.5k
MultiExitLoop14.5k
) {
527
9.24k
        for (User *UseI : IVOpnd->users()) {
528
9.24k
          auto *UserInst = cast<Instruction>(UseI);
529
9.24k
          if (!L->contains(UserInst))
530
1.90k
            return false;
531
12.6k
        }
532
4.32k
      }
533
12.6k
534
12.6k
      
if (12.6k
seenIncrement12.6k
)
535
1.82k
        return false;
536
10.7k
      seenIncrement = true;
537
10.7k
      break;
538
10.7k
    }
539
9.83k
    case Instruction::Trunc:
540
9.83k
    case Instruction::ZExt:
541
9.83k
    case Instruction::SExt:
542
9.83k
      // ignore type conversions
543
9.83k
      break;
544
46.9k
    }
545
46.9k
  }
546
5.43k
  return true;
547
31.7k
}
548
549
/// Fold the loop tail into the loop exit by speculating the loop tail
550
/// instructions. Typically, this is a single post-increment. In the case of a
551
/// simple 2-block loop, hoisting the increment can be much better than
552
/// duplicating the entire loop header. In the case of loops with early exits,
553
/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
554
/// canonical form so downstream passes can handle it.
555
///
556
/// I don't believe this invalidates SCEV.
557
700k
bool LoopRotate::simplifyLoopLatch(Loop *L) {
558
700k
  BasicBlock *Latch = L->getLoopLatch();
559
700k
  if (
!Latch || 700k
Latch->hasAddressTaken()700k
)
560
13
    return false;
561
700k
562
700k
  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
563
700k
  if (
!Jmp || 700k
!Jmp->isUnconditional()700k
)
564
627k
    return false;
565
73.2k
566
73.2k
  BasicBlock *LastExit = Latch->getSinglePredecessor();
567
73.2k
  if (
!LastExit || 73.2k
!L->isLoopExiting(LastExit)38.1k
)
568
39.1k
    return false;
569
34.0k
570
34.0k
  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
571
34.0k
  if (!BI)
572
2.30k
    return false;
573
31.7k
574
31.7k
  
if (31.7k
!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)31.7k
)
575
26.3k
    return false;
576
5.43k
577
5.43k
  
DEBUG5.43k
(dbgs() << "Folding loop latch " << Latch->getName() << " into "
578
5.43k
               << LastExit->getName() << "\n");
579
5.43k
580
5.43k
  // Hoist the instructions from Latch into LastExit.
581
5.43k
  LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
582
5.43k
                                 Latch->begin(), Jmp->getIterator());
583
5.43k
584
5.43k
  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 
02.60k
:
12.82k
;
585
5.43k
  BasicBlock *Header = Jmp->getSuccessor(0);
586
5.43k
  assert(Header == L->getHeader() && "expected a backward branch");
587
5.43k
588
5.43k
  // Remove Latch from the CFG so that LastExit becomes the new Latch.
589
5.43k
  BI->setSuccessor(FallThruPath, Header);
590
5.43k
  Latch->replaceSuccessorsPhiUsesWith(LastExit);
591
5.43k
  Jmp->eraseFromParent();
592
5.43k
593
5.43k
  // Nuke the Latch block.
594
5.43k
  assert(Latch->empty() && "unable to evacuate Latch");
595
5.43k
  LI->removeBlock(Latch);
596
5.43k
  if (DT)
597
5.43k
    DT->eraseNode(Latch);
598
700k
  Latch->eraseFromParent();
599
700k
  return true;
600
700k
}
601
602
/// Rotate \c L, and return true if any modification was made.
603
700k
bool LoopRotate::processLoop(Loop *L) {
604
700k
  // Save the loop metadata.
605
700k
  MDNode *LoopMD = L->getLoopID();
606
700k
607
700k
  // Simplify the loop latch before attempting to rotate the header
608
700k
  // upward. Rotation may not be needed if the loop tail can be folded into the
609
700k
  // loop exit.
610
700k
  bool SimplifiedLatch = simplifyLoopLatch(L);
611
700k
612
700k
  bool MadeChange = rotateLoop(L, SimplifiedLatch);
613
700k
  assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
614
700k
         "Loop latch should be exiting after loop-rotate.");
615
700k
616
700k
  // Restore the loop metadata.
617
700k
  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
618
700k
  if (
(MadeChange || 700k
SimplifiedLatch649k
) &&
LoopMD54.0k
)
619
2.91k
    L->setLoopID(LoopMD);
620
700k
621
700k
  return MadeChange;
622
700k
}
623
624
LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication)
625
95
    : EnableHeaderDuplication(EnableHeaderDuplication) {}
626
627
PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
628
                                      LoopStandardAnalysisResults &AR,
629
59
                                      LPMUpdater &) {
630
59
  int Threshold = EnableHeaderDuplication ? 
DefaultRotationThreshold56
:
03
;
631
59
  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
632
59
  const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
633
59
  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
634
59
                SQ);
635
59
636
59
  bool Changed = LR.processLoop(&L);
637
59
  if (!Changed)
638
49
    return PreservedAnalyses::all();
639
10
640
10
  return getLoopPassPreservedAnalyses();
641
10
}
642
643
namespace {
644
645
class LoopRotateLegacyPass : public LoopPass {
646
  unsigned MaxHeaderSize;
647
648
public:
649
  static char ID; // Pass ID, replacement for typeid
650
34.5k
  LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
651
34.5k
    initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
652
34.5k
    if (SpecifiedMaxHeaderSize == -1)
653
32.0k
      MaxHeaderSize = DefaultRotationThreshold;
654
34.5k
    else
655
2.51k
      MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
656
34.5k
  }
657
658
  // LCSSA form makes instruction renaming easier.
659
34.5k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
660
34.5k
    AU.addRequired<AssumptionCacheTracker>();
661
34.5k
    AU.addRequired<TargetTransformInfoWrapperPass>();
662
34.5k
    getLoopAnalysisUsage(AU);
663
34.5k
  }
664
665
700k
  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
666
700k
    if (skipLoop(L))
667
40
      return false;
668
700k
    Function &F = *L->getHeader()->getParent();
669
700k
670
700k
    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
671
700k
    const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
672
700k
    auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
673
700k
    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
674
700k
    auto *DT = DTWP ? 
&DTWP->getDomTree()700k
:
nullptr0
;
675
700k
    auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
676
700k
    auto *SE = SEWP ? 
&SEWP->getSE()700k
:
nullptr0
;
677
700k
    const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
678
700k
    LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);
679
700k
    return LR.processLoop(L);
680
700k
  }
681
};
682
}
683
684
char LoopRotateLegacyPass::ID = 0;
685
41.6k
INITIALIZE_PASS_BEGIN41.6k
(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
686
41.6k
                      false, false)
687
41.6k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
688
41.6k
INITIALIZE_PASS_DEPENDENCY(LoopPass)
689
41.6k
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
690
41.6k
INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
691
                    false)
692
693
34.5k
Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
694
34.5k
  return new LoopRotateLegacyPass(MaxHeaderSize);
695
34.5k
}