Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----------------- LoopRotationUtils.cpp -----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file provides utilities to convert a loop into a loop with bottom test.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
14
#include "llvm/ADT/Statistic.h"
15
#include "llvm/Analysis/AliasAnalysis.h"
16
#include "llvm/Analysis/AssumptionCache.h"
17
#include "llvm/Analysis/BasicAliasAnalysis.h"
18
#include "llvm/Analysis/CodeMetrics.h"
19
#include "llvm/Analysis/DomTreeUpdater.h"
20
#include "llvm/Analysis/GlobalsModRef.h"
21
#include "llvm/Analysis/InstructionSimplify.h"
22
#include "llvm/Analysis/LoopPass.h"
23
#include "llvm/Analysis/MemorySSA.h"
24
#include "llvm/Analysis/MemorySSAUpdater.h"
25
#include "llvm/Analysis/ScalarEvolution.h"
26
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
27
#include "llvm/Analysis/TargetTransformInfo.h"
28
#include "llvm/Analysis/ValueTracking.h"
29
#include "llvm/IR/CFG.h"
30
#include "llvm/IR/DebugInfoMetadata.h"
31
#include "llvm/IR/Dominators.h"
32
#include "llvm/IR/Function.h"
33
#include "llvm/IR/IntrinsicInst.h"
34
#include "llvm/IR/Module.h"
35
#include "llvm/Support/CommandLine.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39
#include "llvm/Transforms/Utils/Local.h"
40
#include "llvm/Transforms/Utils/LoopUtils.h"
41
#include "llvm/Transforms/Utils/SSAUpdater.h"
42
#include "llvm/Transforms/Utils/ValueMapper.h"
43
using namespace llvm;
44
45
#define DEBUG_TYPE "loop-rotate"
46
47
STATISTIC(NumRotated, "Number of loops rotated");
48
49
namespace {
50
/// A simple loop rotation transformation.
51
class LoopRotate {
52
  const unsigned MaxHeaderSize;
53
  LoopInfo *LI;
54
  const TargetTransformInfo *TTI;
55
  AssumptionCache *AC;
56
  DominatorTree *DT;
57
  ScalarEvolution *SE;
58
  MemorySSAUpdater *MSSAU;
59
  const SimplifyQuery &SQ;
60
  bool RotationOnly;
61
  bool IsUtilMode;
62
63
public:
64
  LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
65
             const TargetTransformInfo *TTI, AssumptionCache *AC,
66
             DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
67
             const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
68
      : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
69
        MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
70
402k
        IsUtilMode(IsUtilMode) {}
71
  bool processLoop(Loop *L);
72
73
private:
74
  bool rotateLoop(Loop *L, bool SimplifiedLatch);
75
  bool simplifyLoopLatch(Loop *L);
76
};
77
} // end anonymous namespace
78
79
/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
80
/// old header into the preheader.  If there were uses of the values produced by
81
/// these instruction that were outside of the loop, we have to insert PHI nodes
82
/// to merge the two values.  Do this now.
83
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
84
                                            BasicBlock *OrigPreheader,
85
                                            ValueToValueMapTy &ValueMap,
86
40.0k
                                SmallVectorImpl<PHINode*> *InsertedPHIs) {
87
40.0k
  // Remove PHI node entries that are no longer live.
88
40.0k
  BasicBlock::iterator I, E = OrigHeader->end();
89
100k
  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); 
++I60.2k
)
90
60.2k
    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
91
40.0k
92
40.0k
  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
93
40.0k
  // as necessary.
94
40.0k
  SSAUpdater SSA(InsertedPHIs);
95
212k
  for (I = OrigHeader->begin(); I != E; 
++I172k
) {
96
172k
    Value *OrigHeaderVal = &*I;
97
172k
98
172k
    // If there are no uses of the value (e.g. because it returns void), there
99
172k
    // is nothing to rewrite.
100
172k
    if (OrigHeaderVal->use_empty())
101
41.1k
      continue;
102
131k
103
131k
    Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
104
131k
105
131k
    // The value now exits in two versions: the initial value in the preheader
106
131k
    // and the loop "next" value in the original header.
107
131k
    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
108
131k
    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
109
131k
    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
110
131k
111
131k
    // Visit each use of the OrigHeader instruction.
112
131k
    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
113
131k
                             UE = OrigHeaderVal->use_end();
114
452k
         UI != UE;) {
115
320k
      // Grab the use before incrementing the iterator.
116
320k
      Use &U = *UI;
117
320k
118
320k
      // Increment the iterator before removing the use from the list.
119
320k
      ++UI;
120
320k
121
320k
      // SSAUpdater can't handle a non-PHI use in the same block as an
122
320k
      // earlier def. We can easily handle those cases manually.
123
320k
      Instruction *UserInst = cast<Instruction>(U.getUser());
124
320k
      if (!isa<PHINode>(UserInst)) {
125
246k
        BasicBlock *UserBB = UserInst->getParent();
126
246k
127
246k
        // The original users in the OrigHeader are already using the
128
246k
        // original definitions.
129
246k
        if (UserBB == OrigHeader)
130
112k
          continue;
131
133k
132
133k
        // Users in the OrigPreHeader need to use the value to which the
133
133k
        // original definitions are mapped.
134
133k
        if (UserBB == OrigPreheader) {
135
0
          U = OrigPreHeaderVal;
136
0
          continue;
137
0
        }
138
208k
      }
139
208k
140
208k
      // Anything else can be handled by SSAUpdater.
141
208k
      SSA.RewriteUse(U);
142
208k
    }
143
131k
144
131k
    // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
145
131k
    // intrinsics.
146
131k
    SmallVector<DbgValueInst *, 1> DbgValues;
147
131k
    llvm::findDbgValues(DbgValues, OrigHeaderVal);
148
131k
    for (auto &DbgValue : DbgValues) {
149
22
      // The original users in the OrigHeader are already using the original
150
22
      // definitions.
151
22
      BasicBlock *UserBB = DbgValue->getParent();
152
22
      if (UserBB == OrigHeader)
153
12
        continue;
154
10
155
10
      // Users in the OrigPreHeader need to use the value to which the
156
10
      // original definitions are mapped and anything else can be handled by
157
10
      // the SSAUpdater. To avoid adding PHINodes, check if the value is
158
10
      // available in UserBB, if not substitute undef.
159
10
      Value *NewVal;
160
10
      if (UserBB == OrigPreheader)
161
0
        NewVal = OrigPreHeaderVal;
162
10
      else if (SSA.HasValueForBlock(UserBB))
163
6
        NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
164
4
      else
165
4
        NewVal = UndefValue::get(OrigHeaderVal->getType());
166
10
      DbgValue->setOperand(0,
167
10
                           MetadataAsValue::get(OrigHeaderVal->getContext(),
168
10
                                                ValueAsMetadata::get(NewVal)));
169
10
    }
170
131k
  }
171
40.0k
}
172
173
// Look for a phi which is only used outside the loop (via a LCSSA phi)
174
// in the exit from the header. This means that rotating the loop can
175
// remove the phi.
176
48.6k
static bool shouldRotateLoopExitingLatch(Loop *L) {
177
48.6k
  BasicBlock *Header = L->getHeader();
178
48.6k
  BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0);
179
48.6k
  if (L->contains(HeaderExit))
180
19.4k
    HeaderExit = Header->getTerminator()->getSuccessor(1);
181
48.6k
182
62.1k
  for (auto &Phi : Header->phis()) {
183
62.1k
    // Look for uses of this phi in the loop/via exits other than the header.
184
77.9k
    if (
llvm::any_of(Phi.users(), [HeaderExit](const User *U) 62.1k
{
185
77.9k
          return cast<Instruction>(U)->getParent() != HeaderExit;
186
77.9k
        }))
187
61.8k
      continue;
188
248
    return true;
189
248
  }
190
48.6k
191
48.6k
  
return false48.4k
;
192
48.6k
}
193
194
/// Rotate loop LP. Return true if the loop is rotated.
195
///
196
/// \param SimplifiedLatch is true if the latch was just folded into the final
197
/// loop exit. In this case we may want to rotate even though the new latch is
198
/// now an exiting branch. This rotation would have happened had the latch not
199
/// been simplified. However, if SimplifiedLatch is false, then we avoid
200
/// rotating loops in which the latch exits to avoid excessive or endless
201
/// rotation. LoopRotate should be repeatable and converge to a canonical
202
/// form. This property is satisfied because simplifying the loop latch can only
203
/// happen once across multiple invocations of the LoopRotate pass.
204
402k
bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
205
402k
  // If the loop has only one block then there is not much to rotate.
206
402k
  if (L->getBlocks().size() == 1)
207
199k
    return false;
208
203k
209
203k
  BasicBlock *OrigHeader = L->getHeader();
210
203k
  BasicBlock *OrigLatch = L->getLoopLatch();
211
203k
212
203k
  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
213
203k
  if (!BI || 
BI->isUnconditional()198k
)
214
30.6k
    return false;
215
172k
216
172k
  // If the loop header is not one of the loop exiting blocks then
217
172k
  // either this loop is already rotated or it is not
218
172k
  // suitable for loop rotation transformations.
219
172k
  if (!L->isLoopExiting(OrigHeader))
220
83.7k
    return false;
221
88.7k
222
88.7k
  // If the loop latch already contains a branch that leaves the loop then the
223
88.7k
  // loop is already rotated.
224
88.7k
  if (!OrigLatch)
225
0
    return false;
226
88.7k
227
88.7k
  // Rotate if either the loop latch does *not* exit the loop, or if the loop
228
88.7k
  // latch was just simplified. Or if we think it will be profitable.
229
88.7k
  if (L->isLoopExiting(OrigLatch) && 
!SimplifiedLatch49.9k
&&
IsUtilMode == false48.6k
&&
230
88.7k
      
!shouldRotateLoopExitingLatch(L)48.6k
)
231
48.4k
    return false;
232
40.3k
233
40.3k
  // Check size of original header and reject loop if it is very big or we can't
234
40.3k
  // duplicate blocks inside it.
235
40.3k
  {
236
40.3k
    SmallPtrSet<const Value *, 32> EphValues;
237
40.3k
    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
238
40.3k
239
40.3k
    CodeMetrics Metrics;
240
40.3k
    Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
241
40.3k
    if (Metrics.notDuplicatable) {
242
5
      LLVM_DEBUG(
243
5
          dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
244
5
                 << " instructions: ";
245
5
          L->dump());
246
5
      return false;
247
5
    }
248
40.3k
    if (Metrics.convergent) {
249
2
      LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
250
2
                           "instructions: ";
251
2
                 L->dump());
252
2
      return false;
253
2
    }
254
40.3k
    if (Metrics.NumInsts > MaxHeaderSize)
255
363
      return false;
256
40.0k
  }
257
40.0k
258
40.0k
  // Now, this loop is suitable for rotation.
259
40.0k
  BasicBlock *OrigPreheader = L->getLoopPreheader();
260
40.0k
261
40.0k
  // If the loop could not be converted to canonical form, it must have an
262
40.0k
  // indirectbr in it, just give up.
263
40.0k
  if (!OrigPreheader || !L->hasDedicatedExits())
264
2
    return false;
265
40.0k
266
40.0k
  // Anything ScalarEvolution may know about this loop or the PHI nodes
267
40.0k
  // in its header will soon be invalidated. We should also invalidate
268
40.0k
  // all outer loops because insertion and deletion of blocks that happens
269
40.0k
  // during the rotation may violate invariants related to backedge taken
270
40.0k
  // infos in them.
271
40.0k
  if (SE)
272
40.0k
    SE->forgetTopmostLoop(L);
273
40.0k
274
40.0k
  LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
275
40.0k
  if (MSSAU && 
VerifyMemorySSA35
)
276
33
    MSSAU->getMemorySSA()->verifyMemorySSA();
277
40.0k
278
40.0k
  // Find new Loop header. NewHeader is a Header's one and only successor
279
40.0k
  // that is inside loop.  Header's other successor is outside the
280
40.0k
  // loop.  Otherwise loop is not suitable for rotation.
281
40.0k
  BasicBlock *Exit = BI->getSuccessor(0);
282
40.0k
  BasicBlock *NewHeader = BI->getSuccessor(1);
283
40.0k
  if (L->contains(Exit))
284
28.2k
    std::swap(Exit, NewHeader);
285
40.0k
  assert(NewHeader && "Unable to determine new loop header");
286
40.0k
  assert(L->contains(NewHeader) && !L->contains(Exit) &&
287
40.0k
         "Unable to determine loop header and exit blocks");
288
40.0k
289
40.0k
  // This code assumes that the new header has exactly one predecessor.
290
40.0k
  // Remove any single-entry PHI nodes in it.
291
40.0k
  assert(NewHeader->getSinglePredecessor() &&
292
40.0k
         "New header doesn't have one pred!");
293
40.0k
  FoldSingleEntryPHINodes(NewHeader);
294
40.0k
295
40.0k
  // Begin by walking OrigHeader and populating ValueMap with an entry for
296
40.0k
  // each Instruction.
297
40.0k
  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
298
40.0k
  ValueToValueMapTy ValueMap, ValueMapMSSA;
299
40.0k
300
40.0k
  // For PHI nodes, the value available in OldPreHeader is just the
301
40.0k
  // incoming value from OldPreHeader.
302
100k
  for (; PHINode *PN = dyn_cast<PHINode>(I); 
++I60.2k
)
303
60.2k
    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
304
40.0k
305
40.0k
  // For the rest of the instructions, either hoist to the OrigPreheader if
306
40.0k
  // possible or create a clone in the OldPreHeader if not.
307
40.0k
  Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
308
40.0k
309
40.0k
  // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
310
40.0k
  using DbgIntrinsicHash =
311
40.0k
      std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
312
40.0k
  auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
313
30
    return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
314
30
  };
315
40.0k
  SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
316
40.0k
  for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
317
40.0k
       I != E; 
++I18
) {
318
21.6k
    if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
319
18
      DbgIntrinsics.insert(makeHash(DII));
320
21.5k
    else
321
21.5k
      break;
322
21.6k
  }
323
40.0k
324
160k
  while (I != E) {
325
120k
    Instruction *Inst = &*I++;
326
120k
327
120k
    // If the instruction's operands are invariant and it doesn't read or write
328
120k
    // memory, then it is safe to hoist.  Doing this doesn't change the order of
329
120k
    // execution in the preheader, but does prevent the instruction from
330
120k
    // executing in each iteration of the loop.  This means it is safe to hoist
331
120k
    // something that might trap, but isn't safe to hoist something that reads
332
120k
    // memory (without proving that the loop doesn't write).
333
120k
    if (L->hasLoopInvariantOperands(Inst) && 
!Inst->mayReadFromMemory()19.8k
&&
334
120k
        
!Inst->mayWriteToMemory()7.92k
&&
!Inst->isTerminator()7.86k
&&
335
120k
        
!isa<DbgInfoIntrinsic>(Inst)7.77k
&&
!isa<AllocaInst>(Inst)7.76k
) {
336
7.76k
      Inst->moveBefore(LoopEntryBranch);
337
7.76k
      continue;
338
7.76k
    }
339
112k
340
112k
    // Otherwise, create a duplicate of the instruction.
341
112k
    Instruction *C = Inst->clone();
342
112k
343
112k
    // Eagerly remap the operands of the instruction.
344
112k
    RemapInstruction(C, ValueMap,
345
112k
                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
346
112k
347
112k
    // Avoid inserting the same intrinsic twice.
348
112k
    if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
349
12
      if (DbgIntrinsics.count(makeHash(DII))) {
350
6
        C->deleteValue();
351
6
        continue;
352
6
      }
353
112k
354
112k
    // With the operands remapped, see if the instruction constant folds or is
355
112k
    // otherwise simplifyable.  This commonly occurs because the entry from PHI
356
112k
    // nodes allows icmps and other instructions to fold.
357
112k
    Value *V = SimplifyInstruction(C, SQ);
358
112k
    if (V && 
LI->replacementPreservesLCSSAForm(C, V)11.9k
) {
359
11.7k
      // If so, then delete the temporary instruction and stick the folded value
360
11.7k
      // in the map.
361
11.7k
      ValueMap[Inst] = V;
362
11.7k
      if (!C->mayHaveSideEffects()) {
363
11.7k
        C->deleteValue();
364
11.7k
        C = nullptr;
365
11.7k
      }
366
100k
    } else {
367
100k
      ValueMap[Inst] = C;
368
100k
    }
369
112k
    if (C) {
370
100k
      // Otherwise, stick the new instruction into the new block!
371
100k
      C->setName(Inst->getName());
372
100k
      C->insertBefore(LoopEntryBranch);
373
100k
374
100k
      if (auto *II = dyn_cast<IntrinsicInst>(C))
375
152
        if (II->getIntrinsicID() == Intrinsic::assume)
376
0
          AC->registerAssumption(II);
377
100k
      // MemorySSA cares whether the cloned instruction was inserted or not, and
378
100k
      // not whether it can be remapped to a simplified value.
379
100k
      ValueMapMSSA[Inst] = C;
380
100k
    }
381
112k
  }
382
40.0k
383
40.0k
  // Along with all the other instructions, we just cloned OrigHeader's
384
40.0k
  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
385
40.0k
  // successors by duplicating their incoming values for OrigHeader.
386
40.0k
  for (BasicBlock *SuccBB : successors(OrigHeader))
387
80.0k
    for (BasicBlock::iterator BI = SuccBB->begin();
388
100k
         PHINode *PN = dyn_cast<PHINode>(BI); 
++BI20.8k
)
389
20.8k
      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
390
40.0k
391
40.0k
  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
392
40.0k
  // OrigPreHeader's old terminator (the original branch into the loop), and
393
40.0k
  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
394
40.0k
  LoopEntryBranch->eraseFromParent();
395
40.0k
396
40.0k
  // Update MemorySSA before the rewrite call below changes the 1:1
397
40.0k
  // instruction:cloned_instruction_or_value mapping.
398
40.0k
  if (MSSAU) {
399
35
    ValueMapMSSA[OrigHeader] = OrigPreheader;
400
35
    MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
401
35
                                        ValueMapMSSA);
402
35
  }
403
40.0k
404
40.0k
  SmallVector<PHINode*, 2> InsertedPHIs;
405
40.0k
  // If there were any uses of instructions in the duplicated block outside the
406
40.0k
  // loop, update them, inserting PHI nodes as required
407
40.0k
  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
408
40.0k
                                  &InsertedPHIs);
409
40.0k
410
40.0k
  // Attach dbg.value intrinsics to the new phis if that phi uses a value that
411
40.0k
  // previously had debug metadata attached. This keeps the debug info
412
40.0k
  // up-to-date in the loop body.
413
40.0k
  if (!InsertedPHIs.empty())
414
39.2k
    insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
415
40.0k
416
40.0k
  // NewHeader is now the header of the loop.
417
40.0k
  L->moveToHeader(NewHeader);
418
40.0k
  assert(L->getHeader() == NewHeader && "Latch block is our new header");
419
40.0k
420
40.0k
  // Inform DT about changes to the CFG.
421
40.0k
  if (DT) {
422
40.0k
    // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
423
40.0k
    // the DT about the removed edge to the OrigHeader (that got removed).
424
40.0k
    SmallVector<DominatorTree::UpdateType, 3> Updates;
425
40.0k
    Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
426
40.0k
    Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
427
40.0k
    Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
428
40.0k
    DT->applyUpdates(Updates);
429
40.0k
430
40.0k
    if (MSSAU) {
431
35
      MSSAU->applyUpdates(Updates, *DT);
432
35
      if (VerifyMemorySSA)
433
33
        MSSAU->getMemorySSA()->verifyMemorySSA();
434
35
    }
435
40.0k
  }
436
40.0k
437
40.0k
  // At this point, we've finished our major CFG changes.  As part of cloning
438
40.0k
  // the loop into the preheader we've simplified instructions and the
439
40.0k
  // duplicated conditional branch may now be branching on a constant.  If it is
440
40.0k
  // branching on a constant and if that constant means that we enter the loop,
441
40.0k
  // then we fold away the cond branch to an uncond branch.  This simplifies the
442
40.0k
  // loop in cases important for nested loops, and it also means we don't have
443
40.0k
  // to split as many edges.
444
40.0k
  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
445
40.0k
  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
446
40.0k
  if (!isa<ConstantInt>(PHBI->getCondition()) ||
447
40.0k
      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
448
30.8k
          NewHeader) {
449
30.8k
    // The conditional branch can't be folded, handle the general case.
450
30.8k
    // Split edges as necessary to preserve LoopSimplify form.
451
30.8k
452
30.8k
    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
453
30.8k
    // thus is not a preheader anymore.
454
30.8k
    // Split the edge to form a real preheader.
455
30.8k
    BasicBlock *NewPH = SplitCriticalEdge(
456
30.8k
        OrigPreheader, NewHeader,
457
30.8k
        CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
458
30.8k
    NewPH->setName(NewHeader->getName() + ".lr.ph");
459
30.8k
460
30.8k
    // Preserve canonical loop form, which means that 'Exit' should have only
461
30.8k
    // one predecessor. Note that Exit could be an exit block for multiple
462
30.8k
    // nested loops, causing both of the edges to now be critical and need to
463
30.8k
    // be split.
464
30.8k
    SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
465
30.8k
    bool SplitLatchEdge = false;
466
65.7k
    for (BasicBlock *ExitPred : ExitPreds) {
467
65.7k
      // We only need to split loop exit edges.
468
65.7k
      Loop *PredLoop = LI->getLoopFor(ExitPred);
469
65.7k
      if (!PredLoop || 
PredLoop->contains(Exit)43.1k
||
470
65.7k
          
ExitPred->getTerminator()->isIndirectTerminator()35.1k
)
471
30.5k
        continue;
472
35.1k
      SplitLatchEdge |= L->getLoopLatch() == ExitPred;
473
35.1k
      BasicBlock *ExitSplit = SplitCriticalEdge(
474
35.1k
          ExitPred, Exit,
475
35.1k
          CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
476
35.1k
      ExitSplit->moveBefore(Exit);
477
35.1k
    }
478
30.8k
    assert(SplitLatchEdge &&
479
30.8k
           "Despite splitting all preds, failed to split latch exit?");
480
30.8k
  } else {
481
9.17k
    // We can fold the conditional branch in the preheader, this makes things
482
9.17k
    // simpler. The first step is to remove the extra edge to the Exit block.
483
9.17k
    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
484
9.17k
    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
485
9.17k
    NewBI->setDebugLoc(PHBI->getDebugLoc());
486
9.17k
    PHBI->eraseFromParent();
487
9.17k
488
9.17k
    // With our CFG finalized, update DomTree if it is available.
489
9.17k
    if (DT) DT->deleteEdge(OrigPreheader, Exit);
490
9.17k
491
9.17k
    // Update MSSA too, if available.
492
9.17k
    if (MSSAU)
493
10
      MSSAU->removeEdge(OrigPreheader, Exit);
494
9.17k
  }
495
40.0k
496
40.0k
  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
497
40.0k
  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
498
40.0k
499
40.0k
  if (MSSAU && 
VerifyMemorySSA35
)
500
33
    MSSAU->getMemorySSA()->verifyMemorySSA();
501
40.0k
502
40.0k
  // Now that the CFG and DomTree are in a consistent state again, try to merge
503
40.0k
  // the OrigHeader block into OrigLatch.  This will succeed if they are
504
40.0k
  // connected by an unconditional branch.  This is just a cleanup so the
505
40.0k
  // emitted code isn't too gross in this common case.
506
40.0k
  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
507
40.0k
  MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
508
40.0k
509
40.0k
  if (MSSAU && 
VerifyMemorySSA35
)
510
33
    MSSAU->getMemorySSA()->verifyMemorySSA();
511
40.0k
512
40.0k
  LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
513
40.0k
514
40.0k
  ++NumRotated;
515
40.0k
  return true;
516
40.0k
}
517
518
/// Determine whether the instructions in this range may be safely and cheaply
519
/// speculated. This is not an important enough situation to develop complex
520
/// heuristics. We handle a single arithmetic instruction along with any type
521
/// conversions.
522
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
523
23.6k
                                  BasicBlock::iterator End, Loop *L) {
524
23.6k
  bool seenIncrement = false;
525
23.6k
  bool MultiExitLoop = false;
526
23.6k
527
23.6k
  if (!L->getExitingBlock())
528
5.17k
    MultiExitLoop = true;
529
23.6k
530
38.9k
  for (BasicBlock::iterator I = Begin; I != End; 
++I15.2k
) {
531
36.1k
532
36.1k
    if (!isSafeToSpeculativelyExecute(&*I))
533
5.91k
      return false;
534
30.2k
535
30.2k
    if (isa<DbgInfoIntrinsic>(I))
536
6
      continue;
537
30.2k
538
30.2k
    switch (I->getOpcode()) {
539
30.2k
    default:
540
4.97k
      return false;
541
30.2k
    case Instruction::GetElementPtr:
542
12.5k
      // GEPs are cheap if all indices are constant.
543
12.5k
      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
544
7.00k
        return false;
545
5.54k
      // fall-thru to increment case
546
5.54k
      LLVM_FALLTHROUGH;
547
11.7k
    case Instruction::Add:
548
11.7k
    case Instruction::Sub:
549
11.7k
    case Instruction::And:
550
11.7k
    case Instruction::Or:
551
11.7k
    case Instruction::Xor:
552
11.7k
    case Instruction::Shl:
553
11.7k
    case Instruction::LShr:
554
11.7k
    case Instruction::AShr: {
555
11.7k
      Value *IVOpnd =
556
11.7k
          !isa<Constant>(I->getOperand(0))
557
11.7k
              ? 
I->getOperand(0)11.6k
558
11.7k
              : 
!isa<Constant>(I->getOperand(1)) 115
?
I->getOperand(1)110
:
nullptr5
;
559
11.7k
      if (!IVOpnd)
560
5
        return false;
561
11.7k
562
11.7k
      // If increment operand is used outside of the loop, this speculation
563
11.7k
      // could cause extra live range interference.
564
11.7k
      if (MultiExitLoop) {
565
7.98k
        for (User *UseI : IVOpnd->users()) {
566
7.98k
          auto *UserInst = cast<Instruction>(UseI);
567
7.98k
          if (!L->contains(UserInst))
568
1.71k
            return false;
569
7.98k
        }
570
3.82k
      }
571
11.7k
572
11.7k
      
if (9.99k
seenIncrement9.99k
)
573
1.27k
        return false;
574
8.71k
      seenIncrement = true;
575
8.71k
      break;
576
8.71k
    }
577
8.71k
    case Instruction::Trunc:
578
6.50k
    case Instruction::ZExt:
579
6.50k
    case Instruction::SExt:
580
6.50k
      // ignore type conversions
581
6.50k
      break;
582
30.2k
    }
583
30.2k
  }
584
23.6k
  
return true2.78k
;
585
23.6k
}
586
587
/// Fold the loop tail into the loop exit by speculating the loop tail
588
/// instructions. Typically, this is a single post-increment. In the case of a
589
/// simple 2-block loop, hoisting the increment can be much better than
590
/// duplicating the entire loop header. In the case of loops with early exits,
591
/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
592
/// canonical form so downstream passes can handle it.
593
///
594
/// I don't believe this invalidates SCEV.
595
402k
bool LoopRotate::simplifyLoopLatch(Loop *L) {
596
402k
  BasicBlock *Latch = L->getLoopLatch();
597
402k
  if (!Latch || 
Latch->hasAddressTaken()402k
)
598
10
    return false;
599
402k
600
402k
  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
601
402k
  if (!Jmp || 
!Jmp->isUnconditional()401k
)
602
348k
    return false;
603
53.9k
604
53.9k
  BasicBlock *LastExit = Latch->getSinglePredecessor();
605
53.9k
  if (!LastExit || 
!L->isLoopExiting(LastExit)27.4k
)
606
29.1k
    return false;
607
24.8k
608
24.8k
  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
609
24.8k
  if (!BI)
610
1.17k
    return false;
611
23.6k
612
23.6k
  if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
613
20.9k
    return false;
614
2.78k
615
2.78k
  LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
616
2.78k
                    << LastExit->getName() << "\n");
617
2.78k
618
2.78k
  // Hoist the instructions from Latch into LastExit.
619
2.78k
  Instruction *FirstLatchInst = &*(Latch->begin());
620
2.78k
  LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
621
2.78k
                                 Latch->begin(), Jmp->getIterator());
622
2.78k
623
2.78k
  // Update MemorySSA
624
2.78k
  if (MSSAU)
625
6
    MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst);
626
2.78k
627
2.78k
  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 
01.24k
:
11.54k
;
628
2.78k
  BasicBlock *Header = Jmp->getSuccessor(0);
629
2.78k
  assert(Header == L->getHeader() && "expected a backward branch");
630
2.78k
631
2.78k
  // Remove Latch from the CFG so that LastExit becomes the new Latch.
632
2.78k
  BI->setSuccessor(FallThruPath, Header);
633
2.78k
  Latch->replaceSuccessorsPhiUsesWith(LastExit);
634
2.78k
  Jmp->eraseFromParent();
635
2.78k
636
2.78k
  // Nuke the Latch block.
637
2.78k
  assert(Latch->empty() && "unable to evacuate Latch");
638
2.78k
  LI->removeBlock(Latch);
639
2.78k
  if (DT)
640
2.78k
    DT->eraseNode(Latch);
641
2.78k
  Latch->eraseFromParent();
642
2.78k
643
2.78k
  if (MSSAU && 
VerifyMemorySSA6
)
644
6
    MSSAU->getMemorySSA()->verifyMemorySSA();
645
2.78k
646
2.78k
  return true;
647
2.78k
}
648
649
/// Rotate \c L, and return true if any modification was made.
650
402k
bool LoopRotate::processLoop(Loop *L) {
651
402k
  // Save the loop metadata.
652
402k
  MDNode *LoopMD = L->getLoopID();
653
402k
654
402k
  bool SimplifiedLatch = false;
655
402k
656
402k
  // Simplify the loop latch before attempting to rotate the header
657
402k
  // upward. Rotation may not be needed if the loop tail can be folded into the
658
402k
  // loop exit.
659
402k
  if (!RotationOnly)
660
402k
    SimplifiedLatch = simplifyLoopLatch(L);
661
402k
662
402k
  bool MadeChange = rotateLoop(L, SimplifiedLatch);
663
402k
  assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
664
402k
         "Loop latch should be exiting after loop-rotate.");
665
402k
666
402k
  // Restore the loop metadata.
667
402k
  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
668
402k
  if ((MadeChange || 
SimplifiedLatch362k
) &&
LoopMD41.5k
)
669
8.51k
    L->setLoopID(LoopMD);
670
402k
671
402k
  return MadeChange || 
SimplifiedLatch362k
;
672
402k
}
673
674
675
/// The utility to convert a loop into a loop with bottom test.
676
bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
677
                        AssumptionCache *AC, DominatorTree *DT,
678
                        ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
679
                        const SimplifyQuery &SQ, bool RotationOnly = true,
680
                        unsigned Threshold = unsigned(-1),
681
402k
                        bool IsUtilMode = true) {
682
402k
  if (MSSAU && 
VerifyMemorySSA59
)
683
57
    MSSAU->getMemorySSA()->verifyMemorySSA();
684
402k
  LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
685
402k
                IsUtilMode);
686
402k
  if (MSSAU && 
VerifyMemorySSA59
)
687
57
    MSSAU->getMemorySSA()->verifyMemorySSA();
688
402k
689
402k
  return LR.processLoop(L);
690
402k
}