Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements some loop unrolling utilities for peeling loops
10
// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
11
// unrolling loops with compile-time constant trip counts.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/Optional.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/Statistic.h"
19
#include "llvm/Analysis/LoopInfo.h"
20
#include "llvm/Analysis/LoopIterator.h"
21
#include "llvm/Analysis/ScalarEvolution.h"
22
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
23
#include "llvm/Analysis/TargetTransformInfo.h"
24
#include "llvm/IR/BasicBlock.h"
25
#include "llvm/IR/Dominators.h"
26
#include "llvm/IR/Function.h"
27
#include "llvm/IR/InstrTypes.h"
28
#include "llvm/IR/Instruction.h"
29
#include "llvm/IR/Instructions.h"
30
#include "llvm/IR/LLVMContext.h"
31
#include "llvm/IR/MDBuilder.h"
32
#include "llvm/IR/Metadata.h"
33
#include "llvm/IR/PatternMatch.h"
34
#include "llvm/Support/Casting.h"
35
#include "llvm/Support/CommandLine.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39
#include "llvm/Transforms/Utils/Cloning.h"
40
#include "llvm/Transforms/Utils/LoopSimplify.h"
41
#include "llvm/Transforms/Utils/LoopUtils.h"
42
#include "llvm/Transforms/Utils/UnrollLoop.h"
43
#include "llvm/Transforms/Utils/ValueMapper.h"
44
#include <algorithm>
45
#include <cassert>
46
#include <cstdint>
47
#include <limits>
48
49
using namespace llvm;
50
using namespace llvm::PatternMatch;
51
52
#define DEBUG_TYPE "loop-unroll"
53
54
STATISTIC(NumPeeled, "Number of loops peeled");
55
56
static cl::opt<unsigned> UnrollPeelMaxCount(
57
    "unroll-peel-max-count", cl::init(7), cl::Hidden,
58
    cl::desc("Max average trip count which will cause loop peeling."));
59
60
static cl::opt<unsigned> UnrollForcePeelCount(
61
    "unroll-force-peel-count", cl::init(0), cl::Hidden,
62
    cl::desc("Force a peel count regardless of profiling information."));
63
64
static cl::opt<bool> UnrollPeelMultiDeoptExit(
65
    "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
66
    cl::desc("Allow peeling of loops with multiple deopt exits."));
67
68
// Designates that a Phi is estimated to become invariant after an "infinite"
69
// number of loop iterations (i.e. only may become an invariant if the loop is
70
// fully unrolled).
71
static const unsigned InfiniteIterationsToInvariance =
72
    std::numeric_limits<unsigned>::max();
73
74
// Check whether we are capable of peeling this loop.
75
380k
bool llvm::canPeel(Loop *L) {
76
380k
  // Make sure the loop is in simplified form
77
380k
  if (!L->isLoopSimplifyForm())
78
0
    return false;
79
380k
80
380k
  if (UnrollPeelMultiDeoptExit) {
81
380k
    SmallVector<BasicBlock *, 4> Exits;
82
380k
    L->getUniqueNonLatchExitBlocks(Exits);
83
380k
84
380k
    if (!Exits.empty()) {
85
107k
      // Latch's terminator is a conditional branch, Latch is exiting and
86
107k
      // all non Latch exits ends up with deoptimize.
87
107k
      const BasicBlock *Latch = L->getLoopLatch();
88
107k
      const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
89
107k
      return T && 
T->isConditional()107k
&&
L->isLoopExiting(Latch)94.8k
&&
90
107k
             
all_of(Exits, [](const BasicBlock *BB) 94.8k
{
91
94.8k
               return BB->getTerminatingDeoptimizeCall();
92
94.8k
             });
93
107k
    }
94
273k
  }
95
273k
96
273k
  // Only peel loops that contain a single exit
97
273k
  if (!L->getExitingBlock() || 
!L->getUniqueExitBlock()272k
)
98
521
    return false;
99
272k
100
272k
  // Don't try to peel loops where the latch is not the exiting block.
101
272k
  // This can be an indication of two different things:
102
272k
  // 1) The loop is not rotated.
103
272k
  // 2) The loop contains irreducible control flow that involves the latch.
104
272k
  if (L->getLoopLatch() != L->getExitingBlock())
105
0
    return false;
106
272k
107
272k
  return true;
108
272k
}
109
110
// This function calculates the number of iterations after which the given Phi
111
// becomes an invariant. The pre-calculated values are memorized in the map. The
112
// function (shortcut is I) is calculated according to the following definition:
113
// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
114
//   If %y is a loop invariant, then I(%x) = 1.
115
//   If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
116
//   Otherwise, I(%x) is infinite.
117
// TODO: Actually if %y is an expression that depends only on Phi %z and some
118
//       loop invariants, we can estimate I(%x) = I(%z) + 1. The example
119
//       looks like:
120
//         %x = phi(0, %a),  <-- becomes invariant starting from 3rd iteration.
121
//         %y = phi(0, 5),
122
//         %a = %y + 1.
123
static unsigned calculateIterationsToInvariance(
124
    PHINode *Phi, Loop *L, BasicBlock *BackEdge,
125
149k
    SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
126
149k
  assert(Phi->getParent() == L->getHeader() &&
127
149k
         "Non-loop Phi should not be checked for turning into invariant.");
128
149k
  assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
129
149k
  // If we already know the answer, take it from the map.
130
149k
  auto I = IterationsToInvariance.find(Phi);
131
149k
  if (I != IterationsToInvariance.end())
132
284
    return I->second;
133
149k
134
149k
  // Otherwise we need to analyze the input from the back edge.
135
149k
  Value *Input = Phi->getIncomingValueForBlock(BackEdge);
136
149k
  // Place infinity to map to avoid infinite recursion for cycled Phis. Such
137
149k
  // cycles can never stop on an invariant.
138
149k
  IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
139
149k
  unsigned ToInvariance = InfiniteIterationsToInvariance;
140
149k
141
149k
  if (L->isLoopInvariant(Input))
142
116
    ToInvariance = 1u;
143
149k
  else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
144
6.11k
    // Only consider Phis in header block.
145
6.11k
    if (IncPhi->getParent() != L->getHeader())
146
5.83k
      return InfiniteIterationsToInvariance;
147
284
    // If the input becomes an invariant after X iterations, then our Phi
148
284
    // becomes an invariant after X + 1 iterations.
149
284
    unsigned InputToInvariance = calculateIterationsToInvariance(
150
284
        IncPhi, L, BackEdge, IterationsToInvariance);
151
284
    if (InputToInvariance != InfiniteIterationsToInvariance)
152
5
      ToInvariance = InputToInvariance + 1u;
153
284
  }
154
149k
155
149k
  // If we found that this Phi lies in an invariant chain, update the map.
156
149k
  
if (143k
ToInvariance != InfiniteIterationsToInvariance143k
)
157
121
    IterationsToInvariance[Phi] = ToInvariance;
158
143k
  return ToInvariance;
159
149k
}
160
161
// Return the number of iterations to peel off that make conditions in the
162
// body true/false. For example, if we peel 2 iterations off the loop below,
163
// the condition i < 2 can be evaluated at compile time.
164
//  for (i = 0; i < n; i++)
165
//    if (i < 2)
166
//      ..
167
//    else
168
//      ..
169
//   }
170
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
171
111k
                                         ScalarEvolution &SE) {
172
111k
  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
173
111k
  unsigned DesiredPeelCount = 0;
174
111k
175
194k
  for (auto *BB : L.blocks()) {
176
194k
    auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
177
194k
    if (!BI || 
BI->isUnconditional()189k
)
178
48.1k
      continue;
179
145k
180
145k
    // Ignore loop exit condition.
181
145k
    if (L.getLoopLatch() == BB)
182
111k
      continue;
183
34.3k
184
34.3k
    Value *Condition = BI->getCondition();
185
34.3k
    Value *LeftVal, *RightVal;
186
34.3k
    CmpInst::Predicate Pred;
187
34.3k
    if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
188
3.27k
      continue;
189
31.0k
190
31.0k
    const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
191
31.0k
    const SCEV *RightSCEV = SE.getSCEV(RightVal);
192
31.0k
193
31.0k
    // Do not consider predicates that are known to be true or false
194
31.0k
    // independently of the loop iteration.
195
31.0k
    if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) ||
196
31.0k
        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV,
197
31.0k
                            RightSCEV))
198
11
      continue;
199
31.0k
200
31.0k
    // Check if we have a condition with one AddRec and one non AddRec
201
31.0k
    // expression. Normalize LeftSCEV to be the AddRec.
202
31.0k
    if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
203
27.5k
      if (isa<SCEVAddRecExpr>(RightSCEV)) {
204
77
        std::swap(LeftSCEV, RightSCEV);
205
77
        Pred = ICmpInst::getSwappedPredicate(Pred);
206
77
      } else
207
27.4k
        continue;
208
3.64k
    }
209
3.64k
210
3.64k
    const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
211
3.64k
212
3.64k
    // Avoid huge SCEV computations in the loop below, make sure we only
213
3.64k
    // consider AddRecs of the loop we are trying to peel and avoid
214
3.64k
    // non-monotonic predicates, as we will not be able to simplify the loop
215
3.64k
    // body.
216
3.64k
    // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
217
3.64k
    //        simplify the loop, if we peel 1 additional iteration, if there
218
3.64k
    //        is no wrapping.
219
3.64k
    bool Increasing;
220
3.64k
    if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
221
3.64k
        
!SE.isMonotonicPredicate(LeftAR, Pred, Increasing)3.55k
)
222
3.39k
      continue;
223
252
    (void)Increasing;
224
252
225
252
    // Check if extending the current DesiredPeelCount lets us evaluate Pred
226
252
    // or !Pred in the loop body statically.
227
252
    unsigned NewPeelCount = DesiredPeelCount;
228
252
229
252
    const SCEV *IterVal = LeftAR->evaluateAtIteration(
230
252
        SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
231
252
232
252
    // If the original condition is not known, get the negated predicate
233
252
    // (which holds on the else branch) and check if it is known. This allows
234
252
    // us to peel of iterations that make the original condition false.
235
252
    if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
236
75
      Pred = ICmpInst::getInversePredicate(Pred);
237
252
238
252
    const SCEV *Step = LeftAR->getStepRecurrence(SE);
239
1.39k
    while (NewPeelCount < MaxPeelCount &&
240
1.39k
           
SE.isKnownPredicate(Pred, IterVal, RightSCEV)1.21k
) {
241
1.14k
      IterVal = SE.getAddExpr(IterVal, Step);
242
1.14k
      NewPeelCount++;
243
1.14k
    }
244
252
245
252
    // Only peel the loop if the monotonic predicate !Pred becomes known in the
246
252
    // first iteration of the loop body after peeling.
247
252
    if (NewPeelCount > DesiredPeelCount &&
248
252
        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
249
196
                            RightSCEV))
250
13
      DesiredPeelCount = NewPeelCount;
251
252
  }
252
111k
253
111k
  return DesiredPeelCount;
254
111k
}
255
256
// Return the number of iterations we want to peel off.
257
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
258
                            TargetTransformInfo::UnrollingPreferences &UP,
259
380k
                            unsigned &TripCount, ScalarEvolution &SE) {
260
380k
  assert(LoopSize > 0 && "Zero loop size is not allowed!");
261
380k
  // Save the UP.PeelCount value set by the target in
262
380k
  // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
263
380k
  unsigned TargetPeelCount = UP.PeelCount;
264
380k
  UP.PeelCount = 0;
265
380k
  if (!canPeel(L))
266
108k
    return;
267
272k
268
272k
  // Only try to peel innermost loops.
269
272k
  if (!L->empty())
270
41.2k
    return;
271
231k
272
231k
  // If the user provided a peel count, use that.
273
231k
  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
274
231k
  if (UserPeelCount) {
275
8
    LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
276
8
                      << " iterations.\n");
277
8
    UP.PeelCount = UnrollForcePeelCount;
278
8
    return;
279
8
  }
280
231k
281
231k
  // Skip peeling if it's disabled.
282
231k
  if (!UP.AllowPeeling)
283
118k
    return;
284
112k
285
112k
  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
286
112k
  // iterations of the loop. For this we compute the number for iterations after
287
112k
  // which every Phi is guaranteed to become an invariant, and try to peel the
288
112k
  // maximum number of iterations among these values, thus turning all those
289
112k
  // Phis into invariants.
290
112k
  // First, check that we can peel at least one iteration.
291
112k
  if (2 * LoopSize <= UP.Threshold && 
UnrollPeelMaxCount > 0111k
) {
292
111k
    // Store the pre-calculated values here.
293
111k
    SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
294
111k
    // Now go through all Phis to calculate their the number of iterations they
295
111k
    // need to become invariants.
296
111k
    // Start the max computation with the UP.PeelCount value set by the target
297
111k
    // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
298
111k
    unsigned DesiredPeelCount = TargetPeelCount;
299
111k
    BasicBlock *BackEdge = L->getLoopLatch();
300
111k
    assert(BackEdge && "Loop is not in simplified form?");
301
261k
    for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); 
++BI149k
) {
302
149k
      PHINode *Phi = cast<PHINode>(&*BI);
303
149k
      unsigned ToInvariance = calculateIterationsToInvariance(
304
149k
          Phi, L, BackEdge, IterationsToInvariance);
305
149k
      if (ToInvariance != InfiniteIterationsToInvariance)
306
121
        DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
307
149k
    }
308
111k
309
111k
    // Pay respect to limitations implied by loop size and the max peel count.
310
111k
    unsigned MaxPeelCount = UnrollPeelMaxCount;
311
111k
    MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
312
111k
313
111k
    DesiredPeelCount = std::max(DesiredPeelCount,
314
111k
                                countToEliminateCompares(*L, MaxPeelCount, SE));
315
111k
316
111k
    if (DesiredPeelCount > 0) {
317
110
      DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
318
110
      // Consider max peel count limitation.
319
110
      assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
320
110
      LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
321
110
                        << " iteration(s) to turn"
322
110
                        << " some Phis into invariants.\n");
323
110
      UP.PeelCount = DesiredPeelCount;
324
110
      return;
325
110
    }
326
112k
  }
327
112k
328
112k
  // Bail if we know the statically calculated trip count.
329
112k
  // In this case we rather prefer partial unrolling.
330
112k
  if (TripCount)
331
13.9k
    return;
332
98.4k
333
98.4k
  // If we don't know the trip count, but have reason to believe the average
334
98.4k
  // trip count is low, peeling should be beneficial, since we will usually
335
98.4k
  // hit the peeled section.
336
98.4k
  // We only do this in the presence of profile information, since otherwise
337
98.4k
  // our estimates of the trip count are not reliable enough.
338
98.4k
  if (L->getHeader()->getParent()->hasProfileData()) {
339
7
    Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
340
7
    if (!PeelCount)
341
2
      return;
342
5
343
5
    LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
344
5
                      << "\n");
345
5
346
5
    if (*PeelCount) {
347
3
      if ((*PeelCount <= UnrollPeelMaxCount) &&
348
3
          
(LoopSize * (*PeelCount + 1) <= UP.Threshold)0
) {
349
0
        LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
350
0
                          << " iterations.\n");
351
0
        UP.PeelCount = *PeelCount;
352
0
        return;
353
0
      }
354
3
      LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
355
3
      LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
356
3
      LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
357
3
                        << "\n");
358
3
      LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
359
3
    }
360
5
  }
361
98.4k
}
362
363
/// Update the branch weights of the latch of a peeled-off loop
364
/// iteration.
365
/// This sets the branch weights for the latch of the recently peeled off loop
366
/// iteration correctly.
367
/// Let F is a weight of the edge from latch to header.
368
/// Let E is a weight of the edge from latch to exit.
369
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
370
/// go to exit.
371
/// Then, Estimated TripCount = F / E.
372
/// For I-th (counting from 0) peeled off iteration we set the the weights for
373
/// the peeled latch as (TC - I, 1). It gives us reasonable distribution,
374
/// The probability to go to exit 1/(TC-I) increases. At the same time
375
/// the estimated trip count of remaining loop reduces by I.
376
/// To avoid dealing with division rounding we can just multiple both part
377
/// of weights to E and use weight as (F - I * E, E).
378
///
379
/// \param Header The copy of the header block that belongs to next iteration.
380
/// \param LatchBR The copy of the latch branch that belongs to this iteration.
381
/// \param[in,out] FallThroughWeight The weight of the edge from latch to
382
/// header before peeling (in) and after peeled off one iteration (out).
383
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
384
                                uint64_t ExitWeight,
385
153
                                uint64_t &FallThroughWeight) {
386
153
  // FallThroughWeight is 0 means that there is no branch weights on original
387
153
  // latch block or estimated trip count is zero.
388
153
  if (!FallThroughWeight)
389
153
    return;
390
0
391
0
  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
392
0
  MDBuilder MDB(LatchBR->getContext());
393
0
  MDNode *WeightNode =
394
0
      HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
395
0
                : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
396
0
  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
397
0
  FallThroughWeight =
398
0
      FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1;
399
0
}
400
401
/// Initialize the weights.
402
///
403
/// \param Header The header block.
404
/// \param LatchBR The latch branch.
405
/// \param[out] ExitWeight The weight of the edge from Latch to Exit.
406
/// \param[out] FallThroughWeight The weight of the edge from Latch to Header.
407
static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
408
                              uint64_t &ExitWeight,
409
119
                              uint64_t &FallThroughWeight) {
410
119
  uint64_t TrueWeight, FalseWeight;
411
119
  if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
412
119
    return;
413
0
  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
414
0
  ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
415
0
  FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight;
416
0
}
417
418
/// Update the weights of original Latch block after peeling off all iterations.
419
///
420
/// \param Header The header block.
421
/// \param LatchBR The latch branch.
422
/// \param ExitWeight The weight of the edge from Latch to Exit.
423
/// \param FallThroughWeight The weight of the edge from Latch to Header.
424
static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
425
                               uint64_t ExitWeight,
426
119
                               uint64_t FallThroughWeight) {
427
119
  // FallThroughWeight is 0 means that there is no branch weights on original
428
119
  // latch block or estimated trip count is zero.
429
119
  if (!FallThroughWeight)
430
119
    return;
431
0
432
0
  // Sets the branch weights on the loop exit.
433
0
  MDBuilder MDB(LatchBR->getContext());
434
0
  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
435
0
  MDNode *WeightNode =
436
0
      HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight)
437
0
                : MDB.createBranchWeights(FallThroughWeight, ExitWeight);
438
0
  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
439
0
}
440
441
/// Clones the body of the loop L, putting it between \p InsertTop and \p
442
/// InsertBot.
443
/// \param IterNumber The serial number of the iteration currently being
444
/// peeled off.
445
/// \param ExitEdges The exit edges of the original loop.
446
/// \param[out] NewBlocks A list of the blocks in the newly created clone
447
/// \param[out] VMap The value map between the loop and the new clone.
448
/// \param LoopBlocks A helper for DFS-traversal of the loop.
449
/// \param LVMap A value-map that maps instructions from the original loop to
450
/// instructions in the last peeled-off iteration.
451
static void cloneLoopBlocks(
452
    Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
453
    SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
454
    SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
455
    ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
456
153
    LoopInfo *LI) {
457
153
  BasicBlock *Header = L->getHeader();
458
153
  BasicBlock *Latch = L->getLoopLatch();
459
153
  BasicBlock *PreHeader = L->getLoopPreheader();
460
153
461
153
  Function *F = Header->getParent();
462
153
  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
463
153
  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
464
153
  Loop *ParentLoop = L->getParentLoop();
465
153
466
153
  // For each block in the original loop, create a new copy,
467
153
  // and update the value map with the newly created values.
468
895
  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; 
++BB742
) {
469
742
    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
470
742
    NewBlocks.push_back(NewBB);
471
742
472
742
    if (ParentLoop)
473
294
      ParentLoop->addBasicBlockToLoop(NewBB, *LI);
474
742
475
742
    VMap[*BB] = NewBB;
476
742
477
742
    // If dominator tree is available, insert nodes to represent cloned blocks.
478
742
    if (DT) {
479
742
      if (Header == *BB)
480
153
        DT->addNewBlock(NewBB, InsertTop);
481
589
      else {
482
589
        DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
483
589
        // VMap must contain entry for IDom, as the iteration order is RPO.
484
589
        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
485
589
      }
486
742
    }
487
742
  }
488
153
489
153
  // Hook-up the control flow for the newly inserted blocks.
490
153
  // The new header is hooked up directly to the "top", which is either
491
153
  // the original loop preheader (for the first iteration) or the previous
492
153
  // iteration's exiting block (for every other iteration)
493
153
  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
494
153
495
153
  // Similarly, for the latch:
496
153
  // The original exiting edge is still hooked up to the loop exit.
497
153
  // The backedge now goes to the "bottom", which is either the loop's real
498
153
  // header (for the last peeled iteration) or the copied header of the next
499
153
  // iteration (for every other iteration)
500
153
  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
501
153
  BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
502
261
  for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; 
++idx108
)
503
261
    if (LatchBR->getSuccessor(idx) == Header) {
504
153
      LatchBR->setSuccessor(idx, InsertBot);
505
153
      break;
506
153
    }
507
153
  if (DT)
508
153
    DT->changeImmediateDominator(InsertBot, NewLatch);
509
153
510
153
  // The new copy of the loop body starts with a bunch of PHI nodes
511
153
  // that pick an incoming value from either the preheader, or the previous
512
153
  // loop iteration. Since this copy is no longer part of the loop, we
513
153
  // resolve this statically:
514
153
  // For the first iteration, we use the value from the preheader directly.
515
153
  // For any other iteration, we replace the phi with the value generated by
516
153
  // the immediately preceding clone of the loop body (which represents
517
153
  // the previous iteration).
518
577
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); 
++I424
) {
519
424
    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
520
424
    if (IterNumber == 0) {
521
352
      VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
522
352
    } else {
523
72
      Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
524
72
      Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
525
72
      if (LatchInst && 
L->contains(LatchInst)68
)
526
68
        VMap[&*I] = LVMap[LatchInst];
527
4
      else
528
4
        VMap[&*I] = LatchVal;
529
72
    }
530
424
    cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
531
424
  }
532
153
533
153
  // Fix up the outgoing values - we need to add a value for the iteration
534
153
  // we've just created. Note that this must happen *after* the incoming
535
153
  // values are adjusted, since the value going out of the latch may also be
536
153
  // a value coming into the header.
537
153
  for (auto Edge : ExitEdges)
538
153
    for (PHINode &PHI : Edge.second->phis()) {
539
92
      Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
540
92
      Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
541
92
      if (LatchInst && L->contains(LatchInst))
542
92
        LatchVal = VMap[LatchVal];
543
92
      PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
544
92
    }
545
153
546
153
  // LastValueMap is updated with the values for the current loop
547
153
  // which are used the next time this function is called.
548
153
  for (const auto &KV : VMap)
549
5.54k
    LVMap[KV.first] = KV.second;
550
153
}
551
552
/// Peel off the first \p PeelCount iterations of loop \p L.
553
///
554
/// Note that this does not peel them off as a single straight-line block.
555
/// Rather, each iteration is peeled off separately, and needs to check the
556
/// exit condition.
557
/// For loops that dynamically execute \p PeelCount iterations or less
558
/// this provides a benefit, since the peeled off iterations, which account
559
/// for the bulk of dynamic execution, can be further simplified by scalar
560
/// optimizations.
561
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
562
                    ScalarEvolution *SE, DominatorTree *DT,
563
119
                    AssumptionCache *AC, bool PreserveLCSSA) {
564
119
  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
565
119
  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
566
119
567
119
  LoopBlocksDFS LoopBlocks(L);
568
119
  LoopBlocks.perform(LI);
569
119
570
119
  BasicBlock *Header = L->getHeader();
571
119
  BasicBlock *PreHeader = L->getLoopPreheader();
572
119
  BasicBlock *Latch = L->getLoopLatch();
573
119
  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
574
119
  L->getExitEdges(ExitEdges);
575
119
576
119
  DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
577
119
  if (DT) {
578
119
    assert(L->hasDedicatedExits() && "No dedicated exits?");
579
119
    for (auto Edge : ExitEdges) {
580
119
      if (ExitIDom.count(Edge.second))
581
0
        continue;
582
119
      BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
583
119
      assert(L->contains(BB) && "IDom is not in a loop");
584
119
      ExitIDom[Edge.second] = BB;
585
119
    }
586
119
  }
587
119
588
119
  Function *F = Header->getParent();
589
119
590
119
  // Set up all the necessary basic blocks. It is convenient to split the
591
119
  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
592
119
  // body, and a new preheader for the "real" loop.
593
119
594
119
  // Peeling the first iteration transforms.
595
119
  //
596
119
  // PreHeader:
597
119
  // ...
598
119
  // Header:
599
119
  //   LoopBody
600
119
  //   If (cond) goto Header
601
119
  // Exit:
602
119
  //
603
119
  // into
604
119
  //
605
119
  // InsertTop:
606
119
  //   LoopBody
607
119
  //   If (!cond) goto Exit
608
119
  // InsertBot:
609
119
  // NewPreHeader:
610
119
  // ...
611
119
  // Header:
612
119
  //  LoopBody
613
119
  //  If (cond) goto Header
614
119
  // Exit:
615
119
  //
616
119
  // Each following iteration will split the current bottom anchor in two,
617
119
  // and put the new copy of the loop body between these two blocks. That is,
618
119
  // after peeling another iteration from the example above, we'll split
619
119
  // InsertBot, and get:
620
119
  //
621
119
  // InsertTop:
622
119
  //   LoopBody
623
119
  //   If (!cond) goto Exit
624
119
  // InsertBot:
625
119
  //   LoopBody
626
119
  //   If (!cond) goto Exit
627
119
  // InsertBot.next:
628
119
  // NewPreHeader:
629
119
  // ...
630
119
  // Header:
631
119
  //  LoopBody
632
119
  //  If (cond) goto Header
633
119
  // Exit:
634
119
635
119
  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
636
119
  BasicBlock *InsertBot =
637
119
      SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
638
119
  BasicBlock *NewPreHeader =
639
119
      SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
640
119
641
119
  InsertTop->setName(Header->getName() + ".peel.begin");
642
119
  InsertBot->setName(Header->getName() + ".peel.next");
643
119
  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
644
119
645
119
  ValueToValueMapTy LVMap;
646
119
647
119
  // If we have branch weight information, we'll want to update it for the
648
119
  // newly created branches.
649
119
  BranchInst *LatchBR =
650
119
      cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
651
119
  uint64_t ExitWeight = 0, FallThroughWeight = 0;
652
119
  initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
653
119
654
119
  // For each peeled-off iteration, make a copy of the loop.
655
272
  for (unsigned Iter = 0; Iter < PeelCount; 
++Iter153
) {
656
153
    SmallVector<BasicBlock *, 8> NewBlocks;
657
153
    ValueToValueMapTy VMap;
658
153
659
153
    cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
660
153
                    LoopBlocks, VMap, LVMap, DT, LI);
661
153
662
153
    // Remap to use values from the current iteration instead of the
663
153
    // previous one.
664
153
    remapInstructionsInBlocks(NewBlocks, VMap);
665
153
666
153
    if (DT) {
667
153
      // Latches of the cloned loops dominate over the loop exit, so idom of the
668
153
      // latter is the first cloned loop body, as original PreHeader dominates
669
153
      // the original loop body.
670
153
      if (Iter == 0)
671
119
        for (auto Exit : ExitIDom)
672
119
          DT->changeImmediateDominator(Exit.first,
673
119
                                       cast<BasicBlock>(LVMap[Exit.second]));
674
#ifdef EXPENSIVE_CHECKS
675
      assert(DT->verify(DominatorTree::VerificationLevel::Fast));
676
#endif
677
    }
678
153
679
153
    auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
680
153
    updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
681
153
    // Remove Loop metadata from the latch branch instruction
682
153
    // because it is not the Loop's latch branch anymore.
683
153
    LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
684
153
685
153
    InsertTop = InsertBot;
686
153
    InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
687
153
    InsertBot->setName(Header->getName() + ".peel.next");
688
153
689
153
    F->getBasicBlockList().splice(InsertTop->getIterator(),
690
153
                                  F->getBasicBlockList(),
691
153
                                  NewBlocks[0]->getIterator(), F->end());
692
153
  }
693
119
694
119
  // Now adjust the phi nodes in the loop header to get their initial values
695
119
  // from the last peeled-off iteration instead of the preheader.
696
471
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); 
++I352
) {
697
352
    PHINode *PHI = cast<PHINode>(I);
698
352
    Value *NewVal = PHI->getIncomingValueForBlock(Latch);
699
352
    Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
700
352
    if (LatchInst && 
L->contains(LatchInst)259
)
701
235
      NewVal = LVMap[LatchInst];
702
352
703
352
    PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
704
352
  }
705
119
706
119
  fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
707
119
708
119
  if (Loop *ParentLoop = L->getParentLoop())
709
43
    L = ParentLoop;
710
119
711
119
  // We modified the loop, update SE.
712
119
  SE->forgetTopmostLoop(L);
713
119
714
119
  // Finally DomtTree must be correct.
715
119
  assert(DT->verify(DominatorTree::VerificationLevel::Fast));
716
119
717
119
  // FIXME: Incrementally update loop-simplify
718
119
  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
719
119
720
119
  NumPeeled++;
721
119
722
119
  return true;
723
119
}