Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This transformation analyzes and transforms the induction variables (and
10
// computations derived from them) into simpler forms suitable for subsequent
11
// analysis and transformation.
12
//
13
// If the trip count of a loop is computable, this pass also makes the following
14
// changes:
15
//   1. The exit condition for the loop is canonicalized to compare the
16
//      induction value against the exit value.  This turns loops like:
17
//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18
//   2. Any use outside of the loop of an expression derived from the indvar
19
//      is changed to compute the derived value outside of the loop, eliminating
20
//      the dependence on the exit value of the induction variable.  If the only
21
//      purpose of the loop is to compute the exit value of some derived
22
//      expression, this transformation will make the loop dead.
23
//
24
//===----------------------------------------------------------------------===//
25
26
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27
#include "llvm/ADT/APFloat.h"
28
#include "llvm/ADT/APInt.h"
29
#include "llvm/ADT/ArrayRef.h"
30
#include "llvm/ADT/DenseMap.h"
31
#include "llvm/ADT/None.h"
32
#include "llvm/ADT/Optional.h"
33
#include "llvm/ADT/STLExtras.h"
34
#include "llvm/ADT/SmallSet.h"
35
#include "llvm/ADT/SmallPtrSet.h"
36
#include "llvm/ADT/SmallVector.h"
37
#include "llvm/ADT/Statistic.h"
38
#include "llvm/ADT/iterator_range.h"
39
#include "llvm/Analysis/LoopInfo.h"
40
#include "llvm/Analysis/LoopPass.h"
41
#include "llvm/Analysis/ScalarEvolution.h"
42
#include "llvm/Analysis/ScalarEvolutionExpander.h"
43
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
44
#include "llvm/Analysis/TargetLibraryInfo.h"
45
#include "llvm/Analysis/TargetTransformInfo.h"
46
#include "llvm/Analysis/ValueTracking.h"
47
#include "llvm/Transforms/Utils/Local.h"
48
#include "llvm/IR/BasicBlock.h"
49
#include "llvm/IR/Constant.h"
50
#include "llvm/IR/ConstantRange.h"
51
#include "llvm/IR/Constants.h"
52
#include "llvm/IR/DataLayout.h"
53
#include "llvm/IR/DerivedTypes.h"
54
#include "llvm/IR/Dominators.h"
55
#include "llvm/IR/Function.h"
56
#include "llvm/IR/IRBuilder.h"
57
#include "llvm/IR/InstrTypes.h"
58
#include "llvm/IR/Instruction.h"
59
#include "llvm/IR/Instructions.h"
60
#include "llvm/IR/IntrinsicInst.h"
61
#include "llvm/IR/Intrinsics.h"
62
#include "llvm/IR/Module.h"
63
#include "llvm/IR/Operator.h"
64
#include "llvm/IR/PassManager.h"
65
#include "llvm/IR/PatternMatch.h"
66
#include "llvm/IR/Type.h"
67
#include "llvm/IR/Use.h"
68
#include "llvm/IR/User.h"
69
#include "llvm/IR/Value.h"
70
#include "llvm/IR/ValueHandle.h"
71
#include "llvm/Pass.h"
72
#include "llvm/Support/Casting.h"
73
#include "llvm/Support/CommandLine.h"
74
#include "llvm/Support/Compiler.h"
75
#include "llvm/Support/Debug.h"
76
#include "llvm/Support/ErrorHandling.h"
77
#include "llvm/Support/MathExtras.h"
78
#include "llvm/Support/raw_ostream.h"
79
#include "llvm/Transforms/Scalar.h"
80
#include "llvm/Transforms/Scalar/LoopPassManager.h"
81
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
82
#include "llvm/Transforms/Utils/LoopUtils.h"
83
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
84
#include <cassert>
85
#include <cstdint>
86
#include <utility>
87
88
using namespace llvm;
89
90
#define DEBUG_TYPE "indvars"
91
92
STATISTIC(NumWidened     , "Number of indvars widened");
93
STATISTIC(NumReplaced    , "Number of exit values replaced");
94
STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95
STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96
STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97
98
// Trip count verification can be enabled by default under NDEBUG if we
99
// implement a strong expression equivalence checker in SCEV. Until then, we
100
// use the verify-indvars flag, which may assert in some cases.
101
static cl::opt<bool> VerifyIndvars(
102
  "verify-indvars", cl::Hidden,
103
  cl::desc("Verify the ScalarEvolution result after running indvars"));
104
105
enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106
107
static cl::opt<ReplaceExitVal> ReplaceExitValue(
108
    "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109
    cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110
    cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111
               clEnumValN(OnlyCheapRepl, "cheap",
112
                          "only replace exit value when the cost is cheap"),
113
               clEnumValN(NoHardUse, "noharduse",
114
                          "only replace exit values when loop def likely dead"),
115
               clEnumValN(AlwaysRepl, "always",
116
                          "always replace exit value whenever possible")));
117
118
static cl::opt<bool> UsePostIncrementRanges(
119
  "indvars-post-increment-ranges", cl::Hidden,
120
  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121
  cl::init(true));
122
123
static cl::opt<bool>
124
DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125
            cl::desc("Disable Linear Function Test Replace optimization"));
126
127
namespace {
128
129
struct RewritePhi;
130
131
class IndVarSimplify {
132
  LoopInfo *LI;
133
  ScalarEvolution *SE;
134
  DominatorTree *DT;
135
  const DataLayout &DL;
136
  TargetLibraryInfo *TLI;
137
  const TargetTransformInfo *TTI;
138
139
  SmallVector<WeakTrackingVH, 16> DeadInsts;
140
141
  bool isValidRewrite(Value *FromVal, Value *ToVal);
142
143
  bool handleFloatingPointIV(Loop *L, PHINode *PH);
144
  bool rewriteNonIntegerIVs(Loop *L);
145
146
  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147
  bool optimizeLoopExits(Loop *L);
148
149
  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
150
  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
151
  bool rewriteFirstIterationLoopExitValues(Loop *L);
152
  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
153
154
  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
155
                                 const SCEV *ExitCount,
156
                                 PHINode *IndVar, SCEVExpander &Rewriter);
157
158
  bool sinkUnusedInvariants(Loop *L);
159
160
public:
161
  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
162
                 const DataLayout &DL, TargetLibraryInfo *TLI,
163
                 TargetTransformInfo *TTI)
164
220k
      : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
165
166
  bool run(Loop *L);
167
};
168
169
} // end anonymous namespace
170
171
/// Return true if the SCEV expansion generated by the rewriter can replace the
172
/// original value. SCEV guarantees that it produces the same value, but the way
173
/// it is produced may be illegal IR.  Ideally, this function will only be
174
/// called for verification.
175
2.04k
bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
176
2.04k
  // If an SCEV expression subsumed multiple pointers, its expansion could
177
2.04k
  // reassociate the GEP changing the base pointer. This is illegal because the
178
2.04k
  // final address produced by a GEP chain must be inbounds relative to its
179
2.04k
  // underlying object. Otherwise basic alias analysis, among other things,
180
2.04k
  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
181
2.04k
  // producing an expression involving multiple pointers. Until then, we must
182
2.04k
  // bail out here.
183
2.04k
  //
184
2.04k
  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
185
2.04k
  // because it understands lcssa phis while SCEV does not.
186
2.04k
  Value *FromPtr = FromVal;
187
2.04k
  Value *ToPtr = ToVal;
188
2.04k
  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
189
470
    FromPtr = GEP->getPointerOperand();
190
470
  }
191
2.04k
  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
192
488
    ToPtr = GEP->getPointerOperand();
193
488
  }
194
2.04k
  if (FromPtr != FromVal || 
ToPtr != ToVal1.57k
) {
195
556
    // Quickly check the common case
196
556
    if (FromPtr == ToPtr)
197
1
      return true;
198
555
199
555
    // SCEV may have rewritten an expression that produces the GEP's pointer
200
555
    // operand. That's ok as long as the pointer operand has the same base
201
555
    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
202
555
    // base of a recurrence. This handles the case in which SCEV expansion
203
555
    // converts a pointer type recurrence into a nonrecurrent pointer base
204
555
    // indexed by an integer recurrence.
205
555
206
555
    // If the GEP base pointer is a vector of pointers, abort.
207
555
    if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
208
0
      return false;
209
555
210
555
    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
211
555
    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
212
555
    if (FromBase == ToBase)
213
528
      return true;
214
27
215
27
    LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
216
27
                      << " != " << *ToBase << "\n");
217
27
218
27
    return false;
219
27
  }
220
1.48k
  return true;
221
1.48k
}
222
223
/// Determine the insertion point for this user. By default, insert immediately
224
/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
225
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
226
/// common dominator for the incoming blocks. A nullptr can be returned if no
227
/// viable location is found: it may happen if User is a PHI and Def only comes
228
/// to this PHI from unreachable blocks.
229
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
230
29.4k
                                          DominatorTree *DT, LoopInfo *LI) {
231
29.4k
  PHINode *PHI = dyn_cast<PHINode>(User);
232
29.4k
  if (!PHI)
233
28.7k
    return User;
234
667
235
667
  Instruction *InsertPt = nullptr;
236
2.16k
  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; 
++i1.50k
) {
237
1.50k
    if (PHI->getIncomingValue(i) != Def)
238
589
      continue;
239
911
240
911
    BasicBlock *InsertBB = PHI->getIncomingBlock(i);
241
911
242
911
    if (!DT->isReachableFromEntry(InsertBB))
243
1
      continue;
244
910
245
910
    if (!InsertPt) {
246
666
      InsertPt = InsertBB->getTerminator();
247
666
      continue;
248
666
    }
249
244
    InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
250
244
    InsertPt = InsertBB->getTerminator();
251
244
  }
252
667
253
667
  // If we have skipped all inputs, it means that Def only comes to Phi from
254
667
  // unreachable blocks.
255
667
  if (!InsertPt)
256
1
    return nullptr;
257
666
258
666
  auto *DefI = dyn_cast<Instruction>(Def);
259
666
  if (!DefI)
260
0
    return InsertPt;
261
666
262
666
  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
263
666
264
666
  auto *L = LI->getLoopFor(DefI->getParent());
265
666
  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
266
666
267
728
  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; 
DTN = DTN->getIDom()62
)
268
728
    if (LI->getLoopFor(DTN->getBlock()) == L)
269
666
      return DTN->getBlock()->getTerminator();
270
666
271
666
  
llvm_unreachable0
("DefI dominates InsertPt!");
272
666
}
273
274
//===----------------------------------------------------------------------===//
275
// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
276
//===----------------------------------------------------------------------===//
277
278
/// Convert APF to an integer, if possible.
279
3.25k
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
280
3.25k
  bool isExact = false;
281
3.25k
  // See if we can convert this to an int64_t
282
3.25k
  uint64_t UIntVal;
283
3.25k
  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
284
3.25k
                           APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
285
3.25k
      
!isExact3.17k
)
286
82
    return false;
287
3.17k
  IntVal = UIntVal;
288
3.17k
  return true;
289
3.17k
}
290
291
/// If the loop has floating induction variable then insert corresponding
292
/// integer induction variable if possible.
293
/// For example,
294
/// for(double i = 0; i < 10000; ++i)
295
///   bar(i)
296
/// is converted into
297
/// for(int i = 0; i < 10000; ++i)
298
///   bar((double)i);
299
300k
bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
300
300k
  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
301
300k
  unsigned BackEdge     = IncomingEdge^1;
302
300k
303
300k
  // Check incoming value.
304
300k
  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
305
300k
306
300k
  int64_t InitValue;
307
300k
  if (!InitValueVal || 
!ConvertToSInt(InitValueVal->getValueAPF(), InitValue)3.17k
)
308
297k
    return false;
309
3.10k
310
3.10k
  // Check IV increment. Reject this PN if increment operation is not
311
3.10k
  // an add or increment value can not be represented by an integer.
312
3.10k
  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
313
3.10k
  if (Incr == nullptr || 
Incr->getOpcode() != Instruction::FAdd1.61k
)
return false1.60k
;
314
1.50k
315
1.50k
  // If this is not an add of the PHI with a constantfp, or if the constant fp
316
1.50k
  // is not an integer, bail out.
317
1.50k
  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
318
1.50k
  int64_t IncValue;
319
1.50k
  if (IncValueVal == nullptr || 
Incr->getOperand(0) != PN74
||
320
1.50k
      
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue)72
)
321
1.44k
    return false;
322
56
323
56
  // Check Incr uses. One user is PN and the other user is an exit condition
324
56
  // used by the conditional terminator.
325
56
  Value::user_iterator IncrUse = Incr->user_begin();
326
56
  Instruction *U1 = cast<Instruction>(*IncrUse++);
327
56
  if (IncrUse == Incr->user_end()) 
return false11
;
328
45
  Instruction *U2 = cast<Instruction>(*IncrUse++);
329
45
  if (IncrUse != Incr->user_end()) 
return false4
;
330
41
331
41
  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
332
41
  // only used by a branch, we can't transform it.
333
41
  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
334
41
  if (!Compare)
335
31
    Compare = dyn_cast<FCmpInst>(U2);
336
41
  if (!Compare || 
!Compare->hasOneUse()11
||
337
41
      
!isa<BranchInst>(Compare->user_back())10
)
338
31
    return false;
339
10
340
10
  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
341
10
342
10
  // We need to verify that the branch actually controls the iteration count
343
10
  // of the loop.  If not, the new IV can overflow and no one will notice.
344
10
  // The branch block must be in the loop and one of the successors must be out
345
10
  // of the loop.
346
10
  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
347
10
  if (!L->contains(TheBr->getParent()) ||
348
10
      (L->contains(TheBr->getSuccessor(0)) &&
349
10
       
L->contains(TheBr->getSuccessor(1))6
))
350
0
    return false;
351
10
352
10
  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
353
10
  // transform it.
354
10
  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
355
10
  int64_t ExitValue;
356
10
  if (ExitValueVal == nullptr ||
357
10
      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
358
0
    return false;
359
10
360
10
  // Find new predicate for integer comparison.
361
10
  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
362
10
  switch (Compare->getPredicate()) {
363
10
  
default: return false0
; // Unknown comparison.
364
10
  case CmpInst::FCMP_OEQ:
365
0
  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
366
0
  case CmpInst::FCMP_ONE:
367
0
  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
368
3
  case CmpInst::FCMP_OGT:
369
3
  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
370
3
  case CmpInst::FCMP_OGE:
371
0
  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
372
7
  case CmpInst::FCMP_OLT:
373
7
  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
374
7
  case CmpInst::FCMP_OLE:
375
0
  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
376
10
  }
377
10
378
10
  // We convert the floating point induction variable to a signed i32 value if
379
10
  // we can.  This is only safe if the comparison will not overflow in a way
380
10
  // that won't be trapped by the integer equivalent operations.  Check for this
381
10
  // now.
382
10
  // TODO: We could use i64 if it is native and the range requires it.
383
10
384
10
  // The start/stride/exit values must all fit in signed i32.
385
10
  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
386
0
    return false;
387
10
388
10
  // If not actually striding (add x, 0.0), avoid touching the code.
389
10
  if (IncValue == 0)
390
0
    return false;
391
10
392
10
  // Positive and negative strides have different safety conditions.
393
10
  if (IncValue > 0) {
394
8
    // If we have a positive stride, we require the init to be less than the
395
8
    // exit value.
396
8
    if (InitValue >= ExitValue)
397
1
      return false;
398
7
399
7
    uint32_t Range = uint32_t(ExitValue-InitValue);
400
7
    // Check for infinite loop, either:
401
7
    // while (i <= Exit) or until (i > Exit)
402
7
    if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
403
3
      if (++Range == 0) 
return false0
; // Range overflows.
404
7
    }
405
7
406
7
    unsigned Leftover = Range % uint32_t(IncValue);
407
7
408
7
    // If this is an equality comparison, we require that the strided value
409
7
    // exactly land on the exit value, otherwise the IV condition will wrap
410
7
    // around and do things the fp IV wouldn't.
411
7
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412
7
        
Leftover != 00
)
413
0
      return false;
414
7
415
7
    // If the stride would wrap around the i32 before exiting, we can't
416
7
    // transform the IV.
417
7
    if (Leftover != 0 && 
int32_t(ExitValue+IncValue) < ExitValue2
)
418
0
      return false;
419
2
  } else {
420
2
    // If we have a negative stride, we require the init to be greater than the
421
2
    // exit value.
422
2
    if (InitValue <= ExitValue)
423
0
      return false;
424
2
425
2
    uint32_t Range = uint32_t(InitValue-ExitValue);
426
2
    // Check for infinite loop, either:
427
2
    // while (i >= Exit) or until (i < Exit)
428
2
    if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
429
2
      if (++Range == 0) 
return false0
; // Range overflows.
430
2
    }
431
2
432
2
    unsigned Leftover = Range % uint32_t(-IncValue);
433
2
434
2
    // If this is an equality comparison, we require that the strided value
435
2
    // exactly land on the exit value, otherwise the IV condition will wrap
436
2
    // around and do things the fp IV wouldn't.
437
2
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
438
2
        
Leftover != 00
)
439
0
      return false;
440
2
441
2
    // If the stride would wrap around the i32 before exiting, we can't
442
2
    // transform the IV.
443
2
    if (Leftover != 0 && 
int32_t(ExitValue+IncValue) > ExitValue0
)
444
0
      return false;
445
9
  }
446
9
447
9
  IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
448
9
449
9
  // Insert new integer induction variable.
450
9
  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
451
9
  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
452
9
                      PN->getIncomingBlock(IncomingEdge));
453
9
454
9
  Value *NewAdd =
455
9
    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
456
9
                              Incr->getName()+".int", Incr);
457
9
  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
458
9
459
9
  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
460
9
                                      ConstantInt::get(Int32Ty, ExitValue),
461
9
                                      Compare->getName());
462
9
463
9
  // In the following deletions, PN may become dead and may be deleted.
464
9
  // Use a WeakTrackingVH to observe whether this happens.
465
9
  WeakTrackingVH WeakPH = PN;
466
9
467
9
  // Delete the old floating point exit comparison.  The branch starts using the
468
9
  // new comparison.
469
9
  NewCompare->takeName(Compare);
470
9
  Compare->replaceAllUsesWith(NewCompare);
471
9
  RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
472
9
473
9
  // Delete the old floating point increment.
474
9
  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
475
9
  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
476
9
477
9
  // If the FP induction variable still has uses, this is because something else
478
9
  // in the loop uses its value.  In order to canonicalize the induction
479
9
  // variable, we chose to eliminate the IV and rewrite it in terms of an
480
9
  // int->fp cast.
481
9
  //
482
9
  // We give preference to sitofp over uitofp because it is faster on most
483
9
  // platforms.
484
9
  if (WeakPH) {
485
7
    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486
7
                                 &*PN->getParent()->getFirstInsertionPt());
487
7
    PN->replaceAllUsesWith(Conv);
488
7
    RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
489
7
  }
490
9
  return true;
491
9
}
492
493
220k
bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
494
220k
  // First step.  Check to see if there are any floating-point recurrences.
495
220k
  // If there are, change them into integer recurrences, permitting analysis by
496
220k
  // the SCEV routines.
497
220k
  BasicBlock *Header = L->getHeader();
498
220k
499
220k
  SmallVector<WeakTrackingVH, 8> PHIs;
500
220k
  for (PHINode &PN : Header->phis())
501
300k
    PHIs.push_back(&PN);
502
220k
503
220k
  bool Changed = false;
504
520k
  for (unsigned i = 0, e = PHIs.size(); i != e; 
++i300k
)
505
300k
    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
506
300k
      Changed |= handleFloatingPointIV(L, PN);
507
220k
508
220k
  // If the loop previously had floating-point IV, ScalarEvolution
509
220k
  // may not have been able to compute a trip count. Now that we've done some
510
220k
  // re-writing, the trip count may be computable.
511
220k
  if (Changed)
512
9
    SE->forgetLoop(L);
513
220k
  return Changed;
514
220k
}
515
516
namespace {
517
518
// Collect information about PHI nodes which can be transformed in
519
// rewriteLoopExitValues.
520
struct RewritePhi {
521
  PHINode *PN;
522
523
  // Ith incoming value.
524
  unsigned Ith;
525
526
  // Exit value after expansion.
527
  Value *Val;
528
529
  // High Cost when expansion.
530
  bool HighCost;
531
532
  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
533
2.01k
      : PN(P), Ith(I), Val(V), HighCost(H) {}
534
};
535
536
} // end anonymous namespace
537
538
//===----------------------------------------------------------------------===//
539
// rewriteLoopExitValues - Optimize IV users outside the loop.
540
// As a side effect, reduces the amount of IV processing within the loop.
541
//===----------------------------------------------------------------------===//
542
543
5.18k
bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
544
5.18k
  SmallPtrSet<const Instruction *, 8> Visited;
545
5.18k
  SmallVector<const Instruction *, 8> WorkList;
546
5.18k
  Visited.insert(I);
547
5.18k
  WorkList.push_back(I);
548
39.6k
  while (!WorkList.empty()) {
549
38.0k
    const Instruction *Curr = WorkList.pop_back_val();
550
38.0k
    // This use is outside the loop, nothing to do.
551
38.0k
    if (!L->contains(Curr))
552
2.13k
      continue;
553
35.9k
    // Do we assume it is a "hard" use which will not be eliminated easily?
554
35.9k
    if (Curr->mayHaveSideEffects())
555
3.65k
      return true;
556
32.2k
    // Otherwise, add all its users to worklist.
557
48.5k
    
for (auto U : Curr->users())32.2k
{
558
48.5k
      auto *UI = cast<Instruction>(U);
559
48.5k
      if (Visited.insert(UI).second)
560
40.6k
        WorkList.push_back(UI);
561
48.5k
    }
562
32.2k
  }
563
5.18k
  
return false1.52k
;
564
5.18k
}
565
566
/// Check to see if this loop has a computable loop-invariant execution count.
567
/// If so, this means that we can compute the final value of any expressions
568
/// that are recurrent in the loop, and substitute the exit values from the loop
569
/// into any instructions outside of the loop that use the final values of the
570
/// current expressions.
571
///
572
/// This is mostly redundant with the regular IndVarSimplify activities that
573
/// happen later, except that it's more powerful in some cases, because it's
574
/// able to brute-force evaluate arbitrary instructions as long as they have
575
/// constant operands at the beginning of the loop.
576
85.5k
bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
577
85.5k
  // Check a pre-condition.
578
85.5k
  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
579
85.5k
         "Indvars did not preserve LCSSA!");
580
85.5k
581
85.5k
  SmallVector<BasicBlock*, 8> ExitBlocks;
582
85.5k
  L->getUniqueExitBlocks(ExitBlocks);
583
85.5k
584
85.5k
  SmallVector<RewritePhi, 8> RewritePhiSet;
585
85.5k
  // Find all values that are computed inside the loop, but used outside of it.
586
85.5k
  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
587
85.5k
  // the exit blocks of the loop to find them.
588
85.6k
  for (BasicBlock *ExitBB : ExitBlocks) {
589
85.6k
    // If there are no PHI nodes in this exit block, then no values defined
590
85.6k
    // inside the loop are used on this path, skip it.
591
85.6k
    PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
592
85.6k
    if (!PN) 
continue68.4k
;
593
17.2k
594
17.2k
    unsigned NumPreds = PN->getNumIncomingValues();
595
17.2k
596
17.2k
    // Iterate over all of the PHI nodes.
597
17.2k
    BasicBlock::iterator BBI = ExitBB->begin();
598
39.4k
    while ((PN = dyn_cast<PHINode>(BBI++))) {
599
22.1k
      if (PN->use_empty())
600
719
        continue; // dead use, don't replace it
601
21.4k
602
21.4k
      if (!SE->isSCEVable(PN->getType()))
603
3.67k
        continue;
604
17.7k
605
17.7k
      // It's necessary to tell ScalarEvolution about this explicitly so that
606
17.7k
      // it can walk the def-use list and forget all SCEVs, as it may not be
607
17.7k
      // watching the PHI itself. Once the new exit value is in place, there
608
17.7k
      // may not be a def-use connection between the loop and every instruction
609
17.7k
      // which got a SCEVAddRecExpr for that loop.
610
17.7k
      SE->forgetValue(PN);
611
17.7k
612
17.7k
      // Iterate over all of the values in all the PHI nodes.
613
35.5k
      for (unsigned i = 0; i != NumPreds; 
++i17.8k
) {
614
17.8k
        // If the value being merged in is not integer or is not defined
615
17.8k
        // in the loop, skip it.
616
17.8k
        Value *InVal = PN->getIncomingValue(i);
617
17.8k
        if (!isa<Instruction>(InVal))
618
21
          continue;
619
17.7k
620
17.7k
        // If this pred is for a subloop, not L itself, skip it.
621
17.7k
        if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
622
1
          continue; // The Block is in a subloop, skip it.
623
17.7k
624
17.7k
        // Check that InVal is defined in the loop.
625
17.7k
        Instruction *Inst = cast<Instruction>(InVal);
626
17.7k
        if (!L->contains(Inst))
627
81
          continue;
628
17.7k
629
17.7k
        // Okay, this instruction has a user outside of the current loop
630
17.7k
        // and varies predictably *inside* the loop.  Evaluate the value it
631
17.7k
        // contains when the loop exits, if possible.
632
17.7k
        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
633
17.7k
        if (!SE->isLoopInvariant(ExitValue, L) ||
634
17.7k
            
!isSafeToExpand(ExitValue, *SE)5.72k
)
635
12.0k
          continue;
636
5.70k
637
5.70k
        // Computing the value outside of the loop brings no benefit if it is
638
5.70k
        // definitely used inside the loop in a way which can not be optimized
639
5.70k
        // away.  Avoid doing so unless we know we have a value which computes
640
5.70k
        // the ExitValue already.  TODO: This should be merged into SCEV
641
5.70k
        // expander to leverage its knowledge of existing expressions.
642
5.70k
        if (ReplaceExitValue != AlwaysRepl &&
643
5.70k
            
!isa<SCEVConstant>(ExitValue)5.70k
&&
!isa<SCEVUnknown>(ExitValue)5.26k
&&
644
5.70k
            
hasHardUserWithinLoop(L, Inst)5.18k
)
645
3.65k
          continue;
646
2.04k
647
2.04k
        bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
648
2.04k
        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
649
2.04k
650
2.04k
        LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
651
2.04k
                          << '\n'
652
2.04k
                          << "  LoopVal = " << *Inst << "\n");
653
2.04k
654
2.04k
        if (!isValidRewrite(Inst, ExitVal)) {
655
27
          DeadInsts.push_back(ExitVal);
656
27
          continue;
657
27
        }
658
2.01k
659
#ifndef NDEBUG
660
        // If we reuse an instruction from a loop which is neither L nor one of
661
        // its containing loops, we end up breaking LCSSA form for this loop by
662
        // creating a new use of its instruction.
663
        if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
664
          if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
665
            if (EVL != L)
666
              assert(EVL->contains(L) && "LCSSA breach detected!");
667
#endif
668
669
2.01k
        // Collect all the candidate PHINodes to be rewritten.
670
2.01k
        RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
671
2.01k
      }
672
17.7k
    }
673
17.2k
  }
674
85.5k
675
85.5k
  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
676
85.5k
677
85.5k
  bool Changed = false;
678
85.5k
  // Transformation.
679
85.5k
  for (const RewritePhi &Phi : RewritePhiSet) {
680
2.01k
    PHINode *PN = Phi.PN;
681
2.01k
    Value *ExitVal = Phi.Val;
682
2.01k
683
2.01k
    // Only do the rewrite when the ExitValue can be expanded cheaply.
684
2.01k
    // If LoopCanBeDel is true, rewrite exit value aggressively.
685
2.01k
    if (ReplaceExitValue == OnlyCheapRepl && 
!LoopCanBeDel2.01k
&&
Phi.HighCost875
) {
686
342
      DeadInsts.push_back(ExitVal);
687
342
      continue;
688
342
    }
689
1.67k
690
1.67k
    Changed = true;
691
1.67k
    ++NumReplaced;
692
1.67k
    Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
693
1.67k
    PN->setIncomingValue(Phi.Ith, ExitVal);
694
1.67k
695
1.67k
    // If this instruction is dead now, delete it. Don't do it now to avoid
696
1.67k
    // invalidating iterators.
697
1.67k
    if (isInstructionTriviallyDead(Inst, TLI))
698
19
      DeadInsts.push_back(Inst);
699
1.67k
700
1.67k
    // Replace PN with ExitVal if that is legal and does not break LCSSA.
701
1.67k
    if (PN->getNumIncomingValues() == 1 &&
702
1.67k
        
LI->replacementPreservesLCSSAForm(PN, ExitVal)1.67k
) {
703
1.65k
      PN->replaceAllUsesWith(ExitVal);
704
1.65k
      PN->eraseFromParent();
705
1.65k
    }
706
1.67k
  }
707
85.5k
708
85.5k
  // The insertion point instruction may have been deleted; clear it out
709
85.5k
  // so that the rewriter doesn't trip over it later.
710
85.5k
  Rewriter.clearInsertPoint();
711
85.5k
  return Changed;
712
85.5k
}
713
714
//===---------------------------------------------------------------------===//
715
// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
716
// they will exit at the first iteration.
717
//===---------------------------------------------------------------------===//
718
719
/// Check to see if this loop has loop invariant conditions which lead to loop
720
/// exits. If so, we know that if the exit path is taken, it is at the first
721
/// loop iteration. This lets us predict exit values of PHI nodes that live in
722
/// loop header.
723
220k
bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
724
220k
  // Verify the input to the pass is already in LCSSA form.
725
220k
  assert(L->isLCSSAForm(*DT));
726
220k
727
220k
  SmallVector<BasicBlock *, 8> ExitBlocks;
728
220k
  L->getUniqueExitBlocks(ExitBlocks);
729
220k
730
220k
  bool MadeAnyChanges = false;
731
274k
  for (auto *ExitBB : ExitBlocks) {
732
274k
    // If there are no more PHI nodes in this exit block, then no more
733
274k
    // values defined inside the loop are used on this path.
734
274k
    for (PHINode &PN : ExitBB->phis()) {
735
133k
      for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
736
283k
           IncomingValIdx != E; 
++IncomingValIdx150k
) {
737
150k
        auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
738
150k
739
150k
        // Can we prove that the exit must run on the first iteration if it
740
150k
        // runs at all?  (i.e. early exits are fine for our purposes, but
741
150k
        // traces which lead to this exit being taken on the 2nd iteration
742
150k
        // aren't.)  Note that this is about whether the exit branch is
743
150k
        // executed, not about whether it is taken.
744
150k
        if (!L->getLoopLatch() ||
745
150k
            !DT->dominates(IncomingBB, L->getLoopLatch()))
746
29.9k
          continue;
747
120k
748
120k
        // Get condition that leads to the exit path.
749
120k
        auto *TermInst = IncomingBB->getTerminator();
750
120k
751
120k
        Value *Cond = nullptr;
752
120k
        if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
753
115k
          // Must be a conditional branch, otherwise the block
754
115k
          // should not be in the loop.
755
115k
          Cond = BI->getCondition();
756
115k
        } else 
if (auto *5.25k
SI5.25k
= dyn_cast<SwitchInst>(TermInst))
757
4.79k
          Cond = SI->getCondition();
758
455
        else
759
455
          continue;
760
120k
761
120k
        if (!L->isLoopInvariant(Cond))
762
120k
          continue;
763
318
764
318
        auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
765
318
766
318
        // Only deal with PHIs in the loop header.
767
318
        if (!ExitVal || 
ExitVal->getParent() != L->getHeader()132
)
768
301
          continue;
769
17
770
17
        // If ExitVal is a PHI on the loop header, then we know its
771
17
        // value along this exit because the exit can only be taken
772
17
        // on the first iteration.
773
17
        auto *LoopPreheader = L->getLoopPreheader();
774
17
        assert(LoopPreheader && "Invalid loop");
775
17
        int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
776
17
        if (PreheaderIdx != -1) {
777
17
          assert(ExitVal->getParent() == L->getHeader() &&
778
17
                 "ExitVal must be in loop header");
779
17
          MadeAnyChanges = true;
780
17
          PN.setIncomingValue(IncomingValIdx,
781
17
                              ExitVal->getIncomingValue(PreheaderIdx));
782
17
        }
783
17
      }
784
133k
    }
785
274k
  }
786
220k
  return MadeAnyChanges;
787
220k
}
788
789
/// Check whether it is possible to delete the loop after rewriting exit
790
/// value. If it is possible, ignore ReplaceExitValue and do rewriting
791
/// aggressively.
792
bool IndVarSimplify::canLoopBeDeleted(
793
85.5k
    Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
794
85.5k
  BasicBlock *Preheader = L->getLoopPreheader();
795
85.5k
  // If there is no preheader, the loop will not be deleted.
796
85.5k
  if (!Preheader)
797
0
    return false;
798
85.5k
799
85.5k
  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
800
85.5k
  // We obviate multiple ExitingBlocks case for simplicity.
801
85.5k
  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
802
85.5k
  // after exit value rewriting, we can enhance the logic here.
803
85.5k
  SmallVector<BasicBlock *, 4> ExitingBlocks;
804
85.5k
  L->getExitingBlocks(ExitingBlocks);
805
85.5k
  SmallVector<BasicBlock *, 8> ExitBlocks;
806
85.5k
  L->getUniqueExitBlocks(ExitBlocks);
807
85.5k
  if (ExitBlocks.size() > 1 || 
ExitingBlocks.size() > 185.4k
)
808
154
    return false;
809
85.4k
810
85.4k
  BasicBlock *ExitBlock = ExitBlocks[0];
811
85.4k
  BasicBlock::iterator BI = ExitBlock->begin();
812
87.2k
  while (PHINode *P = dyn_cast<PHINode>(BI)) {
813
17.4k
    Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
814
17.4k
815
17.4k
    // If the Incoming value of P is found in RewritePhiSet, we know it
816
17.4k
    // could be rewritten to use a loop invariant value in transformation
817
17.4k
    // phase later. Skip it in the loop invariant check below.
818
17.4k
    bool found = false;
819
17.4k
    for (const RewritePhi &Phi : RewritePhiSet) {
820
2.24k
      unsigned i = Phi.Ith;
821
2.24k
      if (Phi.PN == P && 
(Phi.PN)->getIncomingValue(i) == Incoming1.55k
) {
822
1.55k
        found = true;
823
1.55k
        break;
824
1.55k
      }
825
2.24k
    }
826
17.4k
827
17.4k
    Instruction *I;
828
17.4k
    if (!found && 
(I = dyn_cast<Instruction>(Incoming))15.8k
)
829
15.8k
      if (!L->hasLoopInvariantOperands(I))
830
15.6k
        return false;
831
1.80k
832
1.80k
    ++BI;
833
1.80k
  }
834
85.4k
835
85.4k
  
for (auto *BB : L->blocks())69.7k
836
619k
    
if (89.1k
llvm::any_of(*BB, [](Instruction &I) 89.1k
{
837
619k
          return I.mayHaveSideEffects();
838
619k
        }))
839
65.8k
      return false;
840
69.7k
841
69.7k
  
return true3.92k
;
842
69.7k
}
843
844
//===----------------------------------------------------------------------===//
845
//  IV Widening - Extend the width of an IV to cover its widest uses.
846
//===----------------------------------------------------------------------===//
847
848
namespace {
849
850
// Collect information about induction variables that are used by sign/zero
851
// extend operations. This information is recorded by CollectExtend and provides
852
// the input to WidenIV.
853
struct WideIVInfo {
854
  PHINode *NarrowIV = nullptr;
855
856
  // Widest integer type created [sz]ext
857
  Type *WidestNativeType = nullptr;
858
859
  // Was a sext user seen before a zext?
860
  bool IsSigned = false;
861
};
862
863
} // end anonymous namespace
864
865
/// Update information about the induction variable that is extended by this
866
/// sign or zero extend operation. This is used to determine the final width of
867
/// the IV before actually widening it.
868
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
869
126k
                        const TargetTransformInfo *TTI) {
870
126k
  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
871
126k
  if (!IsSigned && 
Cast->getOpcode() != Instruction::ZExt115k
)
872
90.7k
    return;
873
35.4k
874
35.4k
  Type *Ty = Cast->getType();
875
35.4k
  uint64_t Width = SE->getTypeSizeInBits(Ty);
876
35.4k
  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
877
156
    return;
878
35.3k
879
35.3k
  // Check that `Cast` actually extends the induction variable (we rely on this
880
35.3k
  // later).  This takes care of cases where `Cast` is extending a truncation of
881
35.3k
  // the narrow induction variable, and thus can end up being narrower than the
882
35.3k
  // "narrow" induction variable.
883
35.3k
  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
884
35.3k
  if (NarrowIVWidth >= Width)
885
0
    return;
886
35.3k
887
35.3k
  // Cast is either an sext or zext up to this point.
888
35.3k
  // We should not widen an indvar if arithmetics on the wider indvar are more
889
35.3k
  // expensive than those on the narrower indvar. We check only the cost of ADD
890
35.3k
  // because at least an ADD is required to increment the induction variable. We
891
35.3k
  // could compute more comprehensively the cost of all instructions on the
892
35.3k
  // induction variable when necessary.
893
35.3k
  if (TTI &&
894
35.3k
      TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
895
35.3k
          TTI->getArithmeticInstrCost(Instruction::Add,
896
35.3k
                                      Cast->getOperand(0)->getType())) {
897
3
    return;
898
3
  }
899
35.3k
900
35.3k
  if (!WI.WidestNativeType) {
901
26.2k
    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
902
26.2k
    WI.IsSigned = IsSigned;
903
26.2k
    return;
904
26.2k
  }
905
9.09k
906
9.09k
  // We extend the IV to satisfy the sign of its first user, arbitrarily.
907
9.09k
  if (WI.IsSigned != IsSigned)
908
1.33k
    return;
909
7.76k
910
7.76k
  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
911
44
    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
912
7.76k
}
913
914
namespace {
915
916
/// Record a link in the Narrow IV def-use chain along with the WideIV that
917
/// computes the same value as the Narrow IV def.  This avoids caching Use*
918
/// pointers.
919
struct NarrowIVDefUse {
920
  Instruction *NarrowDef = nullptr;
921
  Instruction *NarrowUse = nullptr;
922
  Instruction *WideDef = nullptr;
923
924
  // True if the narrow def is never negative.  Tracking this information lets
925
  // us use a sign extension instead of a zero extension or vice versa, when
926
  // profitable and legal.
927
  bool NeverNegative = false;
928
929
  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
930
                 bool NeverNegative)
931
      : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
932
90.6k
        NeverNegative(NeverNegative) {}
933
};
934
935
/// The goal of this transform is to remove sign and zero extends without
936
/// creating any new induction variables. To do this, it creates a new phi of
937
/// the wider type and redirects all users, either removing extends or inserting
938
/// truncs whenever we stop propagating the type.
939
class WidenIV {
940
  // Parameters
941
  PHINode *OrigPhi;
942
  Type *WideType;
943
944
  // Context
945
  LoopInfo        *LI;
946
  Loop            *L;
947
  ScalarEvolution *SE;
948
  DominatorTree   *DT;
949
950
  // Does the module have any calls to the llvm.experimental.guard intrinsic
951
  // at all? If not we can avoid scanning instructions looking for guards.
952
  bool HasGuards;
953
954
  // Result
955
  PHINode *WidePhi = nullptr;
956
  Instruction *WideInc = nullptr;
957
  const SCEV *WideIncExpr = nullptr;
958
  SmallVectorImpl<WeakTrackingVH> &DeadInsts;
959
960
  SmallPtrSet<Instruction *,16> Widened;
961
  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
962
963
  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
964
965
  // A map tracking the kind of extension used to widen each narrow IV
966
  // and narrow IV user.
967
  // Key: pointer to a narrow IV or IV user.
968
  // Value: the kind of extension used to widen this Instruction.
969
  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
970
971
  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
972
973
  // A map with control-dependent ranges for post increment IV uses. The key is
974
  // a pair of IV def and a use of this def denoting the context. The value is
975
  // a ConstantRange representing possible values of the def at the given
976
  // context.
977
  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
978
979
  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
980
24.6k
                                              Instruction *UseI) {
981
24.6k
    DefUserPair Key(Def, UseI);
982
24.6k
    auto It = PostIncRangeInfos.find(Key);
983
24.6k
    return It == PostIncRangeInfos.end()
984
24.6k
               ? 
Optional<ConstantRange>(None)24.5k
985
24.6k
               : 
Optional<ConstantRange>(It->second)76
;
986
24.6k
  }
987
988
  void calculatePostIncRanges(PHINode *OrigPhi);
989
  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
990
991
313
  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
992
313
    DefUserPair Key(Def, UseI);
993
313
    auto It = PostIncRangeInfos.find(Key);
994
313
    if (It == PostIncRangeInfos.end())
995
293
      PostIncRangeInfos.insert({Key, R});
996
20
    else
997
20
      It->second = R.intersectWith(It->second);
998
313
  }
999
1000
public:
1001
  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1002
          DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1003
          bool HasGuards)
1004
      : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1005
        L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1006
26.2k
        HasGuards(HasGuards), DeadInsts(DI) {
1007
26.2k
    assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1008
26.2k
    ExtendKindMap[OrigPhi] = WI.IsSigned ? 
SignExtended7.37k
:
ZeroExtended18.8k
;
1009
26.2k
  }
1010
1011
  PHINode *createWideIV(SCEVExpander &Rewriter);
1012
1013
protected:
1014
  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1015
                          Instruction *Use);
1016
1017
  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1018
  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1019
                                     const SCEVAddRecExpr *WideAR);
1020
  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1021
1022
  ExtendKind getExtendKind(Instruction *I);
1023
1024
  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1025
1026
  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1027
1028
  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1029
1030
  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1031
                              unsigned OpCode) const;
1032
1033
  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1034
1035
  bool widenLoopCompare(NarrowIVDefUse DU);
1036
  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1037
  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1038
1039
  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1040
};
1041
1042
} // end anonymous namespace
1043
1044
Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1045
29.2k
                                 bool IsSigned, Instruction *Use) {
1046
29.2k
  // Set the debug location and conservative insertion point.
1047
29.2k
  IRBuilder<> Builder(Use);
1048
29.2k
  // Hoist the insertion point into loop preheaders as far as possible.
1049
29.2k
  for (const Loop *L = LI->getLoopFor(Use->getParent());
1050
65.8k
       L && 
L->getLoopPreheader()43.7k
&&
L->isLoopInvariant(NarrowOper)43.7k
;
1051
36.5k
       L = L->getParentLoop())
1052
36.5k
    Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1053
29.2k
1054
29.2k
  return IsSigned ? 
Builder.CreateSExt(NarrowOper, WideType)14.2k
:
1055
29.2k
                    
Builder.CreateZExt(NarrowOper, WideType)15.0k
;
1056
29.2k
}
1057
1058
/// Instantiate a wide operation to replace a narrow operation. This only needs
1059
/// to handle operations that can evaluation to SCEVAddRec. It can safely return
1060
/// 0 for any operation we decide not to clone.
1061
Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1062
8.46k
                                  const SCEVAddRecExpr *WideAR) {
1063
8.46k
  unsigned Opcode = DU.NarrowUse->getOpcode();
1064
8.46k
  switch (Opcode) {
1065
8.46k
  default:
1066
140
    return nullptr;
1067
8.46k
  case Instruction::Add:
1068
6.92k
  case Instruction::Mul:
1069
6.92k
  case Instruction::UDiv:
1070
6.92k
  case Instruction::Sub:
1071
6.92k
    return cloneArithmeticIVUser(DU, WideAR);
1072
6.92k
1073
6.92k
  case Instruction::And:
1074
1.40k
  case Instruction::Or:
1075
1.40k
  case Instruction::Xor:
1076
1.40k
  case Instruction::Shl:
1077
1.40k
  case Instruction::LShr:
1078
1.40k
  case Instruction::AShr:
1079
1.40k
    return cloneBitwiseIVUser(DU);
1080
8.46k
  }
1081
8.46k
}
1082
1083
1.40k
Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1084
1.40k
  Instruction *NarrowUse = DU.NarrowUse;
1085
1.40k
  Instruction *NarrowDef = DU.NarrowDef;
1086
1.40k
  Instruction *WideDef = DU.WideDef;
1087
1.40k
1088
1.40k
  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1089
1.40k
1090
1.40k
  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1091
1.40k
  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1092
1.40k
  // invariant and will be folded or hoisted. If it actually comes from a
1093
1.40k
  // widened IV, it should be removed during a future call to widenIVUse.
1094
1.40k
  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1095
1.40k
  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1096
1.40k
                   ? WideDef
1097
1.40k
                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1098
0
                                      IsSigned, NarrowUse);
1099
1.40k
  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1100
1.40k
                   ? 
WideDef0
1101
1.40k
                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1102
1.40k
                                      IsSigned, NarrowUse);
1103
1.40k
1104
1.40k
  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1105
1.40k
  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1106
1.40k
                                        NarrowBO->getName());
1107
1.40k
  IRBuilder<> Builder(NarrowUse);
1108
1.40k
  Builder.Insert(WideBO);
1109
1.40k
  WideBO->copyIRFlags(NarrowBO);
1110
1.40k
  return WideBO;
1111
1.40k
}
1112
1113
Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1114
6.92k
                                            const SCEVAddRecExpr *WideAR) {
1115
6.92k
  Instruction *NarrowUse = DU.NarrowUse;
1116
6.92k
  Instruction *NarrowDef = DU.NarrowDef;
1117
6.92k
  Instruction *WideDef = DU.WideDef;
1118
6.92k
1119
6.92k
  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1120
6.92k
1121
6.92k
  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 
06.21k
:
1711
;
1122
6.92k
1123
6.92k
  // We're trying to find X such that
1124
6.92k
  //
1125
6.92k
  //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1126
6.92k
  //
1127
6.92k
  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1128
6.92k
  // and check using SCEV if any of them are correct.
1129
6.92k
1130
6.92k
  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1131
6.92k
  // correct solution to X.
1132
7.41k
  auto GuessNonIVOperand = [&](bool SignExt) {
1133
7.41k
    const SCEV *WideLHS;
1134
7.41k
    const SCEV *WideRHS;
1135
7.41k
1136
7.41k
    auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1137
7.41k
      if (SignExt)
1138
3.57k
        return SE->getSignExtendExpr(S, Ty);
1139
3.83k
      return SE->getZeroExtendExpr(S, Ty);
1140
3.83k
    };
1141
7.41k
1142
7.41k
    if (IVOpIdx == 0) {
1143
6.68k
      WideLHS = SE->getSCEV(WideDef);
1144
6.68k
      const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1145
6.68k
      WideRHS = GetExtend(NarrowRHS, WideType);
1146
6.68k
    } else {
1147
736
      const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1148
736
      WideLHS = GetExtend(NarrowLHS, WideType);
1149
736
      WideRHS = SE->getSCEV(WideDef);
1150
736
    }
1151
7.41k
1152
7.41k
    // WideUse is "WideDef `op.wide` X" as described in the comment.
1153
7.41k
    const SCEV *WideUse = nullptr;
1154
7.41k
1155
7.41k
    switch (NarrowUse->getOpcode()) {
1156
7.41k
    default:
1157
0
      llvm_unreachable("No other possibility!");
1158
7.41k
1159
7.41k
    case Instruction::Add:
1160
6.49k
      WideUse = SE->getAddExpr(WideLHS, WideRHS);
1161
6.49k
      break;
1162
7.41k
1163
7.41k
    case Instruction::Mul:
1164
349
      WideUse = SE->getMulExpr(WideLHS, WideRHS);
1165
349
      break;
1166
7.41k
1167
7.41k
    case Instruction::UDiv:
1168
0
      WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1169
0
      break;
1170
7.41k
1171
7.41k
    case Instruction::Sub:
1172
576
      WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1173
576
      break;
1174
7.41k
    }
1175
7.41k
1176
7.41k
    return WideUse == WideAR;
1177
7.41k
  };
1178
6.92k
1179
6.92k
  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1180
6.92k
  if (!GuessNonIVOperand(SignExtend)) {
1181
494
    SignExtend = !SignExtend;
1182
494
    if (!GuessNonIVOperand(SignExtend))
1183
62
      return nullptr;
1184
6.86k
  }
1185
6.86k
1186
6.86k
  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1187
6.86k
                   ? 
WideDef6.16k
1188
6.86k
                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1189
699
                                      SignExtend, NarrowUse);
1190
6.86k
  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1191
6.86k
                   ? 
WideDef703
1192
6.86k
                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1193
6.15k
                                      SignExtend, NarrowUse);
1194
6.86k
1195
6.86k
  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1196
6.86k
  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1197
6.86k
                                        NarrowBO->getName());
1198
6.86k
1199
6.86k
  IRBuilder<> Builder(NarrowUse);
1200
6.86k
  Builder.Insert(WideBO);
1201
6.86k
  WideBO->copyIRFlags(NarrowBO);
1202
6.86k
  return WideBO;
1203
6.86k
}
1204
1205
99.4k
WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1206
99.4k
  auto It = ExtendKindMap.find(I);
1207
99.4k
  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1208
99.4k
  return It->second;
1209
99.4k
}
1210
1211
const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1212
28.3k
                                     unsigned OpCode) const {
1213
28.3k
  if (OpCode == Instruction::Add)
1214
27.3k
    return SE->getAddExpr(LHS, RHS);
1215
903
  if (OpCode == Instruction::Sub)
1216
491
    return SE->getMinusSCEV(LHS, RHS);
1217
412
  if (OpCode == Instruction::Mul)
1218
412
    return SE->getMulExpr(LHS, RHS);
1219
0
1220
0
  llvm_unreachable("Unsupported opcode.");
1221
0
}
1222
1223
/// No-wrap operations can transfer sign extension of their result to their
1224
/// operands. Generate the SCEV value for the widened operation without
1225
/// actually modifying the IR yet. If the expression after extending the
1226
/// operands is an AddRec for this loop, return the AddRec and the kind of
1227
/// extension used.
1228
59.1k
WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1229
59.1k
  // Handle the common case of add<nsw/nuw>
1230
59.1k
  const unsigned OpCode = DU.NarrowUse->getOpcode();
1231
59.1k
  // Only Add/Sub/Mul instructions supported yet.
1232
59.1k
  if (OpCode != Instruction::Add && 
OpCode != Instruction::Sub30.5k
&&
1233
59.1k
      
OpCode != Instruction::Mul29.7k
)
1234
29.1k
    return {nullptr, Unknown};
1235
30.0k
1236
30.0k
  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1237
30.0k
  // if extending the other will lead to a recurrence.
1238
30.0k
  const unsigned ExtendOperIdx =
1239
30.0k
      DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 
128.5k
:
01.45k
;
1240
30.0k
  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1241
30.0k
1242
30.0k
  const SCEV *ExtendOperExpr = nullptr;
1243
30.0k
  const OverflowingBinaryOperator *OBO =
1244
30.0k
    cast<OverflowingBinaryOperator>(DU.NarrowUse);
1245
30.0k
  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1246
30.0k
  if (ExtKind == SignExtended && 
OBO->hasNoSignedWrap()8.37k
)
1247
7.98k
    ExtendOperExpr = SE->getSignExtendExpr(
1248
7.98k
      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1249
22.0k
  else if(ExtKind == ZeroExtended && 
OBO->hasNoUnsignedWrap()21.6k
)
1250
20.3k
    ExtendOperExpr = SE->getZeroExtendExpr(
1251
20.3k
      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1252
1.72k
  else
1253
1.72k
    return {nullptr, Unknown};
1254
28.3k
1255
28.3k
  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1256
28.3k
  // flags. This instruction may be guarded by control flow that the no-wrap
1257
28.3k
  // behavior depends on. Non-control-equivalent instructions can be mapped to
1258
28.3k
  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1259
28.3k
  // semantics to those operations.
1260
28.3k
  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1261
28.3k
  const SCEV *rhs = ExtendOperExpr;
1262
28.3k
1263
28.3k
  // Let's swap operands to the initial order for the case of non-commutative
1264
28.3k
  // operations, like SUB. See PR21014.
1265
28.3k
  if (ExtendOperIdx == 0)
1266
804
    std::swap(lhs, rhs);
1267
28.3k
  const SCEVAddRecExpr *AddRec =
1268
28.3k
      dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1269
28.3k
1270
28.3k
  if (!AddRec || 
AddRec->getLoop() != L28.0k
)
1271
244
    return {nullptr, Unknown};
1272
28.0k
1273
28.0k
  return {AddRec, ExtKind};
1274
28.0k
}
1275
1276
/// Is this instruction potentially interesting for further simplification after
1277
/// widening it's type? In other words, can the extend be safely hoisted out of
1278
/// the loop with SCEV reducing the value to a recurrence on the same loop. If
1279
/// so, return the extended recurrence and the kind of extension used. Otherwise
1280
/// return {nullptr, Unknown}.
1281
31.0k
WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1282
31.0k
  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1283
2.08k
    return {nullptr, Unknown};
1284
28.9k
1285
28.9k
  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1286
28.9k
  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1287
28.9k
      SE->getTypeSizeInBits(WideType)) {
1288
1.68k
    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1289
1.68k
    // index. So don't follow this use.
1290
1.68k
    return {nullptr, Unknown};
1291
1.68k
  }
1292
27.3k
1293
27.3k
  const SCEV *WideExpr;
1294
27.3k
  ExtendKind ExtKind;
1295
27.3k
  if (DU.NeverNegative) {
1296
20.6k
    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1297
20.6k
    if (isa<SCEVAddRecExpr>(WideExpr))
1298
1.90k
      ExtKind = SignExtended;
1299
18.7k
    else {
1300
18.7k
      WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1301
18.7k
      ExtKind = ZeroExtended;
1302
18.7k
    }
1303
20.6k
  } else 
if (6.61k
getExtendKind(DU.NarrowDef) == SignExtended6.61k
) {
1304
3.41k
    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1305
3.41k
    ExtKind = SignExtended;
1306
3.41k
  } else {
1307
3.20k
    WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1308
3.20k
    ExtKind = ZeroExtended;
1309
3.20k
  }
1310
27.3k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1311
27.3k
  if (!AddRec || 
AddRec->getLoop() != L2.15k
)
1312
25.1k
    return {nullptr, Unknown};
1313
2.11k
  return {AddRec, ExtKind};
1314
2.11k
}
1315
1316
/// This IV user cannot be widened. Replace this use of the original narrow IV
1317
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1318
8.52k
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1319
8.52k
  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1320
8.52k
  if (!InsertPt)
1321
1
    return;
1322
8.52k
  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1323
8.52k
                    << *DU.NarrowUse << "\n");
1324
8.52k
  IRBuilder<> Builder(InsertPt);
1325
8.52k
  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1326
8.52k
  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1327
8.52k
}
1328
1329
/// If the narrow use is a compare instruction, then widen the compare
1330
//  (and possibly the other operand).  The extend operation is hoisted into the
1331
// loop preheader as far as possible.
1332
28.9k
bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1333
28.9k
  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1334
28.9k
  if (!Cmp)
1335
7.16k
    return false;
1336
21.7k
1337
21.7k
  // We can legally widen the comparison in the following two cases:
1338
21.7k
  //
1339
21.7k
  //  - The signedness of the IV extension and comparison match
1340
21.7k
  //
1341
21.7k
  //  - The narrow IV is always positive (and thus its sign extension is equal
1342
21.7k
  //    to its zero extension).  For instance, let's say we're zero extending
1343
21.7k
  //    %narrow for the following use
1344
21.7k
  //
1345
21.7k
  //      icmp slt i32 %narrow, %val   ... (A)
1346
21.7k
  //
1347
21.7k
  //    and %narrow is always positive.  Then
1348
21.7k
  //
1349
21.7k
  //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1350
21.7k
  //          == icmp slt i32 zext(%narrow), sext(%val)
1351
21.7k
  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1352
21.7k
  if (!(DU.NeverNegative || 
IsSigned == Cmp->isSigned()5.49k
))
1353
869
    return false;
1354
20.9k
1355
20.9k
  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 
120.6k
:
0233
);
1356
20.9k
  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1357
20.9k
  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1358
20.9k
  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1359
20.9k
1360
20.9k
  // Widen the compare instruction.
1361
20.9k
  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1362
20.9k
  if (!InsertPt)
1363
0
    return false;
1364
20.9k
  IRBuilder<> Builder(InsertPt);
1365
20.9k
  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1366
20.9k
1367
20.9k
  // Widen the other operand of the compare, if necessary.
1368
20.9k
  if (CastWidth < IVWidth) {
1369
20.9k
    Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1370
20.9k
    DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1371
20.9k
  }
1372
20.9k
  return true;
1373
20.9k
}
1374
1375
/// If the narrow use is an instruction whose two operands are the defining
1376
/// instruction of DU and a load instruction, then we have the following:
1377
/// if the load is hoisted outside the loop, then we do not reach this function
1378
/// as scalar evolution analysis works fine in widenIVUse with variables
1379
/// hoisted outside the loop and efficient code is subsequently generated by
1380
/// not emitting truncate instructions. But when the load is not hoisted
1381
/// (whether due to limitation in alias analysis or due to a true legality),
1382
/// then scalar evolution can not proceed with loop variant values and
1383
/// inefficient code is generated. This function handles the non-hoisted load
1384
/// special case by making the optimization generate the same type of code for
1385
/// hoisted and non-hoisted load (widen use and eliminate sign extend
1386
/// instruction). This special case is important especially when the induction
1387
/// variables are affecting addressing mode in code generation.
1388
8.03k
bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1389
8.03k
  Instruction *NarrowUse = DU.NarrowUse;
1390
8.03k
  Instruction *NarrowDef = DU.NarrowDef;
1391
8.03k
  Instruction *WideDef = DU.WideDef;
1392
8.03k
1393
8.03k
  // Handle the common case of add<nsw/nuw>
1394
8.03k
  const unsigned OpCode = NarrowUse->getOpcode();
1395
8.03k
  // Only Add/Sub/Mul instructions are supported.
1396
8.03k
  if (OpCode != Instruction::Add && 
OpCode != Instruction::Sub7.15k
&&
1397
8.03k
      
OpCode != Instruction::Mul6.95k
)
1398
6.62k
    return false;
1399
1.40k
1400
1.40k
  // The operand that is not defined by NarrowDef of DU. Let's call it the
1401
1.40k
  // other operand.
1402
1.40k
  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 
1675
:
0731
;
1403
1.40k
  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1404
1.40k
         "bad DU");
1405
1.40k
1406
1.40k
  const SCEV *ExtendOperExpr = nullptr;
1407
1.40k
  const OverflowingBinaryOperator *OBO =
1408
1.40k
    cast<OverflowingBinaryOperator>(NarrowUse);
1409
1.40k
  ExtendKind ExtKind = getExtendKind(NarrowDef);
1410
1.40k
  if (ExtKind == SignExtended && 
OBO->hasNoSignedWrap()579
)
1411
207
    ExtendOperExpr = SE->getSignExtendExpr(
1412
207
      SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1413
1.19k
  else if (ExtKind == ZeroExtended && 
OBO->hasNoUnsignedWrap()827
)
1414
37
    ExtendOperExpr = SE->getZeroExtendExpr(
1415
37
      SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1416
1.16k
  else
1417
1.16k
    return false;
1418
244
1419
244
  // We are interested in the other operand being a load instruction.
1420
244
  // But, we should look into relaxing this restriction later on.
1421
244
  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1422
244
  if (I && I->getOpcode() != Instruction::Load)
1423
156
    return false;
1424
88
1425
88
  // Verifying that Defining operand is an AddRec
1426
88
  const SCEV *Op1 = SE->getSCEV(WideDef);
1427
88
  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1428
88
  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1429
0
    return false;
1430
88
  // Verifying that other operand is an Extend.
1431
88
  if (ExtKind == SignExtended) {
1432
88
    if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1433
0
      return false;
1434
0
  } else {
1435
0
    if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1436
0
      return false;
1437
88
  }
1438
88
1439
88
  if (ExtKind == SignExtended) {
1440
117
    for (Use &U : NarrowUse->uses()) {
1441
117
      SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1442
117
      if (!User || 
User->getType() != WideType78
)
1443
39
        return false;
1444
117
    }
1445
88
  } else { // ExtKind == ZeroExtended
1446
0
    for (Use &U : NarrowUse->uses()) {
1447
0
      ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1448
0
      if (!User || User->getType() != WideType)
1449
0
        return false;
1450
0
    }
1451
0
  }
1452
88
1453
88
  
return true49
;
1454
88
}
1455
1456
/// Special Case for widening with variant Loads (see
1457
/// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1458
49
void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1459
49
  Instruction *NarrowUse = DU.NarrowUse;
1460
49
  Instruction *NarrowDef = DU.NarrowDef;
1461
49
  Instruction *WideDef = DU.WideDef;
1462
49
1463
49
  ExtendKind ExtKind = getExtendKind(NarrowDef);
1464
49
1465
49
  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1466
49
1467
49
  // Generating a widening use instruction.
1468
49
  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1469
49
                   ? 
WideDef0
1470
49
                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1471
49
                                      ExtKind, NarrowUse);
1472
49
  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1473
49
                   ? WideDef
1474
49
                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1475
0
                                      ExtKind, NarrowUse);
1476
49
1477
49
  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1478
49
  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1479
49
                                        NarrowBO->getName());
1480
49
  IRBuilder<> Builder(NarrowUse);
1481
49
  Builder.Insert(WideBO);
1482
49
  WideBO->copyIRFlags(NarrowBO);
1483
49
1484
49
  if (ExtKind == SignExtended)
1485
49
    ExtendKindMap[NarrowUse] = SignExtended;
1486
0
  else
1487
0
    ExtendKindMap[NarrowUse] = ZeroExtended;
1488
49
1489
49
  // Update the Use.
1490
49
  if (ExtKind == SignExtended) {
1491
76
    for (Use &U : NarrowUse->uses()) {
1492
76
      SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1493
76
      if (User && User->getType() == WideType) {
1494
76
        LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1495
76
                          << *WideBO << "\n");
1496
76
        ++NumElimExt;
1497
76
        User->replaceAllUsesWith(WideBO);
1498
76
        DeadInsts.emplace_back(User);
1499
76
      }
1500
76
    }
1501
49
  } else { // ExtKind == ZeroExtended
1502
0
    for (Use &U : NarrowUse->uses()) {
1503
0
      ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1504
0
      if (User && User->getType() == WideType) {
1505
0
        LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1506
0
                          << *WideBO << "\n");
1507
0
        ++NumElimExt;
1508
0
        User->replaceAllUsesWith(WideBO);
1509
0
        DeadInsts.emplace_back(User);
1510
0
      }
1511
0
    }
1512
0
  }
1513
49
}
1514
1515
/// Determine whether an individual user of the narrow IV can be widened. If so,
1516
/// return the wide clone of the user.
1517
90.6k
Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1518
90.6k
  assert(ExtendKindMap.count(DU.NarrowDef) &&
1519
90.6k
         "Should already know the kind of extension used to widen NarrowDef");
1520
90.6k
1521
90.6k
  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1522
90.6k
  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1523
2.60k
    if (LI->getLoopFor(UsePhi->getParent()) != L) {
1524
2.43k
      // For LCSSA phis, sink the truncate outside the loop.
1525
2.43k
      // After SimplifyCFG most loop exit targets have a single predecessor.
1526
2.43k
      // Otherwise fall back to a truncate within the loop.
1527
2.43k
      if (UsePhi->getNumOperands() != 1)
1528
536
        truncateIVUse(DU, DT, LI);
1529
1.89k
      else {
1530
1.89k
        // Widening the PHI requires us to insert a trunc.  The logical place
1531
1.89k
        // for this trunc is in the same BB as the PHI.  This is not possible if
1532
1.89k
        // the BB is terminated by a catchswitch.
1533
1.89k
        if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1534
1
          return nullptr;
1535
1.89k
1536
1.89k
        PHINode *WidePhi =
1537
1.89k
          PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1538
1.89k
                          UsePhi);
1539
1.89k
        WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1540
1.89k
        IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1541
1.89k
        Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1542
1.89k
        UsePhi->replaceAllUsesWith(Trunc);
1543
1.89k
        DeadInsts.emplace_back(UsePhi);
1544
1.89k
        LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1545
1.89k
                          << *WidePhi << "\n");
1546
1.89k
      }
1547
2.43k
      
return nullptr2.42k
;
1548
88.2k
    }
1549
2.60k
  }
1550
88.2k
1551
88.2k
  // This narrow use can be widened by a sext if it's non-negative or its narrow
1552
88.2k
  // def was widended by a sext. Same for zext.
1553
88.2k
  auto canWidenBySExt = [&]() {
1554
6.72k
    return DU.NeverNegative || 
getExtendKind(DU.NarrowDef) == SignExtended5.01k
;
1555
6.72k
  };
1556
88.2k
  auto canWidenByZExt = [&]() {
1557
22.4k
    return DU.NeverNegative || 
getExtendKind(DU.NarrowDef) == ZeroExtended3.43k
;
1558
22.4k
  };
1559
88.2k
1560
88.2k
  // Our raison d'etre! Eliminate sign and zero extension.
1561
88.2k
  if ((isa<SExtInst>(DU.NarrowUse) && 
canWidenBySExt()6.72k
) ||
1562
88.2k
      
(81.5k
isa<ZExtInst>(DU.NarrowUse)81.5k
&&
canWidenByZExt()22.4k
)) {
1563
29.1k
    Value *NewDef = DU.WideDef;
1564
29.1k
    if (DU.NarrowUse->getType() != WideType) {
1565
9
      unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1566
9
      unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1567
9
      if (CastWidth < IVWidth) {
1568
9
        // The cast isn't as wide as the IV, so insert a Trunc.
1569
9
        IRBuilder<> Builder(DU.NarrowUse);
1570
9
        NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1571
9
      }
1572
0
      else {
1573
0
        // A wider extend was hidden behind a narrower one. This may induce
1574
0
        // another round of IV widening in which the intermediate IV becomes
1575
0
        // dead. It should be very rare.
1576
0
        LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1577
0
                          << " not wide enough to subsume " << *DU.NarrowUse
1578
0
                          << "\n");
1579
0
        DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1580
0
        NewDef = DU.NarrowUse;
1581
0
      }
1582
9
    }
1583
29.1k
    if (NewDef != DU.NarrowUse) {
1584
29.1k
      LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1585
29.1k
                        << " replaced by " << *DU.WideDef << "\n");
1586
29.1k
      ++NumElimExt;
1587
29.1k
      DU.NarrowUse->replaceAllUsesWith(NewDef);
1588
29.1k
      DeadInsts.emplace_back(DU.NarrowUse);
1589
29.1k
    }
1590
29.1k
    // Now that the extend is gone, we want to expose it's uses for potential
1591
29.1k
    // further simplification. We don't need to directly inform SimplifyIVUsers
1592
29.1k
    // of the new users, because their parent IV will be processed later as a
1593
29.1k
    // new loop phi. If we preserved IVUsers analysis, we would also want to
1594
29.1k
    // push the uses of WideDef here.
1595
29.1k
1596
29.1k
    // No further widening is needed. The deceased [sz]ext had done it for us.
1597
29.1k
    return nullptr;
1598
29.1k
  }
1599
59.1k
1600
59.1k
  // Does this user itself evaluate to a recurrence after widening?
1601
59.1k
  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1602
59.1k
  if (!WideAddRec.first)
1603
31.0k
    WideAddRec = getWideRecurrence(DU);
1604
59.1k
1605
59.1k
  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1606
59.1k
  if (!WideAddRec.first) {
1607
28.9k
    // If use is a loop condition, try to promote the condition instead of
1608
28.9k
    // truncating the IV first.
1609
28.9k
    if (widenLoopCompare(DU))
1610
20.9k
      return nullptr;
1611
8.03k
1612
8.03k
    // We are here about to generate a truncate instruction that may hurt
1613
8.03k
    // performance because the scalar evolution expression computed earlier
1614
8.03k
    // in WideAddRec.first does not indicate a polynomial induction expression.
1615
8.03k
    // In that case, look at the operands of the use instruction to determine
1616
8.03k
    // if we can still widen the use instead of truncating its operand.
1617
8.03k
    if (widenWithVariantLoadUse(DU)) {
1618
49
      widenWithVariantLoadUseCodegen(DU);
1619
49
      return nullptr;
1620
49
    }
1621
7.98k
1622
7.98k
    // This user does not evaluate to a recurrence after widening, so don't
1623
7.98k
    // follow it. Instead insert a Trunc to kill off the original use,
1624
7.98k
    // eventually isolating the original narrow IV so it can be removed.
1625
7.98k
    truncateIVUse(DU, DT, LI);
1626
7.98k
    return nullptr;
1627
7.98k
  }
1628
30.1k
  // Assume block terminators cannot evaluate to a recurrence. We can't to
1629
30.1k
  // insert a Trunc after a terminator if there happens to be a critical edge.
1630
30.1k
  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1631
30.1k
         "SCEV is not expected to evaluate a block terminator");
1632
30.1k
1633
30.1k
  // Reuse the IV increment that SCEVExpander created as long as it dominates
1634
30.1k
  // NarrowUse.
1635
30.1k
  Instruction *WideUse = nullptr;
1636
30.1k
  if (WideAddRec.first == WideIncExpr &&
1637
30.1k
      
Rewriter.hoistIVInc(WideInc, DU.NarrowUse)21.8k
)
1638
21.7k
    WideUse = WideInc;
1639
8.46k
  else {
1640
8.46k
    WideUse = cloneIVUser(DU, WideAddRec.first);
1641
8.46k
    if (!WideUse)
1642
202
      return nullptr;
1643
29.9k
  }
1644
29.9k
  // Evaluation of WideAddRec ensured that the narrow expression could be
1645
29.9k
  // extended outside the loop without overflow. This suggests that the wide use
1646
29.9k
  // evaluates to the same expression as the extended narrow use, but doesn't
1647
29.9k
  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1648
29.9k
  // where it fails, we simply throw away the newly created wide use.
1649
29.9k
  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1650
98
    LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1651
98
                      << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1652
98
                      << "\n");
1653
98
    DeadInsts.emplace_back(WideUse);
1654
98
    return nullptr;
1655
98
  }
1656
29.8k
1657
29.8k
  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1658
29.8k
  // Returning WideUse pushes it on the worklist.
1659
29.8k
  return WideUse;
1660
29.8k
}
1661
1662
/// Add eligible users of NarrowDef to NarrowIVUsers.
1663
51.6k
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1664
51.6k
  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1665
51.6k
  bool NonNegativeDef =
1666
51.6k
      SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1667
51.6k
                           SE->getConstant(NarrowSCEV->getType(), 0));
1668
113k
  for (User *U : NarrowDef->users()) {
1669
113k
    Instruction *NarrowUser = cast<Instruction>(U);
1670
113k
1671
113k
    // Handle data flow merges and bizarre phi cycles.
1672
113k
    if (!Widened.insert(NarrowUser).second)
1673
22.3k
      continue;
1674
90.6k
1675
90.6k
    bool NonNegativeUse = false;
1676
90.6k
    if (!NonNegativeDef) {
1677
24.6k
      // We might have a control-dependent range information for this context.
1678
24.6k
      if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1679
76
        NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1680
24.6k
    }
1681
90.6k
1682
90.6k
    NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1683
90.6k
                               NonNegativeDef || 
NonNegativeUse24.6k
);
1684
90.6k
  }
1685
51.6k
}
1686
1687
/// Process a single induction variable. First use the SCEVExpander to create a
1688
/// wide induction variable that evaluates to the same recurrence as the
1689
/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1690
/// def-use chain. After widenIVUse has processed all interesting IV users, the
1691
/// narrow IV will be isolated for removal by DeleteDeadPHIs.
1692
///
1693
/// It would be simpler to delete uses as they are processed, but we must avoid
1694
/// invalidating SCEV expressions.
1695
26.2k
PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1696
26.2k
  // Is this phi an induction variable?
1697
26.2k
  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1698
26.2k
  if (!AddRec)
1699
3.40k
    return nullptr;
1700
22.8k
1701
22.8k
  // Widen the induction variable expression.
1702
22.8k
  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1703
22.8k
                               ? 
SE->getSignExtendExpr(AddRec, WideType)4.99k
1704
22.8k
                               : 
SE->getZeroExtendExpr(AddRec, WideType)17.8k
;
1705
22.8k
1706
22.8k
  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1707
22.8k
         "Expect the new IV expression to preserve its type");
1708
22.8k
1709
22.8k
  // Can the IV be extended outside the loop without overflow?
1710
22.8k
  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1711
22.8k
  if (!AddRec || 
AddRec->getLoop() != L21.7k
)
1712
1.07k
    return nullptr;
1713
21.7k
1714
21.7k
  // An AddRec must have loop-invariant operands. Since this AddRec is
1715
21.7k
  // materialized by a loop header phi, the expression cannot have any post-loop
1716
21.7k
  // operands, so they must dominate the loop header.
1717
21.7k
  assert(
1718
21.7k
      SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1719
21.7k
      SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1720
21.7k
      "Loop header phi recurrence inputs do not dominate the loop");
1721
21.7k
1722
21.7k
  // Iterate over IV uses (including transitive ones) looking for IV increments
1723
21.7k
  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1724
21.7k
  // the increment calculate control-dependent range information basing on
1725
21.7k
  // dominating conditions inside of the loop (e.g. a range check inside of the
1726
21.7k
  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1727
21.7k
  //
1728
21.7k
  // Control-dependent range information is later used to prove that a narrow
1729
21.7k
  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1730
21.7k
  // this on demand because when pushNarrowIVUsers needs this information some
1731
21.7k
  // of the dominating conditions might be already widened.
1732
21.7k
  if (UsePostIncrementRanges)
1733
21.7k
    calculatePostIncRanges(OrigPhi);
1734
21.7k
1735
21.7k
  // The rewriter provides a value for the desired IV expression. This may
1736
21.7k
  // either find an existing phi or materialize a new one. Either way, we
1737
21.7k
  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1738
21.7k
  // of the phi-SCC dominates the loop entry.
1739
21.7k
  Instruction *InsertPt = &L->getHeader()->front();
1740
21.7k
  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1741
21.7k
1742
21.7k
  // Remembering the WideIV increment generated by SCEVExpander allows
1743
21.7k
  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1744
21.7k
  // employ a general reuse mechanism because the call above is the only call to
1745
21.7k
  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1746
21.7k
  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1747
21.7k
    WideInc =
1748
21.7k
      cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1749
21.7k
    WideIncExpr = SE->getSCEV(WideInc);
1750
21.7k
    // Propagate the debug location associated with the original loop increment
1751
21.7k
    // to the new (widened) increment.
1752
21.7k
    auto *OrigInc =
1753
21.7k
      cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1754
21.7k
    WideInc->setDebugLoc(OrigInc->getDebugLoc());
1755
21.7k
  }
1756
21.7k
1757
21.7k
  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1758
21.7k
  ++NumWidened;
1759
21.7k
1760
21.7k
  // Traverse the def-use chain using a worklist starting at the original IV.
1761
21.7k
  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1762
21.7k
1763
21.7k
  Widened.insert(OrigPhi);
1764
21.7k
  pushNarrowIVUsers(OrigPhi, WidePhi);
1765
21.7k
1766
112k
  while (!NarrowIVUsers.empty()) {
1767
90.6k
    NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1768
90.6k
1769
90.6k
    // Process a def-use edge. This may replace the use, so don't hold a
1770
90.6k
    // use_iterator across it.
1771
90.6k
    Instruction *WideUse = widenIVUse(DU, Rewriter);
1772
90.6k
1773
90.6k
    // Follow all def-use edges from the previous narrow use.
1774
90.6k
    if (WideUse)
1775
29.8k
      pushNarrowIVUsers(DU.NarrowUse, WideUse);
1776
90.6k
1777
90.6k
    // widenIVUse may have removed the def-use edge.
1778
90.6k
    if (DU.NarrowDef->use_empty())
1779
829
      DeadInsts.emplace_back(DU.NarrowDef);
1780
90.6k
  }
1781
21.7k
1782
21.7k
  // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1783
21.7k
  // evaluate the same recurrence, we can just copy the debug info over.
1784
21.7k
  SmallVector<DbgValueInst *, 1> DbgValues;
1785
21.7k
  llvm::findDbgValues(DbgValues, OrigPhi);
1786
21.7k
  auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1787
21.7k
                                     ValueAsMetadata::get(WidePhi));
1788
21.7k
  for (auto &DbgValue : DbgValues)
1789
1
    DbgValue->setOperand(0, MDPhi);
1790
21.7k
  return WidePhi;
1791
21.7k
}
1792
1793
/// Calculates control-dependent range for the given def at the given context
1794
/// by looking at dominating conditions inside of the loop
1795
void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1796
583k
                                    Instruction *NarrowUser) {
1797
583k
  using namespace llvm::PatternMatch;
1798
583k
1799
583k
  Value *NarrowDefLHS;
1800
583k
  const APInt *NarrowDefRHS;
1801
583k
  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1802
583k
                                 m_APInt(NarrowDefRHS))) ||
1803
583k
      
!NarrowDefRHS->isNonNegative()29.6k
)
1804
556k
    return;
1805
26.8k
1806
26.8k
  auto UpdateRangeFromCondition = [&] (Value *Condition,
1807
26.8k
                                       bool TrueDest) {
1808
15.8k
    CmpInst::Predicate Pred;
1809
15.8k
    Value *CmpRHS;
1810
15.8k
    if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1811
15.8k
                                 m_Value(CmpRHS))))
1812
15.5k
      return;
1813
313
1814
313
    CmpInst::Predicate P =
1815
313
            TrueDest ? 
Pred221
:
CmpInst::getInversePredicate(Pred)92
;
1816
313
1817
313
    auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1818
313
    auto CmpConstrainedLHSRange =
1819
313
            ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1820
313
    auto NarrowDefRange =
1821
313
            CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1822
313
1823
313
    updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1824
313
  };
1825
26.8k
1826
73.1k
  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1827
73.1k
    if (!HasGuards)
1828
73.1k
      return;
1829
26
1830
26
    for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1831
99
                                     Ctx->getParent()->rend())) {
1832
99
      Value *C = nullptr;
1833
99
      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1834
5
        UpdateRangeFromCondition(C, /*TrueDest=*/true);
1835
99
    }
1836
26
  };
1837
26.8k
1838
26.8k
  UpdateRangeFromGuards(NarrowUser);
1839
26.8k
1840
26.8k
  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1841
26.8k
  // If NarrowUserBB is statically unreachable asking dominator queries may
1842
26.8k
  // yield surprising results. (e.g. the block may not have a dom tree node)
1843
26.8k
  if (!DT->isReachableFromEntry(NarrowUserBB))
1844
0
    return;
1845
26.8k
1846
26.8k
  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1847
73.1k
       L->contains(DTB->getBlock());
1848
46.3k
       DTB = DTB->getIDom()) {
1849
46.3k
    auto *BB = DTB->getBlock();
1850
46.3k
    auto *TI = BB->getTerminator();
1851
46.3k
    UpdateRangeFromGuards(TI);
1852
46.3k
1853
46.3k
    auto *BI = dyn_cast<BranchInst>(TI);
1854
46.3k
    if (!BI || 
!BI->isConditional()43.3k
)
1855
17.3k
      continue;
1856
29.0k
1857
29.0k
    auto *TrueSuccessor = BI->getSuccessor(0);
1858
29.0k
    auto *FalseSuccessor = BI->getSuccessor(1);
1859
29.0k
1860
58.0k
    auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1861
58.0k
      return BBE.isSingleEdge() &&
1862
58.0k
             DT->dominates(BBE, NarrowUser->getParent());
1863
58.0k
    };
1864
29.0k
1865
29.0k
    if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1866
9.87k
      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1867
29.0k
1868
29.0k
    if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1869
5.95k
      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1870
29.0k
  }
1871
26.8k
}
1872
1873
/// Calculates PostIncRangeInfos map for the given IV
1874
21.7k
void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1875
21.7k
  SmallPtrSet<Instruction *, 16> Visited;
1876
21.7k
  SmallVector<Instruction *, 6> Worklist;
1877
21.7k
  Worklist.push_back(OrigPhi);
1878
21.7k
  Visited.insert(OrigPhi);
1879
21.7k
1880
627k
  while (!Worklist.empty()) {
1881
605k
    Instruction *NarrowDef = Worklist.pop_back_val();
1882
605k
1883
711k
    for (Use &U : NarrowDef->uses()) {
1884
711k
      auto *NarrowUser = cast<Instruction>(U.getUser());
1885
711k
1886
711k
      // Don't go looking outside the current loop.
1887
711k
      auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1888
711k
      if (!NarrowUserLoop || 
!L->contains(NarrowUserLoop)706k
)
1889
8.74k
        continue;
1890
702k
1891
702k
      if (!Visited.insert(NarrowUser).second)
1892
119k
        continue;
1893
583k
1894
583k
      Worklist.push_back(NarrowUser);
1895
583k
1896
583k
      calculatePostIncRange(NarrowDef, NarrowUser);
1897
583k
    }
1898
605k
  }
1899
21.7k
}
1900
1901
//===----------------------------------------------------------------------===//
1902
//  Live IV Reduction - Minimize IVs live across the loop.
1903
//===----------------------------------------------------------------------===//
1904
1905
//===----------------------------------------------------------------------===//
1906
//  Simplification of IV users based on SCEV evaluation.
1907
//===----------------------------------------------------------------------===//
1908
1909
namespace {
1910
1911
class IndVarSimplifyVisitor : public IVVisitor {
1912
  ScalarEvolution *SE;
1913
  const TargetTransformInfo *TTI;
1914
  PHINode *IVPhi;
1915
1916
public:
1917
  WideIVInfo WI;
1918
1919
  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1920
                        const TargetTransformInfo *TTI,
1921
                        const DominatorTree *DTree)
1922
322k
    : SE(SCEV), TTI(TTI), IVPhi(IV) {
1923
322k
    DT = DTree;
1924
322k
    WI.NarrowIV = IVPhi;
1925
322k
  }
1926
1927
  // Implement the interface used by simplifyUsersOfIV.
1928
126k
  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1929
};
1930
1931
} // end anonymous namespace
1932
1933
/// Iteratively perform simplification on a worklist of IV users. Each
1934
/// successive simplification may push more users which may themselves be
1935
/// candidates for simplification.
1936
///
1937
/// Sign/Zero extend elimination is interleaved with IV simplification.
1938
bool IndVarSimplify::simplifyAndExtend(Loop *L,
1939
                                       SCEVExpander &Rewriter,
1940
220k
                                       LoopInfo *LI) {
1941
220k
  SmallVector<WideIVInfo, 8> WideIVs;
1942
220k
1943
220k
  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1944
220k
          Intrinsic::getName(Intrinsic::experimental_guard));
1945
220k
  bool HasGuards = GuardDecl && 
!GuardDecl->use_empty()18
;
1946
220k
1947
220k
  SmallVector<PHINode*, 8> LoopPhis;
1948
520k
  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); 
++I300k
) {
1949
300k
    LoopPhis.push_back(cast<PHINode>(I));
1950
300k
  }
1951
220k
  // Each round of simplification iterates through the SimplifyIVUsers worklist
1952
220k
  // for all current phis, then determines whether any IVs can be
1953
220k
  // widened. Widening adds new phis to LoopPhis, inducing another round of
1954
220k
  // simplification on the wide IVs.
1955
220k
  bool Changed = false;
1956
446k
  while (!LoopPhis.empty()) {
1957
226k
    // Evaluate as many IV expressions as possible before widening any IVs. This
1958
226k
    // forces SCEV to set no-wrap flags before evaluating sign/zero
1959
226k
    // extension. The first time SCEV attempts to normalize sign/zero extension,
1960
226k
    // the result becomes final. So for the most predictable results, we delay
1961
226k
    // evaluation of sign/zero extend evaluation until needed, and avoid running
1962
226k
    // other SCEV based analysis prior to simplifyAndExtend.
1963
322k
    do {
1964
322k
      PHINode *CurrIV = LoopPhis.pop_back_val();
1965
322k
1966
322k
      // Information about sign/zero extensions of CurrIV.
1967
322k
      IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1968
322k
1969
322k
      Changed |=
1970
322k
          simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1971
322k
1972
322k
      if (Visitor.WI.WidestNativeType) {
1973
26.2k
        WideIVs.push_back(Visitor.WI);
1974
26.2k
      }
1975
322k
    } while(!LoopPhis.empty());
1976
226k
1977
252k
    for (; !WideIVs.empty(); 
WideIVs.pop_back()26.2k
) {
1978
26.2k
      WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1979
26.2k
      if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1980
21.7k
        Changed = true;
1981
21.7k
        LoopPhis.push_back(WidePhi);
1982
21.7k
      }
1983
26.2k
    }
1984
226k
  }
1985
220k
  return Changed;
1986
220k
}
1987
1988
//===----------------------------------------------------------------------===//
1989
//  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1990
//===----------------------------------------------------------------------===//
1991
1992
/// Given an Value which is hoped to be part of an add recurance in the given
1993
/// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1994
/// that this is less general than SCEVs AddRec checking.  
1995
260k
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1996
260k
  Instruction *IncI = dyn_cast<Instruction>(IncV);
1997
260k
  if (!IncI)
1998
89
    return nullptr;
1999
260k
2000
260k
  switch (IncI->getOpcode()) {
2001
260k
  case Instruction::Add:
2002
156k
  case Instruction::Sub:
2003
156k
    break;
2004
156k
  case Instruction::GetElementPtr:
2005
24.7k
    // An IV counter must preserve its type.
2006
24.7k
    if (IncI->getNumOperands() == 2)
2007
24.7k
      break;
2008
1
    LLVM_FALLTHROUGH;
2009
78.7k
  default:
2010
78.7k
    return nullptr;
2011
181k
  }
2012
181k
2013
181k
  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2014
181k
  if (Phi && 
Phi->getParent() == L->getHeader()180k
) {
2015
180k
    if (L->isLoopInvariant(IncI->getOperand(1)))
2016
180k
      return Phi;
2017
175
    return nullptr;
2018
175
  }
2019
692
  if (IncI->getOpcode() == Instruction::GetElementPtr)
2020
90
    return nullptr;
2021
602
2022
602
  // Allow add/sub to be commuted.
2023
602
  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2024
602
  if (Phi && 
Phi->getParent() == L->getHeader()8
) {
2025
6
    if (L->isLoopInvariant(IncI->getOperand(0)))
2026
4
      return Phi;
2027
598
  }
2028
598
  return nullptr;
2029
598
}
2030
2031
/// Whether the current loop exit test is based on this value.  Currently this
2032
/// is limited to a direct use in the loop condition.
2033
12.6k
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2034
12.6k
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2035
12.6k
  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2036
12.6k
  // TODO: Allow non-icmp loop test.
2037
12.6k
  if (!ICmp)
2038
277
    return false;
2039
12.3k
2040
12.3k
  // TODO: Allow indirect use.
2041
12.3k
  return ICmp->getOperand(0) == V || 
ICmp->getOperand(1) == V8.24k
;
2042
12.3k
}
2043
2044
/// linearFunctionTestReplace policy. Return true unless we can show that the
2045
/// current exit test is already sufficiently canonical.
2046
274k
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2047
274k
  assert(L->getLoopLatch() && "Must be in simplified form");
2048
274k
2049
274k
  // Avoid converting a constant or loop invariant test back to a runtime
2050
274k
  // test.  This is critical for when SCEV's cached ExitCount is less precise
2051
274k
  // than the current IR (such as after we've proven a particular exit is
2052
274k
  // actually dead and thus the BE count never reaches our ExitCount.)
2053
274k
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2054
274k
  if (L->isLoopInvariant(BI->getCondition()))
2055
1.09k
    return false;
2056
273k
  
2057
273k
  // Do LFTR to simplify the exit condition to an ICMP.
2058
273k
  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2059
273k
  if (!Cond)
2060
19.5k
    return true;
2061
253k
2062
253k
  // Do LFTR to simplify the exit ICMP to EQ/NE
2063
253k
  ICmpInst::Predicate Pred = Cond->getPredicate();
2064
253k
  if (Pred != ICmpInst::ICMP_NE && 
Pred != ICmpInst::ICMP_EQ253k
)
2065
103k
    return true;
2066
149k
2067
149k
  // Look for a loop invariant RHS
2068
149k
  Value *LHS = Cond->getOperand(0);
2069
149k
  Value *RHS = Cond->getOperand(1);
2070
149k
  if (!L->isLoopInvariant(RHS)) {
2071
9.14k
    if (!L->isLoopInvariant(LHS))
2072
8.40k
      return true;
2073
739
    std::swap(LHS, RHS);
2074
739
  }
2075
149k
  // Look for a simple IV counter LHS
2076
149k
  PHINode *Phi = dyn_cast<PHINode>(LHS);
2077
141k
  if (!Phi)
2078
136k
    Phi = getLoopPhiForCounter(LHS, L);
2079
141k
2080
141k
  if (!Phi)
2081
77.3k
    return true;
2082
64.0k
2083
64.0k
  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2084
64.0k
  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2085
64.0k
  if (Idx < 0)
2086
1.84k
    return true;
2087
62.1k
2088
62.1k
  // Do LFTR if the exit condition's IV is *not* a simple counter.
2089
62.1k
  Value *IncV = Phi->getIncomingValue(Idx);
2090
62.1k
  return Phi != getLoopPhiForCounter(IncV, L);
2091
62.1k
}
2092
2093
/// Return true if undefined behavior would provable be executed on the path to
2094
/// OnPathTo if Root produced a posion result.  Note that this doesn't say
2095
/// anything about whether OnPathTo is actually executed or whether Root is
2096
/// actually poison.  This can be used to assess whether a new use of Root can
2097
/// be added at a location which is control equivalent with OnPathTo (such as
2098
/// immediately before it) without introducing UB which didn't previously
2099
/// exist.  Note that a false result conveys no information.  
2100
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2101
                                          Instruction *OnPathTo, 
2102
525
                                          DominatorTree *DT) {
2103
525
  // Basic approach is to assume Root is poison, propagate poison forward
2104
525
  // through all users we can easily track, and then check whether any of those
2105
525
  // users are provable UB and must execute before out exiting block might
2106
525
  // exit.
2107
525
2108
525
  // The set of all recursive users we've visited (which are assumed to all be
2109
525
  // poison because of said visit)
2110
525
  SmallSet<const Value *, 16> KnownPoison;
2111
525
  SmallVector<const Instruction*, 16> Worklist;
2112
525
  Worklist.push_back(Root);
2113
1.72k
  while (!Worklist.empty()) {
2114
1.54k
    const Instruction *I = Worklist.pop_back_val();
2115
1.54k
2116
1.54k
    // If we know this must trigger UB on a path leading our target.
2117
1.54k
    if (mustTriggerUB(I, KnownPoison) && 
DT->dominates(I, OnPathTo)408
)
2118
345
      return true;
2119
1.19k
    
2120
1.19k
    // If we can't analyze propagation through this instruction, just skip it
2121
1.19k
    // and transitive users.  Safe as false is a conservative result.
2122
1.19k
    if (!propagatesFullPoison(I) && 
I != Root786
)
2123
328
      continue;
2124
869
2125
869
    if (KnownPoison.insert(I).second)
2126
747
      for (const User *User : I->users())
2127
1.33k
        Worklist.push_back(cast<Instruction>(User));
2128
869
  }
2129
525
2130
525
  // Might be non-UB, or might have a path we couldn't prove must execute on
2131
525
  // way to exiting bb. 
2132
525
  
return false180
;
2133
525
}
2134
2135
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2136
/// down to checking that all operands are constant and listing instructions
2137
/// that may hide undef.
2138
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2139
258k
                               unsigned Depth) {
2140
258k
  if (isa<Constant>(V))
2141
111k
    return !isa<UndefValue>(V);
2142
146k
2143
146k
  if (Depth >= 6)
2144
2.50k
    return false;
2145
143k
2146
143k
  // Conservatively handle non-constant non-instructions. For example, Arguments
2147
143k
  // may be undef.
2148
143k
  Instruction *I = dyn_cast<Instruction>(V);
2149
143k
  if (!I)
2150
1.79k
    return false;
2151
142k
2152
142k
  // Load and return values may be undef.
2153
142k
  if(I->mayReadFromMemory() || 
isa<CallInst>(I)139k
||
isa<InvokeInst>(I)139k
)
2154
2.46k
    return false;
2155
139k
2156
139k
  // Optimistically handle other instructions.
2157
264k
  
for (Value *Op : I->operands())139k
{
2158
264k
    if (!Visited.insert(Op).second)
2159
67.0k
      continue;
2160
197k
    if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2161
23.1k
      return false;
2162
197k
  }
2163
139k
  
return true116k
;
2164
139k
}
2165
2166
/// Return true if the given value is concrete. We must prove that undef can
2167
/// never reach it.
2168
///
2169
/// TODO: If we decide that this is a good approach to checking for undef, we
2170
/// may factor it into a common location.
2171
61.1k
static bool hasConcreteDef(Value *V) {
2172
61.1k
  SmallPtrSet<Value*, 8> Visited;
2173
61.1k
  Visited.insert(V);
2174
61.1k
  return hasConcreteDefImpl(V, Visited, 0);
2175
61.1k
}
2176
2177
/// Return true if this IV has any uses other than the (soon to be rewritten)
2178
/// loop exit test.
2179
25.5k
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2180
25.5k
  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2181
25.5k
  Value *IncV = Phi->getIncomingValue(LatchIdx);
2182
25.5k
2183
25.5k
  for (User *U : Phi->users())
2184
37.6k
    if (U != Cond && 
U != IncV36.9k
)
return false25.2k
;
2185
25.5k
2186
25.5k
  
for (User *U : IncV->users())353
2187
516
    if (U != Cond && 
U != Phi399
)
return false144
;
2188
353
  
return true209
;
2189
353
}
2190
2191
/// Return true if the given phi is a "counter" in L.  A counter is an
2192
/// add recurance (of integer or pointer type) with an arbitrary start, and a
2193
/// step of 1.  Note that L must have exactly one latch.
2194
static bool isLoopCounter(PHINode* Phi, Loop *L,
2195
87.1k
                          ScalarEvolution *SE) {
2196
87.1k
  assert(Phi->getParent() == L->getHeader());
2197
87.1k
  assert(L->getLoopLatch());
2198
87.1k
  
2199
87.1k
  if (!SE->isSCEVable(Phi->getType()))
2200
2.06k
    return false;
2201
85.0k
2202
85.0k
  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2203
85.0k
  if (!AR || 
AR->getLoop() != L71.5k
||
!AR->isAffine()71.5k
)
2204
13.4k
    return false;
2205
71.5k
2206
71.5k
  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2207
71.5k
  if (!Step || 
!Step->isOne()70.2k
)
2208
9.66k
    return false;
2209
61.8k
2210
61.8k
  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2211
61.8k
  Value *IncV = Phi->getIncomingValue(LatchIdx);
2212
61.8k
  return (getLoopPhiForCounter(IncV, L) == Phi);
2213
61.8k
}
2214
2215
/// Search the loop header for a loop counter (anadd rec w/step of one)
2216
/// suitable for use by LFTR.  If multiple counters are available, select the
2217
/// "best" one based profitable heuristics.
2218
///
2219
/// BECount may be an i8* pointer type. The pointer difference is already
2220
/// valid count without scaling the address stride, so it remains a pointer
2221
/// expression as far as SCEV is concerned.
2222
static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2223
                                const SCEV *BECount,
2224
51.2k
                                ScalarEvolution *SE, DominatorTree *DT) {
2225
51.2k
  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2226
51.2k
2227
51.2k
  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2228
51.2k
2229
51.2k
  // Loop over all of the PHI nodes, looking for a simple counter.
2230
51.2k
  PHINode *BestPhi = nullptr;
2231
51.2k
  const SCEV *BestInit = nullptr;
2232
51.2k
  BasicBlock *LatchBlock = L->getLoopLatch();
2233
51.2k
  assert(LatchBlock && "Must be in simplified form");
2234
51.2k
  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2235
51.2k
2236
138k
  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); 
++I87.1k
) {
2237
87.1k
    PHINode *Phi = cast<PHINode>(I);
2238
87.1k
    if (!isLoopCounter(Phi, L, SE))
2239
25.4k
      continue;
2240
61.6k
2241
61.6k
    // Avoid comparing an integer IV against a pointer Limit.
2242
61.6k
    if (BECount->getType()->isPointerTy() && 
!Phi->getType()->isPointerTy()176
)
2243
13
      continue;
2244
61.6k
2245
61.6k
    const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2246
61.6k
    
2247
61.6k
    // AR may be a pointer type, while BECount is an integer type.
2248
61.6k
    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2249
61.6k
    // AR may not be a narrower type, or we may never exit.
2250
61.6k
    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2251
61.6k
    if (PhiWidth < BCWidth || 
!DL.isLegalInteger(PhiWidth)61.3k
)
2252
502
      continue;
2253
61.1k
2254
61.1k
    // Avoid reusing a potentially undef value to compute other values that may
2255
61.1k
    // have originally had a concrete definition.
2256
61.1k
    if (!hasConcreteDef(Phi)) {
2257
6.77k
      // We explicitly allow unknown phis as long as they are already used by
2258
6.77k
      // the loop exit test.  This is legal since performing LFTR could not
2259
6.77k
      // increase the number of undef users. 
2260
6.77k
      Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2261
6.77k
      if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2262
6.77k
          
!isLoopExitTestBasedOn(IncPhi, ExitingBB)5.58k
)
2263
2.74k
        continue;
2264
58.4k
    }
2265
58.4k
2266
58.4k
    // Avoid introducing undefined behavior due to poison which didn't exist in
2267
58.4k
    // the original program.  (Annoyingly, the rules for poison and undef
2268
58.4k
    // propagation are distinct, so this does NOT cover the undef case above.)
2269
58.4k
    // We have to ensure that we don't introduce UB by introducing a use on an
2270
58.4k
    // iteration where said IV produces poison.  Our strategy here differs for
2271
58.4k
    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2272
58.4k
    // see code in linearFunctionTestReplace.  For pointers, we restrict
2273
58.4k
    // transforms as there is no good way to reinfer inbounds once lost.
2274
58.4k
    if (!Phi->getType()->isIntegerTy() &&
2275
58.4k
        
!mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)336
)
2276
38
      continue;
2277
58.3k
    
2278
58.3k
    const SCEV *Init = AR->getStart();
2279
58.3k
2280
58.3k
    if (BestPhi && 
!AlmostDeadIV(BestPhi, LatchBlock, Cond)12.8k
) {
2281
12.7k
      // Don't force a live loop counter if another IV can be used.
2282
12.7k
      if (AlmostDeadIV(Phi, LatchBlock, Cond))
2283
140
        continue;
2284
12.6k
2285
12.6k
      // Prefer to count-from-zero. This is a more "canonical" counter form. It
2286
12.6k
      // also prefers integer to pointer IVs.
2287
12.6k
      if (BestInit->isZero() != Init->isZero()) {
2288
196
        if (BestInit->isZero())
2289
137
          continue;
2290
12.4k
      }
2291
12.4k
      // If two IVs both count from zero or both count from nonzero then the
2292
12.4k
      // narrower is likely a dead phi that has been widened. Use the wider phi
2293
12.4k
      // to allow the other to be eliminated.
2294
12.4k
      else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2295
12.4k
        continue;
2296
45.6k
    }
2297
45.6k
    BestPhi = Phi;
2298
45.6k
    BestInit = Init;
2299
45.6k
  }
2300
51.2k
  return BestPhi;
2301
51.2k
}
2302
2303
/// Insert an IR expression which computes the value held by the IV IndVar
2304
/// (which must be an loop counter w/unit stride) after the backedge of loop L
2305
/// is taken ExitCount times.
2306
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2307
                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
2308
33.1k
                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
2309
33.1k
  assert(isLoopCounter(IndVar, L, SE));
2310
33.1k
  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2311
33.1k
  const SCEV *IVInit = AR->getStart();
2312
33.1k
2313
33.1k
  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2314
33.1k
  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2315
33.1k
  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2316
33.1k
  // the existing GEPs whenever possible.
2317
33.1k
  if (IndVar->getType()->isPointerTy() &&
2318
33.1k
      
!ExitCount->getType()->isPointerTy()260
) {
2319
192
    // IVOffset will be the new GEP offset that is interpreted by GEP as a
2320
192
    // signed value. ExitCount on the other hand represents the loop trip count,
2321
192
    // which is an unsigned value. FindLoopCounter only allows induction
2322
192
    // variables that have a positive unit stride of one. This means we don't
2323
192
    // have to handle the case of negative offsets (yet) and just need to zero
2324
192
    // extend ExitCount.
2325
192
    Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2326
192
    const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2327
192
    if (UsePostInc)
2328
51
      IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2329
192
2330
192
    // Expand the code for the iteration count.
2331
192
    assert(SE->isLoopInvariant(IVOffset, L) &&
2332
192
           "Computed iteration count is not loop invariant!");
2333
192
2334
192
    // We could handle pointer IVs other than i8*, but we need to compensate for
2335
192
    // gep index scaling.
2336
192
    assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2337
192
                             cast<PointerType>(IndVar->getType())
2338
192
                                 ->getElementType())->isOne() &&
2339
192
           "unit stride pointer IV must be i8*");
2340
192
2341
192
    const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2342
192
    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2343
192
    return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2344
32.9k
  } else {
2345
32.9k
    // In any other case, convert both IVInit and ExitCount to integers before
2346
32.9k
    // comparing. This may result in SCEV expansion of pointers, but in practice
2347
32.9k
    // SCEV will fold the pointer arithmetic away as such:
2348
32.9k
    // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2349
32.9k
    //
2350
32.9k
    // Valid Cases: (1) both integers is most common; (2) both may be pointers
2351
32.9k
    // for simple memset-style loops.
2352
32.9k
    //
2353
32.9k
    // IVInit integer and ExitCount pointer would only occur if a canonical IV
2354
32.9k
    // were generated on top of case #2, which is not expected.
2355
32.9k
2356
32.9k
    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2357
32.9k
    // For unit stride, IVCount = Start + ExitCount with 2's complement
2358
32.9k
    // overflow.
2359
32.9k
2360
32.9k
    // For integer IVs, truncate the IV before computing IVInit + BECount,
2361
32.9k
    // unless we know apriori that the limit must be a constant when evaluated
2362
32.9k
    // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2363
32.9k
    // of the IV in the loop over a (potentially) expensive expansion of the
2364
32.9k
    // widened exit count add(zext(add)) expression.
2365
32.9k
    if (SE->getTypeSizeInBits(IVInit->getType())
2366
32.9k
        > SE->getTypeSizeInBits(ExitCount->getType())) {
2367
11.9k
      if (isa<SCEVConstant>(IVInit) && 
isa<SCEVConstant>(ExitCount)10.9k
)
2368
6.14k
        ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2369
5.75k
      else
2370
5.75k
        IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2371
11.9k
    }
2372
32.9k
2373
32.9k
    const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2374
32.9k
2375
32.9k
    if (UsePostInc)
2376
32.8k
      IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2377
32.9k
2378
32.9k
    // Expand the code for the iteration count.
2379
32.9k
    assert(SE->isLoopInvariant(IVLimit, L) &&
2380
32.9k
           "Computed iteration count is not loop invariant!");
2381
32.9k
    // Ensure that we generate the same type as IndVar, or a smaller integer
2382
32.9k
    // type. In the presence of null pointer values, we have an integer type
2383
32.9k
    // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2384
32.9k
    Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2385
32.9k
      
IndVar->getType()68
: ExitCount->getType();
2386
32.9k
    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2387
32.9k
    return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2388
32.9k
  }
2389
33.1k
}
2390
2391
/// This method rewrites the exit condition of the loop to be a canonical !=
2392
/// comparison against the incremented loop induction variable.  This pass is
2393
/// able to rewrite the exit tests of any loop where the SCEV analysis can
2394
/// determine a loop-invariant trip count of the loop, which is actually a much
2395
/// broader range than just linear tests.
2396
bool IndVarSimplify::
2397
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2398
                          const SCEV *ExitCount,
2399
33.1k
                          PHINode *IndVar, SCEVExpander &Rewriter) {
2400
33.1k
  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2401
33.1k
  assert(isLoopCounter(IndVar, L, SE));
2402
33.1k
  Instruction * const IncVar =
2403
33.1k
    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2404
33.1k
2405
33.1k
  // Initialize CmpIndVar to the preincremented IV.
2406
33.1k
  Value *CmpIndVar = IndVar;
2407
33.1k
  bool UsePostInc = false;
2408
33.1k
2409
33.1k
  // If the exiting block is the same as the backedge block, we prefer to
2410
33.1k
  // compare against the post-incremented value, otherwise we must compare
2411
33.1k
  // against the preincremented value.
2412
33.1k
  if (ExitingBB == L->getLoopLatch()) {
2413
33.0k
    // For pointer IVs, we chose to not strip inbounds which requires us not
2414
33.0k
    // to add a potentially UB introducing use.  We need to either a) show
2415
33.0k
    // the loop test we're modifying is already in post-inc form, or b) show
2416
33.0k
    // that adding a use must not introduce UB.
2417
33.0k
    bool SafeToPostInc =
2418
33.0k
        IndVar->getType()->isIntegerTy() ||
2419
33.0k
        
isLoopExitTestBasedOn(IncVar, ExitingBB)260
||
2420
33.0k
        
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT)189
;
2421
33.0k
    if (SafeToPostInc) {
2422
32.9k
      UsePostInc = true;
2423
32.9k
      CmpIndVar = IncVar;
2424
32.9k
    }
2425
33.0k
  }
2426
33.1k
2427
33.1k
  // It may be necessary to drop nowrap flags on the incrementing instruction
2428
33.1k
  // if either LFTR moves from a pre-inc check to a post-inc check (in which
2429
33.1k
  // case the increment might have previously been poison on the last iteration
2430
33.1k
  // only) or if LFTR switches to a different IV that was previously dynamically
2431
33.1k
  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2432
33.1k
  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2433
33.1k
  // check), because the pre-inc addrec flags may be adopted from the original
2434
33.1k
  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2435
33.1k
  // TODO: This handling is inaccurate for one case: If we switch to a
2436
33.1k
  // dynamically dead IV that wraps on the first loop iteration only, which is
2437
33.1k
  // not covered by the post-inc addrec. (If the new IV was not dynamically
2438
33.1k
  // dead, it could not be poison on the first iteration in the first place.)
2439
33.1k
  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2440
32.9k
    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2441
32.9k
    if (BO->hasNoUnsignedWrap())
2442
32.0k
      BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2443
32.9k
    if (BO->hasNoSignedWrap())
2444
31.3k
      BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2445
32.9k
  }
2446
33.1k
2447
33.1k
  Value *ExitCnt = genLoopLimit(
2448
33.1k
      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2449
33.1k
  assert(ExitCnt->getType()->isPointerTy() ==
2450
33.1k
             IndVar->getType()->isPointerTy() &&
2451
33.1k
         "genLoopLimit missed a cast");
2452
33.1k
2453
33.1k
  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2454
33.1k
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2455
33.1k
  ICmpInst::Predicate P;
2456
33.1k
  if (L->contains(BI->getSuccessor(0)))
2457
32.8k
    P = ICmpInst::ICMP_NE;
2458
304
  else
2459
304
    P = ICmpInst::ICMP_EQ;
2460
33.1k
2461
33.1k
  IRBuilder<> Builder(BI);
2462
33.1k
2463
33.1k
  // The new loop exit condition should reuse the debug location of the
2464
33.1k
  // original loop exit condition.
2465
33.1k
  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2466
33.1k
    Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2467
33.1k
2468
33.1k
  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2469
33.1k
  // avoid the expensive expansion of the limit expression in the wider type,
2470
33.1k
  // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2471
33.1k
  // since we know (from the exit count bitwidth), that we can't self-wrap in
2472
33.1k
  // the narrower type.
2473
33.1k
  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2474
33.1k
  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2475
33.1k
  if (CmpIndVarSize > ExitCntSize) {
2476
5.75k
    assert(!CmpIndVar->getType()->isPointerTy() &&
2477
5.75k
           !ExitCnt->getType()->isPointerTy());
2478
5.75k
2479
5.75k
    // Before resorting to actually inserting the truncate, use the same
2480
5.75k
    // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2481
5.75k
    // the other side of the comparison instead.  We still evaluate the limit
2482
5.75k
    // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2483
5.75k
    // a truncate within in.  
2484
5.75k
    bool Extended = false;
2485
5.75k
    const SCEV *IV = SE->getSCEV(CmpIndVar);
2486
5.75k
    const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2487
5.75k
                                                  ExitCnt->getType());
2488
5.75k
    const SCEV *ZExtTrunc =
2489
5.75k
      SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2490
5.75k
    
2491
5.75k
    if (ZExtTrunc == IV) {
2492
5.18k
      Extended = true;
2493
5.18k
      ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2494
5.18k
                                   "wide.trip.count");
2495
5.18k
    } else {
2496
564
      const SCEV *SExtTrunc =
2497
564
        SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2498
564
      if (SExtTrunc == IV) {
2499
357
        Extended = true;
2500
357
        ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2501
357
                                     "wide.trip.count");
2502
357
      }
2503
564
    }
2504
5.75k
2505
5.75k
    if (Extended) {
2506
5.54k
      bool Discard;
2507
5.54k
      L->makeLoopInvariant(ExitCnt, Discard);
2508
5.54k
    } else 
2509
207
      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2510
207
                                      "lftr.wideiv");
2511
5.75k
  }
2512
33.1k
  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2513
33.1k
                    << "      LHS:" << *CmpIndVar << '\n'
2514
33.1k
                    << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2515
33.1k
                    << "\n"
2516
33.1k
                    << "      RHS:\t" << *ExitCnt << "\n"
2517
33.1k
                    << "ExitCount:\t" << *ExitCount << "\n"
2518
33.1k
                    << "  was: " << *BI->getCondition() << "\n");
2519
33.1k
2520
33.1k
  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2521
33.1k
  Value *OrigCond = BI->getCondition();
2522
33.1k
  // It's tempting to use replaceAllUsesWith here to fully replace the old
2523
33.1k
  // comparison, but that's not immediately safe, since users of the old
2524
33.1k
  // comparison may not be dominated by the new comparison. Instead, just
2525
33.1k
  // update the branch to use the new comparison; in the common case this
2526
33.1k
  // will make old comparison dead.
2527
33.1k
  BI->setCondition(Cond);
2528
33.1k
  DeadInsts.push_back(OrigCond);
2529
33.1k
2530
33.1k
  ++NumLFTR;
2531
33.1k
  return true;
2532
33.1k
}
2533
2534
//===----------------------------------------------------------------------===//
2535
//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2536
//===----------------------------------------------------------------------===//
2537
2538
/// If there's a single exit block, sink any loop-invariant values that
2539
/// were defined in the preheader but not used inside the loop into the
2540
/// exit block to reduce register pressure in the loop.
2541
220k
bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2542
220k
  BasicBlock *ExitBlock = L->getExitBlock();
2543
220k
  if (!ExitBlock) 
return false54.4k
;
2544
165k
2545
165k
  BasicBlock *Preheader = L->getLoopPreheader();
2546
165k
  if (!Preheader) 
return false0
;
2547
165k
2548
165k
  bool MadeAnyChanges = false;
2549
165k
  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2550
165k
  BasicBlock::iterator I(Preheader->getTerminator());
2551
437k
  while (I != Preheader->begin()) {
2552
289k
    --I;
2553
289k
    // New instructions were inserted at the end of the preheader.
2554
289k
    if (isa<PHINode>(I))
2555
17.4k
      break;
2556
272k
2557
272k
    // Don't move instructions which might have side effects, since the side
2558
272k
    // effects need to complete before instructions inside the loop.  Also don't
2559
272k
    // move instructions which might read memory, since the loop may modify
2560
272k
    // memory. Note that it's okay if the instruction might have undefined
2561
272k
    // behavior: LoopSimplify guarantees that the preheader dominates the exit
2562
272k
    // block.
2563
272k
    if (I->mayHaveSideEffects() || 
I->mayReadFromMemory()239k
)
2564
62.9k
      continue;
2565
209k
2566
209k
    // Skip debug info intrinsics.
2567
209k
    if (isa<DbgInfoIntrinsic>(I))
2568
7
      continue;
2569
209k
2570
209k
    // Skip eh pad instructions.
2571
209k
    if (I->isEHPad())
2572
107
      continue;
2573
209k
2574
209k
    // Don't sink alloca: we never want to sink static alloca's out of the
2575
209k
    // entry block, and correctly sinking dynamic alloca's requires
2576
209k
    // checks for stacksave/stackrestore intrinsics.
2577
209k
    // FIXME: Refactor this check somehow?
2578
209k
    if (isa<AllocaInst>(I))
2579
3.18k
      continue;
2580
205k
2581
205k
    // Determine if there is a use in or before the loop (direct or
2582
205k
    // otherwise).
2583
205k
    bool UsedInLoop = false;
2584
260k
    for (Use &U : I->uses()) {
2585
260k
      Instruction *User = cast<Instruction>(U.getUser());
2586
260k
      BasicBlock *UseBB = User->getParent();
2587
260k
      if (PHINode *P = dyn_cast<PHINode>(User)) {
2588
16.4k
        unsigned i =
2589
16.4k
          PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2590
16.4k
        UseBB = P->getIncomingBlock(i);
2591
16.4k
      }
2592
260k
      if (UseBB == Preheader || 
L->contains(UseBB)164k
) {
2593
200k
        UsedInLoop = true;
2594
200k
        break;
2595
200k
      }
2596
260k
    }
2597
205k
2598
205k
    // If there is, the def must remain in the preheader.
2599
205k
    if (UsedInLoop)
2600
200k
      continue;
2601
5.09k
2602
5.09k
    // Otherwise, sink it to the exit block.
2603
5.09k
    Instruction *ToMove = &*I;
2604
5.09k
    bool Done = false;
2605
5.09k
2606
5.09k
    if (I != Preheader->begin()) {
2607
4.85k
      // Skip debug info intrinsics.
2608
4.85k
      do {
2609
4.85k
        --I;
2610
4.85k
      } while (isa<DbgInfoIntrinsic>(I) && 
I != Preheader->begin()0
);
2611
4.85k
2612
4.85k
      if (isa<DbgInfoIntrinsic>(I) && 
I == Preheader->begin()0
)
2613
0
        Done = true;
2614
4.85k
    } else {
2615
244
      Done = true;
2616
244
    }
2617
5.09k
2618
5.09k
    MadeAnyChanges = true;
2619
5.09k
    ToMove->moveBefore(*ExitBlock, InsertPt);
2620
5.09k
    if (Done) 
break244
;
2621
4.85k
    InsertPt = ToMove->getIterator();
2622
4.85k
  }
2623
165k
2624
165k
  return MadeAnyChanges;
2625
165k
}
2626
2627
220k
bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2628
220k
  SmallVector<BasicBlock*, 16> ExitingBlocks;
2629
220k
  L->getExitingBlocks(ExitingBlocks);
2630
220k
2631
220k
  // Form an expression for the maximum exit count possible for this loop. We
2632
220k
  // merge the max and exact information to approximate a version of
2633
220k
  // getMaxBackedgeTakenInfo which isn't restricted to just constants.
2634
220k
  // TODO: factor this out as a version of getMaxBackedgeTakenCount which
2635
220k
  // isn't guaranteed to return a constant.
2636
220k
  SmallVector<const SCEV*, 4> ExitCounts;
2637
220k
  const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
2638
220k
  if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2639
134k
    ExitCounts.push_back(MaxConstEC);
2640
298k
  for (BasicBlock *ExitingBB : ExitingBlocks) {
2641
298k
    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2642
298k
    if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2643
108k
      assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2644
108k
             "We should only have known counts for exiting blocks that "
2645
108k
             "dominate latch!");
2646
108k
      ExitCounts.push_back(ExitCount);
2647
108k
    }
2648
298k
  }
2649
220k
  if (ExitCounts.empty())
2650
85.5k
    return false;
2651
134k
  const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2652
134k
2653
134k
  bool Changed = false;
2654
169k
  for (BasicBlock *ExitingBB : ExitingBlocks) {
2655
169k
    // If our exitting block exits multiple loops, we can only rewrite the
2656
169k
    // innermost one.  Otherwise, we're changing how many times the innermost
2657
169k
    // loop runs before it exits. 
2658
169k
    if (LI->getLoopFor(ExitingBB) != L)
2659
7.35k
      continue;
2660
162k
2661
162k
    // Can't rewrite non-branch yet.
2662
162k
    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2663
162k
    if (!BI)
2664
5.32k
      continue;
2665
157k
2666
157k
    // If already constant, nothing to do.
2667
157k
    if (isa<Constant>(BI->getCondition()))
2668
746
      continue;
2669
156k
    
2670
156k
    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2671
156k
    if (isa<SCEVCouldNotCompute>(ExitCount))
2672
48.5k
      continue;
2673
107k
2674
107k
    // If we know we'd exit on the first iteration, rewrite the exit to
2675
107k
    // reflect this.  This does not imply the loop must exit through this
2676
107k
    // exit; there may be an earlier one taken on the first iteration.
2677
107k
    // TODO: Given we know the backedge can't be taken, we should go ahead
2678
107k
    // and break it.  Or at least, kill all the header phis and simplify.
2679
107k
    if (ExitCount->isZero()) {
2680
150
      bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2681
150
      auto *OldCond = BI->getCondition();
2682
150
      auto *NewCond = ExitIfTrue ? 
ConstantInt::getTrue(OldCond->getType())41
:
2683
150
        
ConstantInt::getFalse(OldCond->getType())109
;
2684
150
      BI->setCondition(NewCond);
2685
150
      if (OldCond->use_empty())
2686
149
        DeadInsts.push_back(OldCond);
2687
150
      Changed = true;
2688
150
      continue;
2689
150
    }
2690
107k
2691
107k
    // If we end up with a pointer exit count, bail.  Note that we can end up
2692
107k
    // with a pointer exit count for one exiting block, and not for another in
2693
107k
    // the same loop.
2694
107k
    if (!ExitCount->getType()->isIntegerTy() ||
2695
107k
        
!MaxExitCount->getType()->isIntegerTy()102k
)
2696
5.09k
      continue;
2697
102k
    
2698
102k
    Type *WiderType =
2699
102k
      SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2700
102k
    ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2701
102k
    MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2702
102k
    assert(MaxExitCount->getType() == ExitCount->getType());
2703
102k
    
2704
102k
    // Can we prove that some other exit must be taken strictly before this
2705
102k
    // one?  TODO: handle cases where ule is known, and equality is covered
2706
102k
    // by a dominating exit
2707
102k
    if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2708
102k
                                     MaxExitCount, ExitCount)) {
2709
9
      bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2710
9
      auto *OldCond = BI->getCondition();
2711
9
      auto *NewCond = ExitIfTrue ? 
ConstantInt::getFalse(OldCond->getType())5
:
2712
9
        
ConstantInt::getTrue(OldCond->getType())4
;
2713
9
      BI->setCondition(NewCond);
2714
9
      if (OldCond->use_empty())
2715
9
        DeadInsts.push_back(OldCond);
2716
9
      Changed = true;
2717
9
      continue;
2718
9
    }
2719
102k
2720
102k
    // TODO: If we can prove that the exiting iteration is equal to the exit
2721
102k
    // count for this exit and that no previous exit oppurtunities exist within
2722
102k
    // the loop, then we can discharge all other exits.  (May fall out of
2723
102k
    // previous TODO.) 
2724
102k
    
2725
102k
    // TODO: If we can't prove any relation between our exit count and the
2726
102k
    // loops exit count, but taking this exit doesn't require actually running
2727
102k
    // the loop (i.e. no side effects, no computed values used in exit), then
2728
102k
    // we can replace the exit test with a loop invariant test which exits on
2729
102k
    // the first iteration.  
2730
102k
  }
2731
134k
  return Changed;
2732
134k
}
2733
2734
//===----------------------------------------------------------------------===//
2735
//  IndVarSimplify driver. Manage several subpasses of IV simplification.
2736
//===----------------------------------------------------------------------===//
2737
2738
220k
bool IndVarSimplify::run(Loop *L) {
2739
220k
  // We need (and expect!) the incoming loop to be in LCSSA.
2740
220k
  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2741
220k
         "LCSSA required to run indvars!");
2742
220k
  bool Changed = false;
2743
220k
2744
220k
  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2745
220k
  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2746
220k
  //    canonicalization can be a pessimization without LSR to "clean up"
2747
220k
  //    afterwards.
2748
220k
  //  - We depend on having a preheader; in particular,
2749
220k
  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2750
220k
  //    and we're in trouble if we can't find the induction variable even when
2751
220k
  //    we've manually inserted one.
2752
220k
  //  - LFTR relies on having a single backedge.
2753
220k
  if (!L->isLoopSimplifyForm())
2754
4
    return false;
2755
220k
2756
220k
  // If there are any floating-point recurrences, attempt to
2757
220k
  // transform them to use integer recurrences.
2758
220k
  Changed |= rewriteNonIntegerIVs(L);
2759
220k
2760
220k
  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2761
220k
2762
220k
  // Create a rewriter object which we'll use to transform the code with.
2763
220k
  SCEVExpander Rewriter(*SE, DL, "indvars");
2764
#ifndef NDEBUG
2765
  Rewriter.setDebugType(DEBUG_TYPE);
2766
#endif
2767
2768
220k
  // Eliminate redundant IV users.
2769
220k
  //
2770
220k
  // Simplification works best when run before other consumers of SCEV. We
2771
220k
  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2772
220k
  // other expressions involving loop IVs have been evaluated. This helps SCEV
2773
220k
  // set no-wrap flags before normalizing sign/zero extension.
2774
220k
  Rewriter.disableCanonicalMode();
2775
220k
  Changed |= simplifyAndExtend(L, Rewriter, LI);
2776
220k
2777
220k
  // Check to see if this loop has a computable loop-invariant execution count.
2778
220k
  // If so, this means that we can compute the final value of any expressions
2779
220k
  // that are recurrent in the loop, and substitute the exit values from the
2780
220k
  // loop into any instructions outside of the loop that use the final values of
2781
220k
  // the current expressions.
2782
220k
  //
2783
220k
  if (ReplaceExitValue != NeverRepl &&
2784
220k
      !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2785
85.5k
    Changed |= rewriteLoopExitValues(L, Rewriter);
2786
220k
2787
220k
  // Eliminate redundant IV cycles.
2788
220k
  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2789
220k
2790
220k
  Changed |= optimizeLoopExits(L);
2791
220k
2792
220k
  // If we have a trip count expression, rewrite the loop's exit condition
2793
220k
  // using it.  
2794
220k
  if (!DisableLFTR) {
2795
220k
    SmallVector<BasicBlock*, 16> ExitingBlocks;
2796
220k
    L->getExitingBlocks(ExitingBlocks);
2797
298k
    for (BasicBlock *ExitingBB : ExitingBlocks) {
2798
298k
      // Can't rewrite non-branch yet.
2799
298k
      if (!isa<BranchInst>(ExitingBB->getTerminator()))
2800
13.5k
        continue;
2801
284k
2802
284k
      // If our exitting block exits multiple loops, we can only rewrite the
2803
284k
      // innermost one.  Otherwise, we're changing how many times the innermost
2804
284k
      // loop runs before it exits. 
2805
284k
      if (LI->getLoopFor(ExitingBB) != L)
2806
10.3k
        continue;
2807
274k
      
2808
274k
      if (!needsLFTR(L, ExitingBB))
2809
60.8k
        continue;
2810
213k
2811
213k
      const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2812
213k
      if (isa<SCEVCouldNotCompute>(ExitCount))
2813
162k
        continue;
2814
51.2k
2815
51.2k
      // This was handled above, but as we form SCEVs, we can sometimes refine
2816
51.2k
      // existing ones; this allows exit counts to be folded to zero which
2817
51.2k
      // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2818
51.2k
      // until stable to handle cases like this better.
2819
51.2k
      if (ExitCount->isZero())
2820
0
        continue;
2821
51.2k
      
2822
51.2k
      PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2823
51.2k
      if (!IndVar)
2824
5.65k
        continue;
2825
45.5k
      
2826
45.5k
      // Avoid high cost expansions.  Note: This heuristic is questionable in
2827
45.5k
      // that our definition of "high cost" is not exactly principled.  
2828
45.5k
      if (Rewriter.isHighCostExpansion(ExitCount, L))
2829
12.3k
        continue;
2830
33.1k
      
2831
33.1k
      // Check preconditions for proper SCEVExpander operation. SCEV does not
2832
33.1k
      // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2833
33.1k
      // any pass that uses the SCEVExpander must do it. This does not work
2834
33.1k
      // well for loop passes because SCEVExpander makes assumptions about
2835
33.1k
      // all loops, while LoopPassManager only forces the current loop to be
2836
33.1k
      // simplified. 
2837
33.1k
      //
2838
33.1k
      // FIXME: SCEV expansion has no way to bail out, so the caller must
2839
33.1k
      // explicitly check any assumptions made by SCEV. Brittle.
2840
33.1k
      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2841
33.1k
      if (!AR || 
AR->getLoop()->getLoopPreheader()234
)
2842
33.1k
        Changed |= linearFunctionTestReplace(L, ExitingBB,
2843
33.1k
                                             ExitCount, IndVar,
2844
33.1k
                                             Rewriter);
2845
33.1k
    }
2846
220k
  }
2847
220k
  // Clear the rewriter cache, because values that are in the rewriter's cache
2848
220k
  // can be deleted in the loop below, causing the AssertingVH in the cache to
2849
220k
  // trigger.
2850
220k
  Rewriter.clear();
2851
220k
2852
220k
  // Now that we're done iterating through lists, clean up any instructions
2853
220k
  // which are now dead.
2854
320k
  while (!DeadInsts.empty())
2855
99.7k
    if (Instruction *Inst =
2856
99.5k
            dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2857
99.5k
      Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2858
220k
2859
220k
  // The Rewriter may not be used from this point on.
2860
220k
2861
220k
  // Loop-invariant instructions in the preheader that aren't used in the
2862
220k
  // loop may be sunk below the loop to reduce register pressure.
2863
220k
  Changed |= sinkUnusedInvariants(L);
2864
220k
2865
220k
  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2866
220k
  // trip count and therefore can further simplify exit values in addition to
2867
220k
  // rewriteLoopExitValues.
2868
220k
  Changed |= rewriteFirstIterationLoopExitValues(L);
2869
220k
2870
220k
  // Clean up dead instructions.
2871
220k
  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2872
220k
2873
220k
  // Check a post-condition.
2874
220k
  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2875
220k
         "Indvars did not preserve LCSSA!");
2876
220k
2877
220k
  // Verify that LFTR, and any other change have not interfered with SCEV's
2878
220k
  // ability to compute trip count.
2879
#ifndef NDEBUG
2880
  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2881
    SE->forgetLoop(L);
2882
    const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2883
    if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2884
        SE->getTypeSizeInBits(NewBECount->getType()))
2885
      NewBECount = SE->getTruncateOrNoop(NewBECount,
2886
                                         BackedgeTakenCount->getType());
2887
    else
2888
      BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2889
                                                 NewBECount->getType());
2890
    assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2891
  }
2892
#endif
2893
2894
220k
  return Changed;
2895
220k
}
2896
2897
PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2898
                                          LoopStandardAnalysisResults &AR,
2899
97
                                          LPMUpdater &) {
2900
97
  Function *F = L.getHeader()->getParent();
2901
97
  const DataLayout &DL = F->getParent()->getDataLayout();
2902
97
2903
97
  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2904
97
  if (!IVS.run(&L))
2905
46
    return PreservedAnalyses::all();
2906
51
2907
51
  auto PA = getLoopPassPreservedAnalyses();
2908
51
  PA.preserveSet<CFGAnalyses>();
2909
51
  return PA;
2910
51
}
2911
2912
namespace {
2913
2914
struct IndVarSimplifyLegacyPass : public LoopPass {
2915
  static char ID; // Pass identification, replacement for typeid
2916
2917
13.6k
  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2918
13.6k
    initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2919
13.6k
  }
2920
2921
220k
  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2922
220k
    if (skipLoop(L))
2923
16
      return false;
2924
220k
2925
220k
    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2926
220k
    auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2927
220k
    auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2928
220k
    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2929
220k
    auto *TLI = TLIP ? &TLIP->getTLI() : 
nullptr0
;
2930
220k
    auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2931
220k
    auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : 
nullptr0
;
2932
220k
    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2933
220k
2934
220k
    IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2935
220k
    return IVS.run(L);
2936
220k
  }
2937
2938
13.6k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
2939
13.6k
    AU.setPreservesCFG();
2940
13.6k
    getLoopAnalysisUsage(AU);
2941
13.6k
  }
2942
};
2943
2944
} // end anonymous namespace
2945
2946
char IndVarSimplifyLegacyPass::ID = 0;
2947
2948
48.9k
INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2949
48.9k
                      "Induction Variable Simplification", false, false)
2950
48.9k
INITIALIZE_PASS_DEPENDENCY(LoopPass)
2951
48.9k
INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2952
                    "Induction Variable Simplification", false, false)
2953
2954
13.4k
Pass *llvm::createIndVarSimplifyPass() {
2955
13.4k
  return new IndVarSimplifyLegacyPass();
2956
13.4k
}