Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/ScalarEvolution.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains the implementation of the scalar evolution analysis
10
// engine, which is used primarily to analyze expressions involving induction
11
// variables in loops.
12
//
13
// There are several aspects to this library.  First is the representation of
14
// scalar expressions, which are represented as subclasses of the SCEV class.
15
// These classes are used to represent certain types of subexpressions that we
16
// can handle. We only create one SCEV of a particular shape, so
17
// pointer-comparisons for equality are legal.
18
//
19
// One important aspect of the SCEV objects is that they are never cyclic, even
20
// if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
21
// the PHI node is one of the idioms that we can represent (e.g., a polynomial
22
// recurrence) then we represent it directly as a recurrence node, otherwise we
23
// represent it as a SCEVUnknown node.
24
//
25
// In addition to being able to represent expressions of various types, we also
26
// have folders that are used to build the *canonical* representation for a
27
// particular expression.  These folders are capable of using a variety of
28
// rewrite rules to simplify the expressions.
29
//
30
// Once the folders are defined, we can implement the more interesting
31
// higher-level code, such as the code that recognizes PHI nodes of various
32
// types, computes the execution count of a loop, etc.
33
//
34
// TODO: We should use these routines and value representations to implement
35
// dependence analysis!
36
//
37
//===----------------------------------------------------------------------===//
38
//
39
// There are several good references for the techniques used in this analysis.
40
//
41
//  Chains of recurrences -- a method to expedite the evaluation
42
//  of closed-form functions
43
//  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
44
//
45
//  On computational properties of chains of recurrences
46
//  Eugene V. Zima
47
//
48
//  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
49
//  Robert A. van Engelen
50
//
51
//  Efficient Symbolic Analysis for Optimizing Compilers
52
//  Robert A. van Engelen
53
//
54
//  Using the chains of recurrences algebra for data dependence testing and
55
//  induction variable substitution
56
//  MS Thesis, Johnie Birch
57
//
58
//===----------------------------------------------------------------------===//
59
60
#include "llvm/Analysis/ScalarEvolution.h"
61
#include "llvm/ADT/APInt.h"
62
#include "llvm/ADT/ArrayRef.h"
63
#include "llvm/ADT/DenseMap.h"
64
#include "llvm/ADT/DepthFirstIterator.h"
65
#include "llvm/ADT/EquivalenceClasses.h"
66
#include "llvm/ADT/FoldingSet.h"
67
#include "llvm/ADT/None.h"
68
#include "llvm/ADT/Optional.h"
69
#include "llvm/ADT/STLExtras.h"
70
#include "llvm/ADT/ScopeExit.h"
71
#include "llvm/ADT/Sequence.h"
72
#include "llvm/ADT/SetVector.h"
73
#include "llvm/ADT/SmallPtrSet.h"
74
#include "llvm/ADT/SmallSet.h"
75
#include "llvm/ADT/SmallVector.h"
76
#include "llvm/ADT/Statistic.h"
77
#include "llvm/ADT/StringRef.h"
78
#include "llvm/Analysis/AssumptionCache.h"
79
#include "llvm/Analysis/ConstantFolding.h"
80
#include "llvm/Analysis/InstructionSimplify.h"
81
#include "llvm/Analysis/LoopInfo.h"
82
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
83
#include "llvm/Analysis/TargetLibraryInfo.h"
84
#include "llvm/Analysis/ValueTracking.h"
85
#include "llvm/Config/llvm-config.h"
86
#include "llvm/IR/Argument.h"
87
#include "llvm/IR/BasicBlock.h"
88
#include "llvm/IR/CFG.h"
89
#include "llvm/IR/CallSite.h"
90
#include "llvm/IR/Constant.h"
91
#include "llvm/IR/ConstantRange.h"
92
#include "llvm/IR/Constants.h"
93
#include "llvm/IR/DataLayout.h"
94
#include "llvm/IR/DerivedTypes.h"
95
#include "llvm/IR/Dominators.h"
96
#include "llvm/IR/Function.h"
97
#include "llvm/IR/GlobalAlias.h"
98
#include "llvm/IR/GlobalValue.h"
99
#include "llvm/IR/GlobalVariable.h"
100
#include "llvm/IR/InstIterator.h"
101
#include "llvm/IR/InstrTypes.h"
102
#include "llvm/IR/Instruction.h"
103
#include "llvm/IR/Instructions.h"
104
#include "llvm/IR/IntrinsicInst.h"
105
#include "llvm/IR/Intrinsics.h"
106
#include "llvm/IR/LLVMContext.h"
107
#include "llvm/IR/Metadata.h"
108
#include "llvm/IR/Operator.h"
109
#include "llvm/IR/PatternMatch.h"
110
#include "llvm/IR/Type.h"
111
#include "llvm/IR/Use.h"
112
#include "llvm/IR/User.h"
113
#include "llvm/IR/Value.h"
114
#include "llvm/IR/Verifier.h"
115
#include "llvm/Pass.h"
116
#include "llvm/Support/Casting.h"
117
#include "llvm/Support/CommandLine.h"
118
#include "llvm/Support/Compiler.h"
119
#include "llvm/Support/Debug.h"
120
#include "llvm/Support/ErrorHandling.h"
121
#include "llvm/Support/KnownBits.h"
122
#include "llvm/Support/SaveAndRestore.h"
123
#include "llvm/Support/raw_ostream.h"
124
#include <algorithm>
125
#include <cassert>
126
#include <climits>
127
#include <cstddef>
128
#include <cstdint>
129
#include <cstdlib>
130
#include <map>
131
#include <memory>
132
#include <tuple>
133
#include <utility>
134
#include <vector>
135
136
using namespace llvm;
137
138
#define DEBUG_TYPE "scalar-evolution"
139
140
STATISTIC(NumArrayLenItCounts,
141
          "Number of trip counts computed with array length");
142
STATISTIC(NumTripCountsComputed,
143
          "Number of loops with predictable loop counts");
144
STATISTIC(NumTripCountsNotComputed,
145
          "Number of loops without predictable loop counts");
146
STATISTIC(NumBruteForceTripCountsComputed,
147
          "Number of loops with trip counts computed by force");
148
149
static cl::opt<unsigned>
150
MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
151
                        cl::desc("Maximum number of iterations SCEV will "
152
                                 "symbolically execute a constant "
153
                                 "derived loop"),
154
                        cl::init(100));
155
156
// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
157
static cl::opt<bool> VerifySCEV(
158
    "verify-scev", cl::Hidden,
159
    cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
160
static cl::opt<bool>
161
    VerifySCEVMap("verify-scev-maps", cl::Hidden,
162
                  cl::desc("Verify no dangling value in ScalarEvolution's "
163
                           "ExprValueMap (slow)"));
164
165
static cl::opt<bool> VerifyIR(
166
    "scev-verify-ir", cl::Hidden,
167
    cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"),
168
    cl::init(false));
169
170
static cl::opt<unsigned> MulOpsInlineThreshold(
171
    "scev-mulops-inline-threshold", cl::Hidden,
172
    cl::desc("Threshold for inlining multiplication operands into a SCEV"),
173
    cl::init(32));
174
175
static cl::opt<unsigned> AddOpsInlineThreshold(
176
    "scev-addops-inline-threshold", cl::Hidden,
177
    cl::desc("Threshold for inlining addition operands into a SCEV"),
178
    cl::init(500));
179
180
static cl::opt<unsigned> MaxSCEVCompareDepth(
181
    "scalar-evolution-max-scev-compare-depth", cl::Hidden,
182
    cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
183
    cl::init(32));
184
185
static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth(
186
    "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden,
187
    cl::desc("Maximum depth of recursive SCEV operations implication analysis"),
188
    cl::init(2));
189
190
static cl::opt<unsigned> MaxValueCompareDepth(
191
    "scalar-evolution-max-value-compare-depth", cl::Hidden,
192
    cl::desc("Maximum depth of recursive value complexity comparisons"),
193
    cl::init(2));
194
195
static cl::opt<unsigned>
196
    MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden,
197
                  cl::desc("Maximum depth of recursive arithmetics"),
198
                  cl::init(32));
199
200
static cl::opt<unsigned> MaxConstantEvolvingDepth(
201
    "scalar-evolution-max-constant-evolving-depth", cl::Hidden,
202
    cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
203
204
static cl::opt<unsigned>
205
    MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden,
206
                 cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"),
207
                 cl::init(8));
208
209
static cl::opt<unsigned>
210
    MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
211
                  cl::desc("Max coefficients in AddRec during evolving"),
212
                  cl::init(8));
213
214
static cl::opt<unsigned>
215
    HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden,
216
                  cl::desc("Size of the expression which is considered huge"),
217
                  cl::init(4096));
218
219
//===----------------------------------------------------------------------===//
220
//                           SCEV class definitions
221
//===----------------------------------------------------------------------===//
222
223
//===----------------------------------------------------------------------===//
224
// Implementation of the SCEV class.
225
//
226
227
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
228
LLVM_DUMP_METHOD void SCEV::dump() const {
229
  print(dbgs());
230
  dbgs() << '\n';
231
}
232
#endif
233
234
57.2k
void SCEV::print(raw_ostream &OS) const {
235
57.2k
  switch (static_cast<SCEVTypes>(getSCEVType())) {
236
57.2k
  case scConstant:
237
19.4k
    cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
238
19.4k
    return;
239
57.2k
  case scTruncate: {
240
1.26k
    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
241
1.26k
    const SCEV *Op = Trunc->getOperand();
242
1.26k
    OS << "(trunc " << *Op->getType() << " " << *Op << " to "
243
1.26k
       << *Trunc->getType() << ")";
244
1.26k
    return;
245
57.2k
  }
246
57.2k
  case scZeroExtend: {
247
2.56k
    const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
248
2.56k
    const SCEV *Op = ZExt->getOperand();
249
2.56k
    OS << "(zext " << *Op->getType() << " " << *Op << " to "
250
2.56k
       << *ZExt->getType() << ")";
251
2.56k
    return;
252
57.2k
  }
253
57.2k
  case scSignExtend: {
254
450
    const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
255
450
    const SCEV *Op = SExt->getOperand();
256
450
    OS << "(sext " << *Op->getType() << " " << *Op << " to "
257
450
       << *SExt->getType() << ")";
258
450
    return;
259
57.2k
  }
260
57.2k
  case scAddRecExpr: {
261
3.88k
    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
262
3.88k
    OS << "{" << *AR->getOperand(0);
263
8.28k
    for (unsigned i = 1, e = AR->getNumOperands(); i != e; 
++i4.40k
)
264
4.40k
      OS << ",+," << *AR->getOperand(i);
265
3.88k
    OS << "}<";
266
3.88k
    if (AR->hasNoUnsignedWrap())
267
470
      OS << "nuw><";
268
3.88k
    if (AR->hasNoSignedWrap())
269
689
      OS << "nsw><";
270
3.88k
    if (AR->hasNoSelfWrap() &&
271
3.88k
        
!AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))965
)
272
172
      OS << "nw><";
273
3.88k
    AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
274
3.88k
    OS << ">";
275
3.88k
    return;
276
57.2k
  }
277
57.2k
  case scAddExpr:
278
16.5k
  case scMulExpr:
279
16.5k
  case scUMaxExpr:
280
16.5k
  case scSMaxExpr:
281
16.5k
  case scUMinExpr:
282
16.5k
  case scSMinExpr: {
283
16.5k
    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
284
16.5k
    const char *OpStr = nullptr;
285
16.5k
    switch (NAry->getSCEVType()) {
286
16.5k
    
case scAddExpr: OpStr = " + "; break7.51k
;
287
16.5k
    
case scMulExpr: OpStr = " * "; break5.68k
;
288
16.5k
    
case scUMaxExpr: OpStr = " umax "; break1.60k
;
289
16.5k
    
case scSMaxExpr: OpStr = " smax "; break328
;
290
16.5k
    case scUMinExpr:
291
1.23k
      OpStr = " umin ";
292
1.23k
      break;
293
16.5k
    case scSMinExpr:
294
128
      OpStr = " smin ";
295
128
      break;
296
16.5k
    }
297
16.5k
    OS << "(";
298
16.5k
    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
299
53.4k
         I != E; 
++I36.9k
) {
300
36.9k
      OS << **I;
301
36.9k
      if (std::next(I) != E)
302
20.4k
        OS << OpStr;
303
36.9k
    }
304
16.5k
    OS << ")";
305
16.5k
    switch (NAry->getSCEVType()) {
306
16.5k
    case scAddExpr:
307
13.2k
    case scMulExpr:
308
13.2k
      if (NAry->hasNoUnsignedWrap())
309
472
        OS << "<nuw>";
310
13.2k
      if (NAry->hasNoSignedWrap())
311
2.06k
        OS << "<nsw>";
312
16.5k
    }
313
16.5k
    return;
314
16.5k
  }
315
16.5k
  case scUDivExpr: {
316
467
    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
317
467
    OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
318
467
    return;
319
16.5k
  }
320
16.5k
  case scUnknown: {
321
12.5k
    const SCEVUnknown *U = cast<SCEVUnknown>(this);
322
12.5k
    Type *AllocTy;
323
12.5k
    if (U->isSizeOf(AllocTy)) {
324
4
      OS << "sizeof(" << *AllocTy << ")";
325
4
      return;
326
4
    }
327
12.5k
    if (U->isAlignOf(AllocTy)) {
328
3
      OS << "alignof(" << *AllocTy << ")";
329
3
      return;
330
3
    }
331
12.5k
332
12.5k
    Type *CTy;
333
12.5k
    Constant *FieldNo;
334
12.5k
    if (U->isOffsetOf(CTy, FieldNo)) {
335
1
      OS << "offsetof(" << *CTy << ", ";
336
1
      FieldNo->printAsOperand(OS, false);
337
1
      OS << ")";
338
1
      return;
339
1
    }
340
12.5k
341
12.5k
    // Otherwise just print it normally.
342
12.5k
    U->getValue()->printAsOperand(OS, false);
343
12.5k
    return;
344
12.5k
  }
345
12.5k
  case scCouldNotCompute:
346
76
    OS << "***COULDNOTCOMPUTE***";
347
76
    return;
348
0
  }
349
0
  llvm_unreachable("Unknown SCEV kind!");
350
0
}
351
352
251M
Type *SCEV::getType() const {
353
251M
  switch (static_cast<SCEVTypes>(getSCEVType())) {
354
251M
  case scConstant:
355
105M
    return cast<SCEVConstant>(this)->getType();
356
251M
  case scTruncate:
357
11.4M
  case scZeroExtend:
358
11.4M
  case scSignExtend:
359
11.4M
    return cast<SCEVCastExpr>(this)->getType();
360
49.5M
  case scAddRecExpr:
361
49.5M
  case scMulExpr:
362
49.5M
  case scUMaxExpr:
363
49.5M
  case scSMaxExpr:
364
49.5M
  case scUMinExpr:
365
49.5M
  case scSMinExpr:
366
49.5M
    return cast<SCEVNAryExpr>(this)->getType();
367
49.5M
  case scAddExpr:
368
30.3M
    return cast<SCEVAddExpr>(this)->getType();
369
49.5M
  case scUDivExpr:
370
1.28M
    return cast<SCEVUDivExpr>(this)->getType();
371
52.9M
  case scUnknown:
372
52.9M
    return cast<SCEVUnknown>(this)->getType();
373
49.5M
  case scCouldNotCompute:
374
0
    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
375
0
  }
376
0
  llvm_unreachable("Unknown SCEV kind!");
377
0
}
378
379
54.9M
bool SCEV::isZero() const {
380
54.9M
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
381
27.7M
    return SC->getValue()->isZero();
382
27.2M
  return false;
383
27.2M
}
384
385
832k
bool SCEV::isOne() const {
386
832k
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
387
821k
    return SC->getValue()->isOne();
388
10.3k
  return false;
389
10.3k
}
390
391
12.1M
bool SCEV::isAllOnesValue() const {
392
12.1M
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
393
12.1M
    return SC->getValue()->isMinusOne();
394
19.1k
  return false;
395
19.1k
}
396
397
244k
bool SCEV::isNonConstantNegative() const {
398
244k
  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
399
244k
  if (!Mul) 
return false206k
;
400
38.1k
401
38.1k
  // If there is a constant factor, it will be first.
402
38.1k
  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
403
38.1k
  if (!SC) 
return false2.80k
;
404
35.3k
405
35.3k
  // Return true if the value is negative, this matches things like (-42 * V).
406
35.3k
  return SC->getAPInt().isNegative();
407
35.3k
}
408
409
SCEVCouldNotCompute::SCEVCouldNotCompute() :
410
4.01M
  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {}
411
412
14.1M
bool SCEVCouldNotCompute::classof(const SCEV *S) {
413
14.1M
  return S->getSCEVType() == scCouldNotCompute;
414
14.1M
}
415
416
95.5M
const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
417
95.5M
  FoldingSetNodeID ID;
418
95.5M
  ID.AddInteger(scConstant);
419
95.5M
  ID.AddPointer(V);
420
95.5M
  void *IP = nullptr;
421
95.5M
  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 
return S89.9M
;
422
5.55M
  SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
423
5.55M
  UniqueSCEVs.InsertNode(S, IP);
424
5.55M
  return S;
425
5.55M
}
426
427
24.3M
const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
428
24.3M
  return getConstant(ConstantInt::get(getContext(), Val));
429
24.3M
}
430
431
const SCEV *
432
28.2M
ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
433
28.2M
  IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
434
28.2M
  return getConstant(ConstantInt::get(ITy, V, isSigned));
435
28.2M
}
436
437
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
438
                           unsigned SCEVTy, const SCEV *op, Type *ty)
439
1.81M
  : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {}
440
441
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
442
                                   const SCEV *op, Type *ty)
443
121k
  : SCEVCastExpr(ID, scTruncate, op, ty) {
444
121k
  assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
445
121k
         "Cannot truncate non-integer value!");
446
121k
}
447
448
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
449
                                       const SCEV *op, Type *ty)
450
892k
  : SCEVCastExpr(ID, scZeroExtend, op, ty) {
451
892k
  assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
452
892k
         "Cannot zero extend non-integer value!");
453
892k
}
454
455
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
456
                                       const SCEV *op, Type *ty)
457
797k
  : SCEVCastExpr(ID, scSignExtend, op, ty) {
458
797k
  assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
459
797k
         "Cannot sign extend non-integer value!");
460
797k
}
461
462
50.9k
void SCEVUnknown::deleted() {
463
50.9k
  // Clear this SCEVUnknown from various maps.
464
50.9k
  SE->forgetMemoizedResults(this);
465
50.9k
466
50.9k
  // Remove this SCEVUnknown from the uniquing map.
467
50.9k
  SE->UniqueSCEVs.RemoveNode(this);
468
50.9k
469
50.9k
  // Release the value.
470
50.9k
  setValPtr(nullptr);
471
50.9k
}
472
473
20.7k
void SCEVUnknown::allUsesReplacedWith(Value *New) {
474
20.7k
  // Remove this SCEVUnknown from the uniquing map.
475
20.7k
  SE->UniqueSCEVs.RemoveNode(this);
476
20.7k
477
20.7k
  // Update this SCEVUnknown to point to the new value. This is needed
478
20.7k
  // because there may still be outstanding SCEVs which still point to
479
20.7k
  // this SCEVUnknown.
480
20.7k
  setValPtr(New);
481
20.7k
}
482
483
12.5k
bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
484
12.5k
  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
485
13
    if (VCE->getOpcode() == Instruction::PtrToInt)
486
8
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
487
8
        if (CE->getOpcode() == Instruction::GetElementPtr &&
488
8
            CE->getOperand(0)->isNullValue() &&
489
8
            CE->getNumOperands() == 2)
490
4
          if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
491
4
            if (CI->isOne()) {
492
4
              AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
493
4
                                 ->getElementType();
494
4
              return true;
495
4
            }
496
12.5k
497
12.5k
  return false;
498
12.5k
}
499
500
12.5k
bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
501
12.5k
  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
502
9
    if (VCE->getOpcode() == Instruction::PtrToInt)
503
4
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
504
4
        if (CE->getOpcode() == Instruction::GetElementPtr &&
505
4
            CE->getOperand(0)->isNullValue()) {
506
4
          Type *Ty =
507
4
            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
508
4
          if (StructType *STy = dyn_cast<StructType>(Ty))
509
4
            if (!STy->isPacked() &&
510
4
                CE->getNumOperands() == 3 &&
511
4
                CE->getOperand(1)->isNullValue()) {
512
4
              if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
513
4
                if (CI->isOne() &&
514
4
                    
STy->getNumElements() == 23
&&
515
4
                    
STy->getElementType(0)->isIntegerTy(1)3
) {
516
3
                  AllocTy = STy->getElementType(1);
517
3
                  return true;
518
3
                }
519
12.5k
            }
520
4
        }
521
12.5k
522
12.5k
  return false;
523
12.5k
}
524
525
12.5k
bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
526
12.5k
  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
527
6
    if (VCE->getOpcode() == Instruction::PtrToInt)
528
1
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
529
1
        if (CE->getOpcode() == Instruction::GetElementPtr &&
530
1
            CE->getNumOperands() == 3 &&
531
1
            CE->getOperand(0)->isNullValue() &&
532
1
            CE->getOperand(1)->isNullValue()) {
533
1
          Type *Ty =
534
1
            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
535
1
          // Ignore vector types here so that ScalarEvolutionExpander doesn't
536
1
          // emit getelementptrs that index into vectors.
537
1
          if (Ty->isStructTy() || 
Ty->isArrayTy()0
) {
538
1
            CTy = Ty;
539
1
            FieldNo = CE->getOperand(2);
540
1
            return true;
541
1
          }
542
12.5k
        }
543
12.5k
544
12.5k
  return false;
545
12.5k
}
546
547
//===----------------------------------------------------------------------===//
548
//                               SCEV Utilities
549
//===----------------------------------------------------------------------===//
550
551
/// Compare the two values \p LV and \p RV in terms of their "complexity" where
552
/// "complexity" is a partial (and somewhat ad-hoc) relation used to order
553
/// operands in SCEV expressions.  \p EqCache is a set of pairs of values that
554
/// have been previously deemed to be "equally complex" by this routine.  It is
555
/// intended to avoid exponential time complexity in cases like:
556
///
557
///   %a = f(%x, %y)
558
///   %b = f(%a, %a)
559
///   %c = f(%b, %b)
560
///
561
///   %d = f(%x, %y)
562
///   %e = f(%d, %d)
563
///   %f = f(%e, %e)
564
///
565
///   CompareValueComplexity(%f, %c)
566
///
567
/// Since we do not continue running this routine on expression trees once we
568
/// have seen unequal values, there is no need to track them in the cache.
569
static int
570
CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue,
571
                       const LoopInfo *const LI, Value *LV, Value *RV,
572
9.48M
                       unsigned Depth) {
573
9.48M
  if (Depth > MaxValueCompareDepth || 
EqCacheValue.isEquivalent(LV, RV)4.33M
)
574
5.23M
    return 0;
575
4.24M
576
4.24M
  // Order pointer values after integer values. This helps SCEVExpander form
577
4.24M
  // GEPs.
578
4.24M
  bool LIsPointer = LV->getType()->isPointerTy(),
579
4.24M
       RIsPointer = RV->getType()->isPointerTy();
580
4.24M
  if (LIsPointer != RIsPointer)
581
390k
    return (int)LIsPointer - (int)RIsPointer;
582
3.85M
583
3.85M
  // Compare getValueID values.
584
3.85M
  unsigned LID = LV->getValueID(), RID = RV->getValueID();
585
3.85M
  if (LID != RID)
586
284k
    return (int)LID - (int)RID;
587
3.57M
588
3.57M
  // Sort arguments by their position.
589
3.57M
  if (const auto *LA = dyn_cast<Argument>(LV)) {
590
63.1k
    const auto *RA = cast<Argument>(RV);
591
63.1k
    unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
592
63.1k
    return (int)LArgNo - (int)RArgNo;
593
63.1k
  }
594
3.51M
595
3.51M
  if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
596
3.69k
    const auto *RGV = cast<GlobalValue>(RV);
597
3.69k
598
7.36k
    const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
599
7.36k
      auto LT = GV->getLinkage();
600
7.36k
      return !(GlobalValue::isPrivateLinkage(LT) ||
601
7.36k
               GlobalValue::isInternalLinkage(LT));
602
7.36k
    };
603
3.69k
604
3.69k
    // Use the names to distinguish the two values, but only if the
605
3.69k
    // names are semantically important.
606
3.69k
    if (IsGVNameSemantic(LGV) && 
IsGVNameSemantic(RGV)3.66k
)
607
3.64k
      return LGV->getName().compare(RGV->getName());
608
3.50M
  }
609
3.50M
610
3.50M
  // For instructions, compare their loop depth, and their operand count.  This
611
3.50M
  // is pretty loose.
612
3.50M
  if (const auto *LInst = dyn_cast<Instruction>(LV)) {
613
3.49M
    const auto *RInst = cast<Instruction>(RV);
614
3.49M
615
3.49M
    // Compare loop depths.
616
3.49M
    const BasicBlock *LParent = LInst->getParent(),
617
3.49M
                     *RParent = RInst->getParent();
618
3.49M
    if (LParent != RParent) {
619
729k
      unsigned LDepth = LI->getLoopDepth(LParent),
620
729k
               RDepth = LI->getLoopDepth(RParent);
621
729k
      if (LDepth != RDepth)
622
7.53k
        return (int)LDepth - (int)RDepth;
623
3.49M
    }
624
3.49M
625
3.49M
    // Compare the number of operands.
626
3.49M
    unsigned LNumOps = LInst->getNumOperands(),
627
3.49M
             RNumOps = RInst->getNumOperands();
628
3.49M
    if (LNumOps != RNumOps)
629
11.2k
      return (int)LNumOps - (int)RNumOps;
630
3.47M
631
7.14M
    
for (unsigned Idx : seq(0u, LNumOps))3.47M
{
632
7.14M
      int Result =
633
7.14M
          CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx),
634
7.14M
                                 RInst->getOperand(Idx), Depth + 1);
635
7.14M
      if (Result != 0)
636
59.0k
        return Result;
637
7.14M
    }
638
3.47M
  }
639
3.50M
640
3.50M
  EqCacheValue.unionSets(LV, RV);
641
3.42M
  return 0;
642
3.50M
}
643
644
// Return negative, zero, or positive, if LHS is less than, equal to, or greater
645
// than RHS, respectively. A three-way result allows recursive comparisons to be
646
// more efficient.
647
static int CompareSCEVComplexity(
648
    EquivalenceClasses<const SCEV *> &EqCacheSCEV,
649
    EquivalenceClasses<const Value *> &EqCacheValue,
650
    const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
651
92.1M
    DominatorTree &DT, unsigned Depth = 0) {
652
92.1M
  // Fast-path: SCEVs are uniqued so we can do a quick equality check.
653
92.1M
  if (LHS == RHS)
654
4.97M
    return 0;
655
87.1M
656
87.1M
  // Primarily, sort the SCEVs by their getSCEVType().
657
87.1M
  unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
658
87.1M
  if (LType != RType)
659
57.2M
    return (int)LType - (int)RType;
660
29.9M
661
29.9M
  if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.isEquivalent(LHS, RHS))
662
9.55k
    return 0;
663
29.9M
  // Aside from the getSCEVType() ordering, the particular ordering
664
29.9M
  // isn't very important except that it's beneficial to be consistent,
665
29.9M
  // so that (a + b) and (b + a) don't end up as different expressions.
666
29.9M
  switch (static_cast<SCEVTypes>(LType)) {
667
29.9M
  case scUnknown: {
668
2.33M
    const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
669
2.33M
    const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
670
2.33M
671
2.33M
    int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(),
672
2.33M
                                   RU->getValue(), Depth + 1);
673
2.33M
    if (X == 0)
674
1.57M
      EqCacheSCEV.unionSets(LHS, RHS);
675
2.33M
    return X;
676
29.9M
  }
677
29.9M
678
29.9M
  case scConstant: {
679
23.4M
    const SCEVConstant *LC = cast<SCEVConstant>(LHS);
680
23.4M
    const SCEVConstant *RC = cast<SCEVConstant>(RHS);
681
23.4M
682
23.4M
    // Compare constant values.
683
23.4M
    const APInt &LA = LC->getAPInt();
684
23.4M
    const APInt &RA = RC->getAPInt();
685
23.4M
    unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
686
23.4M
    if (LBitWidth != RBitWidth)
687
188
      return (int)LBitWidth - (int)RBitWidth;
688
23.4M
    return LA.ult(RA) ? 
-112.9M
:
110.4M
;
689
23.4M
  }
690
23.4M
691
23.4M
  case scAddRecExpr: {
692
714k
    const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
693
714k
    const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
694
714k
695
714k
    // There is always a dominance between two recs that are used by one SCEV,
696
714k
    // so we can safely sort recs by loop header dominance. We require such
697
714k
    // order in getAddExpr.
698
714k
    const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
699
714k
    if (LLoop != RLoop) {
700
153k
      const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
701
153k
      assert(LHead != RHead && "Two loops share the same header?");
702
153k
      if (DT.dominates(LHead, RHead))
703
18.5k
        return 1;
704
153k
      else
705
153k
        assert(DT.dominates(RHead, LHead) &&
706
153k
               "No dominance between recurrences used by one SCEV?");
707
153k
      
return -1134k
;
708
561k
    }
709
561k
710
561k
    // Addrec complexity grows with operand count.
711
561k
    unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
712
561k
    if (LNumOps != RNumOps)
713
681
      return (int)LNumOps - (int)RNumOps;
714
560k
715
560k
    // Lexicographically compare.
716
562k
    
for (unsigned i = 0; 560k
i != LNumOps;
++i1.61k
) {
717
562k
      int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
718
562k
                                    LA->getOperand(i), RA->getOperand(i), DT,
719
562k
                                    Depth + 1);
720
562k
      if (X != 0)
721
560k
        return X;
722
562k
    }
723
560k
    EqCacheSCEV.unionSets(LHS, RHS);
724
16
    return 0;
725
560k
  }
726
560k
727
3.07M
  case scAddExpr:
728
3.07M
  case scMulExpr:
729
3.07M
  case scSMaxExpr:
730
3.07M
  case scUMaxExpr:
731
3.07M
  case scSMinExpr:
732
3.07M
  case scUMinExpr: {
733
3.07M
    const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
734
3.07M
    const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
735
3.07M
736
3.07M
    // Lexicographically compare n-ary expressions.
737
3.07M
    unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
738
3.07M
    if (LNumOps != RNumOps)
739
139k
      return (int)LNumOps - (int)RNumOps;
740
2.93M
741
4.35M
    
for (unsigned i = 0; 2.93M
i != LNumOps;
++i1.42M
) {
742
3.74M
      int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
743
3.74M
                                    LC->getOperand(i), RC->getOperand(i), DT,
744
3.74M
                                    Depth + 1);
745
3.74M
      if (X != 0)
746
2.32M
        return X;
747
3.74M
    }
748
2.93M
    EqCacheSCEV.unionSets(LHS, RHS);
749
612k
    return 0;
750
2.93M
  }
751
2.93M
752
2.93M
  case scUDivExpr: {
753
6.86k
    const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
754
6.86k
    const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
755
6.86k
756
6.86k
    // Lexicographically compare udiv expressions.
757
6.86k
    int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(),
758
6.86k
                                  RC->getLHS(), DT, Depth + 1);
759
6.86k
    if (X != 0)
760
4.46k
      return X;
761
2.39k
    X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(),
762
2.39k
                              RC->getRHS(), DT, Depth + 1);
763
2.39k
    if (X == 0)
764
482
      EqCacheSCEV.unionSets(LHS, RHS);
765
2.39k
    return X;
766
2.39k
  }
767
2.39k
768
362k
  case scTruncate:
769
362k
  case scZeroExtend:
770
362k
  case scSignExtend: {
771
362k
    const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
772
362k
    const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
773
362k
774
362k
    // Compare cast expressions by operand.
775
362k
    int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
776
362k
                                  LC->getOperand(), RC->getOperand(), DT,
777
362k
                                  Depth + 1);
778
362k
    if (X == 0)
779
179k
      EqCacheSCEV.unionSets(LHS, RHS);
780
362k
    return X;
781
362k
  }
782
362k
783
362k
  case scCouldNotCompute:
784
0
    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
785
0
  }
786
0
  llvm_unreachable("Unknown SCEV kind!");
787
0
}
788
789
/// Given a list of SCEV objects, order them by their complexity, and group
790
/// objects of the same complexity together by value.  When this routine is
791
/// finished, we know that any duplicates in the vector are consecutive and that
792
/// complexity is monotonically increasing.
793
///
794
/// Note that we go take special precautions to ensure that we get deterministic
795
/// results from this routine.  In other words, we don't want the results of
796
/// this to depend on where the addresses of various SCEV objects happened to
797
/// land in memory.
798
static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
799
72.4M
                              LoopInfo *LI, DominatorTree &DT) {
800
72.4M
  if (Ops.size() < 2) 
return0
; // Noop
801
72.4M
802
72.4M
  EquivalenceClasses<const SCEV *> EqCacheSCEV;
803
72.4M
  EquivalenceClasses<const Value *> EqCacheValue;
804
72.4M
  if (Ops.size() == 2) {
805
64.6M
    // This is the common case, which also happens to be trivially simple.
806
64.6M
    // Special case it.
807
64.6M
    const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
808
64.6M
    if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0)
809
28.4M
      std::swap(LHS, RHS);
810
64.6M
    return;
811
64.6M
  }
812
7.87M
813
7.87M
  // Do the rough sort by complexity.
814
22.8M
  
llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) 7.87M
{
815
22.8M
    return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) <
816
22.8M
           0;
817
22.8M
  });
818
7.87M
819
7.87M
  // Now that we are sorted by complexity, group elements of the same
820
7.87M
  // complexity.  Note that this is, at worst, N^2, but the vector is likely to
821
7.87M
  // be extremely short in practice.  Note that we take this approach because we
822
7.87M
  // do not want to depend on the addresses of the objects we are grouping.
823
18.2M
  for (unsigned i = 0, e = Ops.size(); i != e-2; 
++i10.4M
) {
824
11.3M
    const SCEV *S = Ops[i];
825
11.3M
    unsigned Complexity = S->getSCEVType();
826
11.3M
827
11.3M
    // If there are any objects of the same complexity and same value as this
828
11.3M
    // one, group them.
829
44.7M
    for (unsigned j = i+1; j != e && 
Ops[j]->getSCEVType() == Complexity43.8M
;
++j33.4M
) {
830
34.3M
      if (Ops[j] == S) { // Found a duplicate.
831
994k
        // Move it to immediately after i'th element.
832
994k
        std::swap(Ops[i+1], Ops[j]);
833
994k
        ++i;   // no need to rescan it.
834
994k
        if (i == e-2) 
return908k
; // Done!
835
994k
      }
836
34.3M
    }
837
11.3M
  }
838
7.87M
}
839
840
// Returns the size of the SCEV S.
841
64
static inline int sizeOfSCEV(const SCEV *S) {
842
64
  struct FindSCEVSize {
843
64
    int Size = 0;
844
64
845
64
    FindSCEVSize() = default;
846
64
847
175
    bool follow(const SCEV *S) {
848
175
      ++Size;
849
175
      // Keep looking at all operands of S.
850
175
      return true;
851
175
    }
852
64
853
175
    bool isDone() const {
854
175
      return false;
855
175
    }
856
64
  };
857
64
858
64
  FindSCEVSize F;
859
64
  SCEVTraversal<FindSCEVSize> ST(F);
860
64
  ST.visitAll(S);
861
64
  return F.Size;
862
64
}
863
864
/// Returns true if the subtree of \p S contains at least HugeExprThreshold
865
/// nodes.
866
112M
static bool isHugeExpression(const SCEV *S) {
867
112M
  return S->getExpressionSize() >= HugeExprThreshold;
868
112M
}
869
870
/// Returns true of \p Ops contains a huge SCEV (see definition above).
871
52.6M
static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) {
872
52.6M
  return any_of(Ops, isHugeExpression);
873
52.6M
}
874
875
namespace {
876
877
struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
878
public:
879
  // Computes the Quotient and Remainder of the division of Numerator by
880
  // Denominator.
881
  static void divide(ScalarEvolution &SE, const SCEV *Numerator,
882
                     const SCEV *Denominator, const SCEV **Quotient,
883
24.1k
                     const SCEV **Remainder) {
884
24.1k
    assert(Numerator && Denominator && "Uninitialized SCEV");
885
24.1k
886
24.1k
    SCEVDivision D(SE, Numerator, Denominator);
887
24.1k
888
24.1k
    // Check for the trivial case here to avoid having to check for it in the
889
24.1k
    // rest of the code.
890
24.1k
    if (Numerator == Denominator) {
891
7.00k
      *Quotient = D.One;
892
7.00k
      *Remainder = D.Zero;
893
7.00k
      return;
894
7.00k
    }
895
17.1k
896
17.1k
    if (Numerator->isZero()) {
897
1.54k
      *Quotient = D.Zero;
898
1.54k
      *Remainder = D.Zero;
899
1.54k
      return;
900
1.54k
    }
901
15.6k
902
15.6k
    // A simple case when N/1. The quotient is N.
903
15.6k
    if (Denominator->isOne()) {
904
26
      *Quotient = Numerator;
905
26
      *Remainder = D.Zero;
906
26
      return;
907
26
    }
908
15.6k
909
15.6k
    // Split the Denominator when it is a product.
910
15.6k
    if (const SCEVMulExpr *T = dyn_cast<SCEVMulExpr>(Denominator)) {
911
12
      const SCEV *Q, *R;
912
12
      *Quotient = Numerator;
913
16
      for (const SCEV *Op : T->operands()) {
914
16
        divide(SE, *Quotient, Op, &Q, &R);
915
16
        *Quotient = Q;
916
16
917
16
        // Bail out when the Numerator is not divisible by one of the terms of
918
16
        // the Denominator.
919
16
        if (!R->isZero()) {
920
8
          *Quotient = D.Zero;
921
8
          *Remainder = Numerator;
922
8
          return;
923
8
        }
924
16
      }
925
12
      *Remainder = D.Zero;
926
4
      return;
927
15.5k
    }
928
15.5k
929
15.5k
    D.visit(Numerator);
930
15.5k
    *Quotient = D.Quotient;
931
15.5k
    *Remainder = D.Remainder;
932
15.5k
  }
933
934
  // Except in the trivial case described above, we do not know how to divide
935
  // Expr by Denominator for the following functions with empty implementation.
936
0
  void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
937
152
  void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
938
62
  void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
939
6
  void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
940
0
  void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
941
0
  void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
942
0
  void visitSMinExpr(const SCEVSMinExpr *Numerator) {}
943
0
  void visitUMinExpr(const SCEVUMinExpr *Numerator) {}
944
1.63k
  void visitUnknown(const SCEVUnknown *Numerator) {}
945
0
  void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
946
947
2.55k
  void visitConstant(const SCEVConstant *Numerator) {
948
2.55k
    if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
949
428
      APInt NumeratorVal = Numerator->getAPInt();
950
428
      APInt DenominatorVal = D->getAPInt();
951
428
      uint32_t NumeratorBW = NumeratorVal.getBitWidth();
952
428
      uint32_t DenominatorBW = DenominatorVal.getBitWidth();
953
428
954
428
      if (NumeratorBW > DenominatorBW)
955
0
        DenominatorVal = DenominatorVal.sext(NumeratorBW);
956
428
      else if (NumeratorBW < DenominatorBW)
957
1
        NumeratorVal = NumeratorVal.sext(DenominatorBW);
958
428
959
428
      APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
960
428
      APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
961
428
      APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
962
428
      Quotient = SE.getConstant(QuotientVal);
963
428
      Remainder = SE.getConstant(RemainderVal);
964
428
      return;
965
428
    }
966
2.55k
  }
967
968
5.95k
  void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
969
5.95k
    const SCEV *StartQ, *StartR, *StepQ, *StepR;
970
5.95k
    if (!Numerator->isAffine())
971
2
      return cannotDivide(Numerator);
972
5.95k
    divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
973
5.95k
    divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
974
5.95k
    // Bail out if the types do not match.
975
5.95k
    Type *Ty = Denominator->getType();
976
5.95k
    if (Ty != StartQ->getType() || Ty != StartR->getType() ||
977
5.95k
        
Ty != StepQ->getType()5.94k
||
Ty != StepR->getType()5.94k
)
978
13
      return cannotDivide(Numerator);
979
5.94k
    Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
980
5.94k
                                Numerator->getNoWrapFlags());
981
5.94k
    Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
982
5.94k
                                 Numerator->getNoWrapFlags());
983
5.94k
  }
984
985
663
  void visitAddExpr(const SCEVAddExpr *Numerator) {
986
663
    SmallVector<const SCEV *, 2> Qs, Rs;
987
663
    Type *Ty = Denominator->getType();
988
663
989
1.35k
    for (const SCEV *Op : Numerator->operands()) {
990
1.35k
      const SCEV *Q, *R;
991
1.35k
      divide(SE, Op, Denominator, &Q, &R);
992
1.35k
993
1.35k
      // Bail out if types do not match.
994
1.35k
      if (Ty != Q->getType() || Ty != R->getType())
995
0
        return cannotDivide(Numerator);
996
1.35k
997
1.35k
      Qs.push_back(Q);
998
1.35k
      Rs.push_back(R);
999
1.35k
    }
1000
663
1001
663
    if (Qs.size() == 1) {
1002
0
      Quotient = Qs[0];
1003
0
      Remainder = Rs[0];
1004
0
      return;
1005
0
    }
1006
663
1007
663
    Quotient = SE.getAddExpr(Qs);
1008
663
    Remainder = SE.getAddExpr(Rs);
1009
663
  }
1010
1011
4.56k
  void visitMulExpr(const SCEVMulExpr *Numerator) {
1012
4.56k
    SmallVector<const SCEV *, 2> Qs;
1013
4.56k
    Type *Ty = Denominator->getType();
1014
4.56k
1015
4.56k
    bool FoundDenominatorTerm = false;
1016
11.1k
    for (const SCEV *Op : Numerator->operands()) {
1017
11.1k
      // Bail out if types do not match.
1018
11.1k
      if (Ty != Op->getType())
1019
0
        return cannotDivide(Numerator);
1020
11.1k
1021
11.1k
      if (FoundDenominatorTerm) {
1022
4.68k
        Qs.push_back(Op);
1023
4.68k
        continue;
1024
4.68k
      }
1025
6.49k
1026
6.49k
      // Check whether Denominator divides one of the product operands.
1027
6.49k
      const SCEV *Q, *R;
1028
6.49k
      divide(SE, Op, Denominator, &Q, &R);
1029
6.49k
      if (!R->isZero()) {
1030
1.97k
        Qs.push_back(Op);
1031
1.97k
        continue;
1032
1.97k
      }
1033
4.51k
1034
4.51k
      // Bail out if types do not match.
1035
4.51k
      if (Ty != Q->getType())
1036
0
        return cannotDivide(Numerator);
1037
4.51k
1038
4.51k
      FoundDenominatorTerm = true;
1039
4.51k
      Qs.push_back(Q);
1040
4.51k
    }
1041
4.56k
1042
4.56k
    if (FoundDenominatorTerm) {
1043
4.51k
      Remainder = Zero;
1044
4.51k
      if (Qs.size() == 1)
1045
0
        Quotient = Qs[0];
1046
4.51k
      else
1047
4.51k
        Quotient = SE.getMulExpr(Qs);
1048
4.51k
      return;
1049
4.51k
    }
1050
46
1051
46
    if (!isa<SCEVUnknown>(Denominator))
1052
14
      return cannotDivide(Numerator);
1053
32
1054
32
    // The Remainder is obtained by replacing Denominator by 0 in Numerator.
1055
32
    ValueToValueMap RewriteMap;
1056
32
    RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
1057
32
        cast<SCEVConstant>(Zero)->getValue();
1058
32
    Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
1059
32
1060
32
    if (Remainder->isZero()) {
1061
0
      // The Quotient is obtained by replacing Denominator by 1 in Numerator.
1062
0
      RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
1063
0
          cast<SCEVConstant>(One)->getValue();
1064
0
      Quotient =
1065
0
          SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
1066
0
      return;
1067
0
    }
1068
32
1069
32
    // Quotient is (Numerator - Remainder) divided by Denominator.
1070
32
    const SCEV *Q, *R;
1071
32
    const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
1072
32
    // This SCEV does not seem to simplify: fail the division here.
1073
32
    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
1074
0
      return cannotDivide(Numerator);
1075
32
    divide(SE, Diff, Denominator, &Q, &R);
1076
32
    if (R != Zero)
1077
0
      return cannotDivide(Numerator);
1078
32
    Quotient = Q;
1079
32
  }
1080
1081
private:
1082
  SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
1083
               const SCEV *Denominator)
1084
24.1k
      : SE(S), Denominator(Denominator) {
1085
24.1k
    Zero = SE.getZero(Denominator->getType());
1086
24.1k
    One = SE.getOne(Denominator->getType());
1087
24.1k
1088
24.1k
    // We generally do not know how to divide Expr by Denominator. We
1089
24.1k
    // initialize the division to a "cannot divide" state to simplify the rest
1090
24.1k
    // of the code.
1091
24.1k
    cannotDivide(Numerator);
1092
24.1k
  }
1093
1094
  // Convenience function for giving up on the division. We set the quotient to
1095
  // be equal to zero and the remainder to be equal to the numerator.
1096
24.1k
  void cannotDivide(const SCEV *Numerator) {
1097
24.1k
    Quotient = Zero;
1098
24.1k
    Remainder = Numerator;
1099
24.1k
  }
1100
1101
  ScalarEvolution &SE;
1102
  const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
1103
};
1104
1105
} // end anonymous namespace
1106
1107
//===----------------------------------------------------------------------===//
1108
//                      Simple SCEV method implementations
1109
//===----------------------------------------------------------------------===//
1110
1111
/// Compute BC(It, K).  The result has width W.  Assume, K > 0.
1112
static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
1113
                                       ScalarEvolution &SE,
1114
359k
                                       Type *ResultTy) {
1115
359k
  // Handle the simplest case efficiently.
1116
359k
  if (K == 1)
1117
357k
    return SE.getTruncateOrZeroExtend(It, ResultTy);
1118
1.51k
1119
1.51k
  // We are using the following formula for BC(It, K):
1120
1.51k
  //
1121
1.51k
  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
1122
1.51k
  //
1123
1.51k
  // Suppose, W is the bitwidth of the return value.  We must be prepared for
1124
1.51k
  // overflow.  Hence, we must assure that the result of our computation is
1125
1.51k
  // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
1126
1.51k
  // safe in modular arithmetic.
1127
1.51k
  //
1128
1.51k
  // However, this code doesn't use exactly that formula; the formula it uses
1129
1.51k
  // is something like the following, where T is the number of factors of 2 in
1130
1.51k
  // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
1131
1.51k
  // exponentiation:
1132
1.51k
  //
1133
1.51k
  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
1134
1.51k
  //
1135
1.51k
  // This formula is trivially equivalent to the previous formula.  However,
1136
1.51k
  // this formula can be implemented much more efficiently.  The trick is that
1137
1.51k
  // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
1138
1.51k
  // arithmetic.  To do exact division in modular arithmetic, all we have
1139
1.51k
  // to do is multiply by the inverse.  Therefore, this step can be done at
1140
1.51k
  // width W.
1141
1.51k
  //
1142
1.51k
  // The next issue is how to safely do the division by 2^T.  The way this
1143
1.51k
  // is done is by doing the multiplication step at a width of at least W + T
1144
1.51k
  // bits.  This way, the bottom W+T bits of the product are accurate. Then,
1145
1.51k
  // when we perform the division by 2^T (which is equivalent to a right shift
1146
1.51k
  // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
1147
1.51k
  // truncated out after the division by 2^T.
1148
1.51k
  //
1149
1.51k
  // In comparison to just directly using the first formula, this technique
1150
1.51k
  // is much more efficient; using the first formula requires W * K bits,
1151
1.51k
  // but this formula less than W + K bits. Also, the first formula requires
1152
1.51k
  // a division step, whereas this formula only requires multiplies and shifts.
1153
1.51k
  //
1154
1.51k
  // It doesn't matter whether the subtraction step is done in the calculation
1155
1.51k
  // width or the input iteration count's width; if the subtraction overflows,
1156
1.51k
  // the result must be zero anyway.  We prefer here to do it in the width of
1157
1.51k
  // the induction variable because it helps a lot for certain cases; CodeGen
1158
1.51k
  // isn't smart enough to ignore the overflow, which leads to much less
1159
1.51k
  // efficient code if the width of the subtraction is wider than the native
1160
1.51k
  // register width.
1161
1.51k
  //
1162
1.51k
  // (It's possible to not widen at all by pulling out factors of 2 before
1163
1.51k
  // the multiplication; for example, K=2 can be calculated as
1164
1.51k
  // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
1165
1.51k
  // extra arithmetic, so it's not an obvious win, and it gets
1166
1.51k
  // much more complicated for K > 3.)
1167
1.51k
1168
1.51k
  // Protection from insane SCEVs; this bound is conservative,
1169
1.51k
  // but it probably doesn't matter.
1170
1.51k
  if (K > 1000)
1171
0
    return SE.getCouldNotCompute();
1172
1.51k
1173
1.51k
  unsigned W = SE.getTypeSizeInBits(ResultTy);
1174
1.51k
1175
1.51k
  // Calculate K! / 2^T and T; we divide out the factors of two before
1176
1.51k
  // multiplying for calculating K! / 2^T to avoid overflow.
1177
1.51k
  // Other overflow doesn't matter because we only care about the bottom
1178
1.51k
  // W bits of the result.
1179
1.51k
  APInt OddFactorial(W, 1);
1180
1.51k
  unsigned T = 1;
1181
2.97k
  for (unsigned i = 3; i <= K; 
++i1.45k
) {
1182
1.45k
    APInt Mult(W, i);
1183
1.45k
    unsigned TwoFactors = Mult.countTrailingZeros();
1184
1.45k
    T += TwoFactors;
1185
1.45k
    Mult.lshrInPlace(TwoFactors);
1186
1.45k
    OddFactorial *= Mult;
1187
1.45k
  }
1188
1.51k
1189
1.51k
  // We need at least W + T bits for the multiplication step
1190
1.51k
  unsigned CalculationBits = W + T;
1191
1.51k
1192
1.51k
  // Calculate 2^T, at width T+W.
1193
1.51k
  APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
1194
1.51k
1195
1.51k
  // Calculate the multiplicative inverse of K! / 2^T;
1196
1.51k
  // this multiplication factor will perform the exact division by
1197
1.51k
  // K! / 2^T.
1198
1.51k
  APInt Mod = APInt::getSignedMinValue(W+1);
1199
1.51k
  APInt MultiplyFactor = OddFactorial.zext(W+1);
1200
1.51k
  MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
1201
1.51k
  MultiplyFactor = MultiplyFactor.trunc(W);
1202
1.51k
1203
1.51k
  // Calculate the product, at width T+W
1204
1.51k
  IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
1205
1.51k
                                                      CalculationBits);
1206
1.51k
  const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
1207
4.49k
  for (unsigned i = 1; i != K; 
++i2.97k
) {
1208
2.97k
    const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
1209
2.97k
    Dividend = SE.getMulExpr(Dividend,
1210
2.97k
                             SE.getTruncateOrZeroExtend(S, CalculationTy));
1211
2.97k
  }
1212
1.51k
1213
1.51k
  // Divide by 2^T
1214
1.51k
  const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
1215
1.51k
1216
1.51k
  // Truncate the result, and divide by K! / 2^T.
1217
1.51k
1218
1.51k
  return SE.getMulExpr(SE.getConstant(MultiplyFactor),
1219
1.51k
                       SE.getTruncateOrZeroExtend(DivResult, ResultTy));
1220
1.51k
}
1221
1222
/// Return the value of this chain of recurrences at the specified iteration
1223
/// number.  We can evaluate this recurrence by multiplying each element in the
1224
/// chain by the binomial coefficient corresponding to it.  In other words, we
1225
/// can evaluate {A,+,B,+,C,+,D} as:
1226
///
1227
///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
1228
///
1229
/// where BC(It, k) stands for binomial coefficient.
1230
const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
1231
357k
                                                ScalarEvolution &SE) const {
1232
357k
  const SCEV *Result = getStart();
1233
716k
  for (unsigned i = 1, e = getNumOperands(); i != e; 
++i359k
) {
1234
359k
    // The computation is correct in the face of overflow provided that the
1235
359k
    // multiplication is performed _after_ the evaluation of the binomial
1236
359k
    // coefficient.
1237
359k
    const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
1238
359k
    if (isa<SCEVCouldNotCompute>(Coeff))
1239
0
      return Coeff;
1240
359k
1241
359k
    Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
1242
359k
  }
1243
357k
  return Result;
1244
357k
}
1245
1246
//===----------------------------------------------------------------------===//
1247
//                    SCEV Expression folder implementations
1248
//===----------------------------------------------------------------------===//
1249
1250
const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
1251
748k
                                             unsigned Depth) {
1252
748k
  assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
1253
748k
         "This is not a truncating conversion!");
1254
748k
  assert(isSCEVable(Ty) &&
1255
748k
         "This is not a conversion to a SCEVable type!");
1256
748k
  Ty = getEffectiveSCEVType(Ty);
1257
748k
1258
748k
  FoldingSetNodeID ID;
1259
748k
  ID.AddInteger(scTruncate);
1260
748k
  ID.AddPointer(Op);
1261
748k
  ID.AddPointer(Ty);
1262
748k
  void *IP = nullptr;
1263
748k
  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 
return S19.8k
;
1264
728k
1265
728k
  // Fold if the operand is constant.
1266
728k
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1267
442k
    return getConstant(
1268
442k
      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
1269
286k
1270
286k
  // trunc(trunc(x)) --> trunc(x)
1271
286k
  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
1272
5.38k
    return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);
1273
281k
1274
281k
  // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
1275
281k
  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
1276
9.96k
    return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);
1277
271k
1278
271k
  // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
1279
271k
  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1280
15.8k
    return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);
1281
255k
1282
255k
  if (Depth > MaxCastDepth) {
1283
11
    SCEV *S =
1284
11
        new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
1285
11
    UniqueSCEVs.InsertNode(S, IP);
1286
11
    addToLoopUseLists(S);
1287
11
    return S;
1288
11
  }
1289
255k
1290
255k
  // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
1291
255k
  // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
1292
255k
  // if after transforming we have at most one truncate, not counting truncates
1293
255k
  // that replace other casts.
1294
255k
  if (isa<SCEVAddExpr>(Op) || 
isa<SCEVMulExpr>(Op)235k
) {
1295
29.3k
    auto *CommOp = cast<SCEVCommutativeExpr>(Op);
1296
29.3k
    SmallVector<const SCEV *, 4> Operands;
1297
29.3k
    unsigned numTruncs = 0;
1298
90.7k
    for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && 
numTruncs < 261.4k
;
1299
61.3k
         ++i) {
1300
61.3k
      const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
1301
61.3k
      if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && 
isa<SCEVTruncateExpr>(S)45.8k
)
1302
16.7k
        numTruncs++;
1303
61.3k
      Operands.push_back(S);
1304
61.3k
    }
1305
29.3k
    if (numTruncs < 2) {
1306
28.1k
      if (isa<SCEVAddExpr>(Op))
1307
19.3k
        return getAddExpr(Operands);
1308
8.79k
      else if (isa<SCEVMulExpr>(Op))
1309
8.79k
        return getMulExpr(Operands);
1310
8.79k
      else
1311
8.79k
        llvm_unreachable("Unexpected SCEV type for Op.");
1312
28.1k
    }
1313
29.3k
    // Although we checked in the beginning that ID is not in the cache, it is
1314
29.3k
    // possible that during recursion and different modification ID was inserted
1315
29.3k
    // into the cache. So if we find it, just return it.
1316
29.3k
    
if (const SCEV *1.15k
S1.15k
= UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
1317
0
      return S;
1318
227k
  }
1319
227k
1320
227k
  // If the input value is a chrec scev, truncate the chrec's operands.
1321
227k
  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
1322
105k
    SmallVector<const SCEV *, 4> Operands;
1323
105k
    for (const SCEV *Op : AddRec->operands())
1324
211k
      Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
1325
105k
    return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
1326
105k
  }
1327
121k
1328
121k
  // The cast wasn't folded; create an explicit cast node. We can reuse
1329
121k
  // the existing insert position since if we get here, we won't have
1330
121k
  // made any changes which would invalidate it.
1331
121k
  SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
1332
121k
                                                 Op, Ty);
1333
121k
  UniqueSCEVs.InsertNode(S, IP);
1334
121k
  addToLoopUseLists(S);
1335
121k
  return S;
1336
121k
}
1337
1338
// Get the limit of a recurrence such that incrementing by Step cannot cause
1339
// signed overflow as long as the value of the recurrence within the
1340
// loop does not exceed this limit before incrementing.
1341
static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
1342
                                                 ICmpInst::Predicate *Pred,
1343
324k
                                                 ScalarEvolution *SE) {
1344
324k
  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
1345
324k
  if (SE->isKnownPositive(Step)) {
1346
172k
    *Pred = ICmpInst::ICMP_SLT;
1347
172k
    return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
1348
172k
                           SE->getSignedRangeMax(Step));
1349
172k
  }
1350
151k
  if (SE->isKnownNegative(Step)) {
1351
142k
    *Pred = ICmpInst::ICMP_SGT;
1352
142k
    return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
1353
142k
                           SE->getSignedRangeMin(Step));
1354
142k
  }
1355
9.58k
  return nullptr;
1356
9.58k
}
1357
1358
// Get the limit of a recurrence such that incrementing by Step cannot cause
1359
// unsigned overflow as long as the value of the recurrence within the loop does
1360
// not exceed this limit before incrementing.
1361
static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
1362
                                                   ICmpInst::Predicate *Pred,
1363
12.8k
                                                   ScalarEvolution *SE) {
1364
12.8k
  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
1365
12.8k
  *Pred = ICmpInst::ICMP_ULT;
1366
12.8k
1367
12.8k
  return SE->getConstant(APInt::getMinValue(BitWidth) -
1368
12.8k
                         SE->getUnsignedRangeMax(Step));
1369
12.8k
}
1370
1371
namespace {
1372
1373
struct ExtendOpTraitsBase {
1374
  typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *,
1375
                                                          unsigned);
1376
};
1377
1378
// Used to make code generic over signed and unsigned overflow.
1379
template <typename ExtendOp> struct ExtendOpTraits {
1380
  // Members present:
1381
  //
1382
  // static const SCEV::NoWrapFlags WrapType;
1383
  //
1384
  // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
1385
  //
1386
  // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1387
  //                                           ICmpInst::Predicate *Pred,
1388
  //                                           ScalarEvolution *SE);
1389
};
1390
1391
template <>
1392
struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
1393
  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
1394
1395
  static const GetExtendExprTy GetExtendExpr;
1396
1397
  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1398
                                             ICmpInst::Predicate *Pred,
1399
7.92k
                                             ScalarEvolution *SE) {
1400
7.92k
    return getSignedOverflowLimitForStep(Step, Pred, SE);
1401
7.92k
  }
1402
};
1403
1404
const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1405
    SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
1406
1407
template <>
1408
struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
1409
  static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
1410
1411
  static const GetExtendExprTy GetExtendExpr;
1412
1413
  static const SCEV *getOverflowLimitForStep(const SCEV *Step,
1414
                                             ICmpInst::Predicate *Pred,
1415
12.8k
                                             ScalarEvolution *SE) {
1416
12.8k
    return getUnsignedOverflowLimitForStep(Step, Pred, SE);
1417
12.8k
  }
1418
};
1419
1420
const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
1421
    SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
1422
1423
} // end anonymous namespace
1424
1425
// The recurrence AR has been shown to have no signed/unsigned wrap or something
1426
// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
1427
// easily prove NSW/NUW for its preincrement or postincrement sibling. This
1428
// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
1429
// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
1430
// expression "Step + sext/zext(PreIncAR)" is congruent with
1431
// "sext/zext(PostIncAR)"
1432
template <typename ExtendOpTy>
1433
static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
1434
549k
                                        ScalarEvolution *SE, unsigned Depth) {
1435
549k
  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1436
549k
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1437
549k
1438
549k
  const Loop *L = AR->getLoop();
1439
549k
  const SCEV *Start = AR->getStart();
1440
549k
  const SCEV *Step = AR->getStepRecurrence(*SE);
1441
549k
1442
549k
  // Check for a simple looking step prior to loop entry.
1443
549k
  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
1444
549k
  if (!SA)
1445
525k
    return nullptr;
1446
24.3k
1447
24.3k
  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
1448
24.3k
  // subtraction is expensive. For this purpose, perform a quick and dirty
1449
24.3k
  // difference, by checking for Step in the operand list.
1450
24.3k
  SmallVector<const SCEV *, 4> DiffOps;
1451
24.3k
  for (const SCEV *Op : SA->operands())
1452
49.6k
    if (Op != Step)
1453
30.5k
      DiffOps.push_back(Op);
1454
24.3k
1455
24.3k
  if (DiffOps.size() == SA->getNumOperands())
1456
5.34k
    return nullptr;
1457
19.0k
1458
19.0k
  // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
1459
19.0k
  // `Step`:
1460
19.0k
1461
19.0k
  // 1. NSW/NUW flags on the step increment.
1462
19.0k
  auto PreStartFlags =
1463
19.0k
    ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
1464
19.0k
  const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
1465
19.0k
  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
1466
19.0k
      SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
1467
19.0k
1468
19.0k
  // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
1469
19.0k
  // "S+X does not sign/unsign-overflow".
1470
19.0k
  //
1471
19.0k
1472
19.0k
  const SCEV *BECount = SE->getBackedgeTakenCount(L);
1473
19.0k
  if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
1474
19.0k
      
!isa<SCEVCouldNotCompute>(BECount)10.3k
&&
SE->isKnownPositive(BECount)8.35k
)
1475
523
    return PreStart;
1476
18.5k
1477
18.5k
  // 2. Direct overflow check on the step operation's expression.
1478
18.5k
  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
1479
18.5k
  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
1480
18.5k
  const SCEV *OperandExtendedStart =
1481
18.5k
      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
1482
18.5k
                     (SE->*GetExtendExpr)(Step, WideTy, Depth));
1483
18.5k
  if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
1484
9.48k
    if (PreAR && AR->getNoWrapFlags(WrapType)) {
1485
9.47k
      // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
1486
9.47k
      // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
1487
9.47k
      // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
1488
9.47k
      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
1489
9.47k
    }
1490
9.48k
    return PreStart;
1491
9.48k
  }
1492
9.02k
1493
9.02k
  // 3. Loop precondition.
1494
9.02k
  ICmpInst::Predicate Pred;
1495
9.02k
  const SCEV *OverflowLimit =
1496
9.02k
      ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
1497
9.02k
1498
9.02k
  if (OverflowLimit &&
1499
9.02k
      
SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)8.68k
)
1500
673
    return PreStart;
1501
8.35k
1502
8.35k
  return nullptr;
1503
8.35k
}
ScalarEvolution.cpp:llvm::SCEV const* getPreStartForExtend<llvm::SCEVZeroExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int)
Line
Count
Source
1434
315k
                                        ScalarEvolution *SE, unsigned Depth) {
1435
315k
  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1436
315k
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1437
315k
1438
315k
  const Loop *L = AR->getLoop();
1439
315k
  const SCEV *Start = AR->getStart();
1440
315k
  const SCEV *Step = AR->getStepRecurrence(*SE);
1441
315k
1442
315k
  // Check for a simple looking step prior to loop entry.
1443
315k
  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
1444
315k
  if (!SA)
1445
305k
    return nullptr;
1446
10.4k
1447
10.4k
  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
1448
10.4k
  // subtraction is expensive. For this purpose, perform a quick and dirty
1449
10.4k
  // difference, by checking for Step in the operand list.
1450
10.4k
  SmallVector<const SCEV *, 4> DiffOps;
1451
10.4k
  for (const SCEV *Op : SA->operands())
1452
21.2k
    if (Op != Step)
1453
12.8k
      DiffOps.push_back(Op);
1454
10.4k
1455
10.4k
  if (DiffOps.size() == SA->getNumOperands())
1456
1.96k
    return nullptr;
1457
8.45k
1458
8.45k
  // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
1459
8.45k
  // `Step`:
1460
8.45k
1461
8.45k
  // 1. NSW/NUW flags on the step increment.
1462
8.45k
  auto PreStartFlags =
1463
8.45k
    ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
1464
8.45k
  const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
1465
8.45k
  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
1466
8.45k
      SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
1467
8.45k
1468
8.45k
  // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
1469
8.45k
  // "S+X does not sign/unsign-overflow".
1470
8.45k
  //
1471
8.45k
1472
8.45k
  const SCEV *BECount = SE->getBackedgeTakenCount(L);
1473
8.45k
  if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
1474
8.45k
      
!isa<SCEVCouldNotCompute>(BECount)1.40k
&&
SE->isKnownPositive(BECount)1.20k
)
1475
255
    return PreStart;
1476
8.19k
1477
8.19k
  // 2. Direct overflow check on the step operation's expression.
1478
8.19k
  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
1479
8.19k
  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
1480
8.19k
  const SCEV *OperandExtendedStart =
1481
8.19k
      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
1482
8.19k
                     (SE->*GetExtendExpr)(Step, WideTy, Depth));
1483
8.19k
  if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
1484
1.09k
    if (PreAR && AR->getNoWrapFlags(WrapType)) {
1485
1.09k
      // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
1486
1.09k
      // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
1487
1.09k
      // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
1488
1.09k
      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
1489
1.09k
    }
1490
1.09k
    return PreStart;
1491
1.09k
  }
1492
7.10k
1493
7.10k
  // 3. Loop precondition.
1494
7.10k
  ICmpInst::Predicate Pred;
1495
7.10k
  const SCEV *OverflowLimit =
1496
7.10k
      ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
1497
7.10k
1498
7.10k
  if (OverflowLimit &&
1499
7.10k
      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
1500
162
    return PreStart;
1501
6.93k
1502
6.93k
  return nullptr;
1503
6.93k
}
ScalarEvolution.cpp:llvm::SCEV const* getPreStartForExtend<llvm::SCEVSignExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int)
Line
Count
Source
1434
233k
                                        ScalarEvolution *SE, unsigned Depth) {
1435
233k
  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1436
233k
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1437
233k
1438
233k
  const Loop *L = AR->getLoop();
1439
233k
  const SCEV *Start = AR->getStart();
1440
233k
  const SCEV *Step = AR->getStepRecurrence(*SE);
1441
233k
1442
233k
  // Check for a simple looking step prior to loop entry.
1443
233k
  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
1444
233k
  if (!SA)
1445
219k
    return nullptr;
1446
13.9k
1447
13.9k
  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
1448
13.9k
  // subtraction is expensive. For this purpose, perform a quick and dirty
1449
13.9k
  // difference, by checking for Step in the operand list.
1450
13.9k
  SmallVector<const SCEV *, 4> DiffOps;
1451
13.9k
  for (const SCEV *Op : SA->operands())
1452
28.3k
    if (Op != Step)
1453
17.7k
      DiffOps.push_back(Op);
1454
13.9k
1455
13.9k
  if (DiffOps.size() == SA->getNumOperands())
1456
3.38k
    return nullptr;
1457
10.5k
1458
10.5k
  // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
1459
10.5k
  // `Step`:
1460
10.5k
1461
10.5k
  // 1. NSW/NUW flags on the step increment.
1462
10.5k
  auto PreStartFlags =
1463
10.5k
    ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
1464
10.5k
  const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
1465
10.5k
  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
1466
10.5k
      SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
1467
10.5k
1468
10.5k
  // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
1469
10.5k
  // "S+X does not sign/unsign-overflow".
1470
10.5k
  //
1471
10.5k
1472
10.5k
  const SCEV *BECount = SE->getBackedgeTakenCount(L);
1473
10.5k
  if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
1474
10.5k
      
!isa<SCEVCouldNotCompute>(BECount)8.91k
&&
SE->isKnownPositive(BECount)7.14k
)
1475
268
    return PreStart;
1476
10.3k
1477
10.3k
  // 2. Direct overflow check on the step operation's expression.
1478
10.3k
  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
1479
10.3k
  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
1480
10.3k
  const SCEV *OperandExtendedStart =
1481
10.3k
      SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
1482
10.3k
                     (SE->*GetExtendExpr)(Step, WideTy, Depth));
1483
10.3k
  if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
1484
8.38k
    if (PreAR && AR->getNoWrapFlags(WrapType)) {
1485
8.38k
      // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
1486
8.38k
      // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
1487
8.38k
      // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`.  Cache this fact.
1488
8.38k
      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
1489
8.38k
    }
1490
8.38k
    return PreStart;
1491
8.38k
  }
1492
1.92k
1493
1.92k
  // 3. Loop precondition.
1494
1.92k
  ICmpInst::Predicate Pred;
1495
1.92k
  const SCEV *OverflowLimit =
1496
1.92k
      ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
1497
1.92k
1498
1.92k
  if (OverflowLimit &&
1499
1.92k
      
SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)1.58k
)
1500
511
    return PreStart;
1501
1.41k
1502
1.41k
  return nullptr;
1503
1.41k
}
1504
1505
// Get the normalized zero or sign extended expression for this AddRec's Start.
1506
template <typename ExtendOpTy>
1507
static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
1508
                                        ScalarEvolution *SE,
1509
549k
                                        unsigned Depth) {
1510
549k
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1511
549k
1512
549k
  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
1513
549k
  if (!PreStart)
1514
538k
    return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
1515
10.6k
1516
10.6k
  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
1517
10.6k
                                             Depth),
1518
10.6k
                        (SE->*GetExtendExpr)(PreStart, Ty, Depth));
1519
10.6k
}
ScalarEvolution.cpp:llvm::SCEV const* getExtendAddRecStart<llvm::SCEVZeroExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int)
Line
Count
Source
1509
315k
                                        unsigned Depth) {
1510
315k
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1511
315k
1512
315k
  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
1513
315k
  if (!PreStart)
1514
314k
    return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
1515
1.51k
1516
1.51k
  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
1517
1.51k
                                             Depth),
1518
1.51k
                        (SE->*GetExtendExpr)(PreStart, Ty, Depth));
1519
1.51k
}
ScalarEvolution.cpp:llvm::SCEV const* getExtendAddRecStart<llvm::SCEVSignExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int)
Line
Count
Source
1509
233k
                                        unsigned Depth) {
1510
233k
  auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
1511
233k
1512
233k
  const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
1513
233k
  if (!PreStart)
1514
224k
    return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
1515
9.16k
1516
9.16k
  return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
1517
9.16k
                                             Depth),
1518
9.16k
                        (SE->*GetExtendExpr)(PreStart, Ty, Depth));
1519
9.16k
}
1520
1521
// Try to prove away overflow by looking at "nearby" add recurrences.  A
1522
// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
1523
// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
1524
//
1525
// Formally:
1526
//
1527
//     {S,+,X} == {S-T,+,X} + T
1528
//  => Ext({S,+,X}) == Ext({S-T,+,X} + T)
1529
//
1530
// If ({S-T,+,X} + T) does not overflow  ... (1)
1531
//
1532
//  RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
1533
//
1534
// If {S-T,+,X} does not overflow  ... (2)
1535
//
1536
//  RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
1537
//      == {Ext(S-T)+Ext(T),+,Ext(X)}
1538
//
1539
// If (S-T)+T does not overflow  ... (3)
1540
//
1541
//  RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
1542
//      == {Ext(S),+,Ext(X)} == LHS
1543
//
1544
// Thus, if (1), (2) and (3) are true for some T, then
1545
//   Ext({S,+,X}) == {Ext(S),+,Ext(X)}
1546
//
1547
// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
1548
// does not overflow" restricted to the 0th iteration.  Therefore we only need
1549
// to check for (1) and (2).
1550
//
1551
// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
1552
// is `Delta` (defined below).
1553
template <typename ExtendOpTy>
1554
bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
1555
                                                const SCEV *Step,
1556
612k
                                                const Loop *L) {
1557
612k
  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1558
612k
1559
612k
  // We restrict `Start` to a constant to prevent SCEV from spending too much
1560
612k
  // time here.  It is correct (but more expensive) to continue with a
1561
612k
  // non-constant `Start` and do a general SCEV subtraction to compute
1562
612k
  // `PreStart` below.
1563
612k
  const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
1564
612k
  if (!StartC)
1565
385k
    return false;
1566
226k
1567
226k
  APInt StartAI = StartC->getAPInt();
1568
226k
1569
905k
  for (unsigned Delta : {-2, -1, 1, 2}) {
1570
905k
    const SCEV *PreStart = getConstant(StartAI - Delta);
1571
905k
1572
905k
    FoldingSetNodeID ID;
1573
905k
    ID.AddInteger(scAddRecExpr);
1574
905k
    ID.AddPointer(PreStart);
1575
905k
    ID.AddPointer(Step);
1576
905k
    ID.AddPointer(L);
1577
905k
    void *IP = nullptr;
1578
905k
    const auto *PreAR =
1579
905k
      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
1580
905k
1581
905k
    // Give up if we don't already have the add recurrence we need because
1582
905k
    // actually constructing an add recurrence is relatively expensive.
1583
905k
    if (PreAR && 
PreAR->getNoWrapFlags(WrapType)284k
) { // proves (2)
1584
11.7k
      const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
1585
11.7k
      ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1586
11.7k
      const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
1587
11.7k
          DeltaS, &Pred, this);
1588
11.7k
      if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
1589
1.75k
        return true;
1590
11.7k
    }
1591
905k
  }
1592
226k
1593
226k
  
return false224k
;
1594
226k
}
bool llvm::ScalarEvolution::proveNoWrapByVaryingStart<llvm::SCEVZeroExtendExpr>(llvm::SCEV const*, llvm::SCEV const*, llvm::Loop const*)
Line
Count
Source
1556
256k
                                                const Loop *L) {
1557
256k
  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1558
256k
1559
256k
  // We restrict `Start` to a constant to prevent SCEV from spending too much
1560
256k
  // time here.  It is correct (but more expensive) to continue with a
1561
256k
  // non-constant `Start` and do a general SCEV subtraction to compute
1562
256k
  // `PreStart` below.
1563
256k
  const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
1564
256k
  if (!StartC)
1565
167k
    return false;
1566
88.5k
1567
88.5k
  APInt StartAI = StartC->getAPInt();
1568
88.5k
1569
354k
  for (unsigned Delta : {-2, -1, 1, 2}) {
1570
354k
    const SCEV *PreStart = getConstant(StartAI - Delta);
1571
354k
1572
354k
    FoldingSetNodeID ID;
1573
354k
    ID.AddInteger(scAddRecExpr);
1574
354k
    ID.AddPointer(PreStart);
1575
354k
    ID.AddPointer(Step);
1576
354k
    ID.AddPointer(L);
1577
354k
    void *IP = nullptr;
1578
354k
    const auto *PreAR =
1579
354k
      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
1580
354k
1581
354k
    // Give up if we don't already have the add recurrence we need because
1582
354k
    // actually constructing an add recurrence is relatively expensive.
1583
354k
    if (PreAR && 
PreAR->getNoWrapFlags(WrapType)111k
) { // proves (2)
1584
5.76k
      const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
1585
5.76k
      ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1586
5.76k
      const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
1587
5.76k
          DeltaS, &Pred, this);
1588
5.76k
      if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
1589
1.59k
        return true;
1590
5.76k
    }
1591
354k
  }
1592
88.5k
1593
88.5k
  
return false86.9k
;
1594
88.5k
}
bool llvm::ScalarEvolution::proveNoWrapByVaryingStart<llvm::SCEVSignExtendExpr>(llvm::SCEV const*, llvm::SCEV const*, llvm::Loop const*)
Line
Count
Source
1556
355k
                                                const Loop *L) {
1557
355k
  auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
1558
355k
1559
355k
  // We restrict `Start` to a constant to prevent SCEV from spending too much
1560
355k
  // time here.  It is correct (but more expensive) to continue with a
1561
355k
  // non-constant `Start` and do a general SCEV subtraction to compute
1562
355k
  // `PreStart` below.
1563
355k
  const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
1564
355k
  if (!StartC)
1565
217k
    return false;
1566
137k
1567
137k
  APInt StartAI = StartC->getAPInt();
1568
137k
1569
551k
  for (unsigned Delta : {-2, -1, 1, 2}) {
1570
551k
    const SCEV *PreStart = getConstant(StartAI - Delta);
1571
551k
1572
551k
    FoldingSetNodeID ID;
1573
551k
    ID.AddInteger(scAddRecExpr);
1574
551k
    ID.AddPointer(PreStart);
1575
551k
    ID.AddPointer(Step);
1576
551k
    ID.AddPointer(L);
1577
551k
    void *IP = nullptr;
1578
551k
    const auto *PreAR =
1579
551k
      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
1580
551k
1581
551k
    // Give up if we don't already have the add recurrence we need because
1582
551k
    // actually constructing an add recurrence is relatively expensive.
1583
551k
    if (PreAR && 
PreAR->getNoWrapFlags(WrapType)172k
) { // proves (2)
1584
6.00k
      const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
1585
6.00k
      ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
1586
6.00k
      const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
1587
6.00k
          DeltaS, &Pred, this);
1588
6.00k
      if (Limit && isKnownPredicate(Pred, PreAR, Limit))  // proves (1)
1589
167
        return true;
1590
6.00k
    }
1591
551k
  }
1592
137k
1593
137k
  
return false137k
;
1594
137k
}
1595
1596
// Finds an integer D for an expression (C + x + y + ...) such that the top
1597
// level addition in (D + (C - D + x + y + ...)) would not wrap (signed or
1598
// unsigned) and the number of trailing zeros of (C - D + x + y + ...) is
1599
// maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and
1600
// the (C + x + y + ...) expression is \p WholeAddExpr.
1601
static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
1602
                                            const SCEVConstant *ConstantTerm,
1603
306k
                                            const SCEVAddExpr *WholeAddExpr) {
1604
306k
  const APInt C = ConstantTerm->getAPInt();
1605
306k
  const unsigned BitWidth = C.getBitWidth();
1606
306k
  // Find number of trailing zeros of (x + y + ...) w/o the C first:
1607
306k
  uint32_t TZ = BitWidth;
1608
627k
  for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && 
TZ358k
;
++I321k
)
1609
321k
    TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I)));
1610
306k
  if (TZ) {
1611
90.6k
    // Set D to be as many least significant bits of C as possible while still
1612
90.6k
    // guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap:
1613
90.6k
    return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : 
C0
;
1614
90.6k
  }
1615
215k
  return APInt(BitWidth, 0);
1616
215k
}
1617
1618
// Finds an integer D for an affine AddRec expression {C,+,x} such that the top
1619
// level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the
1620
// number of trailing zeros of (C - D + x * n) is maximized, where C is the \p
1621
// ConstantStart, x is an arbitrary \p Step, and n is the loop trip count.
1622
static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
1623
                                            const APInt &ConstantStart,
1624
245k
                                            const SCEV *Step) {
1625
245k
  const unsigned BitWidth = ConstantStart.getBitWidth();
1626
245k
  const uint32_t TZ = SE.GetMinTrailingZeros(Step);
1627
245k
  if (TZ)
1628
73.3k
    return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth)
1629
73.3k
                         : 
ConstantStart0
;
1630
171k
  return APInt(BitWidth, 0);
1631
171k
}
1632
1633
const SCEV *
1634
14.9M
ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
1635
14.9M
  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
1636
14.9M
         "This is not an extending conversion!");
1637
14.9M
  assert(isSCEVable(Ty) &&
1638
14.9M
         "This is not a conversion to a SCEVable type!");
1639
14.9M
  Ty = getEffectiveSCEVType(Ty);
1640
14.9M
1641
14.9M
  // Fold if the operand is constant.
1642
14.9M
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1643
7.95M
    return getConstant(
1644
7.95M
      cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
1645
6.94M
1646
6.94M
  // zext(zext(x)) --> zext(x)
1647
6.94M
  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1648
195k
    return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
1649
6.74M
1650
6.74M
  // Before doing any expensive analysis, check to see if we've already
1651
6.74M
  // computed a SCEV for this Op and Ty.
1652
6.74M
  FoldingSetNodeID ID;
1653
6.74M
  ID.AddInteger(scZeroExtend);
1654
6.74M
  ID.AddPointer(Op);
1655
6.74M
  ID.AddPointer(Ty);
1656
6.74M
  void *IP = nullptr;
1657
6.74M
  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 
return S5.16M
;
1658
1.58M
  if (Depth > MaxCastDepth) {
1659
12
    SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
1660
12
                                                     Op, Ty);
1661
12
    UniqueSCEVs.InsertNode(S, IP);
1662
12
    addToLoopUseLists(S);
1663
12
    return S;
1664
12
  }
1665
1.58M
1666
1.58M
  // zext(trunc(x)) --> zext(x) or x or trunc(x)
1667
1.58M
  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
1668
65.7k
    // It's possible the bits taken off by the truncate were all zero bits. If
1669
65.7k
    // so, we should be able to simplify this further.
1670
65.7k
    const SCEV *X = ST->getOperand();
1671
65.7k
    ConstantRange CR = getUnsignedRange(X);
1672
65.7k
    unsigned TruncBits = getTypeSizeInBits(ST->getType());
1673
65.7k
    unsigned NewBits = getTypeSizeInBits(Ty);
1674
65.7k
    if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
1675
65.7k
            CR.zextOrTrunc(NewBits)))
1676
25.9k
      return getTruncateOrZeroExtend(X, Ty, Depth);
1677
1.55M
  }
1678
1.55M
1679
1.55M
  // If the input value is a chrec scev, and we can prove that the value
1680
1.55M
  // did not overflow the old, smaller, value, we can zero extend all of the
1681
1.55M
  // operands (often constants).  This allows analysis of something like
1682
1.55M
  // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
1683
1.55M
  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
1684
580k
    if (AR->isAffine()) {
1685
579k
      const SCEV *Start = AR->getStart();
1686
579k
      const SCEV *Step = AR->getStepRecurrence(*this);
1687
579k
      unsigned BitWidth = getTypeSizeInBits(AR->getType());
1688
579k
      const Loop *L = AR->getLoop();
1689
579k
1690
579k
      if (!AR->hasNoUnsignedWrap()) {
1691
358k
        auto NewFlags = proveNoWrapViaConstantRanges(AR);
1692
358k
        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
1693
358k
      }
1694
579k
1695
579k
      // If we have special knowledge that this addrec won't overflow,
1696
579k
      // we don't need to do any further analysis.
1697
579k
      if (AR->hasNoUnsignedWrap())
1698
228k
        return getAddRecExpr(
1699
228k
            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
1700
228k
            getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
1701
351k
1702
351k
      // Check whether the backedge-taken count is SCEVCouldNotCompute.
1703
351k
      // Note that this serves two purposes: It filters out loops that are
1704
351k
      // simply not analyzable, and it covers the case where this code is
1705
351k
      // being called from within backedge-taken count analysis, such that
1706
351k
      // attempting to ask for the backedge-taken count would likely result
1707
351k
      // in infinite recursion. In the later case, the analysis code will
1708
351k
      // cope with a conservative value, and it will take care to purge
1709
351k
      // that value once it has finished.
1710
351k
      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
1711
351k
      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
1712
307k
        // Manually compute the final value for AR, checking for
1713
307k
        // overflow.
1714
307k
1715
307k
        // Check whether the backedge-taken count can be losslessly casted to
1716
307k
        // the addrec's type. The count is always unsigned.
1717
307k
        const SCEV *CastedMaxBECount =
1718
307k
            getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
1719
307k
        const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
1720
307k
            CastedMaxBECount, MaxBECount->getType(), Depth);
1721
307k
        if (MaxBECount == RecastedMaxBECount) {
1722
239k
          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
1723
239k
          // Check whether Start+Step*MaxBECount has no unsigned overflow.
1724
239k
          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
1725
239k
                                        SCEV::FlagAnyWrap, Depth + 1);
1726
239k
          const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
1727
239k
                                                          SCEV::FlagAnyWrap,
1728
239k
                                                          Depth + 1),
1729
239k
                                               WideTy, Depth + 1);
1730
239k
          const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
1731
239k
          const SCEV *WideMaxBECount =
1732
239k
            getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
1733
239k
          const SCEV *OperandExtendedAdd =
1734
239k
            getAddExpr(WideStart,
1735
239k
                       getMulExpr(WideMaxBECount,
1736
239k
                                  getZeroExtendExpr(Step, WideTy, Depth + 1),
1737
239k
                                  SCEV::FlagAnyWrap, Depth + 1),
1738
239k
                       SCEV::FlagAnyWrap, Depth + 1);
1739
239k
          if (ZAdd == OperandExtendedAdd) {
1740
41.0k
            // Cache knowledge of AR NUW, which is propagated to this AddRec.
1741
41.0k
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1742
41.0k
            // Return the expression with the addrec on the outside.
1743
41.0k
            return getAddRecExpr(
1744
41.0k
                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
1745
41.0k
                                                         Depth + 1),
1746
41.0k
                getZeroExtendExpr(Step, Ty, Depth + 1), L,
1747
41.0k
                AR->getNoWrapFlags());
1748
41.0k
          }
1749
197k
          // Similar to above, only this time treat the step value as signed.
1750
197k
          // This covers loops that count down.
1751
197k
          OperandExtendedAdd =
1752
197k
            getAddExpr(WideStart,
1753
197k
                       getMulExpr(WideMaxBECount,
1754
197k
                                  getSignExtendExpr(Step, WideTy, Depth + 1),
1755
197k
                                  SCEV::FlagAnyWrap, Depth + 1),
1756
197k
                       SCEV::FlagAnyWrap, Depth + 1);
1757
197k
          if (ZAdd == OperandExtendedAdd) {
1758
20.6k
            // Cache knowledge of AR NW, which is propagated to this AddRec.
1759
20.6k
            // Negative step causes unsigned wrap, but it still can't self-wrap.
1760
20.6k
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1761
20.6k
            // Return the expression with the addrec on the outside.
1762
20.6k
            return getAddRecExpr(
1763
20.6k
                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
1764
20.6k
                                                         Depth + 1),
1765
20.6k
                getSignExtendExpr(Step, Ty, Depth + 1), L,
1766
20.6k
                AR->getNoWrapFlags());
1767
20.6k
          }
1768
289k
        }
1769
307k
      }
1770
289k
1771
289k
      // Normally, in the cases we can prove no-overflow via a
1772
289k
      // backedge guarding condition, we can also compute a backedge
1773
289k
      // taken count for the loop.  The exceptions are assumptions and
1774
289k
      // guards present in the loop -- SCEV is not great at exploiting
1775
289k
      // these to compute max backedge taken counts, but can still use
1776
289k
      // these to prove lack of overflow.  Use this fact to avoid
1777
289k
      // doing extra work that may not pay off.
1778
289k
      if (!isa<SCEVCouldNotCompute>(MaxBECount) || 
HasGuards43.4k
||
1779
289k
          
!AC.assumptions().empty()43.4k
) {
1780
246k
        // If the backedge is guarded by a comparison with the pre-inc
1781
246k
        // value the addrec is safe. Also, if the entry is guarded by
1782
246k
        // a comparison with the start value and the backedge is
1783
246k
        // guarded by a comparison with the post-inc value, the addrec
1784
246k
        // is safe.
1785
246k
        if (isKnownPositive(Step)) {
1786
119k
          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
1787
119k
                                      getUnsignedRangeMax(Step));
1788
119k
          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
1789
119k
              
isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)118k
) {
1790
461
            // Cache knowledge of AR NUW, which is propagated to this
1791
461
            // AddRec.
1792
461
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1793
461
            // Return the expression with the addrec on the outside.
1794
461
            return getAddRecExpr(
1795
461
                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
1796
461
                                                         Depth + 1),
1797
461
                getZeroExtendExpr(Step, Ty, Depth + 1), L,
1798
461
                AR->getNoWrapFlags());
1799
461
          }
1800
126k
        } else if (isKnownNegative(Step)) {
1801
114k
          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
1802
114k
                                      getSignedRangeMin(Step));
1803
114k
          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
1804
114k
              
isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)94.7k
) {
1805
23.1k
            // Cache knowledge of AR NW, which is propagated to this
1806
23.1k
            // AddRec.  Negative step causes unsigned wrap, but it
1807
23.1k
            // still can't self-wrap.
1808
23.1k
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
1809
23.1k
            // Return the expression with the addrec on the outside.
1810
23.1k
            return getAddRecExpr(
1811
23.1k
                getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
1812
23.1k
                                                         Depth + 1),
1813
23.1k
                getSignExtendExpr(Step, Ty, Depth + 1), L,
1814
23.1k
                AR->getNoWrapFlags());
1815
23.1k
          }
1816
266k
        }
1817
246k
      }
1818
266k
1819
266k
      // zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw>
1820
266k
      // if D + (C - D + Step * n) could be proven to not unsigned wrap
1821
266k
      // where D maximizes the number of trailing zeros of (C - D + Step * n)
1822
266k
      if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
1823
98.3k
        const APInt &C = SC->getAPInt();
1824
98.3k
        const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
1825
98.3k
        if (D != 0) {
1826
9.74k
          const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
1827
9.74k
          const SCEV *SResidual =
1828
9.74k
              getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
1829
9.74k
          const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
1830
9.74k
          return getAddExpr(SZExtD, SZExtR,
1831
9.74k
                            (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
1832
9.74k
                            Depth + 1);
1833
9.74k
        }
1834
256k
      }
1835
256k
1836
256k
      if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
1837
1.59k
        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
1838
1.59k
        return getAddRecExpr(
1839
1.59k
            getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
1840
1.59k
            getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
1841
1.59k
      }
1842
1.22M
    }
1843
1.22M
1844
1.22M
  // zext(A % B) --> zext(A) % zext(B)
1845
1.22M
  {
1846
1.22M
    const SCEV *LHS;
1847
1.22M
    const SCEV *RHS;
1848
1.22M
    if (matchURem(Op, LHS, RHS))
1849
293
      return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1),
1850
293
                         getZeroExtendExpr(RHS, Ty, Depth + 1));
1851
1.22M
  }
1852
1.22M
1853
1.22M
  // zext(A / B) --> zext(A) / zext(B).
1854
1.22M
  if (auto *Div = dyn_cast<SCEVUDivExpr>(Op))
1855
87.9k
    return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1),
1856
87.9k
                       getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1));
1857
1.14M
1858
1.14M
  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
1859
281k
    // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
1860
281k
    if (SA->hasNoUnsignedWrap()) {
1861
40.7k
      // If the addition does not unsign overflow then we can, by definition,
1862
40.7k
      // commute the zero extension with the addition operation.
1863
40.7k
      SmallVector<const SCEV *, 4> Ops;
1864
40.7k
      for (const auto *Op : SA->operands())
1865
81.5k
        Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
1866
40.7k
      return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
1867
40.7k
    }
1868
240k
1869
240k
    // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))
1870
240k
    // if D + (C - D + x + y + ...) could be proven to not unsigned wrap
1871
240k
    // where D maximizes the number of trailing zeros of (C - D + x + y + ...)
1872
240k
    //
1873
240k
    // Often address arithmetics contain expressions like
1874
240k
    // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))).
1875
240k
    // This transformation is useful while proving that such expressions are
1876
240k
    // equal or differ by a small constant amount, see LoadStoreVectorizer pass.
1877
240k
    if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
1878
200k
      const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
1879
200k
      if (D != 0) {
1880
6.67k
        const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
1881
6.67k
        const SCEV *SResidual =
1882
6.67k
            getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
1883
6.67k
        const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
1884
6.67k
        return getAddExpr(SZExtD, SZExtR,
1885
6.67k
                          (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
1886
6.67k
                          Depth + 1);
1887
6.67k
      }
1888
1.09M
    }
1889
240k
  }
1890
1.09M
1891
1.09M
  if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) {
1892
192k
    // zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw>
1893
192k
    if (SM->hasNoUnsignedWrap()) {
1894
149k
      // If the multiply does not unsign overflow then we can, by definition,
1895
149k
      // commute the zero extension with the multiply operation.
1896
149k
      SmallVector<const SCEV *, 4> Ops;
1897
149k
      for (const auto *Op : SM->operands())
1898
298k
        Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
1899
149k
      return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1);
1900
149k
    }
1901
43.4k
1902
43.4k
    // zext(2^K * (trunc X to iN)) to iM ->
1903
43.4k
    // 2^K * (zext(trunc X to i{N-K}) to iM)<nuw>
1904
43.4k
    //
1905
43.4k
    // Proof:
1906
43.4k
    //
1907
43.4k
    //     zext(2^K * (trunc X to iN)) to iM
1908
43.4k
    //   = zext((trunc X to iN) << K) to iM
1909
43.4k
    //   = zext((trunc X to i{N-K}) << K)<nuw> to iM
1910
43.4k
    //     (because shl removes the top K bits)
1911
43.4k
    //   = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM
1912
43.4k
    //   = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>.
1913
43.4k
    //
1914
43.4k
    if (SM->getNumOperands() == 2)
1915
43.0k
      if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
1916
33.8k
        if (MulLHS->getAPInt().isPowerOf2())
1917
10.4k
          if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
1918
1.23k
            int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) -
1919
1.23k
                               MulLHS->getAPInt().logBase2();
1920
1.23k
            Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
1921
1.23k
            return getMulExpr(
1922
1.23k
                getZeroExtendExpr(MulLHS, Ty),
1923
1.23k
                getZeroExtendExpr(
1924
1.23k
                    getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty),
1925
1.23k
                SCEV::FlagNUW, Depth + 1);
1926
1.23k
          }
1927
943k
  }
1928
943k
1929
943k
  // The cast wasn't folded; create an explicit cast node.
1930
943k
  // Recompute the insert position, as it may have been invalidated.
1931
943k
  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 
return S51.2k
;
1932
892k
  SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
1933
892k
                                                   Op, Ty);
1934
892k
  UniqueSCEVs.InsertNode(S, IP);
1935
892k
  addToLoopUseLists(S);
1936
892k
  return S;
1937
892k
}
1938
1939
const SCEV *
1940
5.02M
ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
1941
5.02M
  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
1942
5.02M
         "This is not an extending conversion!");
1943
5.02M
  assert(isSCEVable(Ty) &&
1944
5.02M
         "This is not a conversion to a SCEVable type!");
1945
5.02M
  Ty = getEffectiveSCEVType(Ty);
1946
5.02M
1947
5.02M
  // Fold if the operand is constant.
1948
5.02M
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
1949
2.32M
    return getConstant(
1950
2.32M
      cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
1951
2.69M
1952
2.69M
  // sext(sext(x)) --> sext(x)
1953
2.69M
  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
1954
25.0k
    return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);
1955
2.66M
1956
2.66M
  // sext(zext(x)) --> zext(x)
1957
2.66M
  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
1958
52.1k
    return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
1959
2.61M
1960
2.61M
  // Before doing any expensive analysis, check to see if we've already
1961
2.61M
  // computed a SCEV for this Op and Ty.
1962
2.61M
  FoldingSetNodeID ID;
1963
2.61M
  ID.AddInteger(scSignExtend);
1964
2.61M
  ID.AddPointer(Op);
1965
2.61M
  ID.AddPointer(Ty);
1966
2.61M
  void *IP = nullptr;
1967
2.61M
  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 
return S1.25M
;
1968
1.36M
  // Limit recursion depth.
1969
1.36M
  if (Depth > MaxCastDepth) {
1970
4
    SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
1971
4
                                                     Op, Ty);
1972
4
    UniqueSCEVs.InsertNode(S, IP);
1973
4
    addToLoopUseLists(S);
1974
4
    return S;
1975
4
  }
1976
1.36M
1977
1.36M
  // sext(trunc(x)) --> sext(x) or x or trunc(x)
1978
1.36M
  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
1979
31.8k
    // It's possible the bits taken off by the truncate were all sign bits. If
1980
31.8k
    // so, we should be able to simplify this further.
1981
31.8k
    const SCEV *X = ST->getOperand();
1982
31.8k
    ConstantRange CR = getSignedRange(X);
1983
31.8k
    unsigned TruncBits = getTypeSizeInBits(ST->getType());
1984
31.8k
    unsigned NewBits = getTypeSizeInBits(Ty);
1985
31.8k
    if (CR.truncate(TruncBits).signExtend(NewBits).contains(
1986
31.8k
            CR.sextOrTrunc(NewBits)))
1987
20.8k
      return getTruncateOrSignExtend(X, Ty, Depth);
1988
1.34M
  }
1989
1.34M
1990
1.34M
  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
1991
281k
    // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
1992
281k
    if (SA->hasNoSignedWrap()) {
1993
121k
      // If the addition does not sign overflow then we can, by definition,
1994
121k
      // commute the sign extension with the addition operation.
1995
121k
      SmallVector<const SCEV *, 4> Ops;
1996
121k
      for (const auto *Op : SA->operands())
1997
243k
        Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
1998
121k
      return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
1999
121k
    }
2000
159k
2001
159k
    // sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...))
2002
159k
    // if D + (C - D + x + y + ...) could be proven to not signed wrap
2003
159k
    // where D maximizes the number of trailing zeros of (C - D + x + y + ...)
2004
159k
    //
2005
159k
    // For instance, this will bring two seemingly different expressions:
2006
159k
    //     1 + sext(5 + 20 * %x + 24 * %y)  and
2007
159k
    //         sext(6 + 20 * %x + 24 * %y)
2008
159k
    // to the same form:
2009
159k
    //     2 + sext(4 + 20 * %x + 24 * %y)
2010
159k
    if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
2011
105k
      const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
2012
105k
      if (D != 0) {
2013
5.51k
        const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
2014
5.51k
        const SCEV *SResidual =
2015
5.51k
            getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
2016
5.51k
        const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
2017
5.51k
        return getAddExpr(SSExtD, SSExtR,
2018
5.51k
                          (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
2019
5.51k
                          Depth + 1);
2020
5.51k
      }
2021
1.21M
    }
2022
159k
  }
2023
1.21M
  // If the input value is a chrec scev, and we can prove that the value
2024
1.21M
  // did not overflow the old, smaller, value, we can sign extend all of the
2025
1.21M
  // operands (often constants).  This allows analysis of something like
2026
1.21M
  // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
2027
1.21M
  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
2028
598k
    if (AR->isAffine()) {
2029
598k
      const SCEV *Start = AR->getStart();
2030
598k
      const SCEV *Step = AR->getStepRecurrence(*this);
2031
598k
      unsigned BitWidth = getTypeSizeInBits(AR->getType());
2032
598k
      const Loop *L = AR->getLoop();
2033
598k
2034
598k
      if (!AR->hasNoSignedWrap()) {
2035
464k
        auto NewFlags = proveNoWrapViaConstantRanges(AR);
2036
464k
        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags);
2037
464k
      }
2038
598k
2039
598k
      // If we have special knowledge that this addrec won't overflow,
2040
598k
      // we don't need to do any further analysis.
2041
598k
      if (AR->hasNoSignedWrap())
2042
215k
        return getAddRecExpr(
2043
215k
            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
2044
215k
            getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
2045
383k
2046
383k
      // Check whether the backedge-taken count is SCEVCouldNotCompute.
2047
383k
      // Note that this serves two purposes: It filters out loops that are
2048
383k
      // simply not analyzable, and it covers the case where this code is
2049
383k
      // being called from within backedge-taken count analysis, such that
2050
383k
      // attempting to ask for the backedge-taken count would likely result
2051
383k
      // in infinite recursion. In the later case, the analysis code will
2052
383k
      // cope with a conservative value, and it will take care to purge
2053
383k
      // that value once it has finished.
2054
383k
      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
2055
383k
      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
2056
333k
        // Manually compute the final value for AR, checking for
2057
333k
        // overflow.
2058
333k
2059
333k
        // Check whether the backedge-taken count can be losslessly casted to
2060
333k
        // the addrec's type. The count is always unsigned.
2061
333k
        const SCEV *CastedMaxBECount =
2062
333k
            getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
2063
333k
        const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
2064
333k
            CastedMaxBECount, MaxBECount->getType(), Depth);
2065
333k
        if (MaxBECount == RecastedMaxBECount) {
2066
269k
          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
2067
269k
          // Check whether Start+Step*MaxBECount has no signed overflow.
2068
269k
          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
2069
269k
                                        SCEV::FlagAnyWrap, Depth + 1);
2070
269k
          const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
2071
269k
                                                          SCEV::FlagAnyWrap,
2072
269k
                                                          Depth + 1),
2073
269k
                                               WideTy, Depth + 1);
2074
269k
          const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
2075
269k
          const SCEV *WideMaxBECount =
2076
269k
            getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
2077
269k
          const SCEV *OperandExtendedAdd =
2078
269k
            getAddExpr(WideStart,
2079
269k
                       getMulExpr(WideMaxBECount,
2080
269k
                                  getSignExtendExpr(Step, WideTy, Depth + 1),
2081
269k
                                  SCEV::FlagAnyWrap, Depth + 1),
2082
269k
                       SCEV::FlagAnyWrap, Depth + 1);
2083
269k
          if (SAdd == OperandExtendedAdd) {
2084
17.4k
            // Cache knowledge of AR NSW, which is propagated to this AddRec.
2085
17.4k
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
2086
17.4k
            // Return the expression with the addrec on the outside.
2087
17.4k
            return getAddRecExpr(
2088
17.4k
                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
2089
17.4k
                                                         Depth + 1),
2090
17.4k
                getSignExtendExpr(Step, Ty, Depth + 1), L,
2091
17.4k
                AR->getNoWrapFlags());
2092
17.4k
          }
2093
252k
          // Similar to above, only this time treat the step value as unsigned.
2094
252k
          // This covers loops that count up with an unsigned step.
2095
252k
          OperandExtendedAdd =
2096
252k
            getAddExpr(WideStart,
2097
252k
                       getMulExpr(WideMaxBECount,
2098
252k
                                  getZeroExtendExpr(Step, WideTy, Depth + 1),
2099
252k
                                  SCEV::FlagAnyWrap, Depth + 1),
2100
252k
                       SCEV::FlagAnyWrap, Depth + 1);
2101
252k
          if (SAdd == OperandExtendedAdd) {
2102
1
            // If AR wraps around then
2103
1
            //
2104
1
            //    abs(Step) * MaxBECount > unsigned-max(AR->getType())
2105
1
            // => SAdd != OperandExtendedAdd
2106
1
            //
2107
1
            // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
2108
1
            // (SAdd == OperandExtendedAdd => AR is NW)
2109
1
2110
1
            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
2111
1
2112
1
            // Return the expression with the addrec on the outside.
2113
1
            return getAddRecExpr(
2114
1
                getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
2115
1
                                                         Depth + 1),
2116
1
                getZeroExtendExpr(Step, Ty, Depth + 1), L,
2117
1
                AR->getNoWrapFlags());
2118
1
          }
2119
365k
        }
2120
333k
      }
2121
365k
2122
365k
      // Normally, in the cases we can prove no-overflow via a
2123
365k
      // backedge guarding condition, we can also compute a backedge
2124
365k
      // taken count for the loop.  The exceptions are assumptions and
2125
365k
      // guards present in the loop -- SCEV is not great at exploiting
2126
365k
      // these to compute max backedge taken counts, but can still use
2127
365k
      // these to prove lack of overflow.  Use this fact to avoid
2128
365k
      // doing extra work that may not pay off.
2129
365k
2130
365k
      if (!isa<SCEVCouldNotCompute>(MaxBECount) || 
HasGuards49.2k
||
2131
365k
          
!AC.assumptions().empty()49.2k
) {
2132
316k
        // If the backedge is guarded by a comparison with the pre-inc
2133
316k
        // value the addrec is safe. Also, if the entry is guarded by
2134
316k
        // a comparison with the start value and the backedge is
2135
316k
        // guarded by a comparison with the post-inc value, the addrec
2136
316k
        // is safe.
2137
316k
        ICmpInst::Predicate Pred;
2138
316k
        const SCEV *OverflowLimit =
2139
316k
            getSignedOverflowLimitForStep(Step, &Pred, this);
2140
316k
        if (OverflowLimit &&
2141
316k
            
(307k
isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit)307k
||
2142
307k
             
isKnownOnEveryIteration(Pred, AR, OverflowLimit)306k
)) {
2143
737
          // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
2144
737
          const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
2145
737
          return getAddRecExpr(
2146
737
              getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
2147
737
              getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
2148
737
        }
2149
364k
      }
2150
364k
2151
364k
      // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
2152
364k
      // if D + (C - D + Step * n) could be proven to not signed wrap
2153
364k
      // where D maximizes the number of trailing zeros of (C - D + Step * n)
2154
364k
      if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
2155
146k
        const APInt &C = SC->getAPInt();
2156
146k
        const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
2157
146k
        if (D != 0) {
2158
9.05k
          const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
2159
9.05k
          const SCEV *SResidual =
2160
9.05k
              getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
2161
9.05k
          const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
2162
9.05k
          return getAddExpr(SSExtD, SSExtR,
2163
9.05k
                            (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
2164
9.05k
                            Depth + 1);
2165
9.05k
        }
2166
355k
      }
2167
355k
2168
355k
      if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
2169
167
        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
2170
167
        return getAddRecExpr(
2171
167
            getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
2172
167
            getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
2173
167
      }
2174
971k
    }
2175
971k
2176
971k
  // If the input value is provably positive and we could not simplify
2177
971k
  // away the sext build a zext instead.
2178
971k
  if (isKnownNonNegative(Op))
2179
130k
    return getZeroExtendExpr(Op, Ty, Depth + 1);
2180
841k
2181
841k
  // The cast wasn't folded; create an explicit cast node.
2182
841k
  // Recompute the insert position, as it may have been invalidated.
2183
841k
  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 
return S43.7k
;
2184
797k
  SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
2185
797k
                                                   Op, Ty);
2186
797k
  UniqueSCEVs.InsertNode(S, IP);
2187
797k
  addToLoopUseLists(S);
2188
797k
  return S;
2189
797k
}
2190
2191
/// getAnyExtendExpr - Return a SCEV for the given operand extended with
2192
/// unspecified bits out to the given type.
2193
const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
2194
429k
                                              Type *Ty) {
2195
429k
  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
2196
429k
         "This is not an extending conversion!");
2197
429k
  assert(isSCEVable(Ty) &&
2198
429k
         "This is not a conversion to a SCEVable type!");
2199
429k
  Ty = getEffectiveSCEVType(Ty);
2200
429k
2201
429k
  // Sign-extend negative constants.
2202
429k
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
2203
235k
    if (SC->getAPInt().isNegative())
2204
115k
      return getSignExtendExpr(Op, Ty);
2205
313k
2206
313k
  // Peel off a truncate cast.
2207
313k
  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
2208
6.93k
    const SCEV *NewOp = T->getOperand();
2209
6.93k
    if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
2210
0
      return getAnyExtendExpr(NewOp, Ty);
2211
6.93k
    return getTruncateOrNoop(NewOp, Ty);
2212
6.93k
  }
2213
306k
2214
306k
  // Next try a zext cast. If the cast is folded, use it.
2215
306k
  const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
2216
306k
  if (!isa<SCEVZeroExtendExpr>(ZExt))
2217
150k
    return ZExt;
2218
155k
2219
155k
  // Next try a sext cast. If the cast is folded, use it.
2220
155k
  const SCEV *SExt = getSignExtendExpr(Op, Ty);
2221
155k
  if (!isa<SCEVSignExtendExpr>(SExt))
2222
6.92k
    return SExt;
2223
149k
2224
149k
  // Force the cast to be folded into the operands of an addrec.
2225
149k
  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
2226
62.2k
    SmallVector<const SCEV *, 4> Ops;
2227
62.2k
    for (const SCEV *Op : AR->operands())
2228
124k
      Ops.push_back(getAnyExtendExpr(Op, Ty));
2229
62.2k
    return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
2230
62.2k
  }
2231
86.8k
2232
86.8k
  // If the expression is obviously signed, use the sext cast value.
2233
86.8k
  if (isa<SCEVSMaxExpr>(Op))
2234
1.00k
    return SExt;
2235
85.8k
2236
85.8k
  // Absent any other information, use the zext cast value.
2237
85.8k
  return ZExt;
2238
85.8k
}
2239
2240
/// Process the given Ops list, which is a list of operands to be added under
2241
/// the given scale, update the given map. This is a helper function for
2242
/// getAddRecExpr. As an example of what it does, given a sequence of operands
2243
/// that would form an add expression like this:
2244
///
2245
///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
2246
///
2247
/// where A and B are constants, update the map with these values:
2248
///
2249
///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
2250
///
2251
/// and add 13 + A*B*29 to AccumulatedConstant.
2252
/// This will allow getAddRecExpr to produce this:
2253
///
2254
///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
2255
///
2256
/// This form often exposes folding opportunities that are hidden in
2257
/// the original operand list.
2258
///
2259
/// Return true iff it appears that any interesting folding opportunities
2260
/// may be exposed. This helps getAddRecExpr short-circuit extra work in
2261
/// the common case where no interesting opportunities are present, and
2262
/// is also used as a check to avoid infinite recursion.
2263
static bool
2264
CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
2265
                             SmallVectorImpl<const SCEV *> &NewOps,
2266
                             APInt &AccumulatedConstant,
2267
                             const SCEV *const *Ops, size_t NumOperands,
2268
                             const APInt &Scale,
2269
10.1M
                             ScalarEvolution &SE) {
2270
10.1M
  bool Interesting = false;
2271
10.1M
2272
10.1M
  // Iterate over the add operands. They are sorted, with constants first.
2273
10.1M
  unsigned i = 0;
2274
17.5M
  while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
2275
7.43M
    ++i;
2276
7.43M
    // Pull a buried constant out to the outside.
2277
7.43M
    if (Scale != 1 || 
AccumulatedConstant != 07.42M
||
C->getValue()->isZero()7.42M
)
2278
9.00k
      Interesting = true;
2279
7.43M
    AccumulatedConstant += Scale * C->getAPInt();
2280
7.43M
  }
2281
10.1M
2282
10.1M
  // Next comes everything else. We're especially interested in multiplies
2283
10.1M
  // here, but they're in the middle, so just visit the rest with one loop.
2284
27.9M
  for (; i != NumOperands; 
++i17.8M
) {
2285
17.8M
    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
2286
17.8M
    if (Mul && 
isa<SCEVConstant>(Mul->getOperand(0))11.6M
) {
2287
11.4M
      APInt NewScale =
2288
11.4M
          Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
2289
11.4M
      if (Mul->getNumOperands() == 2 && 
isa<SCEVAddExpr>(Mul->getOperand(1))11.3M
) {
2290
191k
        // A multiplication of a constant with another add; recurse.
2291
191k
        const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
2292
191k
        Interesting |=
2293
191k
          CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
2294
191k
                                       Add->op_begin(), Add->getNumOperands(),
2295
191k
                                       NewScale, SE);
2296
11.2M
      } else {
2297
11.2M
        // A multiplication of a constant with some other value. Update
2298
11.2M
        // the map.
2299
11.2M
        SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
2300
11.2M
        const SCEV *Key = SE.getMulExpr(MulOps);
2301
11.2M
        auto Pair = M.insert({Key, NewScale});
2302
11.2M
        if (Pair.second) {
2303
11.0M
          NewOps.push_back(Pair.first->first);
2304
11.0M
        } else {
2305
229k
          Pair.first->second += NewScale;
2306
229k
          // The map already had an entry for this value, which may indicate
2307
229k
          // a folding opportunity.
2308
229k
          Interesting = true;
2309
229k
        }
2310
11.2M
      }
2311
11.4M
    } else {
2312
6.40M
      // An ordinary operand. Update the map.
2313
6.40M
      std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
2314
6.40M
          M.insert({Ops[i], Scale});
2315
6.40M
      if (Pair.second) {
2316
5.27M
        NewOps.push_back(Pair.first->first);
2317
5.27M
      } else {
2318
1.12M
        Pair.first->second += Scale;
2319
1.12M
        // The map already had an entry for this value, which may indicate
2320
1.12M
        // a folding opportunity.
2321
1.12M
        Interesting = true;
2322
1.12M
      }
2323
6.40M
    }
2324
17.8M
  }
2325
10.1M
2326
10.1M
  return Interesting;
2327
10.1M
}
2328
2329
// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
2330
// `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of
2331
// can't-overflow flags for the operation if possible.
2332
static SCEV::NoWrapFlags
2333
StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
2334
                      const ArrayRef<const SCEV *> Ops,
2335
92.1M
                      SCEV::NoWrapFlags Flags) {
2336
92.1M
  using namespace std::placeholders;
2337
92.1M
2338
92.1M
  using OBO = OverflowingBinaryOperator;
2339
92.1M
2340
92.1M
  bool CanAnalyze =
2341
92.1M
      Type == scAddExpr || 
Type == scAddRecExpr45.3M
||
Type == scMulExpr25.4M
;
2342
92.1M
  (void)CanAnalyze;
2343
92.1M
  assert(CanAnalyze && "don't call from other places!");
2344
92.1M
2345
92.1M
  int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
2346
92.1M
  SCEV::NoWrapFlags SignOrUnsignWrap =
2347
92.1M
      ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
2348
92.1M
2349
92.1M
  // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
2350
92.1M
  auto IsKnownNonNegative = [&](const SCEV *S) {
2351
13.6M
    return SE->isKnownNonNegative(S);
2352
13.6M
  };
2353
92.1M
2354
92.1M
  if (SignOrUnsignWrap == SCEV::FlagNSW && 
all_of(Ops, IsKnownNonNegative)8.78M
)
2355
2.08M
    Flags =
2356
2.08M
        ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
2357
92.1M
2358
92.1M
  SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
2359
92.1M
2360
92.1M
  if (SignOrUnsignWrap != SignOrUnsignMask &&
2361
92.1M
      
(86.1M
Type == scAddExpr86.1M
||
Type == scMulExpr40.8M
) &&
Ops.size() == 268.8M
&&
2362
92.1M
      
isa<SCEVConstant>(Ops[0])60.9M
) {
2363
55.2M
2364
55.2M
    auto Opcode = [&] {
2365
55.2M
      switch (Type) {
2366
55.2M
      case scAddExpr:
2367
33.2M
        return Instruction::Add;
2368
55.2M
      case scMulExpr:
2369
21.9M
        return Instruction::Mul;
2370
55.2M
      default:
2371
0
        llvm_unreachable("Unexpected SCEV op.");
2372
55.2M
      }
2373
55.2M
    }();
2374
55.2M
2375
55.2M
    const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
2376
55.2M
2377
55.2M
    // (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow.
2378
55.2M
    if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
2379
50.2M
      auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
2380
50.2M
          Opcode, C, OBO::NoSignedWrap);
2381
50.2M
      if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
2382
34.7M
        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
2383
50.2M
    }
2384
55.2M
2385
55.2M
    // (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow.
2386
55.2M
    if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
2387
54.5M
      auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
2388
54.5M
          Opcode, C, OBO::NoUnsignedWrap);
2389
54.5M
      if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
2390
23.7M
        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
2391
54.5M
    }
2392
55.2M
  }
2393
92.1M
2394
92.1M
  return Flags;
2395
92.1M
}
2396
2397
18.4M
bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
2398
18.4M
  return isLoopInvariant(S, L) && 
properlyDominates(S, L->getHeader())9.27M
;
2399
18.4M
}
2400
2401
/// Get a canonical add expression, or something simpler if possible.
2402
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
2403
                                        SCEV::NoWrapFlags Flags,
2404
51.6M
                                        unsigned Depth) {
2405
51.6M
  assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
2406
51.6M
         "only nuw or nsw allowed");
2407
51.6M
  assert(!Ops.empty() && "Cannot get empty add!");
2408
51.6M
  if (Ops.size() == 1) 
return Ops[0]4.83M
;
2409
#ifndef NDEBUG
2410
  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
2411
  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
2412
    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
2413
           "SCEVAddExpr operand types don't match!");
2414
#endif
2415
2416
46.8M
  // Sort by complexity, this groups all similar expression types together.
2417
46.8M
  GroupByComplexity(Ops, &LI, DT);
2418
46.8M
2419
46.8M
  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
2420
46.8M
2421
46.8M
  // If there are any constants, fold them together.
2422
46.8M
  unsigned Idx = 0;
2423
46.8M
  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
2424
41.0M
    ++Idx;
2425
41.0M
    assert(Idx < Ops.size());
2426
44.5M
    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
2427
14.0M
      // We found two constants, fold them together!
2428
14.0M
      Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
2429
14.0M
      if (Ops.size() == 2) 
return Ops[0]10.5M
;
2430
3.52M
      Ops.erase(Ops.begin()+1);  // Erase the folded element
2431
3.52M
      LHSC = cast<SCEVConstant>(Ops[0]);
2432
3.52M
    }
2433
41.0M
2434
41.0M
    // If we are left with a constant zero being added, strip it off.
2435
41.0M
    
if (30.4M
LHSC->getValue()->isZero()30.4M
) {
2436
9.65M
      Ops.erase(Ops.begin());
2437
9.65M
      --Idx;
2438
9.65M
    }
2439
30.4M
2440
30.4M
    if (Ops.size() == 1) 
return Ops[0]9.00M
;
2441
27.2M
  }
2442
27.2M
2443
27.2M
  // Limit recursion calls depth.
2444
27.2M
  if (Depth > MaxArithDepth || 
hasHugeExpression(Ops)27.2M
)
2445
1.57k
    return getOrCreateAddExpr(Ops, Flags);
2446
27.2M
2447
27.2M
  // Okay, check to see if the same value occurs in the operand list more than
2448
27.2M
  // once.  If so, merge them together into an multiply expression.  Since we
2449
27.2M
  // sorted the list, these values are required to be adjacent.
2450
27.2M
  Type *Ty = Ops[0]->getType();
2451
27.2M
  bool FoundMatch = false;
2452
60.3M
  for (unsigned i = 0, e = Ops.size(); i != e-1; 
++i33.0M
)
2453
33.0M
    if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
2454
9.91k
      // Scan ahead to count how many equal operands there are.
2455
9.91k
      unsigned Count = 2;
2456
11.1k
      while (i+Count != e && 
Ops[i+Count] == Ops[i]6.38k
)
2457
1.22k
        ++Count;
2458
9.91k
      // Merge the values into a multiply.
2459
9.91k
      const SCEV *Scale = getConstant(Ty, Count);
2460
9.91k
      const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1);
2461
9.91k
      if (Ops.size() == Count)
2462
3.23k
        return Mul;
2463
6.67k
      Ops[i] = Mul;
2464
6.67k
      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
2465
6.67k
      --i; e -= Count - 1;
2466
6.67k
      FoundMatch = true;
2467
6.67k
    }
2468
27.2M
  
if (27.2M
FoundMatch27.2M
)
2469
5.68k
    return getAddExpr(Ops, Flags, Depth + 1);
2470
27.2M
2471
27.2M
  // Check for truncates. If all the operands are truncated from the same
2472
27.2M
  // type, see if factoring out the truncate would permit the result to be
2473
27.2M
  // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y)
2474
27.2M
  // if the contents of the resulting outer trunc fold to something simple.
2475
27.2M
  auto FindTruncSrcType = [&]() -> Type * {
2476
27.2M
    // We're ultimately looking to fold an addrec of truncs and muls of only
2477
27.2M
    // constants and truncs, so if we find any other types of SCEV
2478
27.2M
    // as operands of the addrec then we bail and return nullptr here.
2479
27.2M
    // Otherwise, we return the type of the operand of a trunc that we find.
2480
27.2M
    if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx]))
2481
81.7k
      return T->getOperand()->getType();
2482
27.1M
    if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
2483
9.37M
      const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1);
2484
9.37M
      if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp))
2485
37.5k
        return T->getOperand()->getType();
2486
27.1M
    }
2487
27.1M
    return nullptr;
2488
27.1M
  };
2489
27.2M
  if (auto *SrcType = FindTruncSrcType()) {
2490
119k
    SmallVector<const SCEV *, 8> LargeOps;
2491
119k
    bool Ok = true;
2492
119k
    // Check all the operands to see if they can be represented in the
2493
119k
    // source type of the truncate.
2494
366k
    for (unsigned i = 0, e = Ops.size(); i != e; 
++i247k
) {
2495
274k
      if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
2496
84.7k
        if (T->getOperand()->getType() != SrcType) {
2497
74
          Ok = false;
2498
74
          break;
2499
74
        }
2500
84.6k
        LargeOps.push_back(T->getOperand());
2501
189k
      } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
2502
90.8k
        LargeOps.push_back(getAnyExtendExpr(C, SrcType));
2503
98.8k
      } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
2504
71.6k
        SmallVector<const SCEV *, 8> LargeMulOps;
2505
190k
        for (unsigned j = 0, f = M->getNumOperands(); j != f && 
Ok134k
;
++j118k
) {
2506
130k
          if (const SCEVTruncateExpr *T =
2507
55.9k
                dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
2508
55.9k
            if (T->getOperand()->getType() != SrcType) {
2509
368
              Ok = false;
2510
368
              break;
2511
368
            }
2512
55.5k
            LargeMulOps.push_back(T->getOperand());
2513
74.7k
          } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
2514
62.9k
            LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
2515
62.9k
          } else {
2516
11.8k
            Ok = false;
2517
11.8k
            break;
2518
11.8k
          }
2519
130k
        }
2520
71.6k
        if (Ok)
2521
55.3k
          LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1));
2522
71.6k
      } else {
2523
27.2k
        Ok = false;
2524
27.2k
        break;
2525
27.2k
      }
2526
274k
    }
2527
119k
    if (Ok) {
2528
83.8k
      // Evaluate the expression in the larger type.
2529
83.8k
      const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1);
2530
83.8k
      // If it folds to something simple, use it. Otherwise, don't.
2531
83.8k
      if (isa<SCEVConstant>(Fold) || 
isa<SCEVUnknown>(Fold)83.6k
)
2532
228
        return getTruncateExpr(Fold, Ty);
2533
27.2M
    }
2534
119k
  }
2535
27.2M
2536
27.2M
  // Skip past any other cast SCEVs.
2537
29.7M
  
while (27.2M
Idx < Ops.size() &&
Ops[Idx]->getSCEVType() < scAddExpr28.2M
)
2538
2.53M
    ++Idx;
2539
27.2M
2540
27.2M
  // If there are add operands they would be next.
2541
27.2M
  if (Idx < Ops.size()) {
2542
25.7M
    bool DeletedAdd = false;
2543
31.1M
    while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
2544
5.46M
      if (Ops.size() > AddOpsInlineThreshold ||
2545
5.46M
          
Add->getNumOperands() > AddOpsInlineThreshold5.46M
)
2546
1
        break;
2547
5.46M
      // If we have an add, expand the add operands onto the end of the operands
2548
5.46M
      // list.
2549
5.46M
      Ops.erase(Ops.begin()+Idx);
2550
5.46M
      Ops.append(Add->op_begin(), Add->op_end());
2551
5.46M
      DeletedAdd = true;
2552
5.46M
    }
2553
25.7M
2554
25.7M
    // If we deleted at least one add, we added operands to the end of the list,
2555
25.7M
    // and they are not necessarily sorted.  Recurse to resort and resimplify
2556
25.7M
    // any operands we just acquired.
2557
25.7M
    if (DeletedAdd)
2558
4.72M
      return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
2559
22.5M
  }
2560
22.5M
2561
22.5M
  // Skip over the add expression until we get to a multiply.
2562
22.5M
  
while (22.5M
Idx < Ops.size() &&
Ops[Idx]->getSCEVType() < scMulExpr20.9M
)
2563
1
    ++Idx;
2564
22.5M
2565
22.5M
  // Check to see if there are any folding opportunities present with
2566
22.5M
  // operands multiplied by constant values.
2567
22.5M
  if (Idx < Ops.size() && 
isa<SCEVMulExpr>(Ops[Idx])20.9M
) {
2568
9.91M
    uint64_t BitWidth = getTypeSizeInBits(Ty);
2569
9.91M
    DenseMap<const SCEV *, APInt> M;
2570
9.91M
    SmallVector<const SCEV *, 8> NewOps;
2571
9.91M
    APInt AccumulatedConstant(BitWidth, 0);
2572
9.91M
    if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
2573
9.91M
                                     Ops.data(), Ops.size(),
2574
9.91M
                                     APInt(BitWidth, 1), *this)) {
2575
1.22M
      struct APIntCompare {
2576
1.22M
        bool operator()(const APInt &LHS, const APInt &RHS) const {
2577
703k
          return LHS.ult(RHS);
2578
703k
        }
2579
1.22M
      };
2580
1.22M
2581
1.22M
      // Some interesting folding opportunity is present, so its worthwhile to
2582
1.22M
      // re-generate the operands list. Group the operands by constant scale,
2583
1.22M
      // to avoid multiplying by the same constant scale multiple times.
2584
1.22M
      std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
2585
1.22M
      for (const SCEV *NewOp : NewOps)
2586
1.60M
        MulOpLists[M.find(NewOp)->second].push_back(NewOp);
2587
1.22M
      // Re-generate the operands list.
2588
1.22M
      Ops.clear();
2589
1.22M
      if (AccumulatedConstant != 0)
2590
1.06M
        Ops.push_back(getConstant(AccumulatedConstant));
2591
1.22M
      for (auto &MulOp : MulOpLists)
2592
1.44M
        if (MulOp.first != 0)
2593
240k
          Ops.push_back(getMulExpr(
2594
240k
              getConstant(MulOp.first),
2595
240k
              getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1),
2596
240k
              SCEV::FlagAnyWrap, Depth + 1));
2597
1.22M
      if (Ops.empty())
2598
85.2k
        return getZero(Ty);
2599
1.13M
      if (Ops.size() == 1)
2600
994k
        return Ops[0];
2601
144k
      return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
2602
144k
    }
2603
9.91M
  }
2604
21.2M
2605
21.2M
  // If we are adding something to a multiply expression, make sure the
2606
21.2M
  // something is not already an operand of the multiply.  If so, merge it into
2607
21.2M
  // the multiply.
2608
31.1M
  
for (; 21.2M
Idx < Ops.size() &&
isa<SCEVMulExpr>(Ops[Idx])24.1M
;
++Idx9.85M
) {
2609
9.86M
    const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
2610
29.6M
    for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; 
++MulOp19.8M
) {
2611
19.8M
      const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
2612
19.8M
      if (isa<SCEVConstant>(MulOpSCEV))
2613
9.70M
        continue;
2614
78.5M
      
for (unsigned AddOp = 0, e = Ops.size(); 10.1M
AddOp != e;
++AddOp68.4M
)
2615
68.4M
        if (MulOpSCEV == Ops[AddOp]) {
2616
1.75k
          // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
2617
1.75k
          const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
2618
1.75k
          if (Mul->getNumOperands() != 2) {
2619
606
            // If the multiply has more than two operands, we must get the
2620
606
            // Y*Z term.
2621
606
            SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
2622
606
                                                Mul->op_begin()+MulOp);
2623
606
            MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
2624
606
            InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
2625
606
          }
2626
1.75k
          SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
2627
1.75k
          const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
2628
1.75k
          const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV,
2629
1.75k
                                            SCEV::FlagAnyWrap, Depth + 1);
2630
1.75k
          if (Ops.size() == 2) 
return OuterMul468
;
2631
1.28k
          if (AddOp < Idx) {
2632
222
            Ops.erase(Ops.begin()+AddOp);
2633
222
            Ops.erase(Ops.begin()+Idx-1);
2634
1.06k
          } else {
2635
1.06k
            Ops.erase(Ops.begin()+Idx);
2636
1.06k
            Ops.erase(Ops.begin()+AddOp-1);
2637
1.06k
          }
2638
1.28k
          Ops.push_back(OuterMul);
2639
1.28k
          return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
2640
1.28k
        }
2641
10.1M
2642
10.1M
      // Check this multiply against other multiplies being added together.
2643
10.1M
      for (unsigned OtherMulIdx = Idx+1;
2644
21.4M
           OtherMulIdx < Ops.size() && 
isa<SCEVMulExpr>(Ops[OtherMulIdx])15.5M
;
2645
11.3M
           
++OtherMulIdx11.3M
) {
2646
11.3M
        const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
2647
11.3M
        // If MulOp occurs in OtherMul, we can fold the two multiplies
2648
11.3M
        // together.
2649
11.3M
        for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
2650
34.1M
             OMulOp != e; 
++OMulOp22.7M
)
2651
22.7M
          if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
2652
13.3k
            // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
2653
13.3k
            const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
2654
13.3k
            if (Mul->getNumOperands() != 2) {
2655
2.33k
              SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
2656
2.33k
                                                  Mul->op_begin()+MulOp);
2657
2.33k
              MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
2658
2.33k
              InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
2659
2.33k
            }
2660
13.3k
            const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
2661
13.3k
            if (OtherMul->getNumOperands() != 2) {
2662
6.22k
              SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
2663
6.22k
                                                  OtherMul->op_begin()+OMulOp);
2664
6.22k
              MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
2665
6.22k
              InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
2666
6.22k
            }
2667
13.3k
            SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
2668
13.3k
            const SCEV *InnerMulSum =
2669
13.3k
                getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
2670
13.3k
            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum,
2671
13.3k
                                              SCEV::FlagAnyWrap, Depth + 1);
2672
13.3k
            if (Ops.size() == 2) 
return OuterMul3.91k
;
2673
9.39k
            Ops.erase(Ops.begin()+Idx);
2674
9.39k
            Ops.erase(Ops.begin()+OtherMulIdx-1);
2675
9.39k
            Ops.push_back(OuterMul);
2676
9.39k
            return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
2677
9.39k
          }
2678
11.3M
      }
2679
10.1M
    }
2680
9.86M
  }
2681
21.2M
2682
21.2M
  // If there are any add recurrences in the operands list, see if any other
2683
21.2M
  // added values are loop invariant.  If so, we can fold them into the
2684
21.2M
  // recurrence.
2685
21.3M
  
while (21.2M
Idx < Ops.size() &&
Ops[Idx]->getSCEVType() < scAddRecExpr14.3M
)
2686
117k
    ++Idx;
2687
21.2M
2688
21.2M
  // Scan over all recurrences, trying to fold loop invariants into them.
2689
21.4M
  for (; Idx < Ops.size() && 
isa<SCEVAddRecExpr>(Ops[Idx])14.3M
;
++Idx236k
) {
2690
7.61M
    // Scan all of the other operands to this add and add them to the vector if
2691
7.61M
    // they are loop invariant w.r.t. the recurrence.
2692
7.61M
    SmallVector<const SCEV *, 8> LIOps;
2693
7.61M
    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
2694
7.61M
    const Loop *AddRecLoop = AddRec->getLoop();
2695
23.6M
    for (unsigned i = 0, e = Ops.size(); i != e; 
++i16.0M
)
2696
16.0M
      if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
2697
7.58M
        LIOps.push_back(Ops[i]);
2698
7.58M
        Ops.erase(Ops.begin()+i);
2699
7.58M
        --i; --e;
2700
7.58M
      }
2701
7.61M
2702
7.61M
    // If we found some loop invariants, fold them into the recurrence.
2703
7.61M
    if (!LIOps.empty()) {
2704
6.89M
      //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
2705
6.89M
      LIOps.push_back(AddRec->getStart());
2706
6.89M
2707
6.89M
      SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
2708
6.89M
                                             AddRec->op_end());
2709
6.89M
      // This follows from the fact that the no-wrap flags on the outer add
2710
6.89M
      // expression are applicable on the 0th iteration, when the add recurrence
2711
6.89M
      // will be equal to its start value.
2712
6.89M
      AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
2713
6.89M
2714
6.89M
      // Build the new addrec. Propagate the NUW and NSW flags if both the
2715
6.89M
      // outer add and the inner addrec are guaranteed to have no overflow.
2716
6.89M
      // Always propagate NW.
2717
6.89M
      Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
2718
6.89M
      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
2719
6.89M
2720
6.89M
      // If all of the other operands were loop invariant, we are done.
2721
6.89M
      if (Ops.size() == 1) 
return NewRec6.83M
;
2722
52.2k
2723
52.2k
      // Otherwise, add the folded AddRec by the non-invariant parts.
2724
52.2k
      for (unsigned i = 0;; 
++i28.9k
)
2725
81.2k
        if (Ops[i] == AddRec) {
2726
52.2k
          Ops[i] = NewRec;
2727
52.2k
          break;
2728
52.2k
        }
2729
52.2k
      return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
2730
52.2k
    }
2731
728k
2732
728k
    // Okay, if there weren't any loop invariants to be folded, check to see if
2733
728k
    // there are multiple AddRec's with the same loop induction variable being
2734
728k
    // added together.  If so, we can fold them.
2735
728k
    for (unsigned OtherIdx = Idx+1;
2736
728k
         OtherIdx < Ops.size() && 
isa<SCEVAddRecExpr>(Ops[OtherIdx])654k
;
2737
728k
         
++OtherIdx0
) {
2738
492k
      // We expect the AddRecExpr's to be sorted in reverse dominance order,
2739
492k
      // so that the 1st found AddRecExpr is dominated by all others.
2740
492k
      assert(DT.dominates(
2741
492k
           cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
2742
492k
           AddRec->getLoop()->getHeader()) &&
2743
492k
        "AddRecExprs are not sorted in reverse dominance order?");
2744
492k
      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
2745
492k
        // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
2746
492k
        SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
2747
492k
                                               AddRec->op_end());
2748
984k
        for (; OtherIdx != Ops.size() && 
isa<SCEVAddRecExpr>(Ops[OtherIdx])492k
;
2749
492k
             ++OtherIdx) {
2750
492k
          const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
2751
492k
          if (OtherAddRec->getLoop() == AddRecLoop) {
2752
492k
            for (unsigned i = 0, e = OtherAddRec->getNumOperands();
2753
1.47M
                 i != e; 
++i985k
) {
2754
985k
              if (i >= AddRecOps.size()) {
2755
432
                AddRecOps.append(OtherAddRec->op_begin()+i,
2756
432
                                 OtherAddRec->op_end());
2757
432
                break;
2758
432
              }
2759
985k
              SmallVector<const SCEV *, 2> TwoOps = {
2760
985k
                  AddRecOps[i], OtherAddRec->getOperand(i)};
2761
985k
              AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
2762
985k
            }
2763
492k
            Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
2764
492k
          }
2765
492k
        }
2766
492k
        // Step size has changed, so we cannot guarantee no self-wraparound.
2767
492k
        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
2768
492k
        return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
2769
492k
      }
2770
492k
    }
2771
728k
2772
728k
    // Otherwise couldn't fold anything into this recurrence.  Move onto the
2773
728k
    // next one.
2774
728k
  }
2775
21.2M
2776
21.2M
  // Okay, it looks like we really DO need an add expr.  Check to see if we
2777
21.2M
  // already have one, otherwise create a new one.
2778
21.2M
  
return getOrCreateAddExpr(Ops, Flags)13.8M
;
2779
21.2M
}
2780
2781
const SCEV *
2782
ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
2783
13.8M
                                    SCEV::NoWrapFlags Flags) {
2784
13.8M
  FoldingSetNodeID ID;
2785
13.8M
  ID.AddInteger(scAddExpr);
2786
13.8M
  for (const SCEV *Op : Ops)
2787
31.2M
    ID.AddPointer(Op);
2788
13.8M
  void *IP = nullptr;
2789
13.8M
  SCEVAddExpr *S =
2790
13.8M
      static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2791
13.8M
  if (!S) {
2792
5.14M
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
2793
5.14M
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2794
5.14M
    S = new (SCEVAllocator)
2795
5.14M
        SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
2796
5.14M
    UniqueSCEVs.InsertNode(S, IP);
2797
5.14M
    addToLoopUseLists(S);
2798
5.14M
  }
2799
13.8M
  S->setNoWrapFlags(Flags);
2800
13.8M
  return S;
2801
13.8M
}
2802
2803
const SCEV *
2804
ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
2805
19.9M
                                       const Loop *L, SCEV::NoWrapFlags Flags) {
2806
19.9M
  FoldingSetNodeID ID;
2807
19.9M
  ID.AddInteger(scAddRecExpr);
2808
59.8M
  for (unsigned i = 0, e = Ops.size(); i != e; 
++i39.8M
)
2809
39.8M
    ID.AddPointer(Ops[i]);
2810
19.9M
  ID.AddPointer(L);
2811
19.9M
  void *IP = nullptr;
2812
19.9M
  SCEVAddRecExpr *S =
2813
19.9M
      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2814
19.9M
  if (!S) {
2815
5.45M
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
2816
5.45M
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2817
5.45M
    S = new (SCEVAllocator)
2818
5.45M
        SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
2819
5.45M
    UniqueSCEVs.InsertNode(S, IP);
2820
5.45M
    addToLoopUseLists(S);
2821
5.45M
  }
2822
19.9M
  S->setNoWrapFlags(Flags);
2823
19.9M
  return S;
2824
19.9M
}
2825
2826
const SCEV *
2827
ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
2828
7.73M
                                    SCEV::NoWrapFlags Flags) {
2829
7.73M
  FoldingSetNodeID ID;
2830
7.73M
  ID.AddInteger(scMulExpr);
2831
23.3M
  for (unsigned i = 0, e = Ops.size(); i != e; 
++i15.6M
)
2832
15.6M
    ID.AddPointer(Ops[i]);
2833
7.73M
  void *IP = nullptr;
2834
7.73M
  SCEVMulExpr *S =
2835
7.73M
    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
2836
7.73M
  if (!S) {
2837
1.47M
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
2838
1.47M
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
2839
1.47M
    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
2840
1.47M
                                        O, Ops.size());
2841
1.47M
    UniqueSCEVs.InsertNode(S, IP);
2842
1.47M
    addToLoopUseLists(S);
2843
1.47M
  }
2844
7.73M
  S->setNoWrapFlags(Flags);
2845
7.73M
  return S;
2846
7.73M
}
2847
2848
3.27k
static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
2849
3.27k
  uint64_t k = i*j;
2850
3.27k
  if (j > 1 && 
k / j != i3.26k
)
Overflow = true0
;
2851
3.27k
  return k;
2852
3.27k
}
2853
2854
/// Compute the result of "n choose k", the binomial coefficient.  If an
2855
/// intermediate computation overflows, Overflow will be set and the return will
2856
/// be garbage. Overflow is not cleared on absence of overflow.
2857
13.4k
static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
2858
13.4k
  // We use the multiplicative formula:
2859
13.4k
  //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
2860
13.4k
  // At each iteration, we take the n-th term of the numeral and divide by the
2861
13.4k
  // (k-n)th term of the denominator.  This division will always produce an
2862
13.4k
  // integral result, and helps reduce the chance of overflow in the
2863
13.4k
  // intermediate computations. However, we can still overflow even when the
2864
13.4k
  // final result would fit.
2865
13.4k
2866
13.4k
  if (n == 0 || 
n == k10.2k
)
return 16.73k
;
2867
6.70k
  if (k > n) 
return 00
;
2868
6.70k
2869
6.70k
  if (k > n/2)
2870
325
    k = n-k;
2871
6.70k
2872
6.70k
  uint64_t r = 1;
2873
9.96k
  for (uint64_t i = 1; i <= k; 
++i3.26k
) {
2874
3.26k
    r = umul_ov(r, n-(i-1), Overflow);
2875
3.26k
    r /= i;
2876
3.26k
  }
2877
6.70k
  return r;
2878
6.70k
}
2879
2880
/// Determine if any of the operands in this SCEV are a constant or if
2881
/// any of the add or multiply expressions in this SCEV contain a constant.
2882
1.76M
static bool containsConstantInAddMulChain(const SCEV *StartExpr) {
2883
1.76M
  struct FindConstantInAddMulChain {
2884
1.76M
    bool FoundConstant = false;
2885
1.76M
2886
5.75M
    bool follow(const SCEV *S) {
2887
5.75M
      FoundConstant |= isa<SCEVConstant>(S);
2888
5.75M
      return isa<SCEVAddExpr>(S) || 
isa<SCEVMulExpr>(S)3.98M
;
2889
5.75M
    }
2890
1.76M
2891
2.25M
    bool isDone() const {
2892
2.25M
      return FoundConstant;
2893
2.25M
    }
2894
1.76M
  };
2895
1.76M
2896
1.76M
  FindConstantInAddMulChain F;
2897
1.76M
  SCEVTraversal<FindConstantInAddMulChain> ST(F);
2898
1.76M
  ST.visitAll(StartExpr);
2899
1.76M
  return F.FoundConstant;
2900
1.76M
}
2901
2902
/// Get a canonical multiply expression, or something simpler if possible.
2903
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
2904
                                        SCEV::NoWrapFlags Flags,
2905
37.3M
                                        unsigned Depth) {
2906
37.3M
  assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
2907
37.3M
         "only nuw or nsw allowed");
2908
37.3M
  assert(!Ops.empty() && "Cannot get empty mul!");
2909
37.3M
  if (Ops.size() == 1) 
return Ops[0]11.9M
;
2910
#ifndef NDEBUG
2911
  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
2912
  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
2913
    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
2914
           "SCEVMulExpr operand types don't match!");
2915
#endif
2916
2917
25.4M
  // Sort by complexity, this groups all similar expression types together.
2918
25.4M
  GroupByComplexity(Ops, &LI, DT);
2919
25.4M
2920
25.4M
  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
2921
25.4M
2922
25.4M
  // Limit recursion calls depth.
2923
25.4M
  if (Depth > MaxArithDepth || 
hasHugeExpression(Ops)25.4M
)
2924
3.80k
    return getOrCreateMulExpr(Ops, Flags);
2925
25.4M
2926
25.4M
  // If there are any constants, fold them together.
2927
25.4M
  unsigned Idx = 0;
2928
25.4M
  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
2929
25.1M
2930
25.1M
    if (Ops.size() == 2)
2931
23.8M
      // C1*(C2+V) -> C1*C2 + C1*V
2932
23.8M
      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
2933
2.11M
        // If any of Add's ops are Adds or Muls with a constant, apply this
2934
2.11M
        // transformation as well.
2935
2.11M
        //
2936
2.11M
        // TODO: There are some cases where this transformation is not
2937
2.11M
        // profitable; for example, Add = (C0 + X) * Y + Z.  Maybe the scope of
2938
2.11M
        // this transformation should be narrowed down.
2939
2.11M
        if (Add->getNumOperands() == 2 && 
containsConstantInAddMulChain(Add)1.76M
)
2940
1.63M
          return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
2941
1.63M
                                       SCEV::FlagAnyWrap, Depth + 1),
2942
1.63M
                            getMulExpr(LHSC, Add->getOperand(1),
2943
1.63M
                                       SCEV::FlagAnyWrap, Depth + 1),
2944
1.63M
                            SCEV::FlagAnyWrap, Depth + 1);
2945
23.5M
2946
23.5M
    ++Idx;
2947
24.7M
    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
2948
11.4M
      // We found two constants, fold them together!
2949
11.4M
      ConstantInt *Fold =
2950
11.4M
          ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
2951
11.4M
      Ops[0] = getConstant(Fold);
2952
11.4M
      Ops.erase(Ops.begin()+1);  // Erase the folded element
2953
11.4M
      if (Ops.size() == 1) 
return Ops[0]10.2M
;
2954
1.19M
      LHSC = cast<SCEVConstant>(Ops[0]);
2955
1.19M
    }
2956
23.5M
2957
23.5M
    // If we are left with a constant one being multiplied, strip it off.
2958
23.5M
    
if (13.2M
cast<SCEVConstant>(Ops[0])->getValue()->isOne()13.2M
) {
2959
1.15M
      Ops.erase(Ops.begin());
2960
1.15M
      --Idx;
2961
12.1M
    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
2962
12.8k
      // If we have a multiply of zero, it will always be zero.
2963
12.8k
      return Ops[0];
2964
12.1M
    } else if (Ops[0]->isAllOnesValue()) {
2965
9.03M
      // If we have a mul by -1 of an add, try distributing the -1 among the
2966
9.03M
      // add operands.
2967
9.03M
      if (Ops.size() == 2) {
2968
8.94M
        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
2969
421k
          SmallVector<const SCEV *, 4> NewOps;
2970
421k
          bool AnyFolded = false;
2971
1.19M
          for (const SCEV *AddOp : Add->operands()) {
2972
1.19M
            const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
2973
1.19M
                                         Depth + 1);
2974
1.19M
            if (!isa<SCEVMulExpr>(Mul)) 
AnyFolded = true494k
;
2975
1.19M
            NewOps.push_back(Mul);
2976
1.19M
          }
2977
421k
          if (AnyFolded)
2978
310k
            return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
2979
8.51M
        } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
2980
2.25M
          // Negation preserves a recurrence's no self-wrap property.
2981
2.25M
          SmallVector<const SCEV *, 4> Operands;
2982
2.25M
          for (const SCEV *AddRecOp : AddRec->operands())
2983
4.51M
            Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
2984
4.51M
                                          Depth + 1));
2985
2.25M
2986
2.25M
          return getAddRecExpr(Operands, AddRec->getLoop(),
2987
2.25M
                               AddRec->getNoWrapFlags(SCEV::FlagNW));
2988
2.25M
        }
2989
10.7M
      }
2990
9.03M
    }
2991
10.7M
2992
10.7M
    if (Ops.size() == 1)
2993
1.13M
      return Ops[0];
2994
9.83M
  }
2995
9.83M
2996
9.83M
  // Skip over the add expression until we get to a multiply.
2997
13.2M
  
while (9.83M
Idx < Ops.size() &&
Ops[Idx]->getSCEVType() < scMulExpr10.0M
)
2998
3.39M
    ++Idx;
2999
9.83M
3000
9.83M
  // If there are mul operands inline them all into this expression.
3001
9.83M
  if (Idx < Ops.size()) {
3002
6.67M
    bool DeletedMul = false;
3003
7.98M
    while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
3004
1.31M
      if (Ops.size() > MulOpsInlineThreshold)
3005
138
        break;
3006
1.31M
      // If we have an mul, expand the mul operands onto the end of the
3007
1.31M
      // operands list.
3008
1.31M
      Ops.erase(Ops.begin()+Idx);
3009
1.31M
      Ops.append(Mul->op_begin(), Mul->op_end());
3010
1.31M
      DeletedMul = true;
3011
1.31M
    }
3012
6.67M
3013
6.67M
    // If we deleted at least one mul, we added operands to the end of the
3014
6.67M
    // list, and they are not necessarily sorted.  Recurse to resort and
3015
6.67M
    // resimplify any operands we just acquired.
3016
6.67M
    if (DeletedMul)
3017
1.31M
      return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
3018
8.52M
  }
3019
8.52M
3020
8.52M
  // If there are any add recurrences in the operands list, see if any other
3021
8.52M
  // added values are loop invariant.  If so, we can fold them into the
3022
8.52M
  // recurrence.
3023
9.09M
  
while (8.52M
Idx < Ops.size() &&
Ops[Idx]->getSCEVType() < scAddRecExpr5.45M
)
3024
577k
    ++Idx;
3025
8.52M
3026
8.52M
  // Scan over all recurrences, trying to fold loop invariants into them.
3027
8.53M
  for (; Idx < Ops.size() && 
isa<SCEVAddRecExpr>(Ops[Idx])4.88M
;
++Idx10.4k
) {
3028
802k
    // Scan all of the other operands to this mul and add them to the vector
3029
802k
    // if they are loop invariant w.r.t. the recurrence.
3030
802k
    SmallVector<const SCEV *, 8> LIOps;
3031
802k
    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
3032
802k
    const Loop *AddRecLoop = AddRec->getLoop();
3033
2.41M
    for (unsigned i = 0, e = Ops.size(); i != e; 
++i1.61M
)
3034
1.61M
      if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
3035
791k
        LIOps.push_back(Ops[i]);
3036
791k
        Ops.erase(Ops.begin()+i);
3037
791k
        --i; --e;
3038
791k
      }
3039
802k
3040
802k
    // If we found some loop invariants, fold them into the recurrence.
3041
802k
    if (!LIOps.empty()) {
3042
791k
      //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
3043
791k
      SmallVector<const SCEV *, 4> NewOps;
3044
791k
      NewOps.reserve(AddRec->getNumOperands());
3045
791k
      const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
3046
2.37M
      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; 
++i1.58M
)
3047
1.58M
        NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
3048
1.58M
                                    SCEV::FlagAnyWrap, Depth + 1));
3049
791k
3050
791k
      // Build the new addrec. Propagate the NUW and NSW flags if both the
3051
791k
      // outer mul and the inner addrec are guaranteed to have no overflow.
3052
791k
      //
3053
791k
      // No self-wrap cannot be guaranteed after changing the step size, but
3054
791k
      // will be inferred if either NUW or NSW is true.
3055
791k
      Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
3056
791k
      const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
3057
791k
3058
791k
      // If all of the other operands were loop invariant, we are done.
3059
791k
      if (Ops.size() == 1) 
return NewRec788k
;
3060
2.42k
3061
2.42k
      // Otherwise, multiply the folded AddRec by the non-invariant parts.
3062
2.42k
      for (unsigned i = 0;; 
++i1.76k
)
3063
4.19k
        if (Ops[i] == AddRec) {
3064
2.42k
          Ops[i] = NewRec;
3065
2.42k
          break;
3066
2.42k
        }
3067
2.42k
      return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
3068
2.42k
    }
3069
11.4k
3070
11.4k
    // Okay, if there weren't any loop invariants to be folded, check to see
3071
11.4k
    // if there are multiple AddRec's with the same loop induction variable
3072
11.4k
    // being multiplied together.  If so, we can fold them.
3073
11.4k
3074
11.4k
    // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
3075
11.4k
    // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
3076
11.4k
    //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
3077
11.4k
    //   ]]],+,...up to x=2n}.
3078
11.4k
    // Note that the arguments to choose() are always integers with values
3079
11.4k
    // known at compile time, never SCEV objects.
3080
11.4k
    //
3081
11.4k
    // The implementation avoids pointless extra computations when the two
3082
11.4k
    // addrec's are of different length (mathematically, it's equivalent to
3083
11.4k
    // an infinite stream of zeros on the right).
3084
11.4k
    bool OpsModified = false;
3085
11.4k
    for (unsigned OtherIdx = Idx+1;
3086
11.8k
         OtherIdx != Ops.size() && 
isa<SCEVAddRecExpr>(Ops[OtherIdx])6.68k
;
3087
11.4k
         
++OtherIdx404
) {
3088
1.40k
      const SCEVAddRecExpr *OtherAddRec =
3089
1.40k
        dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
3090
1.40k
      if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
3091
0
        continue;
3092
1.40k
3093
1.40k
      // Limit max number of arguments to avoid creation of unreasonably big
3094
1.40k
      // SCEVAddRecs with very complex operands.
3095
1.40k
      if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
3096
1.40k
          MaxAddRecSize || 
isHugeExpression(AddRec)1.07k
||
3097
1.40k
          
isHugeExpression(OtherAddRec)1.07k
)
3098
330
        continue;
3099
1.07k
3100
1.07k
      bool Overflow = false;
3101
1.07k
      Type *Ty = AddRec->getType();
3102
1.07k
      bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
3103
1.07k
      SmallVector<const SCEV*, 7> AddRecOps;
3104
1.07k
      for (int x = 0, xe = AddRec->getNumOperands() +
3105
4.47k
             OtherAddRec->getNumOperands() - 1; x != xe && 
!Overflow3.40k
;
++x3.40k
) {
3106
3.40k
        SmallVector <const SCEV *, 7> SumOps;
3107
10.7k
        for (int y = x, ye = 2*x+1; y != ye && 
!Overflow7.35k
;
++y7.35k
) {
3108
7.35k
          uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
3109
7.35k
          for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
3110
7.35k
                 ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
3111
13.4k
               z < ze && 
!Overflow6.08k
;
++z6.08k
) {
3112
6.08k
            uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
3113
6.08k
            uint64_t Coeff;
3114
6.08k
            if (LargerThan64Bits)
3115
10
              Coeff = umul_ov(Coeff1, Coeff2, Overflow);
3116
6.07k
            else
3117
6.07k
              Coeff = Coeff1*Coeff2;
3118
6.08k
            const SCEV *CoeffTerm = getConstant(Ty, Coeff);
3119
6.08k
            const SCEV *Term1 = AddRec->getOperand(y-z);
3120
6.08k
            const SCEV *Term2 = OtherAddRec->getOperand(z);
3121
6.08k
            SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
3122
6.08k
                                        SCEV::FlagAnyWrap, Depth + 1));
3123
6.08k
          }
3124
7.35k
        }
3125
3.40k
        if (SumOps.empty())
3126
0
          SumOps.push_back(getZero(Ty));
3127
3.40k
        AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
3128
3.40k
      }
3129
1.07k
      if (!Overflow) {
3130
1.07k
        const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
3131
1.07k
                                              SCEV::FlagAnyWrap);
3132
1.07k
        if (Ops.size() == 2) 
return NewAddRec993
;
3133
77
        Ops[Idx] = NewAddRec;
3134
77
        Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
3135
77
        OpsModified = true;
3136
77
        AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
3137
77
        if (!AddRec)
3138
3
          break;
3139
77
      }
3140
1.07k
    }
3141
11.4k
    
if (10.4k
OpsModified10.4k
)
3142
54
      return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
3143
10.4k
3144
10.4k
    // Otherwise couldn't fold anything into this recurrence.  Move onto the
3145
10.4k
    // next one.
3146
10.4k
  }
3147
8.52M
3148
8.52M
  // Okay, it looks like we really DO need an mul expr.  Check to see if we
3149
8.52M
  // already have one, otherwise create a new one.
3150
8.52M
  
return getOrCreateMulExpr(Ops, Flags)7.72M
;
3151
8.52M
}
3152
3153
/// Represents an unsigned remainder expression based on unsigned division.
3154
const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS,
3155
79.2k
                                         const SCEV *RHS) {
3156
79.2k
  assert(getEffectiveSCEVType(LHS->getType()) ==
3157
79.2k
         getEffectiveSCEVType(RHS->getType()) &&
3158
79.2k
         "SCEVURemExpr operand types don't match!");
3159
79.2k
3160
79.2k
  // Short-circuit easy cases
3161
79.2k
  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
3162
38.2k
    // If constant is one, the result is trivial
3163
38.2k
    if (RHSC->getValue()->isOne())
3164
12.5k
      return getZero(LHS->getType()); // X urem 1 --> 0
3165
25.7k
3166
25.7k
    // If constant is a power of two, fold into a zext(trunc(LHS)).
3167
25.7k
    if (RHSC->getAPInt().isPowerOf2()) {
3168
7.96k
      Type *FullTy = LHS->getType();
3169
7.96k
      Type *TruncTy =
3170
7.96k
          IntegerType::get(getContext(), RHSC->getAPInt().logBase2());
3171
7.96k
      return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy);
3172
7.96k
    }
3173
58.7k
  }
3174
58.7k
3175
58.7k
  // Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y)
3176
58.7k
  const SCEV *UDiv = getUDivExpr(LHS, RHS);
3177
58.7k
  const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW);
3178
58.7k
  return getMinusSCEV(LHS, Mult, SCEV::FlagNUW);
3179
58.7k
}
3180
3181
/// Get a canonical unsigned division expression, or something simpler if
3182
/// possible.
3183
const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
3184
668k
                                         const SCEV *RHS) {
3185
668k
  assert(getEffectiveSCEVType(LHS->getType()) ==
3186
668k
         getEffectiveSCEVType(RHS->getType()) &&
3187
668k
         "SCEVUDivExpr operand types don't match!");
3188
668k
3189
668k
  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
3190
623k
    if (RHSC->getValue()->isOne())
3191
361k
      return LHS;                               // X udiv 1 --> x
3192
261k
    // If the denominator is zero, the result of the udiv is undefined. Don't
3193
261k
    // try to analyze it, because the resolution chosen here may differ from
3194
261k
    // the resolution chosen in other parts of the compiler.
3195
261k
    if (!RHSC->getValue()->isZero()) {
3196
261k
      // Determine if the division can be folded into the operands of
3197
261k
      // its operands.
3198
261k
      // TODO: Generalize this to non-constants by using known-bits information.
3199
261k
      Type *Ty = LHS->getType();
3200
261k
      unsigned LZ = RHSC->getAPInt().countLeadingZeros();
3201
261k
      unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
3202
261k
      // For non-power-of-two values, effectively round the value up to the
3203
261k
      // nearest power of two.
3204
261k
      if (!RHSC->getAPInt().isPowerOf2())
3205
36.3k
        ++MaxShiftAmt;
3206
261k
      IntegerType *ExtTy =
3207
261k
        IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
3208
261k
      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
3209
8.08k
        if (const SCEVConstant *Step =
3210
7.74k
            dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
3211
7.74k
          // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
3212
7.74k
          const APInt &StepInt = Step->getAPInt();
3213
7.74k
          const APInt &DivInt = RHSC->getAPInt();
3214
7.74k
          if (!StepInt.urem(DivInt) &&
3215
7.74k
              getZeroExtendExpr(AR, ExtTy) ==
3216
840
              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3217
840
                            getZeroExtendExpr(Step, ExtTy),
3218
840
                            AR->getLoop(), SCEV::FlagAnyWrap)) {
3219
477
            SmallVector<const SCEV *, 4> Operands;
3220
477
            for (const SCEV *Op : AR->operands())
3221
954
              Operands.push_back(getUDivExpr(Op, RHS));
3222
477
            return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
3223
477
          }
3224
7.26k
          /// Get a canonical UDivExpr for a recurrence.
3225
7.26k
          /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
3226
7.26k
          // We can currently only fold X%N if X is constant.
3227
7.26k
          const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
3228
7.26k
          if (StartC && 
!DivInt.urem(StepInt)5.94k
&&
3229
7.26k
              getZeroExtendExpr(AR, ExtTy) ==
3230
5.23k
              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
3231
5.23k
                            getZeroExtendExpr(Step, ExtTy),
3232
5.23k
                            AR->getLoop(), SCEV::FlagAnyWrap)) {
3233
2.96k
            const APInt &StartInt = StartC->getAPInt();
3234
2.96k
            const APInt &StartRem = StartInt.urem(StepInt);
3235
2.96k
            if (StartRem != 0)
3236
10
              LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
3237
10
                                  AR->getLoop(), SCEV::FlagNW);
3238
2.96k
          }
3239
7.26k
        }
3240
261k
      // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
3241
261k
      
if (const SCEVMulExpr *261k
M261k
= dyn_cast<SCEVMulExpr>(LHS)) {
3242
3.79k
        SmallVector<const SCEV *, 4> Operands;
3243
3.79k
        for (const SCEV *Op : M->operands())
3244
7.82k
          Operands.push_back(getZeroExtendExpr(Op, ExtTy));
3245
3.79k
        if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
3246
246
          // Find an operand that's safely divisible.
3247
450
          
for (unsigned i = 0, e = M->getNumOperands(); 246
i != e;
++i204
) {
3248
348
            const SCEV *Op = M->getOperand(i);
3249
348
            const SCEV *Div = getUDivExpr(Op, RHSC);
3250
348
            if (!isa<SCEVUDivExpr>(Div) && 
getMulExpr(Div, RHSC) == Op252
) {
3251
144
              Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
3252
144
                                                      M->op_end());
3253
144
              Operands[i] = Div;
3254
144
              return getMulExpr(Operands);
3255
144
            }
3256
348
          }
3257
3.79k
      }
3258
261k
3259
261k
      // (A/B)/C --> A/(B*C) if safe and B*C can be folded.
3260
261k
      
if (const SCEVUDivExpr *261k
OtherDiv261k
= dyn_cast<SCEVUDivExpr>(LHS)) {
3261
379
        if (auto *DivisorConstant =
3262
379
                dyn_cast<SCEVConstant>(OtherDiv->getRHS())) {
3263
379
          bool Overflow = false;
3264
379
          APInt NewRHS =
3265
379
              DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow);
3266
379
          if (Overflow) {
3267
332
            return getConstant(RHSC->getType(), 0, false);
3268
332
          }
3269
47
          return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS));
3270
47
        }
3271
379
      }
3272
260k
3273
260k
      // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
3274
260k
      if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
3275
66.8k
        SmallVector<const SCEV *, 4> Operands;
3276
66.8k
        for (const SCEV *Op : A->operands())
3277
150k
          Operands.push_back(getZeroExtendExpr(Op, ExtTy));
3278
66.8k
        if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
3279
11.4k
          Operands.clear();
3280
11.7k
          for (unsigned i = 0, e = A->getNumOperands(); i != e; 
++i342
) {
3281
11.6k
            const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
3282
11.6k
            if (isa<SCEVUDivExpr>(Op) ||
3283
11.6k
                
getMulExpr(Op, RHS) != A->getOperand(i)11.5k
)
3284
11.3k
              break;
3285
342
            Operands.push_back(Op);
3286
342
          }
3287
11.4k
          if (Operands.size() == A->getNumOperands())
3288
119
            return getAddExpr(Operands);
3289
260k
        }
3290
66.8k
      }
3291
260k
3292
260k
      // Fold if both operands are constant.
3293
260k
      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
3294
24.4k
        Constant *LHSCV = LHSC->getValue();
3295
24.4k
        Constant *RHSCV = RHSC->getValue();
3296
24.4k
        return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
3297
24.4k
                                                                   RHSCV)));
3298
24.4k
      }
3299
281k
    }
3300
261k
  }
3301
281k
3302
281k
  FoldingSetNodeID ID;
3303
281k
  ID.AddInteger(scUDivExpr);
3304
281k
  ID.AddPointer(LHS);
3305
281k
  ID.AddPointer(RHS);
3306
281k
  void *IP = nullptr;
3307
281k
  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 
return S89.6k
;
3308
192k
  SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
3309
192k
                                             LHS, RHS);
3310
192k
  UniqueSCEVs.InsertNode(S, IP);
3311
192k
  addToLoopUseLists(S);
3312
192k
  return S;
3313
192k
}
3314
3315
0
static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
3316
0
  APInt A = C1->getAPInt().abs();
3317
0
  APInt B = C2->getAPInt().abs();
3318
0
  uint32_t ABW = A.getBitWidth();
3319
0
  uint32_t BBW = B.getBitWidth();
3320
0
3321
0
  if (ABW > BBW)
3322
0
    B = B.zext(ABW);
3323
0
  else if (ABW < BBW)
3324
0
    A = A.zext(BBW);
3325
0
3326
0
  return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B));
3327
0
}
3328
3329
/// Get a canonical unsigned division expression, or something simpler if
3330
/// possible. There is no representation for an exact udiv in SCEV IR, but we
3331
/// can attempt to remove factors from the LHS and RHS.  We can't do this when
3332
/// it's not exact because the udiv may be clearing bits.
3333
const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
3334
27.8k
                                              const SCEV *RHS) {
3335
27.8k
  // TODO: we could try to find factors in all sorts of things, but for now we
3336
27.8k
  // just deal with u/exact (multiply, constant). See SCEVDivision towards the
3337
27.8k
  // end of this file for inspiration.
3338
27.8k
3339
27.8k
  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
3340
27.8k
  if (!Mul || 
!Mul->hasNoUnsignedWrap()1
)
3341
27.8k
    return getUDivExpr(LHS, RHS);
3342
0
3343
0
  if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
3344
0
    // If the mulexpr multiplies by a constant, then that constant must be the
3345
0
    // first element of the mulexpr.
3346
0
    if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
3347
0
      if (LHSCst == RHSCst) {
3348
0
        SmallVector<const SCEV *, 2> Operands;
3349
0
        Operands.append(Mul->op_begin() + 1, Mul->op_end());
3350
0
        return getMulExpr(Operands);
3351
0
      }
3352
0
3353
0
      // We can't just assume that LHSCst divides RHSCst cleanly, it could be
3354
0
      // that there's a factor provided by one of the other terms. We need to
3355
0
      // check.
3356
0
      APInt Factor = gcd(LHSCst, RHSCst);
3357
0
      if (!Factor.isIntN(1)) {
3358
0
        LHSCst =
3359
0
            cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
3360
0
        RHSCst =
3361
0
            cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
3362
0
        SmallVector<const SCEV *, 2> Operands;
3363
0
        Operands.push_back(LHSCst);
3364
0
        Operands.append(Mul->op_begin() + 1, Mul->op_end());
3365
0
        LHS = getMulExpr(Operands);
3366
0
        RHS = RHSCst;
3367
0
        Mul = dyn_cast<SCEVMulExpr>(LHS);
3368
0
        if (!Mul)
3369
0
          return getUDivExactExpr(LHS, RHS);
3370
0
      }
3371
0
    }
3372
0
  }
3373
0
3374
0
  for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
3375
0
    if (Mul->getOperand(i) == RHS) {
3376
0
      SmallVector<const SCEV *, 2> Operands;
3377
0
      Operands.append(Mul->op_begin(), Mul->op_begin() + i);
3378
0
      Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
3379
0
      return getMulExpr(Operands);
3380
0
    }
3381
0
  }
3382
0
3383
0
  return getUDivExpr(LHS, RHS);
3384
0
}
3385
3386
/// Get an add recurrence expression for the specified loop.  Simplify the
3387
/// expression as much as possible.
3388
const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
3389
                                           const Loop *L,
3390
4.46M
                                           SCEV::NoWrapFlags Flags) {
3391
4.46M
  SmallVector<const SCEV *, 4> Operands;
3392
4.46M
  Operands.push_back(Start);
3393
4.46M
  if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
3394
515
    if (StepChrec->getLoop() == L) {
3395
131
      Operands.append(StepChrec->op_begin(), StepChrec->op_end());
3396
131
      return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
3397
131
    }
3398
4.46M
3399
4.46M
  Operands.push_back(Step);
3400
4.46M
  return getAddRecExpr(Operands, L, Flags);
3401
4.46M
}
3402
3403
/// Get an add recurrence expression for the specified loop.  Simplify the
3404
/// expression as much as possible.
3405
const SCEV *
3406
ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
3407
20.9M
                               const Loop *L, SCEV::NoWrapFlags Flags) {
3408
20.9M
  if (Operands.size() == 1) 
return Operands[0]490k
;
3409
#ifndef NDEBUG
3410
  Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
3411
  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
3412
    assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
3413
           "SCEVAddRecExpr operand types don't match!");
3414
  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
3415
    assert(isLoopInvariant(Operands[i], L) &&
3416
           "SCEVAddRecExpr operand is not loop-invariant!");
3417
#endif
3418
3419
20.4M
  if (Operands.back()->isZero()) {
3420
491k
    Operands.pop_back();
3421
491k
    return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
3422
491k
  }
3423
19.9M
3424
19.9M
  // It's tempting to want to call getMaxBackedgeTakenCount count here and
3425
19.9M
  // use that information to infer NUW and NSW flags. However, computing a
3426
19.9M
  // BE count requires calling getAddRecExpr, so we may not yet have a
3427
19.9M
  // meaningful BE count at this point (and if we don't, we'd be stuck
3428
19.9M
  // with a SCEVCouldNotCompute as the cached BE count).
3429
19.9M
3430
19.9M
  Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
3431
19.9M
3432
19.9M
  // Canonicalize nested AddRecs in by nesting them in order of loop depth.
3433
19.9M
  if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
3434
780k
    const Loop *NestedLoop = NestedAR->getLoop();
3435
780k
    if (L->contains(NestedLoop)
3436
780k
            ? 
(L->getLoopDepth() < NestedLoop->getLoopDepth())0
3437
780k
            : (!NestedLoop->contains(L) &&
3438
780k
               
DT.dominates(L->getHeader(), NestedLoop->getHeader())252k
)) {
3439
0
      SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
3440
0
                                                  NestedAR->op_end());
3441
0
      Operands[0] = NestedAR->getStart();
3442
0
      // AddRecs require their operands be loop-invariant with respect to their
3443
0
      // loops. Don't perform this transformation if it would break this
3444
0
      // requirement.
3445
0
      bool AllInvariant = all_of(
3446
0
          Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
3447
0
3448
0
      if (AllInvariant) {
3449
0
        // Create a recurrence for the outer loop with the same step size.
3450
0
        //
3451
0
        // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
3452
0
        // inner recurrence has the same property.
3453
0
        SCEV::NoWrapFlags OuterFlags =
3454
0
          maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
3455
0
3456
0
        NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
3457
0
        AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
3458
0
          return isLoopInvariant(Op, NestedLoop);
3459
0
        });
3460
0
3461
0
        if (AllInvariant) {
3462
0
          // Ok, both add recurrences are valid after the transformation.
3463
0
          //
3464
0
          // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
3465
0
          // the outer recurrence has the same property.
3466
0
          SCEV::NoWrapFlags InnerFlags =
3467
0
            maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
3468
0
          return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
3469
0
        }
3470
0
      }
3471
0
      // Reset Operands to its original state.
3472
0
      Operands[0] = NestedAR;
3473
0
    }
3474
780k
  }
3475
19.9M
3476
19.9M
  // Okay, it looks like we really DO need an addrec expr.  Check to see if we
3477
19.9M
  // already have one, otherwise create a new one.
3478
19.9M
  return getOrCreateAddRecExpr(Operands, L, Flags);
3479
19.9M
}
3480
3481
const SCEV *
3482
ScalarEvolution::getGEPExpr(GEPOperator *GEP,
3483
2.06M
                            const SmallVectorImpl<const SCEV *> &IndexExprs) {
3484
2.06M
  const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
3485
2.06M
  // getSCEV(Base)->getType() has the same address space as Base->getType()
3486
2.06M
  // because SCEV::getType() preserves the address space.
3487
2.06M
  Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
3488
2.06M
  // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
3489
2.06M
  // instruction to its SCEV, because the Instruction may be guarded by control
3490
2.06M
  // flow and the no-overflow bits may not be valid for the expression in any
3491
2.06M
  // context. This can be fixed similarly to how these flags are handled for
3492
2.06M
  // adds.
3493
2.06M
  SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? 
SCEV::FlagNSW1.68M
3494
2.06M
                                             : 
SCEV::FlagAnyWrap374k
;
3495
2.06M
3496
2.06M
  const SCEV *TotalOffset = getZero(IntPtrTy);
3497
2.06M
  // The array size is unimportant. The first thing we do on CurTy is getting
3498
2.06M
  // its element type.
3499
2.06M
  Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
3500
3.74M
  for (const SCEV *IndexExpr : IndexExprs) {
3501
3.74M
    // Compute the (potentially symbolic) offset in bytes for this index.
3502
3.74M
    if (StructType *STy = dyn_cast<StructType>(CurTy)) {
3503
1.07M
      // For a struct, add the member offset.
3504
1.07M
      ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
3505
1.07M
      unsigned FieldNo = Index->getZExtValue();
3506
1.07M
      const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
3507
1.07M
3508
1.07M
      // Add the field offset to the running total offset.
3509
1.07M
      TotalOffset = getAddExpr(TotalOffset, FieldOffset);
3510
1.07M
3511
1.07M
      // Update CurTy to the type of the field at Index.
3512
1.07M
      CurTy = STy->getTypeAtIndex(Index);
3513
2.67M
    } else {
3514
2.67M
      // Update CurTy to its element type.
3515
2.67M
      CurTy = cast<SequentialType>(CurTy)->getElementType();
3516
2.67M
      // For an array, add the element offset, explicitly scaled.
3517
2.67M
      const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
3518
2.67M
      // Getelementptr indices are signed.
3519
2.67M
      IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
3520
2.67M
3521
2.67M
      // Multiply the index by the element size to compute the element offset.
3522
2.67M
      const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
3523
2.67M
3524
2.67M
      // Add the element offset to the running total offset.
3525
2.67M
      TotalOffset = getAddExpr(TotalOffset, LocalOffset);
3526
2.67M
    }
3527
3.74M
  }
3528
2.06M
3529
2.06M
  // Add the total offset from all the GEP indices to the base.
3530
2.06M
  return getAddExpr(BaseExpr, TotalOffset, Wrap);
3531
2.06M
}
3532
3533
std::tuple<const SCEV *, FoldingSetNodeID, void *>
3534
ScalarEvolution::findExistingSCEVInCache(int SCEVType,
3535
393k
                                         ArrayRef<const SCEV *> Ops) {
3536
393k
  FoldingSetNodeID ID;
3537
393k
  void *IP = nullptr;
3538
393k
  ID.AddInteger(SCEVType);
3539
1.18M
  for (unsigned i = 0, e = Ops.size(); i != e; 
++i793k
)
3540
793k
    ID.AddPointer(Ops[i]);
3541
393k
  return std::tuple<const SCEV *, FoldingSetNodeID, void *>(
3542
393k
      UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
3543
393k
}
3544
3545
const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
3546
254k
                                           SmallVectorImpl<const SCEV *> &Ops) {
3547
254k
  assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
3548
254k
  if (Ops.size() == 1) 
return Ops[0]0
;
3549
#ifndef NDEBUG
3550
  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
3551
  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
3552
    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
3553
           "Operand types don't match!");
3554
#endif
3555
3556
254k
  bool IsSigned = Kind == scSMaxExpr || 
Kind == scSMinExpr204k
;
3557
254k
  bool IsMax = Kind == scSMaxExpr || 
Kind == scUMaxExpr204k
;
3558
254k
3559
254k
  // Sort by complexity, this groups all similar expression types together.
3560
254k
  GroupByComplexity(Ops, &LI, DT);
3561
254k
3562
254k
  // Check if we have created the same expression before.
3563
254k
  if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
3564
23.5k
    return S;
3565
23.5k
  }
3566
230k
3567
230k
  // If there are any constants, fold them together.
3568
230k
  unsigned Idx = 0;
3569
230k
  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
3570
181k
    ++Idx;
3571
181k
    assert(Idx < Ops.size());
3572
181k
    auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
3573
62.5k
      if (Kind == scSMaxExpr)
3574
4.98k
        return APIntOps::smax(LHS, RHS);
3575
57.6k
      else if (Kind == scSMinExpr)
3576
11.9k
        return APIntOps::smin(LHS, RHS);
3577
45.6k
      else if (Kind == scUMaxExpr)
3578
30
        return APIntOps::umax(LHS, RHS);
3579
45.6k
      else if (Kind == scUMinExpr)
3580
45.6k
        return APIntOps::umin(LHS, RHS);
3581
0
      llvm_unreachable("Unknown SCEV min/max opcode");
3582
0
    };
3583
181k
3584
181k
    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
3585
62.5k
      // We found two constants, fold them together!
3586
62.5k
      ConstantInt *Fold = ConstantInt::get(
3587
62.5k
          getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
3588
62.5k
      Ops[0] = getConstant(Fold);
3589
62.5k
      Ops.erase(Ops.begin()+1);  // Erase the folded element
3590
62.5k
      if (Ops.size() == 1) 
return Ops[0]62.4k
;
3591
173
      LHSC = cast<SCEVConstant>(Ops[0]);
3592
173
    }
3593
181k
3594
181k
    bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
3595
118k
    bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
3596
118k
3597
118k
    if (IsMax ? 
IsMinV44.8k
:
IsMaxV73.9k
) {
3598
9.18k
      // If we are left with a constant minimum(/maximum)-int, strip it off.
3599
9.18k
      Ops.erase(Ops.begin());
3600
9.18k
      --Idx;
3601
109k
    } else if (IsMax ? 
IsMaxV44.5k
:
IsMinV65.0k
) {
3602
98
      // If we have a max(/min) with a constant maximum(/minimum)-int,
3603
98
      // it will always be the extremum.
3604
98
      return LHSC;
3605
98
    }
3606
118k
3607
118k
    if (Ops.size() == 1) 
return Ops[0]9.17k
;
3608
158k
  }
3609
158k
3610
158k
  // Find the first operation of the same kind
3611
316k
  
while (158k
Idx < Ops.size() &&
Ops[Idx]->getSCEVType() < Kind202k
)
3612
158k
    ++Idx;
3613
158k
3614
158k
  // Check to see if one of the operands is of the same kind. If so, expand its
3615
158k
  // operands onto our operand list, and recurse to simplify.
3616
158k
  if (Idx < Ops.size()) {
3617
44.8k
    bool DeletedAny = false;
3618
46.3k
    while (Ops[Idx]->getSCEVType() == Kind) {
3619
1.55k
      const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
3620
1.55k
      Ops.erase(Ops.begin()+Idx);
3621
1.55k
      Ops.append(SMME->op_begin(), SMME->op_end());
3622
1.55k
      DeletedAny = true;
3623
1.55k
    }
3624
44.8k
3625
44.8k
    if (DeletedAny)
3626
1.53k
      return getMinMaxExpr(Kind, Ops);
3627
157k
  }
3628
157k
3629
157k
  // Okay, check to see if the same value occurs in the operand list twice.  If
3630
157k
  // so, delete one.  Since we sorted the list, these values are required to
3631
157k
  // be adjacent.
3632
157k
  llvm::CmpInst::Predicate GEPred =
3633
157k
      IsSigned ? 
ICmpInst::ICMP_SGE51.9k
:
ICmpInst::ICMP_UGE105k
;
3634
157k
  llvm::CmpInst::Predicate LEPred =
3635
157k
      IsSigned ? 
ICmpInst::ICMP_SLE51.9k
:
ICmpInst::ICMP_ULE105k
;
3636
157k
  llvm::CmpInst::Predicate FirstPred = IsMax ? 
GEPred73.5k
:
LEPred83.8k
;
3637
157k
  llvm::CmpInst::Predicate SecondPred = IsMax ? 
LEPred73.5k
:
GEPred83.8k
;
3638
317k
  for (unsigned i = 0, e = Ops.size() - 1; i != e; 
++i159k
) {
3639
159k
    if (Ops[i] == Ops[i + 1] ||
3640
159k
        
isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])159k
) {
3641
334
      //  X op Y op Y  -->  X op Y
3642
334
      //  X op Y       -->  X, if we know X, Y are ordered appropriately
3643
334
      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
3644
334
      --i;
3645
334
      --e;
3646
159k
    } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
3647
159k
                                               Ops[i + 1])) {
3648
17.1k
      //  X op Y       -->  Y, if we know X, Y are ordered appropriately
3649
17.1k
      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
3650
17.1k
      --i;
3651
17.1k
      --e;
3652
17.1k
    }
3653
159k
  }
3654
157k
3655
157k
  if (Ops.size() == 1) 
return Ops[0]17.3k
;
3656
139k
3657
139k
  assert(!Ops.empty() && "Reduced smax down to nothing!");
3658
139k
3659
139k
  // Okay, it looks like we really DO need an expr.  Check to see if we
3660
139k
  // already have one, otherwise create a new one.
3661
139k
  const SCEV *ExistingSCEV;
3662
139k
  FoldingSetNodeID ID;
3663
139k
  void *IP;
3664
139k
  std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
3665
139k
  if (ExistingSCEV)
3666
1.20k
    return ExistingSCEV;
3667
138k
  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
3668
138k
  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
3669
138k
  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
3670
138k
      ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
3671
138k
3672
138k
  UniqueSCEVs.InsertNode(S, IP);
3673
138k
  addToLoopUseLists(S);
3674
138k
  return S;
3675
138k
}
3676
3677
48.8k
const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
3678
48.8k
  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
3679
48.8k
  return getSMaxExpr(Ops);
3680
48.8k
}
3681
3682
49.2k
const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
3683
49.2k
  return getMinMaxExpr(scSMaxExpr, Ops);
3684
49.2k
}
3685
3686
37.0k
const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
3687
37.0k
  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
3688
37.0k
  return getUMaxExpr(Ops);
3689
37.0k
}
3690
3691
37.0k
const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
3692
37.0k
  return getMinMaxExpr(scUMaxExpr, Ops);
3693
37.0k
}
3694
3695
const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
3696
28.3k
                                         const SCEV *RHS) {
3697
28.3k
  SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
3698
28.3k
  return getSMinExpr(Ops);
3699
28.3k
}
3700
3701
28.4k
const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
3702
28.4k
  return getMinMaxExpr(scSMinExpr, Ops);
3703
28.4k
}
3704
3705
const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
3706
18.3k
                                         const SCEV *RHS) {
3707
18.3k
  SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
3708
18.3k
  return getUMinExpr(Ops);
3709
18.3k
}
3710
3711
137k
const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
3712
137k
  return getMinMaxExpr(scUMinExpr, Ops);
3713
137k
}
3714
3715
3.20M
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
3716
3.20M
  // We can bypass creating a target-independent
3717
3.20M
  // constant expression and then folding it back into a ConstantInt.
3718
3.20M
  // This is just a compile-time optimization.
3719
3.20M
  return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
3720
3.20M
}
3721
3722
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
3723
                                             StructType *STy,
3724
1.07M
                                             unsigned FieldNo) {
3725
1.07M
  // We can bypass creating a target-independent
3726
1.07M
  // constant expression and then folding it back into a ConstantInt.
3727
1.07M
  // This is just a compile-time optimization.
3728
1.07M
  return getConstant(
3729
1.07M
      IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
3730
1.07M
}
3731
3732
5.32M
const SCEV *ScalarEvolution::getUnknown(Value *V) {
3733
5.32M
  // Don't attempt to do anything other than create a SCEVUnknown object
3734
5.32M
  // here.  createSCEV only calls getUnknown after checking for all other
3735
5.32M
  // interesting possibilities, and any other code that calls getUnknown
3736
5.32M
  // is doing so in order to hide a value from SCEV canonicalization.
3737
5.32M
3738
5.32M
  FoldingSetNodeID ID;
3739
5.32M
  ID.AddInteger(scUnknown);
3740
5.32M
  ID.AddPointer(V);
3741
5.32M
  void *IP = nullptr;
3742
5.32M
  if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
3743
1.61M
    assert(cast<SCEVUnknown>(S)->getValue() == V &&
3744
1.61M
           "Stale SCEVUnknown in uniquing map!");
3745
1.61M
    return S;
3746
1.61M
  }
3747
3.71M
  SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
3748
3.71M
                                            FirstUnknown);
3749
3.71M
  FirstUnknown = cast<SCEVUnknown>(S);
3750
3.71M
  UniqueSCEVs.InsertNode(S, IP);
3751
3.71M
  return S;
3752
3.71M
}
3753
3754
//===----------------------------------------------------------------------===//
3755
//            Basic SCEV Analysis and PHI Idiom Recognition Code
3756
//
3757
3758
/// Test if values of the given type are analyzable within the SCEV
3759
/// framework. This primarily includes integer types, and it can optionally
3760
/// include pointer types if the ScalarEvolution class has access to
3761
/// target-specific information.
3762
26.0M
bool ScalarEvolution::isSCEVable(Type *Ty) const {
3763
26.0M
  // Integers and pointers are always SCEVable.
3764
26.0M
  return Ty->isIntOrPtrTy();
3765
26.0M
}
3766
3767
/// Return the size in bits of the specified type, for which isSCEVable must
3768
/// return true.
3769
104M
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
3770
104M
  assert(isSCEVable(Ty) && "Type is not SCEVable!");
3771
104M
  if (Ty->isPointerTy())
3772
8.04M
    return getDataLayout().getIndexTypeSizeInBits(Ty);
3773
96.3M
  return getDataLayout().getTypeSizeInBits(Ty);
3774
96.3M
}
3775
3776
/// Return a type with the same bitwidth as the given type and which represents
3777
/// how SCEV will treat the given type, for which isSCEVable must return
3778
/// true. For pointer types, this is the pointer-sized integer type.
3779
74.4M
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
3780
74.4M
  assert(isSCEVable(Ty) && "Type is not SCEVable!");
3781
74.4M
3782
74.4M
  if (Ty->isIntegerTy())
3783
58.3M
    return Ty;
3784
16.0M
3785
16.0M
  // The only other support type is pointer.
3786
16.0M
  assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
3787
16.0M
  return getDataLayout().getIntPtrType(Ty);
3788
16.0M
}
3789
3790
223k
Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
3791
223k
  return  getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? 
T1215k
:
T28.01k
;
3792
223k
}
3793
3794
82.7M
const SCEV *ScalarEvolution::getCouldNotCompute() {
3795
82.7M
  return CouldNotCompute.get();
3796
82.7M
}
3797
3798
44.2M
bool ScalarEvolution::checkValidity(const SCEV *S) const {
3799
115M
  bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
3800
115M
    auto *SU = dyn_cast<SCEVUnknown>(S);
3801
115M
    return SU && 
SU->getValue() == nullptr31.5M
;
3802
115M
  });
3803
44.2M
3804
44.2M
  return !ContainsNulls;
3805
44.2M
}
3806
3807
2.48M
bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
3808
2.48M
  HasRecMapType::iterator I = HasRecMap.find(S);
3809
2.48M
  if (I != HasRecMap.end())
3810
822k
    return I->second;
3811
1.66M
3812
1.66M
  bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>);
3813
1.66M
  HasRecMap.insert({S, FoundAddRec});
3814
1.66M
  return FoundAddRec;
3815
1.66M
}
3816
3817
/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
3818
/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
3819
/// offset I, then return {S', I}, else return {\p S, nullptr}.
3820
12.1M
static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
3821
12.1M
  const auto *Add = dyn_cast<SCEVAddExpr>(S);
3822
12.1M
  if (!Add)
3823
10.3M
    return {S, nullptr};
3824
1.77M
3825
1.77M
  if (Add->getNumOperands() != 2)
3826
138k
    return {S, nullptr};
3827
1.63M
3828
1.63M
  auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
3829
1.63M
  if (!ConstOp)
3830
487k
    return {S, nullptr};
3831
1.15M
3832
1.15M
  return {Add->getOperand(1), ConstOp->getValue()};
3833
1.15M
}
3834
3835
/// Return the ValueOffsetPair set for \p S. \p S can be represented
3836
/// by the value and offset from any ValueOffsetPair in the set.
3837
SetVector<ScalarEvolution::ValueOffsetPair> *
3838
6.15M
ScalarEvolution::getSCEVValues(const SCEV *S) {
3839
6.15M
  ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
3840
6.15M
  if (SI == ExprValueMap.end())
3841
3.58M
    return nullptr;
3842
#ifndef NDEBUG
3843
  if (VerifySCEVMap) {
3844
    // Check there is no dangling Value in the set returned.
3845
    for (const auto &VE : SI->second)
3846
      assert(ValueExprMap.count(VE.first));
3847
  }
3848
#endif
3849
2.57M
  return &SI->second;
3850
2.57M
}
3851
3852
/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
3853
/// cannot be used separately. eraseValueFromMap should be used to remove
3854
/// V from ValueExprMap and ExprValueMap at the same time.
3855
3.85M
void ScalarEvolution::eraseValueFromMap(Value *V) {
3856
3.85M
  ValueExprMapType::iterator I = ValueExprMap.find_as(V);
3857
3.85M
  if (I != ValueExprMap.end()) {
3858
3.19M
    const SCEV *S = I->second;
3859
3.19M
    // Remove {V, 0} from the set of ExprValueMap[S]
3860
3.19M
    if (SetVector<ValueOffsetPair> *SV = getSCEVValues(S))
3861
1.77M
      SV->remove({V, nullptr});
3862
3.19M
3863
3.19M
    // Remove {V, Offset} from the set of ExprValueMap[Stripped]
3864
3.19M
    const SCEV *Stripped;
3865
3.19M
    ConstantInt *Offset;
3866
3.19M
    std::tie(Stripped, Offset) = splitAddExpr(S);
3867
3.19M
    if (Offset != nullptr) {
3868
254k
      if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped))
3869
19.1k
        SV->remove({V, Offset});
3870
254k
    }
3871
3.19M
    ValueExprMap.erase(V);
3872
3.19M
  }
3873
3.85M
}
3874
3875
/// Check whether value has nuw/nsw/exact set but SCEV does not.
3876
/// TODO: In reality it is better to check the poison recursively
3877
/// but this is better than nothing.
3878
9.44M
static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
3879
9.44M
  if (auto *I = dyn_cast<Instruction>(V)) {
3880
7.10M
    if (isa<OverflowingBinaryOperator>(I)) {
3881
1.39M
      if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
3882
1.39M
        if (I->hasNoSignedWrap() && 
!NS->hasNoSignedWrap()916k
)
3883
411k
          return true;
3884
978k
        if (I->hasNoUnsignedWrap() && 
!NS->hasNoUnsignedWrap()456k
)
3885
35.8k
          return true;
3886
5.70M
      }
3887
5.70M
    } else if (isa<PossiblyExactOperator>(I) && 
I->isExact()96.0k
)
3888
25.4k
      return true;
3889
8.97M
  }
3890
8.97M
  return false;
3891
8.97M
}
3892
3893
/// Return an existing SCEV if it exists, otherwise analyze the expression and
3894
/// create a new one.
3895
55.0M
const SCEV *ScalarEvolution::getSCEV(Value *V) {
3896
55.0M
  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
3897
55.0M
3898
55.0M
  const SCEV *S = getExistingSCEV(V);
3899
55.0M
  if (S == nullptr) {
3900
10.8M
    S = createSCEV(V);
3901
10.8M
    // During PHI resolution, it is possible to create two SCEVs for the same
3902
10.8M
    // V, so it is needed to double check whether V->S is inserted into
3903
10.8M
    // ValueExprMap before insert S->{V, 0} into ExprValueMap.
3904
10.8M
    std::pair<ValueExprMapType::iterator, bool> Pair =
3905
10.8M
        ValueExprMap.insert({SCEVCallbackVH(V, this), S});
3906
10.8M
    if (Pair.second && 
!SCEVLostPoisonFlags(S, V)9.44M
) {
3907
8.97M
      ExprValueMap[S].insert({V, nullptr});
3908
8.97M
3909
8.97M
      // If S == Stripped + Offset, add Stripped -> {V, Offset} into
3910
8.97M
      // ExprValueMap.
3911
8.97M
      const SCEV *Stripped = S;
3912
8.97M
      ConstantInt *Offset = nullptr;
3913
8.97M
      std::tie(Stripped, Offset) = splitAddExpr(S);
3914
8.97M
      // If stripped is SCEVUnknown, don't bother to save
3915
8.97M
      // Stripped -> {V, offset}. It doesn't simplify and sometimes even
3916
8.97M
      // increase the complexity of the expansion code.
3917
8.97M
      // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
3918
8.97M
      // because it may generate add/sub instead of GEP in SCEV expansion.
3919
8.97M
      if (Offset != nullptr && 
!isa<SCEVUnknown>(Stripped)896k
&&
3920
8.97M
          
!isa<GetElementPtrInst>(V)54.6k
)
3921
54.5k
        ExprValueMap[Stripped].insert({V, Offset});
3922
8.97M
    }
3923
10.8M
  }
3924
55.0M
  return S;
3925
55.0M
}
3926
3927
56.4M
const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
3928
56.4M
  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
3929
56.4M
3930
56.4M
  ValueExprMapType::iterator I = ValueExprMap.find_as(V);
3931
56.4M
  if (I != ValueExprMap.end()) {
3932
44.2M
    const SCEV *S = I->second;
3933
44.2M
    if (checkValidity(S))
3934
44.2M
      return S;
3935
0
    eraseValueFromMap(V);
3936
0
    forgetMemoizedResults(S);
3937
0
  }
3938
56.4M
  
return nullptr12.1M
;
3939
56.4M
}
3940
3941
/// Return a SCEV corresponding to -V = -1*V
3942
const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
3943
9.14M
                                             SCEV::NoWrapFlags Flags) {
3944
9.14M
  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
3945
3.30M
    return getConstant(
3946
3.30M
               cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
3947
5.83M
3948
5.83M
  Type *Ty = V->getType();
3949
5.83M
  Ty = getEffectiveSCEVType(Ty);
3950
5.83M
  return getMulExpr(
3951
5.83M
      V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
3952
5.83M
}
3953
3954
/// If Expr computes ~A, return A else return nullptr
3955
36.1k
static const SCEV *MatchNotExpr(const SCEV *Expr) {
3956
36.1k
  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
3957
36.1k
  if (!Add || 
Add->getNumOperands() != 22.39k
||
3958
36.1k
      
!Add->getOperand(0)->isAllOnesValue()2.00k
)
3959
35.9k
    return nullptr;
3960
225
3961
225
  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
3962
225
  if (!AddRHS || 
AddRHS->getNumOperands() != 20
||
3963
225
      
!AddRHS->getOperand(0)->isAllOnesValue()0
)
3964
225
    return nullptr;
3965
0
3966
0
  return AddRHS->getOperand(1);
3967
0
}
3968
3969
/// Return a SCEV corresponding to ~V = -1-V
3970
5.15M
const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
3971
5.15M
  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
3972
1.67M
    return getConstant(
3973
1.67M
                cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
3974
3.48M
3975
3.48M
  // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
3976
3.48M
  if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
3977
36.1k
    auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
3978
36.1k
      SmallVector<const SCEV *, 2> MatchedOperands;
3979
36.1k
      for (const SCEV *Operand : MME->operands()) {
3980
36.1k
        const SCEV *Matched = MatchNotExpr(Operand);
3981
36.1k
        if (!Matched)
3982
36.1k
          return (const SCEV *)nullptr;
3983
0
        MatchedOperands.push_back(Matched);
3984
0
      }
3985
36.1k
      return getMinMaxExpr(
3986
0
          SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())),
3987
0
          MatchedOperands);
3988
36.1k
    };
3989
36.1k
    if (const SCEV *Replaced = MatchMinMaxNegation(MME))
3990
0
      return Replaced;
3991
3.48M
  }
3992
3.48M
3993
3.48M
  Type *Ty = V->getType();
3994
3.48M
  Ty = getEffectiveSCEVType(Ty);
3995
3.48M
  const SCEV *AllOnes =
3996
3.48M
                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
3997
3.48M
  return getMinusSCEV(AllOnes, V);
3998
3.48M
}
3999
4000
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
4001
                                          SCEV::NoWrapFlags Flags,
4002
9.69M
                                          unsigned Depth) {
4003
9.69M
  // Fast path: X - X --> 0.
4004
9.69M
  if (LHS == RHS)
4005
1.18M
    return getZero(LHS->getType());
4006
8.50M
4007
8.50M
  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
4008
8.50M
  // makes it so that we cannot make much use of NUW.
4009
8.50M
  auto AddFlags = SCEV::FlagAnyWrap;
4010
8.50M
  const bool RHSIsNotMinSigned =
4011
8.50M
      !getSignedRangeMin(RHS).isMinSignedValue();
4012
8.50M
  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
4013
8.49k
    // Let M be the minimum representable signed value. Then (-1)*RHS
4014
8.49k
    // signed-wraps if and only if RHS is M. That can happen even for
4015
8.49k
    // a NSW subtraction because e.g. (-1)*M signed-wraps even though
4016
8.49k
    // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
4017
8.49k
    // (-1)*RHS, we need to prove that RHS != M.
4018
8.49k
    //
4019
8.49k
    // If LHS is non-negative and we know that LHS - RHS does not
4020
8.49k
    // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
4021
8.49k
    // either by proving that RHS > M or that LHS >= 0.
4022
8.49k
    if (RHSIsNotMinSigned || 
isKnownNonNegative(LHS)862
) {
4023
7.76k
      AddFlags = SCEV::FlagNSW;
4024
7.76k
    }
4025
8.49k
  }
4026
8.50M
4027
8.50M
  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
4028
8.50M
  // RHS is NSW and LHS >= 0.
4029
8.50M
  //
4030
8.50M
  // The difficulty here is that the NSW flag may have been proven
4031
8.50M
  // relative to a loop that is to be found in a recurrence in LHS and
4032
8.50M
  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
4033
8.50M
  // larger scope than intended.
4034
8.50M
  auto NegFlags = RHSIsNotMinSigned ? 
SCEV::FlagNSW5.14M
:
SCEV::FlagAnyWrap3.35M
;
4035
8.50M
4036
8.50M
  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
4037
8.50M
}
4038
4039
const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
4040
1.70M
                                                     unsigned Depth) {
4041
1.70M
  Type *SrcTy = V->getType();
4042
1.70M
  assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
4043
1.70M
         "Cannot truncate or zero extend with non-integer arguments!");
4044
1.70M
  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
4045
1.23M
    return V;  // No conversion
4046
471k
  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
4047
227k
    return getTruncateExpr(V, Ty, Depth);
4048
244k
  return getZeroExtendExpr(V, Ty, Depth);
4049
244k
}
4050
4051
const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
4052
2.70M
                                                     unsigned Depth) {
4053
2.70M
  Type *SrcTy = V->getType();
4054
2.70M
  assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
4055
2.70M
         "Cannot truncate or zero extend with non-integer arguments!");
4056
2.70M
  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
4057
2.64M
    return V;  // No conversion
4058
60.3k
  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
4059
1.70k
    return getTruncateExpr(V, Ty, Depth);
4060
58.6k
  return getSignExtendExpr(V, Ty, Depth);
4061
58.6k
}
4062
4063
const SCEV *
4064
6.15M
ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
4065
6.15M
  Type *SrcTy = V->getType();
4066
6.15M
  assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
4067
6.15M
         "Cannot noop or zero extend with non-integer arguments!");
4068
6.15M
  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
4069
6.15M
         "getNoopOrZeroExtend cannot truncate!");
4070
6.15M
  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
4071
5.17M
    return V;  // No conversion
4072
983k
  return getZeroExtendExpr(V, Ty);
4073
983k
}
4074
4075
const SCEV *
4076
106k
ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
4077
106k
  Type *SrcTy = V->getType();
4078
106k
  assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
4079
106k
         "Cannot noop or sign extend with non-integer arguments!");
4080
106k
  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
4081
106k
         "getNoopOrSignExtend cannot truncate!");
4082
106k
  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
4083
95.1k
    return V;  // No conversion
4084
11.7k
  return getSignExtendExpr(V, Ty);
4085
11.7k
}
4086
4087
const SCEV *
4088
660
ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
4089
660
  Type *SrcTy = V->getType();
4090
660
  assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
4091
660
         "Cannot noop or any extend with non-integer arguments!");
4092
660
  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
4093
660
         "getNoopOrAnyExtend cannot truncate!");
4094
660
  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
4095
660
    return V;  // No conversion
4096
0
  return getAnyExtendExpr(V, Ty);
4097
0
}
4098
4099
const SCEV *
4100
13.8k
ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
4101
13.8k
  Type *SrcTy = V->getType();
4102
13.8k
  assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
4103
13.8k
         "Cannot truncate or noop with non-integer arguments!");
4104
13.8k
  assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
4105
13.8k
         "getTruncateOrNoop cannot extend!");
4106
13.8k
  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
4107
13.2k
    return V;  // No conversion
4108
603
  return getTruncateExpr(V, Ty);
4109
603
}
4110
4111
const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
4112
0
                                                        const SCEV *RHS) {
4113
0
  const SCEV *PromotedLHS = LHS;
4114
0
  const SCEV *PromotedRHS = RHS;
4115
0
4116
0
  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
4117
0
    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
4118
0
  else
4119
0
    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
4120
0
4121
0
  return getUMaxExpr(PromotedLHS, PromotedRHS);
4122
0
}
4123
4124
const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
4125
9.60k
                                                        const SCEV *RHS) {
4126
9.60k
  SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
4127
9.60k
  return getUMinFromMismatchedTypes(Ops);
4128
9.60k
}
4129
4130
const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(
4131
626k
    SmallVectorImpl<const SCEV *> &Ops) {
4132
626k
  assert(!Ops.empty() && "At least one operand must be!");
4133
626k
  // Trivial case.
4134
626k
  if (Ops.size() == 1)
4135
507k
    return Ops[0];
4136
118k
4137
118k
  // Find the max type first.
4138
118k
  Type *MaxType = nullptr;
4139
118k
  for (auto *S : Ops)
4140
238k
    if (MaxType)
4141
119k
      MaxType = getWiderType(MaxType, S->getType());
4142
118k
    else
4143
118k
      MaxType = S->getType();
4144
118k
4145
118k
  // Extend all ops to max type.
4146
118k
  SmallVector<const SCEV *, 2> PromotedOps;
4147
118k
  for (auto *S : Ops)
4148
238k
    PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType));
4149
118k
4150
118k
  // Generate umin.
4151
118k
  return getUMinExpr(PromotedOps);
4152
118k
}
4153
4154
429k
const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
4155
429k
  // A pointer operand may evaluate to a nonpointer expression, such as null.
4156
429k
  if (!V->getType()->isPointerTy())
4157
2.58k
    return V;
4158
426k
4159
426k
  if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
4160
0
    return getPointerBase(Cast->getOperand());
4161
426k
  } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
4162
288k
    const SCEV *PtrOp = nullptr;
4163
626k
    for (const SCEV *NAryOp : NAry->operands()) {
4164
626k
      if (NAryOp->getType()->isPointerTy()) {
4165
288k
        // Cannot find the base of an expression with multiple pointer operands.
4166
288k
        if (PtrOp)
4167
1
          return V;
4168
288k
        PtrOp = NAryOp;
4169
288k
      }
4170
626k
    }
4171
288k
    
if (288k
!PtrOp288k
)
4172
0
      return V;
4173
288k
    return getPointerBase(PtrOp);
4174
288k
  }
4175
138k
  return V;
4176
138k
}
4177
4178
/// Push users of the given Instruction onto the given Worklist.
4179
static void
4180
PushDefUseChildren(Instruction *I,
4181
20.2M
                   SmallVectorImpl<Instruction *> &Worklist) {
4182
20.2M
  // Push the def-use children onto the Worklist stack.
4183
20.2M
  for (User *U : I->users())
4184
24.6M
    Worklist.push_back(cast<Instruction>(U));
4185
20.2M
}
4186
4187
220k
void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
4188
220k
  SmallVector<Instruction *, 16> Worklist;
4189
220k
  PushDefUseChildren(PN, Worklist);
4190
220k
4191
220k
  SmallPtrSet<Instruction *, 8> Visited;
4192
220k
  Visited.insert(PN);
4193
3.82M
  while (!Worklist.empty()) {
4194
3.60M
    Instruction *I = Worklist.pop_back_val();
4195
3.60M
    if (!Visited.insert(I).second)
4196
471k
      continue;
4197
3.13M
4198
3.13M
    auto It = ValueExprMap.find_as(static_cast<Value *>(I));
4199
3.13M
    if (It != ValueExprMap.end()) {
4200
351k
      const SCEV *Old = It->second;
4201
351k
4202
351k
      // Short-circuit the def-use traversal if the symbolic name
4203
351k
      // ceases to appear in expressions.
4204
351k
      if (Old != SymName && 
!hasOperand(Old, SymName)349k
)
4205
106k
        continue;
4206
244k
4207
244k
      // SCEVUnknown for a PHI either means that it has an unrecognized
4208
244k
      // structure, it's a PHI that's in the progress of being computed
4209
244k
      // by createNodeForPHI, or it's a single-value PHI. In the first case,
4210
244k
      // additional loop trip count information isn't going to change anything.
4211
244k
      // In the second case, createNodeForPHI will perform the necessary
4212
244k
      // updates on its own when it gets to that point. In the third, we do
4213
244k
      // want to forget the SCEVUnknown.
4214
244k
      if (!isa<PHINode>(I) ||
4215
244k
          
!isa<SCEVUnknown>(Old)170
||
4216
244k
          
(27
I != PN27
&&
Old == SymName27
)) {
4217
244k
        eraseValueFromMap(It->first);
4218
244k
        forgetMemoizedResults(Old);
4219
244k
      }
4220
244k
    }
4221
3.13M
4222
3.13M
    PushDefUseChildren(I, Worklist);
4223
3.02M
  }
4224
220k
}
4225
4226
namespace {
4227
4228
/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start
4229
/// expression in case its Loop is L. If it is not L then
4230
/// if IgnoreOtherLoops is true then use AddRec itself
4231
/// otherwise rewrite cannot be done.
4232
/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
4233
class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
4234
public:
4235
  static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
4236
1.30M
                             bool IgnoreOtherLoops = true) {
4237
1.30M
    SCEVInitRewriter Rewriter(L, SE);
4238
1.30M
    const SCEV *Result = Rewriter.visit(S);
4239
1.30M
    if (Rewriter.hasSeenLoopVariantSCEVUnknown())
4240
63.3k
      return SE.getCouldNotCompute();
4241
1.24M
    return Rewriter.hasSeenOtherLoops() && 
!IgnoreOtherLoops3.46k
4242
1.24M
               ? 
SE.getCouldNotCompute()0
4243
1.24M
               : Result;
4244
1.24M
  }
4245
4246
263k
  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
4247
263k
    if (!SE.isLoopInvariant(Expr, L))
4248
64.4k
      SeenLoopVariantSCEVUnknown = true;
4249
263k
    return Expr;
4250
263k
  }
4251
4252
511k
  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
4253
511k
    // Only re-write AddRecExprs for this loop.
4254
511k
    if (Expr->getLoop() == L)
4255
508k
      return Expr->getStart();
4256
3.54k
    SeenOtherLoops = true;
4257
3.54k
    return Expr;
4258
3.54k
  }
4259
4260
1.30M
  bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
4261
4262
1.24M
  bool hasSeenOtherLoops() { return SeenOtherLoops; }
4263
4264
private:
4265
  explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
4266
1.30M
      : SCEVRewriteVisitor(SE), L(L) {}
4267
4268
  const Loop *L;
4269
  bool SeenLoopVariantSCEVUnknown = false;
4270
  bool SeenOtherLoops = false;
4271
};
4272
4273
/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post
4274
/// increment expression in case its Loop is L. If it is not L then
4275
/// use AddRec itself.
4276
/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
4277
class SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> {
4278
public:
4279
929k
  static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) {
4280
929k
    SCEVPostIncRewriter Rewriter(L, SE);
4281
929k
    const SCEV *Result = Rewriter.visit(S);
4282
929k
    return Rewriter.hasSeenLoopVariantSCEVUnknown()
4283
929k
        ? 
SE.getCouldNotCompute()0
4284
929k
        : Result;
4285
929k
  }
4286
4287
197k
  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
4288
197k
    if (!SE.isLoopInvariant(Expr, L))
4289
0
      SeenLoopVariantSCEVUnknown = true;
4290
197k
    return Expr;
4291
197k
  }
4292
4293
500k
  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
4294
500k
    // Only re-write AddRecExprs for this loop.
4295
500k
    if (Expr->getLoop() == L)
4296
496k
      return Expr->getPostIncExpr(SE);
4297
3.48k
    SeenOtherLoops = true;
4298
3.48k
    return Expr;
4299
3.48k
  }
4300
4301
929k
  bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
4302
4303
0
  bool hasSeenOtherLoops() { return SeenOtherLoops; }
4304
4305
private:
4306
  explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE)
4307
929k
      : SCEVRewriteVisitor(SE), L(L) {}
4308
4309
  const Loop *L;
4310
  bool SeenLoopVariantSCEVUnknown = false;
4311
  bool SeenOtherLoops = false;
4312
};
4313
4314
/// This class evaluates the compare condition by matching it against the
4315
/// condition of loop latch. If there is a match we assume a true value
4316
/// for the condition while building SCEV nodes.
4317
class SCEVBackedgeConditionFolder
4318
    : public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> {
4319
public:
4320
  static const SCEV *rewrite(const SCEV *S, const Loop *L,
4321
236k
                             ScalarEvolution &SE) {
4322
236k
    bool IsPosBECond = false;
4323
236k
    Value *BECond = nullptr;
4324
236k
    if (BasicBlock *Latch = L->getLoopLatch()) {
4325
236k
      BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
4326
236k
      if (BI && 
BI->isConditional()236k
) {
4327
233k
        assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
4328
233k
               "Both outgoing branches should not target same header!");
4329
233k
        BECond = BI->getCondition();
4330
233k
        IsPosBECond = BI->getSuccessor(0) == L->getHeader();
4331
233k
      } else {
4332
3.20k
        return S;
4333
3.20k
      }
4334
233k
    }
4335
233k
    SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE);
4336
233k
    return Rewriter.visit(S);
4337
233k
  }
4338
4339
56.5k
  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
4340
56.5k
    const SCEV *Result = Expr;
4341
56.5k
    bool InvariantF = SE.isLoopInvariant(Expr, L);
4342
56.5k
4343
56.5k
    if (!InvariantF) {
4344
30.0k
      Instruction *I = cast<Instruction>(Expr->getValue());
4345
30.0k
      switch (I->getOpcode()) {
4346
30.0k
      case Instruction::Select: {
4347
723
        SelectInst *SI = cast<SelectInst>(I);
4348
723
        Optional<const SCEV *> Res =
4349
723
            compareWithBackedgeCondition(SI->getCondition());
4350
723
        if (Res.hasValue()) {
4351
10
          bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
4352
10
          Result = SE.getSCEV(IsOne ? 
SI->getTrueValue()8
:
SI->getFalseValue()2
);
4353
10
        }
4354
723
        break;
4355
30.0k
      }
4356
30.0k
      default: {
4357
29.3k
        Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
4358
29.3k
        if (Res.hasValue())
4359
9
          Result = Res.getValue();
4360
29.3k
        break;
4361
56.5k
      }
4362
56.5k
      }
4363
56.5k
    }
4364
56.5k
    return Result;
4365
56.5k
  }
4366
4367
private:
4368
  explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond,
4369
                                       bool IsPosBECond, ScalarEvolution &SE)
4370
      : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond),
4371
233k
        IsPositiveBECond(IsPosBECond) {}
4372
4373
  Optional<const SCEV *> compareWithBackedgeCondition(Value *IC);
4374
4375
  const Loop *L;
4376
  /// Loop back condition.
4377
  Value *BackedgeCond = nullptr;
4378
  /// Set to true if loop back is on positive branch condition.
4379
  bool IsPositiveBECond;
4380
};
4381
4382
Optional<const SCEV *>
4383
30.0k
SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) {
4384
30.0k
4385
30.0k
  // If value matches the backedge condition for loop latch,
4386
30.0k
  // then return a constant evolution node based on loopback
4387
30.0k
  // branch taken.
4388
30.0k
  if (BackedgeCond == IC)
4389
19
    return IsPositiveBECond ? 
SE.getOne(Type::getInt1Ty(SE.getContext()))17
4390
19
                            : 
SE.getZero(Type::getInt1Ty(SE.getContext()))2
;
4391
30.0k
  return None;
4392
30.0k
}
4393
4394
class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
4395
public:
4396
  static const SCEV *rewrite(const SCEV *S, const Loop *L,
4397
313k
                             ScalarEvolution &SE) {
4398
313k
    SCEVShiftRewriter Rewriter(L, SE);
4399
313k
    const SCEV *Result = Rewriter.visit(S);
4400
313k
    return Rewriter.isValid() ? 
Result12.3k
:
SE.getCouldNotCompute()301k
;
4401
313k
  }
4402
4403
305k
  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
4404
305k
    // Only allow AddRecExprs for this loop.
4405
305k
    if (!SE.isLoopInvariant(Expr, L))
4406
304k
      Valid = false;
4407
305k
    return Expr;
4408
305k
  }
4409
4410
11.3k
  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
4411
11.3k
    if (Expr->getLoop() == L && 
Expr->isAffine()10.9k
)
4412
10.9k
      return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
4413
471
    Valid = false;
4414
471
    return Expr;
4415
471
  }
4416
4417
313k
  bool isValid() { return Valid; }
4418
4419
private:
4420
  explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
4421
313k
      : SCEVRewriteVisitor(SE), L(L) {}
4422
4423
  const Loop *L;
4424
  bool Valid = true;
4425
};
4426
4427
} // end anonymous namespace
4428
4429
SCEV::NoWrapFlags
4430
822k
ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
4431
822k
  if (!AR->isAffine())
4432
0
    return SCEV::FlagAnyWrap;
4433
822k
4434
822k
  using OBO = OverflowingBinaryOperator;
4435
822k
4436
822k
  SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
4437
822k
4438
822k
  if (!AR->hasNoSignedWrap()) {
4439
743k
    ConstantRange AddRecRange = getSignedRange(AR);
4440
743k
    ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));
4441
743k
4442
743k
    auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
4443
743k
        Instruction::Add, IncRange, OBO::NoSignedWrap);
4444
743k
    if (NSWRegion.contains(AddRecRange))
4445
99.9k
      Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW);
4446
743k
  }
4447
822k
4448
822k
  if (!AR->hasNoUnsignedWrap()) {
4449
773k
    ConstantRange AddRecRange = getUnsignedRange(AR);
4450
773k
    ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this));
4451
773k
4452
773k
    auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
4453
773k
        Instruction::Add, IncRange, OBO::NoUnsignedWrap);
4454
773k
    if (NUWRegion.contains(AddRecRange))
4455
24.4k
      Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW);
4456
773k
  }
4457
822k
4458
822k
  return Result;
4459
822k
}
4460
4461
namespace {
4462
4463
/// Represents an abstract binary operation.  This may exist as a
4464
/// normal instruction or constant expression, or may have been
4465
/// derived from an expression tree.
4466
struct BinaryOp {
4467
  unsigned Opcode;
4468
  Value *LHS;
4469
  Value *RHS;
4470
  bool IsNSW = false;
4471
  bool IsNUW = false;
4472
4473
  /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
4474
  /// constant expression.
4475
  Operator *Op = nullptr;
4476
4477
  explicit BinaryOp(Operator *Op)
4478
      : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)),
4479
2.87M
        Op(Op) {
4480
2.87M
    if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
4481
2.67M
      IsNSW = OBO->hasNoSignedWrap();
4482
2.67M
      IsNUW = OBO->hasNoUnsignedWrap();
4483
2.67M
    }
4484
2.87M
  }
4485
4486
  explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
4487
                    bool IsNUW = false)
4488
68.9k
      : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
4489
};
4490
4491
} // end anonymous namespace
4492
4493
/// Try to map \p V into a BinaryOp, and return \c None on failure.
4494
11.7M
static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
4495
11.7M
  auto *Op = dyn_cast<Operator>(V);
4496
11.7M
  if (!Op)
4497
54.9k
    return None;
4498
11.7M
4499
11.7M
  // Implementation detail: all the cleverness here should happen without
4500
11.7M
  // creating new SCEV expressions -- our caller knowns tricks to avoid creating
4501
11.7M
  // SCEV expressions when possible, and we should not break that.
4502
11.7M
4503
11.7M
  switch (Op->getOpcode()) {
4504
11.7M
  case Instruction::Add:
4505
2.86M
  case Instruction::Sub:
4506
2.86M
  case Instruction::Mul:
4507
2.86M
  case Instruction::UDiv:
4508
2.86M
  case Instruction::URem:
4509
2.86M
  case Instruction::And:
4510
2.86M
  case Instruction::Or:
4511
2.86M
  case Instruction::AShr:
4512
2.86M
  case Instruction::Shl:
4513
2.86M
    return BinaryOp(Op);
4514
2.86M
4515
2.86M
  case Instruction::Xor:
4516
11.3k
    if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
4517
6.53k
      // If the RHS of the xor is a signmask, then this is just an add.
4518
6.53k
      // Instcombine turns add of signmask into xor as a strength reduction step.
4519
6.53k
      if (RHSC->getValue().isSignMask())
4520
438
        return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
4521
10.9k
    return BinaryOp(Op);
4522
10.9k
4523
69.6k
  case Instruction::LShr:
4524
69.6k
    // Turn logical shift right of a constant into a unsigned divide.
4525
69.6k
    if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
4526
68.3k
      uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth();
4527
68.3k
4528
68.3k
      // If the shift count is not less than the bitwidth, the result of
4529
68.3k
      // the shift is undefined. Don't try to analyze it, because the
4530
68.3k
      // resolution chosen here may differ from the resolution chosen in
4531
68.3k
      // other parts of the compiler.
4532
68.3k
      if (SA->getValue().ult(BitWidth)) {
4533
68.3k
        Constant *X =
4534
68.3k
            ConstantInt::get(SA->getContext(),
4535
68.3k
                             APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
4536
68.3k
        return BinaryOp(Instruction::UDiv, Op->getOperand(0), X);
4537
68.3k
      }
4538
1.30k
    }
4539
1.30k
    return BinaryOp(Op);
4540
1.30k
4541
23.3k
  case Instruction::ExtractValue: {
4542
23.3k
    auto *EVI = cast<ExtractValueInst>(Op);
4543
23.3k
    if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
4544
641
      break;
4545
22.7k
4546
22.7k
    auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
4547
22.7k
    if (!WO)
4548
22.5k
      break;
4549
195
4550
195
    Instruction::BinaryOps BinOp = WO->getBinaryOp();
4551
195
    bool Signed = WO->isSigned();
4552
195
    // TODO: Should add nuw/nsw flags for mul as well.
4553
195
    if (BinOp == Instruction::Mul || 
!isOverflowIntrinsicNoWrap(WO, DT)155
)
4554
83
      return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());
4555
112
4556
112
    // Now that we know that all uses of the arithmetic-result component of
4557
112
    // CI are guarded by the overflow check, we can go ahead and pretend
4558
112
    // that the arithmetic is non-overflowing.
4559
112
    return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
4560
112
                    /* IsNSW = */ Signed, /* IsNUW = */ !Signed);
4561
112
  }
4562
112
4563
8.74M
  default:
4564
8.74M
    break;
4565
8.76M
  }
4566
8.76M
4567
8.76M
  return None;
4568
8.76M
}
4569
4570
/// Helper function to createAddRecFromPHIWithCasts. We have a phi
4571
/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
4572
/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
4573
/// way. This function checks if \p Op, an operand of this SCEVAddExpr,
4574
/// follows one of the following patterns:
4575
/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
4576
/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
4577
/// If the SCEV expression of \p Op conforms with one of the expected patterns
4578
/// we return the type of the truncation operation, and indicate whether the
4579
/// truncated type should be treated as signed/unsigned by setting
4580
/// \p Signed to true/false, respectively.
4581
static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
4582
6.20k
                               bool &Signed, ScalarEvolution &SE) {
4583
6.20k
  // The case where Op == SymbolicPHI (that is, with no type conversions on
4584
6.20k
  // the way) is handled by the regular add recurrence creating logic and
4585
6.20k
  // would have already been triggered in createAddRecForPHI. Reaching it here
4586
6.20k
  // means that createAddRecFromPHI had failed for this PHI before (e.g.,
4587
6.20k
  // because one of the other operands of the SCEVAddExpr updating this PHI is
4588
6.20k
  // not invariant).
4589
6.20k
  //
4590
6.20k
  // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
4591
6.20k
  // this case predicates that allow us to prove that Op == SymbolicPHI will
4592
6.20k
  // be added.
4593
6.20k
  if (Op == SymbolicPHI)
4594
498
    return nullptr;
4595
5.70k
4596
5.70k
  unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
4597
5.70k
  unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
4598
5.70k
  if (SourceBits != NewBits)
4599
0
    return nullptr;
4600
5.70k
4601
5.70k
  const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
4602
5.70k
  const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
4603
5.70k
  if (!SExt && 
!ZExt5.64k
)
4604
4.10k
    return nullptr;
4605
1.60k
  const SCEVTruncateExpr *Trunc =
4606
1.60k
      SExt ? 
dyn_cast<SCEVTruncateExpr>(SExt->getOperand())56
4607
1.60k
           : 
dyn_cast<SCEVTruncateExpr>(ZExt->getOperand())1.54k
;
4608
1.60k
  if (!Trunc)
4609
1.56k
    return nullptr;
4610
36
  const SCEV *X = Trunc->getOperand();
4611
36
  if (X != SymbolicPHI)
4612
6
    return nullptr;
4613
30
  Signed = SExt != nullptr;
4614
30
  return Trunc->getType();
4615
30
}
4616
4617
119k
static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
4618
119k
  if (!PN->getType()->isIntegerTy())
4619
43.9k
    return nullptr;
4620
75.7k
  const Loop *L = LI.getLoopFor(PN->getParent());
4621
75.7k
  if (!L || 
L->getHeader() != PN->getParent()63.7k
)
4622
17.5k
    return nullptr;
4623
58.1k
  return L;
4624
58.1k
}
4625
4626
// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
4627
// computation that updates the phi follows the following pattern:
4628
//   (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
4629
// which correspond to a phi->trunc->sext/zext->add->phi update chain.
4630
// If so, try to see if it can be rewritten as an AddRecExpr under some
4631
// Predicates. If successful, return them as a pair. Also cache the results
4632
// of the analysis.
4633
//
4634
// Example usage scenario:
4635
//    Say the Rewriter is called for the following SCEV:
4636
//         8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
4637
//    where:
4638
//         %X = phi i64 (%Start, %BEValue)
4639
//    It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
4640
//    and call this function with %SymbolicPHI = %X.
4641
//
4642
//    The analysis will find that the value coming around the backedge has
4643
//    the following SCEV:
4644
//         BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
4645
//    Upon concluding that this matches the desired pattern, the function
4646
//    will return the pair {NewAddRec, SmallPredsVec} where:
4647
//         NewAddRec = {%Start,+,%Step}
4648
//         SmallPredsVec = {P1, P2, P3} as follows:
4649
//           P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw>
4650
//           P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
4651
//           P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
4652
//    The returned pair means that SymbolicPHI can be rewritten into NewAddRec
4653
//    under the predicates {P1,P2,P3}.
4654
//    This predicated rewrite will be cached in PredicatedSCEVRewrites:
4655
//         PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
4656
//
4657
// TODO's:
4658
//
4659
// 1) Extend the Induction descriptor to also support inductions that involve
4660
//    casts: When needed (namely, when we are called in the context of the
4661
//    vectorizer induction analysis), a Set of cast instructions will be
4662
//    populated by this method, and provided back to isInductionPHI. This is
4663
//    needed to allow the vectorizer to properly record them to be ignored by
4664
//    the cost model and to avoid vectorizing them (otherwise these casts,
4665
//    which are redundant under the runtime overflow checks, will be
4666
//    vectorized, which can be costly).
4667
//
4668
// 2) Support additional induction/PHISCEV patterns: We also want to support
4669
//    inductions where the sext-trunc / zext-trunc operations (partly) occur
4670
//    after the induction update operation (the induction increment):
4671
//
4672
//      (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
4673
//    which correspond to a phi->add->trunc->sext/zext->phi update chain.
4674
//
4675
//      (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
4676
//    which correspond to a phi->trunc->add->sext/zext->phi update chain.
4677
//
4678
// 3) Outline common code with createAddRecFromPHI to avoid duplication.
4679
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
4680
20.4k
ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
4681
20.4k
  SmallVector<const SCEVPredicate *, 3> Predicates;
4682
20.4k
4683
20.4k
  // *** Part1: Analyze if we have a phi-with-cast pattern for which we can
4684
20.4k
  // return an AddRec expression under some predicate.
4685
20.4k
4686
20.4k
  auto *PN = cast<PHINode>(SymbolicPHI->getValue());
4687
20.4k
  const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
4688
20.4k
  assert(L && "Expecting an integer loop header phi");
4689
20.4k
4690
20.4k
  // The loop may have multiple entrances or multiple exits; we can analyze
4691
20.4k
  // this phi as an addrec if it has a unique entry value and a unique
4692
20.4k
  // backedge value.
4693
20.4k
  Value *BEValueV = nullptr, *StartValueV = nullptr;
4694
61.3k
  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; 
++i40.8k
) {
4695
40.8k
    Value *V = PN->getIncomingValue(i);
4696
40.8k
    if (L->contains(PN->getIncomingBlock(i))) {
4697
20.4k
      if (!BEValueV) {
4698
20.4k
        BEValueV = V;
4699
20.4k
      } else 
if (0
BEValueV != V0
) {
4700
0
        BEValueV = nullptr;
4701
0
        break;
4702
0
      }
4703
20.4k
    } else if (!StartValueV) {
4704
20.4k
      StartValueV = V;
4705
20.4k
    } else 
if (1
StartValueV != V1
) {
4706
1
      StartValueV = nullptr;
4707
1
      break;
4708
1
    }
4709
40.8k
  }
4710
20.4k
  if (!BEValueV || !StartValueV)
4711
1
    return None;
4712
20.4k
4713
20.4k
  const SCEV *BEValue = getSCEV(BEValueV);
4714
20.4k
4715
20.4k
  // If the value coming around the backedge is an add with the symbolic
4716
20.4k
  // value we just inserted, possibly with casts that we can ignore under
4717
20.4k
  // an appropriate runtime guard, then we found a simple induction variable!
4718
20.4k
  const auto *Add = dyn_cast<SCEVAddExpr>(BEValue);
4719
20.4k
  if (!Add)
4720
17.9k
    return None;
4721
2.53k
4722
2.53k
  // If there is a single occurrence of the symbolic value, possibly
4723
2.53k
  // casted, replace it with a recurrence.
4724
2.53k
  unsigned FoundIndex = Add->getNumOperands();
4725
2.53k
  Type *TruncTy = nullptr;
4726
2.53k
  bool Signed;
4727
8.70k
  for (unsigned i = 0, e = Add->getNumOperands(); i != e; 
++i6.17k
)
4728
6.20k
    if ((TruncTy =
4729
6.20k
             isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
4730
30
      if (FoundIndex == e) {
4731
30
        FoundIndex = i;
4732
30
        break;
4733
30
      }
4734
2.53k
4735
2.53k
  if (FoundIndex == Add->getNumOperands())
4736
2.50k
    return None;
4737
30
4738
30
  // Create an add with everything but the specified operand.
4739
30
  SmallVector<const SCEV *, 8> Ops;
4740
90
  for (unsigned i = 0, e = Add->getNumOperands(); i != e; 
++i60
)
4741
60
    if (i != FoundIndex)
4742
30
      Ops.push_back(Add->getOperand(i));
4743
30
  const SCEV *Accum = getAddExpr(Ops);
4744
30
4745
30
  // The runtime checks will not be valid if the step amount is
4746
30
  // varying inside the loop.
4747
30
  if (!isLoopInvariant(Accum, L))
4748
3
    return None;
4749
27
4750
27
  // *** Part2: Create the predicates
4751
27
4752
27
  // Analysis was successful: we have a phi-with-cast pattern for which we
4753
27
  // can return an AddRec expression under the following predicates:
4754
27
  //
4755
27
  // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
4756
27
  //     fits within the truncated type (does not overflow) for i = 0 to n-1.
4757
27
  // P2: An Equal predicate that guarantees that
4758
27
  //     Start = (Ext ix (Trunc iy (Start) to ix) to iy)
4759
27
  // P3: An Equal predicate that guarantees that
4760
27
  //     Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
4761
27
  //
4762
27
  // As we next prove, the above predicates guarantee that:
4763
27
  //     Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
4764
27
  //
4765
27
  //
4766
27
  // More formally, we want to prove that:
4767
27
  //     Expr(i+1) = Start + (i+1) * Accum
4768
27
  //               = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
4769
27
  //
4770
27
  // Given that:
4771
27
  // 1) Expr(0) = Start
4772
27
  // 2) Expr(1) = Start + Accum
4773
27
  //            = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
4774
27
  // 3) Induction hypothesis (step i):
4775
27
  //    Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
4776
27
  //
4777
27
  // Proof:
4778
27
  //  Expr(i+1) =
4779
27
  //   = Start + (i+1)*Accum
4780
27
  //   = (Start + i*Accum) + Accum
4781
27
  //   = Expr(i) + Accum
4782
27
  //   = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
4783
27
  //                                                             :: from step i
4784
27
  //
4785
27
  //   = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
4786
27
  //
4787
27
  //   = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
4788
27
  //     + (Ext ix (Trunc iy (Accum) to ix) to iy)
4789
27
  //     + Accum                                                     :: from P3
4790
27
  //
4791
27
  //   = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
4792
27
  //     + Accum                            :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
4793
27
  //
4794
27
  //   = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
4795
27
  //   = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
4796
27
  //
4797
27
  // By induction, the same applies to all iterations 1<=i<n:
4798
27
  //
4799
27
4800
27
  // Create a truncated addrec for which we will add a no overflow check (P1).
4801
27
  const SCEV *StartVal = getSCEV(StartValueV);