/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/ScalarEvolution.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains the implementation of the scalar evolution analysis |
10 | | // engine, which is used primarily to analyze expressions involving induction |
11 | | // variables in loops. |
12 | | // |
13 | | // There are several aspects to this library. First is the representation of |
14 | | // scalar expressions, which are represented as subclasses of the SCEV class. |
15 | | // These classes are used to represent certain types of subexpressions that we |
16 | | // can handle. We only create one SCEV of a particular shape, so |
17 | | // pointer-comparisons for equality are legal. |
18 | | // |
19 | | // One important aspect of the SCEV objects is that they are never cyclic, even |
20 | | // if there is a cycle in the dataflow for an expression (ie, a PHI node). If |
21 | | // the PHI node is one of the idioms that we can represent (e.g., a polynomial |
22 | | // recurrence) then we represent it directly as a recurrence node, otherwise we |
23 | | // represent it as a SCEVUnknown node. |
24 | | // |
25 | | // In addition to being able to represent expressions of various types, we also |
26 | | // have folders that are used to build the *canonical* representation for a |
27 | | // particular expression. These folders are capable of using a variety of |
28 | | // rewrite rules to simplify the expressions. |
29 | | // |
30 | | // Once the folders are defined, we can implement the more interesting |
31 | | // higher-level code, such as the code that recognizes PHI nodes of various |
32 | | // types, computes the execution count of a loop, etc. |
33 | | // |
34 | | // TODO: We should use these routines and value representations to implement |
35 | | // dependence analysis! |
36 | | // |
37 | | //===----------------------------------------------------------------------===// |
38 | | // |
39 | | // There are several good references for the techniques used in this analysis. |
40 | | // |
41 | | // Chains of recurrences -- a method to expedite the evaluation |
42 | | // of closed-form functions |
43 | | // Olaf Bachmann, Paul S. Wang, Eugene V. Zima |
44 | | // |
45 | | // On computational properties of chains of recurrences |
46 | | // Eugene V. Zima |
47 | | // |
48 | | // Symbolic Evaluation of Chains of Recurrences for Loop Optimization |
49 | | // Robert A. van Engelen |
50 | | // |
51 | | // Efficient Symbolic Analysis for Optimizing Compilers |
52 | | // Robert A. van Engelen |
53 | | // |
54 | | // Using the chains of recurrences algebra for data dependence testing and |
55 | | // induction variable substitution |
56 | | // MS Thesis, Johnie Birch |
57 | | // |
58 | | //===----------------------------------------------------------------------===// |
59 | | |
60 | | #include "llvm/Analysis/ScalarEvolution.h" |
61 | | #include "llvm/ADT/APInt.h" |
62 | | #include "llvm/ADT/ArrayRef.h" |
63 | | #include "llvm/ADT/DenseMap.h" |
64 | | #include "llvm/ADT/DepthFirstIterator.h" |
65 | | #include "llvm/ADT/EquivalenceClasses.h" |
66 | | #include "llvm/ADT/FoldingSet.h" |
67 | | #include "llvm/ADT/None.h" |
68 | | #include "llvm/ADT/Optional.h" |
69 | | #include "llvm/ADT/STLExtras.h" |
70 | | #include "llvm/ADT/ScopeExit.h" |
71 | | #include "llvm/ADT/Sequence.h" |
72 | | #include "llvm/ADT/SetVector.h" |
73 | | #include "llvm/ADT/SmallPtrSet.h" |
74 | | #include "llvm/ADT/SmallSet.h" |
75 | | #include "llvm/ADT/SmallVector.h" |
76 | | #include "llvm/ADT/Statistic.h" |
77 | | #include "llvm/ADT/StringRef.h" |
78 | | #include "llvm/Analysis/AssumptionCache.h" |
79 | | #include "llvm/Analysis/ConstantFolding.h" |
80 | | #include "llvm/Analysis/InstructionSimplify.h" |
81 | | #include "llvm/Analysis/LoopInfo.h" |
82 | | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
83 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
84 | | #include "llvm/Analysis/ValueTracking.h" |
85 | | #include "llvm/Config/llvm-config.h" |
86 | | #include "llvm/IR/Argument.h" |
87 | | #include "llvm/IR/BasicBlock.h" |
88 | | #include "llvm/IR/CFG.h" |
89 | | #include "llvm/IR/CallSite.h" |
90 | | #include "llvm/IR/Constant.h" |
91 | | #include "llvm/IR/ConstantRange.h" |
92 | | #include "llvm/IR/Constants.h" |
93 | | #include "llvm/IR/DataLayout.h" |
94 | | #include "llvm/IR/DerivedTypes.h" |
95 | | #include "llvm/IR/Dominators.h" |
96 | | #include "llvm/IR/Function.h" |
97 | | #include "llvm/IR/GlobalAlias.h" |
98 | | #include "llvm/IR/GlobalValue.h" |
99 | | #include "llvm/IR/GlobalVariable.h" |
100 | | #include "llvm/IR/InstIterator.h" |
101 | | #include "llvm/IR/InstrTypes.h" |
102 | | #include "llvm/IR/Instruction.h" |
103 | | #include "llvm/IR/Instructions.h" |
104 | | #include "llvm/IR/IntrinsicInst.h" |
105 | | #include "llvm/IR/Intrinsics.h" |
106 | | #include "llvm/IR/LLVMContext.h" |
107 | | #include "llvm/IR/Metadata.h" |
108 | | #include "llvm/IR/Operator.h" |
109 | | #include "llvm/IR/PatternMatch.h" |
110 | | #include "llvm/IR/Type.h" |
111 | | #include "llvm/IR/Use.h" |
112 | | #include "llvm/IR/User.h" |
113 | | #include "llvm/IR/Value.h" |
114 | | #include "llvm/IR/Verifier.h" |
115 | | #include "llvm/Pass.h" |
116 | | #include "llvm/Support/Casting.h" |
117 | | #include "llvm/Support/CommandLine.h" |
118 | | #include "llvm/Support/Compiler.h" |
119 | | #include "llvm/Support/Debug.h" |
120 | | #include "llvm/Support/ErrorHandling.h" |
121 | | #include "llvm/Support/KnownBits.h" |
122 | | #include "llvm/Support/SaveAndRestore.h" |
123 | | #include "llvm/Support/raw_ostream.h" |
124 | | #include <algorithm> |
125 | | #include <cassert> |
126 | | #include <climits> |
127 | | #include <cstddef> |
128 | | #include <cstdint> |
129 | | #include <cstdlib> |
130 | | #include <map> |
131 | | #include <memory> |
132 | | #include <tuple> |
133 | | #include <utility> |
134 | | #include <vector> |
135 | | |
136 | | using namespace llvm; |
137 | | |
138 | | #define DEBUG_TYPE "scalar-evolution" |
139 | | |
140 | | STATISTIC(NumArrayLenItCounts, |
141 | | "Number of trip counts computed with array length"); |
142 | | STATISTIC(NumTripCountsComputed, |
143 | | "Number of loops with predictable loop counts"); |
144 | | STATISTIC(NumTripCountsNotComputed, |
145 | | "Number of loops without predictable loop counts"); |
146 | | STATISTIC(NumBruteForceTripCountsComputed, |
147 | | "Number of loops with trip counts computed by force"); |
148 | | |
149 | | static cl::opt<unsigned> |
150 | | MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, |
151 | | cl::desc("Maximum number of iterations SCEV will " |
152 | | "symbolically execute a constant " |
153 | | "derived loop"), |
154 | | cl::init(100)); |
155 | | |
156 | | // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. |
157 | | static cl::opt<bool> VerifySCEV( |
158 | | "verify-scev", cl::Hidden, |
159 | | cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); |
160 | | static cl::opt<bool> |
161 | | VerifySCEVMap("verify-scev-maps", cl::Hidden, |
162 | | cl::desc("Verify no dangling value in ScalarEvolution's " |
163 | | "ExprValueMap (slow)")); |
164 | | |
165 | | static cl::opt<bool> VerifyIR( |
166 | | "scev-verify-ir", cl::Hidden, |
167 | | cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"), |
168 | | cl::init(false)); |
169 | | |
170 | | static cl::opt<unsigned> MulOpsInlineThreshold( |
171 | | "scev-mulops-inline-threshold", cl::Hidden, |
172 | | cl::desc("Threshold for inlining multiplication operands into a SCEV"), |
173 | | cl::init(32)); |
174 | | |
175 | | static cl::opt<unsigned> AddOpsInlineThreshold( |
176 | | "scev-addops-inline-threshold", cl::Hidden, |
177 | | cl::desc("Threshold for inlining addition operands into a SCEV"), |
178 | | cl::init(500)); |
179 | | |
180 | | static cl::opt<unsigned> MaxSCEVCompareDepth( |
181 | | "scalar-evolution-max-scev-compare-depth", cl::Hidden, |
182 | | cl::desc("Maximum depth of recursive SCEV complexity comparisons"), |
183 | | cl::init(32)); |
184 | | |
185 | | static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth( |
186 | | "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden, |
187 | | cl::desc("Maximum depth of recursive SCEV operations implication analysis"), |
188 | | cl::init(2)); |
189 | | |
190 | | static cl::opt<unsigned> MaxValueCompareDepth( |
191 | | "scalar-evolution-max-value-compare-depth", cl::Hidden, |
192 | | cl::desc("Maximum depth of recursive value complexity comparisons"), |
193 | | cl::init(2)); |
194 | | |
195 | | static cl::opt<unsigned> |
196 | | MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, |
197 | | cl::desc("Maximum depth of recursive arithmetics"), |
198 | | cl::init(32)); |
199 | | |
200 | | static cl::opt<unsigned> MaxConstantEvolvingDepth( |
201 | | "scalar-evolution-max-constant-evolving-depth", cl::Hidden, |
202 | | cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); |
203 | | |
204 | | static cl::opt<unsigned> |
205 | | MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden, |
206 | | cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"), |
207 | | cl::init(8)); |
208 | | |
209 | | static cl::opt<unsigned> |
210 | | MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, |
211 | | cl::desc("Max coefficients in AddRec during evolving"), |
212 | | cl::init(8)); |
213 | | |
214 | | static cl::opt<unsigned> |
215 | | HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden, |
216 | | cl::desc("Size of the expression which is considered huge"), |
217 | | cl::init(4096)); |
218 | | |
219 | | //===----------------------------------------------------------------------===// |
220 | | // SCEV class definitions |
221 | | //===----------------------------------------------------------------------===// |
222 | | |
223 | | //===----------------------------------------------------------------------===// |
224 | | // Implementation of the SCEV class. |
225 | | // |
226 | | |
227 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
228 | | LLVM_DUMP_METHOD void SCEV::dump() const { |
229 | | print(dbgs()); |
230 | | dbgs() << '\n'; |
231 | | } |
232 | | #endif |
233 | | |
234 | 57.2k | void SCEV::print(raw_ostream &OS) const { |
235 | 57.2k | switch (static_cast<SCEVTypes>(getSCEVType())) { |
236 | 57.2k | case scConstant: |
237 | 19.4k | cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false); |
238 | 19.4k | return; |
239 | 57.2k | case scTruncate: { |
240 | 1.26k | const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); |
241 | 1.26k | const SCEV *Op = Trunc->getOperand(); |
242 | 1.26k | OS << "(trunc " << *Op->getType() << " " << *Op << " to " |
243 | 1.26k | << *Trunc->getType() << ")"; |
244 | 1.26k | return; |
245 | 57.2k | } |
246 | 57.2k | case scZeroExtend: { |
247 | 2.56k | const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this); |
248 | 2.56k | const SCEV *Op = ZExt->getOperand(); |
249 | 2.56k | OS << "(zext " << *Op->getType() << " " << *Op << " to " |
250 | 2.56k | << *ZExt->getType() << ")"; |
251 | 2.56k | return; |
252 | 57.2k | } |
253 | 57.2k | case scSignExtend: { |
254 | 450 | const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this); |
255 | 450 | const SCEV *Op = SExt->getOperand(); |
256 | 450 | OS << "(sext " << *Op->getType() << " " << *Op << " to " |
257 | 450 | << *SExt->getType() << ")"; |
258 | 450 | return; |
259 | 57.2k | } |
260 | 57.2k | case scAddRecExpr: { |
261 | 3.88k | const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this); |
262 | 3.88k | OS << "{" << *AR->getOperand(0); |
263 | 8.28k | for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i4.40k ) |
264 | 4.40k | OS << ",+," << *AR->getOperand(i); |
265 | 3.88k | OS << "}<"; |
266 | 3.88k | if (AR->hasNoUnsignedWrap()) |
267 | 470 | OS << "nuw><"; |
268 | 3.88k | if (AR->hasNoSignedWrap()) |
269 | 689 | OS << "nsw><"; |
270 | 3.88k | if (AR->hasNoSelfWrap() && |
271 | 3.88k | !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))965 ) |
272 | 172 | OS << "nw><"; |
273 | 3.88k | AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
274 | 3.88k | OS << ">"; |
275 | 3.88k | return; |
276 | 57.2k | } |
277 | 57.2k | case scAddExpr: |
278 | 16.5k | case scMulExpr: |
279 | 16.5k | case scUMaxExpr: |
280 | 16.5k | case scSMaxExpr: |
281 | 16.5k | case scUMinExpr: |
282 | 16.5k | case scSMinExpr: { |
283 | 16.5k | const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); |
284 | 16.5k | const char *OpStr = nullptr; |
285 | 16.5k | switch (NAry->getSCEVType()) { |
286 | 16.5k | case scAddExpr: OpStr = " + "; break7.51k ; |
287 | 16.5k | case scMulExpr: OpStr = " * "; break5.68k ; |
288 | 16.5k | case scUMaxExpr: OpStr = " umax "; break1.60k ; |
289 | 16.5k | case scSMaxExpr: OpStr = " smax "; break328 ; |
290 | 16.5k | case scUMinExpr: |
291 | 1.23k | OpStr = " umin "; |
292 | 1.23k | break; |
293 | 16.5k | case scSMinExpr: |
294 | 128 | OpStr = " smin "; |
295 | 128 | break; |
296 | 16.5k | } |
297 | 16.5k | OS << "("; |
298 | 16.5k | for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); |
299 | 53.4k | I != E; ++I36.9k ) { |
300 | 36.9k | OS << **I; |
301 | 36.9k | if (std::next(I) != E) |
302 | 20.4k | OS << OpStr; |
303 | 36.9k | } |
304 | 16.5k | OS << ")"; |
305 | 16.5k | switch (NAry->getSCEVType()) { |
306 | 16.5k | case scAddExpr: |
307 | 13.2k | case scMulExpr: |
308 | 13.2k | if (NAry->hasNoUnsignedWrap()) |
309 | 472 | OS << "<nuw>"; |
310 | 13.2k | if (NAry->hasNoSignedWrap()) |
311 | 2.06k | OS << "<nsw>"; |
312 | 16.5k | } |
313 | 16.5k | return; |
314 | 16.5k | } |
315 | 16.5k | case scUDivExpr: { |
316 | 467 | const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this); |
317 | 467 | OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; |
318 | 467 | return; |
319 | 16.5k | } |
320 | 16.5k | case scUnknown: { |
321 | 12.5k | const SCEVUnknown *U = cast<SCEVUnknown>(this); |
322 | 12.5k | Type *AllocTy; |
323 | 12.5k | if (U->isSizeOf(AllocTy)) { |
324 | 4 | OS << "sizeof(" << *AllocTy << ")"; |
325 | 4 | return; |
326 | 4 | } |
327 | 12.5k | if (U->isAlignOf(AllocTy)) { |
328 | 3 | OS << "alignof(" << *AllocTy << ")"; |
329 | 3 | return; |
330 | 3 | } |
331 | 12.5k | |
332 | 12.5k | Type *CTy; |
333 | 12.5k | Constant *FieldNo; |
334 | 12.5k | if (U->isOffsetOf(CTy, FieldNo)) { |
335 | 1 | OS << "offsetof(" << *CTy << ", "; |
336 | 1 | FieldNo->printAsOperand(OS, false); |
337 | 1 | OS << ")"; |
338 | 1 | return; |
339 | 1 | } |
340 | 12.5k | |
341 | 12.5k | // Otherwise just print it normally. |
342 | 12.5k | U->getValue()->printAsOperand(OS, false); |
343 | 12.5k | return; |
344 | 12.5k | } |
345 | 12.5k | case scCouldNotCompute: |
346 | 76 | OS << "***COULDNOTCOMPUTE***"; |
347 | 76 | return; |
348 | 0 | } |
349 | 0 | llvm_unreachable("Unknown SCEV kind!"); |
350 | 0 | } |
351 | | |
352 | 251M | Type *SCEV::getType() const { |
353 | 251M | switch (static_cast<SCEVTypes>(getSCEVType())) { |
354 | 251M | case scConstant: |
355 | 105M | return cast<SCEVConstant>(this)->getType(); |
356 | 251M | case scTruncate: |
357 | 11.4M | case scZeroExtend: |
358 | 11.4M | case scSignExtend: |
359 | 11.4M | return cast<SCEVCastExpr>(this)->getType(); |
360 | 49.5M | case scAddRecExpr: |
361 | 49.5M | case scMulExpr: |
362 | 49.5M | case scUMaxExpr: |
363 | 49.5M | case scSMaxExpr: |
364 | 49.5M | case scUMinExpr: |
365 | 49.5M | case scSMinExpr: |
366 | 49.5M | return cast<SCEVNAryExpr>(this)->getType(); |
367 | 49.5M | case scAddExpr: |
368 | 30.3M | return cast<SCEVAddExpr>(this)->getType(); |
369 | 49.5M | case scUDivExpr: |
370 | 1.28M | return cast<SCEVUDivExpr>(this)->getType(); |
371 | 52.9M | case scUnknown: |
372 | 52.9M | return cast<SCEVUnknown>(this)->getType(); |
373 | 49.5M | case scCouldNotCompute: |
374 | 0 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); |
375 | 0 | } |
376 | 0 | llvm_unreachable("Unknown SCEV kind!"); |
377 | 0 | } |
378 | | |
379 | 54.9M | bool SCEV::isZero() const { |
380 | 54.9M | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) |
381 | 27.7M | return SC->getValue()->isZero(); |
382 | 27.2M | return false; |
383 | 27.2M | } |
384 | | |
385 | 832k | bool SCEV::isOne() const { |
386 | 832k | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) |
387 | 821k | return SC->getValue()->isOne(); |
388 | 10.3k | return false; |
389 | 10.3k | } |
390 | | |
391 | 12.1M | bool SCEV::isAllOnesValue() const { |
392 | 12.1M | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) |
393 | 12.1M | return SC->getValue()->isMinusOne(); |
394 | 19.1k | return false; |
395 | 19.1k | } |
396 | | |
397 | 244k | bool SCEV::isNonConstantNegative() const { |
398 | 244k | const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this); |
399 | 244k | if (!Mul) return false206k ; |
400 | 38.1k | |
401 | 38.1k | // If there is a constant factor, it will be first. |
402 | 38.1k | const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); |
403 | 38.1k | if (!SC) return false2.80k ; |
404 | 35.3k | |
405 | 35.3k | // Return true if the value is negative, this matches things like (-42 * V). |
406 | 35.3k | return SC->getAPInt().isNegative(); |
407 | 35.3k | } |
408 | | |
409 | | SCEVCouldNotCompute::SCEVCouldNotCompute() : |
410 | 4.01M | SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {} |
411 | | |
412 | 14.1M | bool SCEVCouldNotCompute::classof(const SCEV *S) { |
413 | 14.1M | return S->getSCEVType() == scCouldNotCompute; |
414 | 14.1M | } |
415 | | |
416 | 95.5M | const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { |
417 | 95.5M | FoldingSetNodeID ID; |
418 | 95.5M | ID.AddInteger(scConstant); |
419 | 95.5M | ID.AddPointer(V); |
420 | 95.5M | void *IP = nullptr; |
421 | 95.5M | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S89.9M ; |
422 | 5.55M | SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); |
423 | 5.55M | UniqueSCEVs.InsertNode(S, IP); |
424 | 5.55M | return S; |
425 | 5.55M | } |
426 | | |
427 | 24.3M | const SCEV *ScalarEvolution::getConstant(const APInt &Val) { |
428 | 24.3M | return getConstant(ConstantInt::get(getContext(), Val)); |
429 | 24.3M | } |
430 | | |
431 | | const SCEV * |
432 | 28.2M | ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { |
433 | 28.2M | IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); |
434 | 28.2M | return getConstant(ConstantInt::get(ITy, V, isSigned)); |
435 | 28.2M | } |
436 | | |
437 | | SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, |
438 | | unsigned SCEVTy, const SCEV *op, Type *ty) |
439 | 1.81M | : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {} |
440 | | |
441 | | SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, |
442 | | const SCEV *op, Type *ty) |
443 | 121k | : SCEVCastExpr(ID, scTruncate, op, ty) { |
444 | 121k | assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
445 | 121k | "Cannot truncate non-integer value!"); |
446 | 121k | } |
447 | | |
448 | | SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, |
449 | | const SCEV *op, Type *ty) |
450 | 892k | : SCEVCastExpr(ID, scZeroExtend, op, ty) { |
451 | 892k | assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
452 | 892k | "Cannot zero extend non-integer value!"); |
453 | 892k | } |
454 | | |
455 | | SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, |
456 | | const SCEV *op, Type *ty) |
457 | 797k | : SCEVCastExpr(ID, scSignExtend, op, ty) { |
458 | 797k | assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
459 | 797k | "Cannot sign extend non-integer value!"); |
460 | 797k | } |
461 | | |
462 | 50.9k | void SCEVUnknown::deleted() { |
463 | 50.9k | // Clear this SCEVUnknown from various maps. |
464 | 50.9k | SE->forgetMemoizedResults(this); |
465 | 50.9k | |
466 | 50.9k | // Remove this SCEVUnknown from the uniquing map. |
467 | 50.9k | SE->UniqueSCEVs.RemoveNode(this); |
468 | 50.9k | |
469 | 50.9k | // Release the value. |
470 | 50.9k | setValPtr(nullptr); |
471 | 50.9k | } |
472 | | |
473 | 20.7k | void SCEVUnknown::allUsesReplacedWith(Value *New) { |
474 | 20.7k | // Remove this SCEVUnknown from the uniquing map. |
475 | 20.7k | SE->UniqueSCEVs.RemoveNode(this); |
476 | 20.7k | |
477 | 20.7k | // Update this SCEVUnknown to point to the new value. This is needed |
478 | 20.7k | // because there may still be outstanding SCEVs which still point to |
479 | 20.7k | // this SCEVUnknown. |
480 | 20.7k | setValPtr(New); |
481 | 20.7k | } |
482 | | |
483 | 12.5k | bool SCEVUnknown::isSizeOf(Type *&AllocTy) const { |
484 | 12.5k | if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) |
485 | 13 | if (VCE->getOpcode() == Instruction::PtrToInt) |
486 | 8 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) |
487 | 8 | if (CE->getOpcode() == Instruction::GetElementPtr && |
488 | 8 | CE->getOperand(0)->isNullValue() && |
489 | 8 | CE->getNumOperands() == 2) |
490 | 4 | if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) |
491 | 4 | if (CI->isOne()) { |
492 | 4 | AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) |
493 | 4 | ->getElementType(); |
494 | 4 | return true; |
495 | 4 | } |
496 | 12.5k | |
497 | 12.5k | return false; |
498 | 12.5k | } |
499 | | |
500 | 12.5k | bool SCEVUnknown::isAlignOf(Type *&AllocTy) const { |
501 | 12.5k | if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) |
502 | 9 | if (VCE->getOpcode() == Instruction::PtrToInt) |
503 | 4 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) |
504 | 4 | if (CE->getOpcode() == Instruction::GetElementPtr && |
505 | 4 | CE->getOperand(0)->isNullValue()) { |
506 | 4 | Type *Ty = |
507 | 4 | cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); |
508 | 4 | if (StructType *STy = dyn_cast<StructType>(Ty)) |
509 | 4 | if (!STy->isPacked() && |
510 | 4 | CE->getNumOperands() == 3 && |
511 | 4 | CE->getOperand(1)->isNullValue()) { |
512 | 4 | if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) |
513 | 4 | if (CI->isOne() && |
514 | 4 | STy->getNumElements() == 23 && |
515 | 4 | STy->getElementType(0)->isIntegerTy(1)3 ) { |
516 | 3 | AllocTy = STy->getElementType(1); |
517 | 3 | return true; |
518 | 3 | } |
519 | 12.5k | } |
520 | 4 | } |
521 | 12.5k | |
522 | 12.5k | return false; |
523 | 12.5k | } |
524 | | |
525 | 12.5k | bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { |
526 | 12.5k | if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) |
527 | 6 | if (VCE->getOpcode() == Instruction::PtrToInt) |
528 | 1 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) |
529 | 1 | if (CE->getOpcode() == Instruction::GetElementPtr && |
530 | 1 | CE->getNumOperands() == 3 && |
531 | 1 | CE->getOperand(0)->isNullValue() && |
532 | 1 | CE->getOperand(1)->isNullValue()) { |
533 | 1 | Type *Ty = |
534 | 1 | cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); |
535 | 1 | // Ignore vector types here so that ScalarEvolutionExpander doesn't |
536 | 1 | // emit getelementptrs that index into vectors. |
537 | 1 | if (Ty->isStructTy() || Ty->isArrayTy()0 ) { |
538 | 1 | CTy = Ty; |
539 | 1 | FieldNo = CE->getOperand(2); |
540 | 1 | return true; |
541 | 1 | } |
542 | 12.5k | } |
543 | 12.5k | |
544 | 12.5k | return false; |
545 | 12.5k | } |
546 | | |
547 | | //===----------------------------------------------------------------------===// |
548 | | // SCEV Utilities |
549 | | //===----------------------------------------------------------------------===// |
550 | | |
551 | | /// Compare the two values \p LV and \p RV in terms of their "complexity" where |
552 | | /// "complexity" is a partial (and somewhat ad-hoc) relation used to order |
553 | | /// operands in SCEV expressions. \p EqCache is a set of pairs of values that |
554 | | /// have been previously deemed to be "equally complex" by this routine. It is |
555 | | /// intended to avoid exponential time complexity in cases like: |
556 | | /// |
557 | | /// %a = f(%x, %y) |
558 | | /// %b = f(%a, %a) |
559 | | /// %c = f(%b, %b) |
560 | | /// |
561 | | /// %d = f(%x, %y) |
562 | | /// %e = f(%d, %d) |
563 | | /// %f = f(%e, %e) |
564 | | /// |
565 | | /// CompareValueComplexity(%f, %c) |
566 | | /// |
567 | | /// Since we do not continue running this routine on expression trees once we |
568 | | /// have seen unequal values, there is no need to track them in the cache. |
569 | | static int |
570 | | CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue, |
571 | | const LoopInfo *const LI, Value *LV, Value *RV, |
572 | 9.48M | unsigned Depth) { |
573 | 9.48M | if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV)4.33M ) |
574 | 5.23M | return 0; |
575 | 4.24M | |
576 | 4.24M | // Order pointer values after integer values. This helps SCEVExpander form |
577 | 4.24M | // GEPs. |
578 | 4.24M | bool LIsPointer = LV->getType()->isPointerTy(), |
579 | 4.24M | RIsPointer = RV->getType()->isPointerTy(); |
580 | 4.24M | if (LIsPointer != RIsPointer) |
581 | 390k | return (int)LIsPointer - (int)RIsPointer; |
582 | 3.85M | |
583 | 3.85M | // Compare getValueID values. |
584 | 3.85M | unsigned LID = LV->getValueID(), RID = RV->getValueID(); |
585 | 3.85M | if (LID != RID) |
586 | 284k | return (int)LID - (int)RID; |
587 | 3.57M | |
588 | 3.57M | // Sort arguments by their position. |
589 | 3.57M | if (const auto *LA = dyn_cast<Argument>(LV)) { |
590 | 63.1k | const auto *RA = cast<Argument>(RV); |
591 | 63.1k | unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); |
592 | 63.1k | return (int)LArgNo - (int)RArgNo; |
593 | 63.1k | } |
594 | 3.51M | |
595 | 3.51M | if (const auto *LGV = dyn_cast<GlobalValue>(LV)) { |
596 | 3.69k | const auto *RGV = cast<GlobalValue>(RV); |
597 | 3.69k | |
598 | 7.36k | const auto IsGVNameSemantic = [&](const GlobalValue *GV) { |
599 | 7.36k | auto LT = GV->getLinkage(); |
600 | 7.36k | return !(GlobalValue::isPrivateLinkage(LT) || |
601 | 7.36k | GlobalValue::isInternalLinkage(LT)); |
602 | 7.36k | }; |
603 | 3.69k | |
604 | 3.69k | // Use the names to distinguish the two values, but only if the |
605 | 3.69k | // names are semantically important. |
606 | 3.69k | if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV)3.66k ) |
607 | 3.64k | return LGV->getName().compare(RGV->getName()); |
608 | 3.50M | } |
609 | 3.50M | |
610 | 3.50M | // For instructions, compare their loop depth, and their operand count. This |
611 | 3.50M | // is pretty loose. |
612 | 3.50M | if (const auto *LInst = dyn_cast<Instruction>(LV)) { |
613 | 3.49M | const auto *RInst = cast<Instruction>(RV); |
614 | 3.49M | |
615 | 3.49M | // Compare loop depths. |
616 | 3.49M | const BasicBlock *LParent = LInst->getParent(), |
617 | 3.49M | *RParent = RInst->getParent(); |
618 | 3.49M | if (LParent != RParent) { |
619 | 729k | unsigned LDepth = LI->getLoopDepth(LParent), |
620 | 729k | RDepth = LI->getLoopDepth(RParent); |
621 | 729k | if (LDepth != RDepth) |
622 | 7.53k | return (int)LDepth - (int)RDepth; |
623 | 3.49M | } |
624 | 3.49M | |
625 | 3.49M | // Compare the number of operands. |
626 | 3.49M | unsigned LNumOps = LInst->getNumOperands(), |
627 | 3.49M | RNumOps = RInst->getNumOperands(); |
628 | 3.49M | if (LNumOps != RNumOps) |
629 | 11.2k | return (int)LNumOps - (int)RNumOps; |
630 | 3.47M | |
631 | 7.14M | for (unsigned Idx : seq(0u, LNumOps))3.47M { |
632 | 7.14M | int Result = |
633 | 7.14M | CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx), |
634 | 7.14M | RInst->getOperand(Idx), Depth + 1); |
635 | 7.14M | if (Result != 0) |
636 | 59.0k | return Result; |
637 | 7.14M | } |
638 | 3.47M | } |
639 | 3.50M | |
640 | 3.50M | EqCacheValue.unionSets(LV, RV); |
641 | 3.42M | return 0; |
642 | 3.50M | } |
643 | | |
644 | | // Return negative, zero, or positive, if LHS is less than, equal to, or greater |
645 | | // than RHS, respectively. A three-way result allows recursive comparisons to be |
646 | | // more efficient. |
647 | | static int CompareSCEVComplexity( |
648 | | EquivalenceClasses<const SCEV *> &EqCacheSCEV, |
649 | | EquivalenceClasses<const Value *> &EqCacheValue, |
650 | | const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, |
651 | 92.1M | DominatorTree &DT, unsigned Depth = 0) { |
652 | 92.1M | // Fast-path: SCEVs are uniqued so we can do a quick equality check. |
653 | 92.1M | if (LHS == RHS) |
654 | 4.97M | return 0; |
655 | 87.1M | |
656 | 87.1M | // Primarily, sort the SCEVs by their getSCEVType(). |
657 | 87.1M | unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); |
658 | 87.1M | if (LType != RType) |
659 | 57.2M | return (int)LType - (int)RType; |
660 | 29.9M | |
661 | 29.9M | if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.isEquivalent(LHS, RHS)) |
662 | 9.55k | return 0; |
663 | 29.9M | // Aside from the getSCEVType() ordering, the particular ordering |
664 | 29.9M | // isn't very important except that it's beneficial to be consistent, |
665 | 29.9M | // so that (a + b) and (b + a) don't end up as different expressions. |
666 | 29.9M | switch (static_cast<SCEVTypes>(LType)) { |
667 | 29.9M | case scUnknown: { |
668 | 2.33M | const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); |
669 | 2.33M | const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); |
670 | 2.33M | |
671 | 2.33M | int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(), |
672 | 2.33M | RU->getValue(), Depth + 1); |
673 | 2.33M | if (X == 0) |
674 | 1.57M | EqCacheSCEV.unionSets(LHS, RHS); |
675 | 2.33M | return X; |
676 | 29.9M | } |
677 | 29.9M | |
678 | 29.9M | case scConstant: { |
679 | 23.4M | const SCEVConstant *LC = cast<SCEVConstant>(LHS); |
680 | 23.4M | const SCEVConstant *RC = cast<SCEVConstant>(RHS); |
681 | 23.4M | |
682 | 23.4M | // Compare constant values. |
683 | 23.4M | const APInt &LA = LC->getAPInt(); |
684 | 23.4M | const APInt &RA = RC->getAPInt(); |
685 | 23.4M | unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); |
686 | 23.4M | if (LBitWidth != RBitWidth) |
687 | 188 | return (int)LBitWidth - (int)RBitWidth; |
688 | 23.4M | return LA.ult(RA) ? -112.9M : 110.4M ; |
689 | 23.4M | } |
690 | 23.4M | |
691 | 23.4M | case scAddRecExpr: { |
692 | 714k | const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); |
693 | 714k | const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); |
694 | 714k | |
695 | 714k | // There is always a dominance between two recs that are used by one SCEV, |
696 | 714k | // so we can safely sort recs by loop header dominance. We require such |
697 | 714k | // order in getAddExpr. |
698 | 714k | const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); |
699 | 714k | if (LLoop != RLoop) { |
700 | 153k | const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader(); |
701 | 153k | assert(LHead != RHead && "Two loops share the same header?"); |
702 | 153k | if (DT.dominates(LHead, RHead)) |
703 | 18.5k | return 1; |
704 | 153k | else |
705 | 153k | assert(DT.dominates(RHead, LHead) && |
706 | 153k | "No dominance between recurrences used by one SCEV?"); |
707 | 153k | return -1134k ; |
708 | 561k | } |
709 | 561k | |
710 | 561k | // Addrec complexity grows with operand count. |
711 | 561k | unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); |
712 | 561k | if (LNumOps != RNumOps) |
713 | 681 | return (int)LNumOps - (int)RNumOps; |
714 | 560k | |
715 | 560k | // Lexicographically compare. |
716 | 562k | for (unsigned i = 0; 560k i != LNumOps; ++i1.61k ) { |
717 | 562k | int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, |
718 | 562k | LA->getOperand(i), RA->getOperand(i), DT, |
719 | 562k | Depth + 1); |
720 | 562k | if (X != 0) |
721 | 560k | return X; |
722 | 562k | } |
723 | 560k | EqCacheSCEV.unionSets(LHS, RHS); |
724 | 16 | return 0; |
725 | 560k | } |
726 | 560k | |
727 | 3.07M | case scAddExpr: |
728 | 3.07M | case scMulExpr: |
729 | 3.07M | case scSMaxExpr: |
730 | 3.07M | case scUMaxExpr: |
731 | 3.07M | case scSMinExpr: |
732 | 3.07M | case scUMinExpr: { |
733 | 3.07M | const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); |
734 | 3.07M | const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); |
735 | 3.07M | |
736 | 3.07M | // Lexicographically compare n-ary expressions. |
737 | 3.07M | unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); |
738 | 3.07M | if (LNumOps != RNumOps) |
739 | 139k | return (int)LNumOps - (int)RNumOps; |
740 | 2.93M | |
741 | 4.35M | for (unsigned i = 0; 2.93M i != LNumOps; ++i1.42M ) { |
742 | 3.74M | int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, |
743 | 3.74M | LC->getOperand(i), RC->getOperand(i), DT, |
744 | 3.74M | Depth + 1); |
745 | 3.74M | if (X != 0) |
746 | 2.32M | return X; |
747 | 3.74M | } |
748 | 2.93M | EqCacheSCEV.unionSets(LHS, RHS); |
749 | 612k | return 0; |
750 | 2.93M | } |
751 | 2.93M | |
752 | 2.93M | case scUDivExpr: { |
753 | 6.86k | const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); |
754 | 6.86k | const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); |
755 | 6.86k | |
756 | 6.86k | // Lexicographically compare udiv expressions. |
757 | 6.86k | int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(), |
758 | 6.86k | RC->getLHS(), DT, Depth + 1); |
759 | 6.86k | if (X != 0) |
760 | 4.46k | return X; |
761 | 2.39k | X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(), |
762 | 2.39k | RC->getRHS(), DT, Depth + 1); |
763 | 2.39k | if (X == 0) |
764 | 482 | EqCacheSCEV.unionSets(LHS, RHS); |
765 | 2.39k | return X; |
766 | 2.39k | } |
767 | 2.39k | |
768 | 362k | case scTruncate: |
769 | 362k | case scZeroExtend: |
770 | 362k | case scSignExtend: { |
771 | 362k | const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); |
772 | 362k | const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); |
773 | 362k | |
774 | 362k | // Compare cast expressions by operand. |
775 | 362k | int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, |
776 | 362k | LC->getOperand(), RC->getOperand(), DT, |
777 | 362k | Depth + 1); |
778 | 362k | if (X == 0) |
779 | 179k | EqCacheSCEV.unionSets(LHS, RHS); |
780 | 362k | return X; |
781 | 362k | } |
782 | 362k | |
783 | 362k | case scCouldNotCompute: |
784 | 0 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); |
785 | 0 | } |
786 | 0 | llvm_unreachable("Unknown SCEV kind!"); |
787 | 0 | } |
788 | | |
789 | | /// Given a list of SCEV objects, order them by their complexity, and group |
790 | | /// objects of the same complexity together by value. When this routine is |
791 | | /// finished, we know that any duplicates in the vector are consecutive and that |
792 | | /// complexity is monotonically increasing. |
793 | | /// |
794 | | /// Note that we go take special precautions to ensure that we get deterministic |
795 | | /// results from this routine. In other words, we don't want the results of |
796 | | /// this to depend on where the addresses of various SCEV objects happened to |
797 | | /// land in memory. |
798 | | static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, |
799 | 72.4M | LoopInfo *LI, DominatorTree &DT) { |
800 | 72.4M | if (Ops.size() < 2) return0 ; // Noop |
801 | 72.4M | |
802 | 72.4M | EquivalenceClasses<const SCEV *> EqCacheSCEV; |
803 | 72.4M | EquivalenceClasses<const Value *> EqCacheValue; |
804 | 72.4M | if (Ops.size() == 2) { |
805 | 64.6M | // This is the common case, which also happens to be trivially simple. |
806 | 64.6M | // Special case it. |
807 | 64.6M | const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; |
808 | 64.6M | if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0) |
809 | 28.4M | std::swap(LHS, RHS); |
810 | 64.6M | return; |
811 | 64.6M | } |
812 | 7.87M | |
813 | 7.87M | // Do the rough sort by complexity. |
814 | 22.8M | llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) 7.87M { |
815 | 22.8M | return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) < |
816 | 22.8M | 0; |
817 | 22.8M | }); |
818 | 7.87M | |
819 | 7.87M | // Now that we are sorted by complexity, group elements of the same |
820 | 7.87M | // complexity. Note that this is, at worst, N^2, but the vector is likely to |
821 | 7.87M | // be extremely short in practice. Note that we take this approach because we |
822 | 7.87M | // do not want to depend on the addresses of the objects we are grouping. |
823 | 18.2M | for (unsigned i = 0, e = Ops.size(); i != e-2; ++i10.4M ) { |
824 | 11.3M | const SCEV *S = Ops[i]; |
825 | 11.3M | unsigned Complexity = S->getSCEVType(); |
826 | 11.3M | |
827 | 11.3M | // If there are any objects of the same complexity and same value as this |
828 | 11.3M | // one, group them. |
829 | 44.7M | for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity43.8M ; ++j33.4M ) { |
830 | 34.3M | if (Ops[j] == S) { // Found a duplicate. |
831 | 994k | // Move it to immediately after i'th element. |
832 | 994k | std::swap(Ops[i+1], Ops[j]); |
833 | 994k | ++i; // no need to rescan it. |
834 | 994k | if (i == e-2) return908k ; // Done! |
835 | 994k | } |
836 | 34.3M | } |
837 | 11.3M | } |
838 | 7.87M | } |
839 | | |
840 | | // Returns the size of the SCEV S. |
841 | 64 | static inline int sizeOfSCEV(const SCEV *S) { |
842 | 64 | struct FindSCEVSize { |
843 | 64 | int Size = 0; |
844 | 64 | |
845 | 64 | FindSCEVSize() = default; |
846 | 64 | |
847 | 175 | bool follow(const SCEV *S) { |
848 | 175 | ++Size; |
849 | 175 | // Keep looking at all operands of S. |
850 | 175 | return true; |
851 | 175 | } |
852 | 64 | |
853 | 175 | bool isDone() const { |
854 | 175 | return false; |
855 | 175 | } |
856 | 64 | }; |
857 | 64 | |
858 | 64 | FindSCEVSize F; |
859 | 64 | SCEVTraversal<FindSCEVSize> ST(F); |
860 | 64 | ST.visitAll(S); |
861 | 64 | return F.Size; |
862 | 64 | } |
863 | | |
864 | | /// Returns true if the subtree of \p S contains at least HugeExprThreshold |
865 | | /// nodes. |
866 | 112M | static bool isHugeExpression(const SCEV *S) { |
867 | 112M | return S->getExpressionSize() >= HugeExprThreshold; |
868 | 112M | } |
869 | | |
870 | | /// Returns true of \p Ops contains a huge SCEV (see definition above). |
871 | 52.6M | static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) { |
872 | 52.6M | return any_of(Ops, isHugeExpression); |
873 | 52.6M | } |
874 | | |
875 | | namespace { |
876 | | |
877 | | struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { |
878 | | public: |
879 | | // Computes the Quotient and Remainder of the division of Numerator by |
880 | | // Denominator. |
881 | | static void divide(ScalarEvolution &SE, const SCEV *Numerator, |
882 | | const SCEV *Denominator, const SCEV **Quotient, |
883 | 24.1k | const SCEV **Remainder) { |
884 | 24.1k | assert(Numerator && Denominator && "Uninitialized SCEV"); |
885 | 24.1k | |
886 | 24.1k | SCEVDivision D(SE, Numerator, Denominator); |
887 | 24.1k | |
888 | 24.1k | // Check for the trivial case here to avoid having to check for it in the |
889 | 24.1k | // rest of the code. |
890 | 24.1k | if (Numerator == Denominator) { |
891 | 7.00k | *Quotient = D.One; |
892 | 7.00k | *Remainder = D.Zero; |
893 | 7.00k | return; |
894 | 7.00k | } |
895 | 17.1k | |
896 | 17.1k | if (Numerator->isZero()) { |
897 | 1.54k | *Quotient = D.Zero; |
898 | 1.54k | *Remainder = D.Zero; |
899 | 1.54k | return; |
900 | 1.54k | } |
901 | 15.6k | |
902 | 15.6k | // A simple case when N/1. The quotient is N. |
903 | 15.6k | if (Denominator->isOne()) { |
904 | 26 | *Quotient = Numerator; |
905 | 26 | *Remainder = D.Zero; |
906 | 26 | return; |
907 | 26 | } |
908 | 15.6k | |
909 | 15.6k | // Split the Denominator when it is a product. |
910 | 15.6k | if (const SCEVMulExpr *T = dyn_cast<SCEVMulExpr>(Denominator)) { |
911 | 12 | const SCEV *Q, *R; |
912 | 12 | *Quotient = Numerator; |
913 | 16 | for (const SCEV *Op : T->operands()) { |
914 | 16 | divide(SE, *Quotient, Op, &Q, &R); |
915 | 16 | *Quotient = Q; |
916 | 16 | |
917 | 16 | // Bail out when the Numerator is not divisible by one of the terms of |
918 | 16 | // the Denominator. |
919 | 16 | if (!R->isZero()) { |
920 | 8 | *Quotient = D.Zero; |
921 | 8 | *Remainder = Numerator; |
922 | 8 | return; |
923 | 8 | } |
924 | 16 | } |
925 | 12 | *Remainder = D.Zero; |
926 | 4 | return; |
927 | 15.5k | } |
928 | 15.5k | |
929 | 15.5k | D.visit(Numerator); |
930 | 15.5k | *Quotient = D.Quotient; |
931 | 15.5k | *Remainder = D.Remainder; |
932 | 15.5k | } |
933 | | |
934 | | // Except in the trivial case described above, we do not know how to divide |
935 | | // Expr by Denominator for the following functions with empty implementation. |
936 | 0 | void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} |
937 | 152 | void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} |
938 | 62 | void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} |
939 | 6 | void visitUDivExpr(const SCEVUDivExpr *Numerator) {} |
940 | 0 | void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} |
941 | 0 | void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} |
942 | 0 | void visitSMinExpr(const SCEVSMinExpr *Numerator) {} |
943 | 0 | void visitUMinExpr(const SCEVUMinExpr *Numerator) {} |
944 | 1.63k | void visitUnknown(const SCEVUnknown *Numerator) {} |
945 | 0 | void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} |
946 | | |
947 | 2.55k | void visitConstant(const SCEVConstant *Numerator) { |
948 | 2.55k | if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { |
949 | 428 | APInt NumeratorVal = Numerator->getAPInt(); |
950 | 428 | APInt DenominatorVal = D->getAPInt(); |
951 | 428 | uint32_t NumeratorBW = NumeratorVal.getBitWidth(); |
952 | 428 | uint32_t DenominatorBW = DenominatorVal.getBitWidth(); |
953 | 428 | |
954 | 428 | if (NumeratorBW > DenominatorBW) |
955 | 0 | DenominatorVal = DenominatorVal.sext(NumeratorBW); |
956 | 428 | else if (NumeratorBW < DenominatorBW) |
957 | 1 | NumeratorVal = NumeratorVal.sext(DenominatorBW); |
958 | 428 | |
959 | 428 | APInt QuotientVal(NumeratorVal.getBitWidth(), 0); |
960 | 428 | APInt RemainderVal(NumeratorVal.getBitWidth(), 0); |
961 | 428 | APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal); |
962 | 428 | Quotient = SE.getConstant(QuotientVal); |
963 | 428 | Remainder = SE.getConstant(RemainderVal); |
964 | 428 | return; |
965 | 428 | } |
966 | 2.55k | } |
967 | | |
968 | 5.95k | void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { |
969 | 5.95k | const SCEV *StartQ, *StartR, *StepQ, *StepR; |
970 | 5.95k | if (!Numerator->isAffine()) |
971 | 2 | return cannotDivide(Numerator); |
972 | 5.95k | divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); |
973 | 5.95k | divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); |
974 | 5.95k | // Bail out if the types do not match. |
975 | 5.95k | Type *Ty = Denominator->getType(); |
976 | 5.95k | if (Ty != StartQ->getType() || Ty != StartR->getType() || |
977 | 5.95k | Ty != StepQ->getType()5.94k || Ty != StepR->getType()5.94k ) |
978 | 13 | return cannotDivide(Numerator); |
979 | 5.94k | Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), |
980 | 5.94k | Numerator->getNoWrapFlags()); |
981 | 5.94k | Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), |
982 | 5.94k | Numerator->getNoWrapFlags()); |
983 | 5.94k | } |
984 | | |
985 | 663 | void visitAddExpr(const SCEVAddExpr *Numerator) { |
986 | 663 | SmallVector<const SCEV *, 2> Qs, Rs; |
987 | 663 | Type *Ty = Denominator->getType(); |
988 | 663 | |
989 | 1.35k | for (const SCEV *Op : Numerator->operands()) { |
990 | 1.35k | const SCEV *Q, *R; |
991 | 1.35k | divide(SE, Op, Denominator, &Q, &R); |
992 | 1.35k | |
993 | 1.35k | // Bail out if types do not match. |
994 | 1.35k | if (Ty != Q->getType() || Ty != R->getType()) |
995 | 0 | return cannotDivide(Numerator); |
996 | 1.35k | |
997 | 1.35k | Qs.push_back(Q); |
998 | 1.35k | Rs.push_back(R); |
999 | 1.35k | } |
1000 | 663 | |
1001 | 663 | if (Qs.size() == 1) { |
1002 | 0 | Quotient = Qs[0]; |
1003 | 0 | Remainder = Rs[0]; |
1004 | 0 | return; |
1005 | 0 | } |
1006 | 663 | |
1007 | 663 | Quotient = SE.getAddExpr(Qs); |
1008 | 663 | Remainder = SE.getAddExpr(Rs); |
1009 | 663 | } |
1010 | | |
1011 | 4.56k | void visitMulExpr(const SCEVMulExpr *Numerator) { |
1012 | 4.56k | SmallVector<const SCEV *, 2> Qs; |
1013 | 4.56k | Type *Ty = Denominator->getType(); |
1014 | 4.56k | |
1015 | 4.56k | bool FoundDenominatorTerm = false; |
1016 | 11.1k | for (const SCEV *Op : Numerator->operands()) { |
1017 | 11.1k | // Bail out if types do not match. |
1018 | 11.1k | if (Ty != Op->getType()) |
1019 | 0 | return cannotDivide(Numerator); |
1020 | 11.1k | |
1021 | 11.1k | if (FoundDenominatorTerm) { |
1022 | 4.68k | Qs.push_back(Op); |
1023 | 4.68k | continue; |
1024 | 4.68k | } |
1025 | 6.49k | |
1026 | 6.49k | // Check whether Denominator divides one of the product operands. |
1027 | 6.49k | const SCEV *Q, *R; |
1028 | 6.49k | divide(SE, Op, Denominator, &Q, &R); |
1029 | 6.49k | if (!R->isZero()) { |
1030 | 1.97k | Qs.push_back(Op); |
1031 | 1.97k | continue; |
1032 | 1.97k | } |
1033 | 4.51k | |
1034 | 4.51k | // Bail out if types do not match. |
1035 | 4.51k | if (Ty != Q->getType()) |
1036 | 0 | return cannotDivide(Numerator); |
1037 | 4.51k | |
1038 | 4.51k | FoundDenominatorTerm = true; |
1039 | 4.51k | Qs.push_back(Q); |
1040 | 4.51k | } |
1041 | 4.56k | |
1042 | 4.56k | if (FoundDenominatorTerm) { |
1043 | 4.51k | Remainder = Zero; |
1044 | 4.51k | if (Qs.size() == 1) |
1045 | 0 | Quotient = Qs[0]; |
1046 | 4.51k | else |
1047 | 4.51k | Quotient = SE.getMulExpr(Qs); |
1048 | 4.51k | return; |
1049 | 4.51k | } |
1050 | 46 | |
1051 | 46 | if (!isa<SCEVUnknown>(Denominator)) |
1052 | 14 | return cannotDivide(Numerator); |
1053 | 32 | |
1054 | 32 | // The Remainder is obtained by replacing Denominator by 0 in Numerator. |
1055 | 32 | ValueToValueMap RewriteMap; |
1056 | 32 | RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = |
1057 | 32 | cast<SCEVConstant>(Zero)->getValue(); |
1058 | 32 | Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); |
1059 | 32 | |
1060 | 32 | if (Remainder->isZero()) { |
1061 | 0 | // The Quotient is obtained by replacing Denominator by 1 in Numerator. |
1062 | 0 | RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = |
1063 | 0 | cast<SCEVConstant>(One)->getValue(); |
1064 | 0 | Quotient = |
1065 | 0 | SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); |
1066 | 0 | return; |
1067 | 0 | } |
1068 | 32 | |
1069 | 32 | // Quotient is (Numerator - Remainder) divided by Denominator. |
1070 | 32 | const SCEV *Q, *R; |
1071 | 32 | const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); |
1072 | 32 | // This SCEV does not seem to simplify: fail the division here. |
1073 | 32 | if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) |
1074 | 0 | return cannotDivide(Numerator); |
1075 | 32 | divide(SE, Diff, Denominator, &Q, &R); |
1076 | 32 | if (R != Zero) |
1077 | 0 | return cannotDivide(Numerator); |
1078 | 32 | Quotient = Q; |
1079 | 32 | } |
1080 | | |
1081 | | private: |
1082 | | SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, |
1083 | | const SCEV *Denominator) |
1084 | 24.1k | : SE(S), Denominator(Denominator) { |
1085 | 24.1k | Zero = SE.getZero(Denominator->getType()); |
1086 | 24.1k | One = SE.getOne(Denominator->getType()); |
1087 | 24.1k | |
1088 | 24.1k | // We generally do not know how to divide Expr by Denominator. We |
1089 | 24.1k | // initialize the division to a "cannot divide" state to simplify the rest |
1090 | 24.1k | // of the code. |
1091 | 24.1k | cannotDivide(Numerator); |
1092 | 24.1k | } |
1093 | | |
1094 | | // Convenience function for giving up on the division. We set the quotient to |
1095 | | // be equal to zero and the remainder to be equal to the numerator. |
1096 | 24.1k | void cannotDivide(const SCEV *Numerator) { |
1097 | 24.1k | Quotient = Zero; |
1098 | 24.1k | Remainder = Numerator; |
1099 | 24.1k | } |
1100 | | |
1101 | | ScalarEvolution &SE; |
1102 | | const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; |
1103 | | }; |
1104 | | |
1105 | | } // end anonymous namespace |
1106 | | |
1107 | | //===----------------------------------------------------------------------===// |
1108 | | // Simple SCEV method implementations |
1109 | | //===----------------------------------------------------------------------===// |
1110 | | |
1111 | | /// Compute BC(It, K). The result has width W. Assume, K > 0. |
1112 | | static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, |
1113 | | ScalarEvolution &SE, |
1114 | 359k | Type *ResultTy) { |
1115 | 359k | // Handle the simplest case efficiently. |
1116 | 359k | if (K == 1) |
1117 | 357k | return SE.getTruncateOrZeroExtend(It, ResultTy); |
1118 | 1.51k | |
1119 | 1.51k | // We are using the following formula for BC(It, K): |
1120 | 1.51k | // |
1121 | 1.51k | // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! |
1122 | 1.51k | // |
1123 | 1.51k | // Suppose, W is the bitwidth of the return value. We must be prepared for |
1124 | 1.51k | // overflow. Hence, we must assure that the result of our computation is |
1125 | 1.51k | // equal to the accurate one modulo 2^W. Unfortunately, division isn't |
1126 | 1.51k | // safe in modular arithmetic. |
1127 | 1.51k | // |
1128 | 1.51k | // However, this code doesn't use exactly that formula; the formula it uses |
1129 | 1.51k | // is something like the following, where T is the number of factors of 2 in |
1130 | 1.51k | // K! (i.e. trailing zeros in the binary representation of K!), and ^ is |
1131 | 1.51k | // exponentiation: |
1132 | 1.51k | // |
1133 | 1.51k | // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) |
1134 | 1.51k | // |
1135 | 1.51k | // This formula is trivially equivalent to the previous formula. However, |
1136 | 1.51k | // this formula can be implemented much more efficiently. The trick is that |
1137 | 1.51k | // K! / 2^T is odd, and exact division by an odd number *is* safe in modular |
1138 | 1.51k | // arithmetic. To do exact division in modular arithmetic, all we have |
1139 | 1.51k | // to do is multiply by the inverse. Therefore, this step can be done at |
1140 | 1.51k | // width W. |
1141 | 1.51k | // |
1142 | 1.51k | // The next issue is how to safely do the division by 2^T. The way this |
1143 | 1.51k | // is done is by doing the multiplication step at a width of at least W + T |
1144 | 1.51k | // bits. This way, the bottom W+T bits of the product are accurate. Then, |
1145 | 1.51k | // when we perform the division by 2^T (which is equivalent to a right shift |
1146 | 1.51k | // by T), the bottom W bits are accurate. Extra bits are okay; they'll get |
1147 | 1.51k | // truncated out after the division by 2^T. |
1148 | 1.51k | // |
1149 | 1.51k | // In comparison to just directly using the first formula, this technique |
1150 | 1.51k | // is much more efficient; using the first formula requires W * K bits, |
1151 | 1.51k | // but this formula less than W + K bits. Also, the first formula requires |
1152 | 1.51k | // a division step, whereas this formula only requires multiplies and shifts. |
1153 | 1.51k | // |
1154 | 1.51k | // It doesn't matter whether the subtraction step is done in the calculation |
1155 | 1.51k | // width or the input iteration count's width; if the subtraction overflows, |
1156 | 1.51k | // the result must be zero anyway. We prefer here to do it in the width of |
1157 | 1.51k | // the induction variable because it helps a lot for certain cases; CodeGen |
1158 | 1.51k | // isn't smart enough to ignore the overflow, which leads to much less |
1159 | 1.51k | // efficient code if the width of the subtraction is wider than the native |
1160 | 1.51k | // register width. |
1161 | 1.51k | // |
1162 | 1.51k | // (It's possible to not widen at all by pulling out factors of 2 before |
1163 | 1.51k | // the multiplication; for example, K=2 can be calculated as |
1164 | 1.51k | // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires |
1165 | 1.51k | // extra arithmetic, so it's not an obvious win, and it gets |
1166 | 1.51k | // much more complicated for K > 3.) |
1167 | 1.51k | |
1168 | 1.51k | // Protection from insane SCEVs; this bound is conservative, |
1169 | 1.51k | // but it probably doesn't matter. |
1170 | 1.51k | if (K > 1000) |
1171 | 0 | return SE.getCouldNotCompute(); |
1172 | 1.51k | |
1173 | 1.51k | unsigned W = SE.getTypeSizeInBits(ResultTy); |
1174 | 1.51k | |
1175 | 1.51k | // Calculate K! / 2^T and T; we divide out the factors of two before |
1176 | 1.51k | // multiplying for calculating K! / 2^T to avoid overflow. |
1177 | 1.51k | // Other overflow doesn't matter because we only care about the bottom |
1178 | 1.51k | // W bits of the result. |
1179 | 1.51k | APInt OddFactorial(W, 1); |
1180 | 1.51k | unsigned T = 1; |
1181 | 2.97k | for (unsigned i = 3; i <= K; ++i1.45k ) { |
1182 | 1.45k | APInt Mult(W, i); |
1183 | 1.45k | unsigned TwoFactors = Mult.countTrailingZeros(); |
1184 | 1.45k | T += TwoFactors; |
1185 | 1.45k | Mult.lshrInPlace(TwoFactors); |
1186 | 1.45k | OddFactorial *= Mult; |
1187 | 1.45k | } |
1188 | 1.51k | |
1189 | 1.51k | // We need at least W + T bits for the multiplication step |
1190 | 1.51k | unsigned CalculationBits = W + T; |
1191 | 1.51k | |
1192 | 1.51k | // Calculate 2^T, at width T+W. |
1193 | 1.51k | APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); |
1194 | 1.51k | |
1195 | 1.51k | // Calculate the multiplicative inverse of K! / 2^T; |
1196 | 1.51k | // this multiplication factor will perform the exact division by |
1197 | 1.51k | // K! / 2^T. |
1198 | 1.51k | APInt Mod = APInt::getSignedMinValue(W+1); |
1199 | 1.51k | APInt MultiplyFactor = OddFactorial.zext(W+1); |
1200 | 1.51k | MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); |
1201 | 1.51k | MultiplyFactor = MultiplyFactor.trunc(W); |
1202 | 1.51k | |
1203 | 1.51k | // Calculate the product, at width T+W |
1204 | 1.51k | IntegerType *CalculationTy = IntegerType::get(SE.getContext(), |
1205 | 1.51k | CalculationBits); |
1206 | 1.51k | const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); |
1207 | 4.49k | for (unsigned i = 1; i != K; ++i2.97k ) { |
1208 | 2.97k | const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); |
1209 | 2.97k | Dividend = SE.getMulExpr(Dividend, |
1210 | 2.97k | SE.getTruncateOrZeroExtend(S, CalculationTy)); |
1211 | 2.97k | } |
1212 | 1.51k | |
1213 | 1.51k | // Divide by 2^T |
1214 | 1.51k | const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); |
1215 | 1.51k | |
1216 | 1.51k | // Truncate the result, and divide by K! / 2^T. |
1217 | 1.51k | |
1218 | 1.51k | return SE.getMulExpr(SE.getConstant(MultiplyFactor), |
1219 | 1.51k | SE.getTruncateOrZeroExtend(DivResult, ResultTy)); |
1220 | 1.51k | } |
1221 | | |
1222 | | /// Return the value of this chain of recurrences at the specified iteration |
1223 | | /// number. We can evaluate this recurrence by multiplying each element in the |
1224 | | /// chain by the binomial coefficient corresponding to it. In other words, we |
1225 | | /// can evaluate {A,+,B,+,C,+,D} as: |
1226 | | /// |
1227 | | /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) |
1228 | | /// |
1229 | | /// where BC(It, k) stands for binomial coefficient. |
1230 | | const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, |
1231 | 357k | ScalarEvolution &SE) const { |
1232 | 357k | const SCEV *Result = getStart(); |
1233 | 716k | for (unsigned i = 1, e = getNumOperands(); i != e; ++i359k ) { |
1234 | 359k | // The computation is correct in the face of overflow provided that the |
1235 | 359k | // multiplication is performed _after_ the evaluation of the binomial |
1236 | 359k | // coefficient. |
1237 | 359k | const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); |
1238 | 359k | if (isa<SCEVCouldNotCompute>(Coeff)) |
1239 | 0 | return Coeff; |
1240 | 359k | |
1241 | 359k | Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); |
1242 | 359k | } |
1243 | 357k | return Result; |
1244 | 357k | } |
1245 | | |
1246 | | //===----------------------------------------------------------------------===// |
1247 | | // SCEV Expression folder implementations |
1248 | | //===----------------------------------------------------------------------===// |
1249 | | |
1250 | | const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, |
1251 | 748k | unsigned Depth) { |
1252 | 748k | assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && |
1253 | 748k | "This is not a truncating conversion!"); |
1254 | 748k | assert(isSCEVable(Ty) && |
1255 | 748k | "This is not a conversion to a SCEVable type!"); |
1256 | 748k | Ty = getEffectiveSCEVType(Ty); |
1257 | 748k | |
1258 | 748k | FoldingSetNodeID ID; |
1259 | 748k | ID.AddInteger(scTruncate); |
1260 | 748k | ID.AddPointer(Op); |
1261 | 748k | ID.AddPointer(Ty); |
1262 | 748k | void *IP = nullptr; |
1263 | 748k | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S19.8k ; |
1264 | 728k | |
1265 | 728k | // Fold if the operand is constant. |
1266 | 728k | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
1267 | 442k | return getConstant( |
1268 | 442k | cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty))); |
1269 | 286k | |
1270 | 286k | // trunc(trunc(x)) --> trunc(x) |
1271 | 286k | if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) |
1272 | 5.38k | return getTruncateExpr(ST->getOperand(), Ty, Depth + 1); |
1273 | 281k | |
1274 | 281k | // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing |
1275 | 281k | if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) |
1276 | 9.96k | return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1); |
1277 | 271k | |
1278 | 271k | // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing |
1279 | 271k | if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) |
1280 | 15.8k | return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1); |
1281 | 255k | |
1282 | 255k | if (Depth > MaxCastDepth) { |
1283 | 11 | SCEV *S = |
1284 | 11 | new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); |
1285 | 11 | UniqueSCEVs.InsertNode(S, IP); |
1286 | 11 | addToLoopUseLists(S); |
1287 | 11 | return S; |
1288 | 11 | } |
1289 | 255k | |
1290 | 255k | // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and |
1291 | 255k | // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), |
1292 | 255k | // if after transforming we have at most one truncate, not counting truncates |
1293 | 255k | // that replace other casts. |
1294 | 255k | if (isa<SCEVAddExpr>(Op) || isa<SCEVMulExpr>(Op)235k ) { |
1295 | 29.3k | auto *CommOp = cast<SCEVCommutativeExpr>(Op); |
1296 | 29.3k | SmallVector<const SCEV *, 4> Operands; |
1297 | 29.3k | unsigned numTruncs = 0; |
1298 | 90.7k | for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 261.4k ; |
1299 | 61.3k | ++i) { |
1300 | 61.3k | const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1); |
1301 | 61.3k | if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S)45.8k ) |
1302 | 16.7k | numTruncs++; |
1303 | 61.3k | Operands.push_back(S); |
1304 | 61.3k | } |
1305 | 29.3k | if (numTruncs < 2) { |
1306 | 28.1k | if (isa<SCEVAddExpr>(Op)) |
1307 | 19.3k | return getAddExpr(Operands); |
1308 | 8.79k | else if (isa<SCEVMulExpr>(Op)) |
1309 | 8.79k | return getMulExpr(Operands); |
1310 | 8.79k | else |
1311 | 8.79k | llvm_unreachable("Unexpected SCEV type for Op."); |
1312 | 28.1k | } |
1313 | 29.3k | // Although we checked in the beginning that ID is not in the cache, it is |
1314 | 29.3k | // possible that during recursion and different modification ID was inserted |
1315 | 29.3k | // into the cache. So if we find it, just return it. |
1316 | 29.3k | if (const SCEV *1.15k S1.15k = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) |
1317 | 0 | return S; |
1318 | 227k | } |
1319 | 227k | |
1320 | 227k | // If the input value is a chrec scev, truncate the chrec's operands. |
1321 | 227k | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { |
1322 | 105k | SmallVector<const SCEV *, 4> Operands; |
1323 | 105k | for (const SCEV *Op : AddRec->operands()) |
1324 | 211k | Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1)); |
1325 | 105k | return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); |
1326 | 105k | } |
1327 | 121k | |
1328 | 121k | // The cast wasn't folded; create an explicit cast node. We can reuse |
1329 | 121k | // the existing insert position since if we get here, we won't have |
1330 | 121k | // made any changes which would invalidate it. |
1331 | 121k | SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), |
1332 | 121k | Op, Ty); |
1333 | 121k | UniqueSCEVs.InsertNode(S, IP); |
1334 | 121k | addToLoopUseLists(S); |
1335 | 121k | return S; |
1336 | 121k | } |
1337 | | |
1338 | | // Get the limit of a recurrence such that incrementing by Step cannot cause |
1339 | | // signed overflow as long as the value of the recurrence within the |
1340 | | // loop does not exceed this limit before incrementing. |
1341 | | static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, |
1342 | | ICmpInst::Predicate *Pred, |
1343 | 324k | ScalarEvolution *SE) { |
1344 | 324k | unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); |
1345 | 324k | if (SE->isKnownPositive(Step)) { |
1346 | 172k | *Pred = ICmpInst::ICMP_SLT; |
1347 | 172k | return SE->getConstant(APInt::getSignedMinValue(BitWidth) - |
1348 | 172k | SE->getSignedRangeMax(Step)); |
1349 | 172k | } |
1350 | 151k | if (SE->isKnownNegative(Step)) { |
1351 | 142k | *Pred = ICmpInst::ICMP_SGT; |
1352 | 142k | return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - |
1353 | 142k | SE->getSignedRangeMin(Step)); |
1354 | 142k | } |
1355 | 9.58k | return nullptr; |
1356 | 9.58k | } |
1357 | | |
1358 | | // Get the limit of a recurrence such that incrementing by Step cannot cause |
1359 | | // unsigned overflow as long as the value of the recurrence within the loop does |
1360 | | // not exceed this limit before incrementing. |
1361 | | static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, |
1362 | | ICmpInst::Predicate *Pred, |
1363 | 12.8k | ScalarEvolution *SE) { |
1364 | 12.8k | unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); |
1365 | 12.8k | *Pred = ICmpInst::ICMP_ULT; |
1366 | 12.8k | |
1367 | 12.8k | return SE->getConstant(APInt::getMinValue(BitWidth) - |
1368 | 12.8k | SE->getUnsignedRangeMax(Step)); |
1369 | 12.8k | } |
1370 | | |
1371 | | namespace { |
1372 | | |
1373 | | struct ExtendOpTraitsBase { |
1374 | | typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *, |
1375 | | unsigned); |
1376 | | }; |
1377 | | |
1378 | | // Used to make code generic over signed and unsigned overflow. |
1379 | | template <typename ExtendOp> struct ExtendOpTraits { |
1380 | | // Members present: |
1381 | | // |
1382 | | // static const SCEV::NoWrapFlags WrapType; |
1383 | | // |
1384 | | // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; |
1385 | | // |
1386 | | // static const SCEV *getOverflowLimitForStep(const SCEV *Step, |
1387 | | // ICmpInst::Predicate *Pred, |
1388 | | // ScalarEvolution *SE); |
1389 | | }; |
1390 | | |
1391 | | template <> |
1392 | | struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase { |
1393 | | static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; |
1394 | | |
1395 | | static const GetExtendExprTy GetExtendExpr; |
1396 | | |
1397 | | static const SCEV *getOverflowLimitForStep(const SCEV *Step, |
1398 | | ICmpInst::Predicate *Pred, |
1399 | 7.92k | ScalarEvolution *SE) { |
1400 | 7.92k | return getSignedOverflowLimitForStep(Step, Pred, SE); |
1401 | 7.92k | } |
1402 | | }; |
1403 | | |
1404 | | const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< |
1405 | | SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; |
1406 | | |
1407 | | template <> |
1408 | | struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { |
1409 | | static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; |
1410 | | |
1411 | | static const GetExtendExprTy GetExtendExpr; |
1412 | | |
1413 | | static const SCEV *getOverflowLimitForStep(const SCEV *Step, |
1414 | | ICmpInst::Predicate *Pred, |
1415 | 12.8k | ScalarEvolution *SE) { |
1416 | 12.8k | return getUnsignedOverflowLimitForStep(Step, Pred, SE); |
1417 | 12.8k | } |
1418 | | }; |
1419 | | |
1420 | | const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< |
1421 | | SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; |
1422 | | |
1423 | | } // end anonymous namespace |
1424 | | |
1425 | | // The recurrence AR has been shown to have no signed/unsigned wrap or something |
1426 | | // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as |
1427 | | // easily prove NSW/NUW for its preincrement or postincrement sibling. This |
1428 | | // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + |
1429 | | // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the |
1430 | | // expression "Step + sext/zext(PreIncAR)" is congruent with |
1431 | | // "sext/zext(PostIncAR)" |
1432 | | template <typename ExtendOpTy> |
1433 | | static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, |
1434 | 549k | ScalarEvolution *SE, unsigned Depth) { |
1435 | 549k | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; |
1436 | 549k | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; |
1437 | 549k | |
1438 | 549k | const Loop *L = AR->getLoop(); |
1439 | 549k | const SCEV *Start = AR->getStart(); |
1440 | 549k | const SCEV *Step = AR->getStepRecurrence(*SE); |
1441 | 549k | |
1442 | 549k | // Check for a simple looking step prior to loop entry. |
1443 | 549k | const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); |
1444 | 549k | if (!SA) |
1445 | 525k | return nullptr; |
1446 | 24.3k | |
1447 | 24.3k | // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV |
1448 | 24.3k | // subtraction is expensive. For this purpose, perform a quick and dirty |
1449 | 24.3k | // difference, by checking for Step in the operand list. |
1450 | 24.3k | SmallVector<const SCEV *, 4> DiffOps; |
1451 | 24.3k | for (const SCEV *Op : SA->operands()) |
1452 | 49.6k | if (Op != Step) |
1453 | 30.5k | DiffOps.push_back(Op); |
1454 | 24.3k | |
1455 | 24.3k | if (DiffOps.size() == SA->getNumOperands()) |
1456 | 5.34k | return nullptr; |
1457 | 19.0k | |
1458 | 19.0k | // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + |
1459 | 19.0k | // `Step`: |
1460 | 19.0k | |
1461 | 19.0k | // 1. NSW/NUW flags on the step increment. |
1462 | 19.0k | auto PreStartFlags = |
1463 | 19.0k | ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); |
1464 | 19.0k | const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); |
1465 | 19.0k | const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( |
1466 | 19.0k | SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); |
1467 | 19.0k | |
1468 | 19.0k | // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies |
1469 | 19.0k | // "S+X does not sign/unsign-overflow". |
1470 | 19.0k | // |
1471 | 19.0k | |
1472 | 19.0k | const SCEV *BECount = SE->getBackedgeTakenCount(L); |
1473 | 19.0k | if (PreAR && PreAR->getNoWrapFlags(WrapType) && |
1474 | 19.0k | !isa<SCEVCouldNotCompute>(BECount)10.3k && SE->isKnownPositive(BECount)8.35k ) |
1475 | 523 | return PreStart; |
1476 | 18.5k | |
1477 | 18.5k | // 2. Direct overflow check on the step operation's expression. |
1478 | 18.5k | unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); |
1479 | 18.5k | Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); |
1480 | 18.5k | const SCEV *OperandExtendedStart = |
1481 | 18.5k | SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth), |
1482 | 18.5k | (SE->*GetExtendExpr)(Step, WideTy, Depth)); |
1483 | 18.5k | if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) { |
1484 | 9.48k | if (PreAR && AR->getNoWrapFlags(WrapType)) { |
1485 | 9.47k | // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW |
1486 | 9.47k | // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then |
1487 | 9.47k | // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. |
1488 | 9.47k | const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType); |
1489 | 9.47k | } |
1490 | 9.48k | return PreStart; |
1491 | 9.48k | } |
1492 | 9.02k | |
1493 | 9.02k | // 3. Loop precondition. |
1494 | 9.02k | ICmpInst::Predicate Pred; |
1495 | 9.02k | const SCEV *OverflowLimit = |
1496 | 9.02k | ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); |
1497 | 9.02k | |
1498 | 9.02k | if (OverflowLimit && |
1499 | 9.02k | SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)8.68k ) |
1500 | 673 | return PreStart; |
1501 | 8.35k | |
1502 | 8.35k | return nullptr; |
1503 | 8.35k | } ScalarEvolution.cpp:llvm::SCEV const* getPreStartForExtend<llvm::SCEVZeroExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int) Line | Count | Source | 1434 | 315k | ScalarEvolution *SE, unsigned Depth) { | 1435 | 315k | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; | 1436 | 315k | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; | 1437 | 315k | | 1438 | 315k | const Loop *L = AR->getLoop(); | 1439 | 315k | const SCEV *Start = AR->getStart(); | 1440 | 315k | const SCEV *Step = AR->getStepRecurrence(*SE); | 1441 | 315k | | 1442 | 315k | // Check for a simple looking step prior to loop entry. | 1443 | 315k | const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); | 1444 | 315k | if (!SA) | 1445 | 305k | return nullptr; | 1446 | 10.4k | | 1447 | 10.4k | // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV | 1448 | 10.4k | // subtraction is expensive. For this purpose, perform a quick and dirty | 1449 | 10.4k | // difference, by checking for Step in the operand list. | 1450 | 10.4k | SmallVector<const SCEV *, 4> DiffOps; | 1451 | 10.4k | for (const SCEV *Op : SA->operands()) | 1452 | 21.2k | if (Op != Step) | 1453 | 12.8k | DiffOps.push_back(Op); | 1454 | 10.4k | | 1455 | 10.4k | if (DiffOps.size() == SA->getNumOperands()) | 1456 | 1.96k | return nullptr; | 1457 | 8.45k | | 1458 | 8.45k | // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + | 1459 | 8.45k | // `Step`: | 1460 | 8.45k | | 1461 | 8.45k | // 1. NSW/NUW flags on the step increment. | 1462 | 8.45k | auto PreStartFlags = | 1463 | 8.45k | ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); | 1464 | 8.45k | const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); | 1465 | 8.45k | const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( | 1466 | 8.45k | SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); | 1467 | 8.45k | | 1468 | 8.45k | // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies | 1469 | 8.45k | // "S+X does not sign/unsign-overflow". | 1470 | 8.45k | // | 1471 | 8.45k | | 1472 | 8.45k | const SCEV *BECount = SE->getBackedgeTakenCount(L); | 1473 | 8.45k | if (PreAR && PreAR->getNoWrapFlags(WrapType) && | 1474 | 8.45k | !isa<SCEVCouldNotCompute>(BECount)1.40k && SE->isKnownPositive(BECount)1.20k ) | 1475 | 255 | return PreStart; | 1476 | 8.19k | | 1477 | 8.19k | // 2. Direct overflow check on the step operation's expression. | 1478 | 8.19k | unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); | 1479 | 8.19k | Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); | 1480 | 8.19k | const SCEV *OperandExtendedStart = | 1481 | 8.19k | SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth), | 1482 | 8.19k | (SE->*GetExtendExpr)(Step, WideTy, Depth)); | 1483 | 8.19k | if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) { | 1484 | 1.09k | if (PreAR && AR->getNoWrapFlags(WrapType)) { | 1485 | 1.09k | // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW | 1486 | 1.09k | // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then | 1487 | 1.09k | // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. | 1488 | 1.09k | const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType); | 1489 | 1.09k | } | 1490 | 1.09k | return PreStart; | 1491 | 1.09k | } | 1492 | 7.10k | | 1493 | 7.10k | // 3. Loop precondition. | 1494 | 7.10k | ICmpInst::Predicate Pred; | 1495 | 7.10k | const SCEV *OverflowLimit = | 1496 | 7.10k | ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); | 1497 | 7.10k | | 1498 | 7.10k | if (OverflowLimit && | 1499 | 7.10k | SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) | 1500 | 162 | return PreStart; | 1501 | 6.93k | | 1502 | 6.93k | return nullptr; | 1503 | 6.93k | } |
ScalarEvolution.cpp:llvm::SCEV const* getPreStartForExtend<llvm::SCEVSignExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int) Line | Count | Source | 1434 | 233k | ScalarEvolution *SE, unsigned Depth) { | 1435 | 233k | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; | 1436 | 233k | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; | 1437 | 233k | | 1438 | 233k | const Loop *L = AR->getLoop(); | 1439 | 233k | const SCEV *Start = AR->getStart(); | 1440 | 233k | const SCEV *Step = AR->getStepRecurrence(*SE); | 1441 | 233k | | 1442 | 233k | // Check for a simple looking step prior to loop entry. | 1443 | 233k | const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); | 1444 | 233k | if (!SA) | 1445 | 219k | return nullptr; | 1446 | 13.9k | | 1447 | 13.9k | // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV | 1448 | 13.9k | // subtraction is expensive. For this purpose, perform a quick and dirty | 1449 | 13.9k | // difference, by checking for Step in the operand list. | 1450 | 13.9k | SmallVector<const SCEV *, 4> DiffOps; | 1451 | 13.9k | for (const SCEV *Op : SA->operands()) | 1452 | 28.3k | if (Op != Step) | 1453 | 17.7k | DiffOps.push_back(Op); | 1454 | 13.9k | | 1455 | 13.9k | if (DiffOps.size() == SA->getNumOperands()) | 1456 | 3.38k | return nullptr; | 1457 | 10.5k | | 1458 | 10.5k | // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + | 1459 | 10.5k | // `Step`: | 1460 | 10.5k | | 1461 | 10.5k | // 1. NSW/NUW flags on the step increment. | 1462 | 10.5k | auto PreStartFlags = | 1463 | 10.5k | ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); | 1464 | 10.5k | const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); | 1465 | 10.5k | const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( | 1466 | 10.5k | SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); | 1467 | 10.5k | | 1468 | 10.5k | // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies | 1469 | 10.5k | // "S+X does not sign/unsign-overflow". | 1470 | 10.5k | // | 1471 | 10.5k | | 1472 | 10.5k | const SCEV *BECount = SE->getBackedgeTakenCount(L); | 1473 | 10.5k | if (PreAR && PreAR->getNoWrapFlags(WrapType) && | 1474 | 10.5k | !isa<SCEVCouldNotCompute>(BECount)8.91k && SE->isKnownPositive(BECount)7.14k ) | 1475 | 268 | return PreStart; | 1476 | 10.3k | | 1477 | 10.3k | // 2. Direct overflow check on the step operation's expression. | 1478 | 10.3k | unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); | 1479 | 10.3k | Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); | 1480 | 10.3k | const SCEV *OperandExtendedStart = | 1481 | 10.3k | SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth), | 1482 | 10.3k | (SE->*GetExtendExpr)(Step, WideTy, Depth)); | 1483 | 10.3k | if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) { | 1484 | 8.38k | if (PreAR && AR->getNoWrapFlags(WrapType)) { | 1485 | 8.38k | // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW | 1486 | 8.38k | // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then | 1487 | 8.38k | // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. | 1488 | 8.38k | const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType); | 1489 | 8.38k | } | 1490 | 8.38k | return PreStart; | 1491 | 8.38k | } | 1492 | 1.92k | | 1493 | 1.92k | // 3. Loop precondition. | 1494 | 1.92k | ICmpInst::Predicate Pred; | 1495 | 1.92k | const SCEV *OverflowLimit = | 1496 | 1.92k | ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); | 1497 | 1.92k | | 1498 | 1.92k | if (OverflowLimit && | 1499 | 1.92k | SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)1.58k ) | 1500 | 511 | return PreStart; | 1501 | 1.41k | | 1502 | 1.41k | return nullptr; | 1503 | 1.41k | } |
|
1504 | | |
1505 | | // Get the normalized zero or sign extended expression for this AddRec's Start. |
1506 | | template <typename ExtendOpTy> |
1507 | | static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, |
1508 | | ScalarEvolution *SE, |
1509 | 549k | unsigned Depth) { |
1510 | 549k | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; |
1511 | 549k | |
1512 | 549k | const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth); |
1513 | 549k | if (!PreStart) |
1514 | 538k | return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth); |
1515 | 10.6k | |
1516 | 10.6k | return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, |
1517 | 10.6k | Depth), |
1518 | 10.6k | (SE->*GetExtendExpr)(PreStart, Ty, Depth)); |
1519 | 10.6k | } ScalarEvolution.cpp:llvm::SCEV const* getExtendAddRecStart<llvm::SCEVZeroExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int) Line | Count | Source | 1509 | 315k | unsigned Depth) { | 1510 | 315k | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; | 1511 | 315k | | 1512 | 315k | const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth); | 1513 | 315k | if (!PreStart) | 1514 | 314k | return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth); | 1515 | 1.51k | | 1516 | 1.51k | return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, | 1517 | 1.51k | Depth), | 1518 | 1.51k | (SE->*GetExtendExpr)(PreStart, Ty, Depth)); | 1519 | 1.51k | } |
ScalarEvolution.cpp:llvm::SCEV const* getExtendAddRecStart<llvm::SCEVSignExtendExpr>(llvm::SCEVAddRecExpr const*, llvm::Type*, llvm::ScalarEvolution*, unsigned int) Line | Count | Source | 1509 | 233k | unsigned Depth) { | 1510 | 233k | auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; | 1511 | 233k | | 1512 | 233k | const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth); | 1513 | 233k | if (!PreStart) | 1514 | 224k | return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth); | 1515 | 9.16k | | 1516 | 9.16k | return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, | 1517 | 9.16k | Depth), | 1518 | 9.16k | (SE->*GetExtendExpr)(PreStart, Ty, Depth)); | 1519 | 9.16k | } |
|
1520 | | |
1521 | | // Try to prove away overflow by looking at "nearby" add recurrences. A |
1522 | | // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it |
1523 | | // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. |
1524 | | // |
1525 | | // Formally: |
1526 | | // |
1527 | | // {S,+,X} == {S-T,+,X} + T |
1528 | | // => Ext({S,+,X}) == Ext({S-T,+,X} + T) |
1529 | | // |
1530 | | // If ({S-T,+,X} + T) does not overflow ... (1) |
1531 | | // |
1532 | | // RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) |
1533 | | // |
1534 | | // If {S-T,+,X} does not overflow ... (2) |
1535 | | // |
1536 | | // RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) |
1537 | | // == {Ext(S-T)+Ext(T),+,Ext(X)} |
1538 | | // |
1539 | | // If (S-T)+T does not overflow ... (3) |
1540 | | // |
1541 | | // RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} |
1542 | | // == {Ext(S),+,Ext(X)} == LHS |
1543 | | // |
1544 | | // Thus, if (1), (2) and (3) are true for some T, then |
1545 | | // Ext({S,+,X}) == {Ext(S),+,Ext(X)} |
1546 | | // |
1547 | | // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) |
1548 | | // does not overflow" restricted to the 0th iteration. Therefore we only need |
1549 | | // to check for (1) and (2). |
1550 | | // |
1551 | | // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T |
1552 | | // is `Delta` (defined below). |
1553 | | template <typename ExtendOpTy> |
1554 | | bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, |
1555 | | const SCEV *Step, |
1556 | 612k | const Loop *L) { |
1557 | 612k | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; |
1558 | 612k | |
1559 | 612k | // We restrict `Start` to a constant to prevent SCEV from spending too much |
1560 | 612k | // time here. It is correct (but more expensive) to continue with a |
1561 | 612k | // non-constant `Start` and do a general SCEV subtraction to compute |
1562 | 612k | // `PreStart` below. |
1563 | 612k | const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); |
1564 | 612k | if (!StartC) |
1565 | 385k | return false; |
1566 | 226k | |
1567 | 226k | APInt StartAI = StartC->getAPInt(); |
1568 | 226k | |
1569 | 905k | for (unsigned Delta : {-2, -1, 1, 2}) { |
1570 | 905k | const SCEV *PreStart = getConstant(StartAI - Delta); |
1571 | 905k | |
1572 | 905k | FoldingSetNodeID ID; |
1573 | 905k | ID.AddInteger(scAddRecExpr); |
1574 | 905k | ID.AddPointer(PreStart); |
1575 | 905k | ID.AddPointer(Step); |
1576 | 905k | ID.AddPointer(L); |
1577 | 905k | void *IP = nullptr; |
1578 | 905k | const auto *PreAR = |
1579 | 905k | static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
1580 | 905k | |
1581 | 905k | // Give up if we don't already have the add recurrence we need because |
1582 | 905k | // actually constructing an add recurrence is relatively expensive. |
1583 | 905k | if (PreAR && PreAR->getNoWrapFlags(WrapType)284k ) { // proves (2) |
1584 | 11.7k | const SCEV *DeltaS = getConstant(StartC->getType(), Delta); |
1585 | 11.7k | ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; |
1586 | 11.7k | const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep( |
1587 | 11.7k | DeltaS, &Pred, this); |
1588 | 11.7k | if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) |
1589 | 1.75k | return true; |
1590 | 11.7k | } |
1591 | 905k | } |
1592 | 226k | |
1593 | 226k | return false224k ; |
1594 | 226k | } bool llvm::ScalarEvolution::proveNoWrapByVaryingStart<llvm::SCEVZeroExtendExpr>(llvm::SCEV const*, llvm::SCEV const*, llvm::Loop const*) Line | Count | Source | 1556 | 256k | const Loop *L) { | 1557 | 256k | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; | 1558 | 256k | | 1559 | 256k | // We restrict `Start` to a constant to prevent SCEV from spending too much | 1560 | 256k | // time here. It is correct (but more expensive) to continue with a | 1561 | 256k | // non-constant `Start` and do a general SCEV subtraction to compute | 1562 | 256k | // `PreStart` below. | 1563 | 256k | const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); | 1564 | 256k | if (!StartC) | 1565 | 167k | return false; | 1566 | 88.5k | | 1567 | 88.5k | APInt StartAI = StartC->getAPInt(); | 1568 | 88.5k | | 1569 | 354k | for (unsigned Delta : {-2, -1, 1, 2}) { | 1570 | 354k | const SCEV *PreStart = getConstant(StartAI - Delta); | 1571 | 354k | | 1572 | 354k | FoldingSetNodeID ID; | 1573 | 354k | ID.AddInteger(scAddRecExpr); | 1574 | 354k | ID.AddPointer(PreStart); | 1575 | 354k | ID.AddPointer(Step); | 1576 | 354k | ID.AddPointer(L); | 1577 | 354k | void *IP = nullptr; | 1578 | 354k | const auto *PreAR = | 1579 | 354k | static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); | 1580 | 354k | | 1581 | 354k | // Give up if we don't already have the add recurrence we need because | 1582 | 354k | // actually constructing an add recurrence is relatively expensive. | 1583 | 354k | if (PreAR && PreAR->getNoWrapFlags(WrapType)111k ) { // proves (2) | 1584 | 5.76k | const SCEV *DeltaS = getConstant(StartC->getType(), Delta); | 1585 | 5.76k | ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; | 1586 | 5.76k | const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep( | 1587 | 5.76k | DeltaS, &Pred, this); | 1588 | 5.76k | if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) | 1589 | 1.59k | return true; | 1590 | 5.76k | } | 1591 | 354k | } | 1592 | 88.5k | | 1593 | 88.5k | return false86.9k ; | 1594 | 88.5k | } |
bool llvm::ScalarEvolution::proveNoWrapByVaryingStart<llvm::SCEVSignExtendExpr>(llvm::SCEV const*, llvm::SCEV const*, llvm::Loop const*) Line | Count | Source | 1556 | 355k | const Loop *L) { | 1557 | 355k | auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; | 1558 | 355k | | 1559 | 355k | // We restrict `Start` to a constant to prevent SCEV from spending too much | 1560 | 355k | // time here. It is correct (but more expensive) to continue with a | 1561 | 355k | // non-constant `Start` and do a general SCEV subtraction to compute | 1562 | 355k | // `PreStart` below. | 1563 | 355k | const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); | 1564 | 355k | if (!StartC) | 1565 | 217k | return false; | 1566 | 137k | | 1567 | 137k | APInt StartAI = StartC->getAPInt(); | 1568 | 137k | | 1569 | 551k | for (unsigned Delta : {-2, -1, 1, 2}) { | 1570 | 551k | const SCEV *PreStart = getConstant(StartAI - Delta); | 1571 | 551k | | 1572 | 551k | FoldingSetNodeID ID; | 1573 | 551k | ID.AddInteger(scAddRecExpr); | 1574 | 551k | ID.AddPointer(PreStart); | 1575 | 551k | ID.AddPointer(Step); | 1576 | 551k | ID.AddPointer(L); | 1577 | 551k | void *IP = nullptr; | 1578 | 551k | const auto *PreAR = | 1579 | 551k | static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); | 1580 | 551k | | 1581 | 551k | // Give up if we don't already have the add recurrence we need because | 1582 | 551k | // actually constructing an add recurrence is relatively expensive. | 1583 | 551k | if (PreAR && PreAR->getNoWrapFlags(WrapType)172k ) { // proves (2) | 1584 | 6.00k | const SCEV *DeltaS = getConstant(StartC->getType(), Delta); | 1585 | 6.00k | ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; | 1586 | 6.00k | const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep( | 1587 | 6.00k | DeltaS, &Pred, this); | 1588 | 6.00k | if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) | 1589 | 167 | return true; | 1590 | 6.00k | } | 1591 | 551k | } | 1592 | 137k | | 1593 | 137k | return false137k ; | 1594 | 137k | } |
|
1595 | | |
1596 | | // Finds an integer D for an expression (C + x + y + ...) such that the top |
1597 | | // level addition in (D + (C - D + x + y + ...)) would not wrap (signed or |
1598 | | // unsigned) and the number of trailing zeros of (C - D + x + y + ...) is |
1599 | | // maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and |
1600 | | // the (C + x + y + ...) expression is \p WholeAddExpr. |
1601 | | static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, |
1602 | | const SCEVConstant *ConstantTerm, |
1603 | 306k | const SCEVAddExpr *WholeAddExpr) { |
1604 | 306k | const APInt C = ConstantTerm->getAPInt(); |
1605 | 306k | const unsigned BitWidth = C.getBitWidth(); |
1606 | 306k | // Find number of trailing zeros of (x + y + ...) w/o the C first: |
1607 | 306k | uint32_t TZ = BitWidth; |
1608 | 627k | for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ358k ; ++I321k ) |
1609 | 321k | TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I))); |
1610 | 306k | if (TZ) { |
1611 | 90.6k | // Set D to be as many least significant bits of C as possible while still |
1612 | 90.6k | // guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap: |
1613 | 90.6k | return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C0 ; |
1614 | 90.6k | } |
1615 | 215k | return APInt(BitWidth, 0); |
1616 | 215k | } |
1617 | | |
1618 | | // Finds an integer D for an affine AddRec expression {C,+,x} such that the top |
1619 | | // level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the |
1620 | | // number of trailing zeros of (C - D + x * n) is maximized, where C is the \p |
1621 | | // ConstantStart, x is an arbitrary \p Step, and n is the loop trip count. |
1622 | | static APInt extractConstantWithoutWrapping(ScalarEvolution &SE, |
1623 | | const APInt &ConstantStart, |
1624 | 245k | const SCEV *Step) { |
1625 | 245k | const unsigned BitWidth = ConstantStart.getBitWidth(); |
1626 | 245k | const uint32_t TZ = SE.GetMinTrailingZeros(Step); |
1627 | 245k | if (TZ) |
1628 | 73.3k | return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth) |
1629 | 73.3k | : ConstantStart0 ; |
1630 | 171k | return APInt(BitWidth, 0); |
1631 | 171k | } |
1632 | | |
1633 | | const SCEV * |
1634 | 14.9M | ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { |
1635 | 14.9M | assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && |
1636 | 14.9M | "This is not an extending conversion!"); |
1637 | 14.9M | assert(isSCEVable(Ty) && |
1638 | 14.9M | "This is not a conversion to a SCEVable type!"); |
1639 | 14.9M | Ty = getEffectiveSCEVType(Ty); |
1640 | 14.9M | |
1641 | 14.9M | // Fold if the operand is constant. |
1642 | 14.9M | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
1643 | 7.95M | return getConstant( |
1644 | 7.95M | cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty))); |
1645 | 6.94M | |
1646 | 6.94M | // zext(zext(x)) --> zext(x) |
1647 | 6.94M | if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) |
1648 | 195k | return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); |
1649 | 6.74M | |
1650 | 6.74M | // Before doing any expensive analysis, check to see if we've already |
1651 | 6.74M | // computed a SCEV for this Op and Ty. |
1652 | 6.74M | FoldingSetNodeID ID; |
1653 | 6.74M | ID.AddInteger(scZeroExtend); |
1654 | 6.74M | ID.AddPointer(Op); |
1655 | 6.74M | ID.AddPointer(Ty); |
1656 | 6.74M | void *IP = nullptr; |
1657 | 6.74M | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S5.16M ; |
1658 | 1.58M | if (Depth > MaxCastDepth) { |
1659 | 12 | SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), |
1660 | 12 | Op, Ty); |
1661 | 12 | UniqueSCEVs.InsertNode(S, IP); |
1662 | 12 | addToLoopUseLists(S); |
1663 | 12 | return S; |
1664 | 12 | } |
1665 | 1.58M | |
1666 | 1.58M | // zext(trunc(x)) --> zext(x) or x or trunc(x) |
1667 | 1.58M | if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { |
1668 | 65.7k | // It's possible the bits taken off by the truncate were all zero bits. If |
1669 | 65.7k | // so, we should be able to simplify this further. |
1670 | 65.7k | const SCEV *X = ST->getOperand(); |
1671 | 65.7k | ConstantRange CR = getUnsignedRange(X); |
1672 | 65.7k | unsigned TruncBits = getTypeSizeInBits(ST->getType()); |
1673 | 65.7k | unsigned NewBits = getTypeSizeInBits(Ty); |
1674 | 65.7k | if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( |
1675 | 65.7k | CR.zextOrTrunc(NewBits))) |
1676 | 25.9k | return getTruncateOrZeroExtend(X, Ty, Depth); |
1677 | 1.55M | } |
1678 | 1.55M | |
1679 | 1.55M | // If the input value is a chrec scev, and we can prove that the value |
1680 | 1.55M | // did not overflow the old, smaller, value, we can zero extend all of the |
1681 | 1.55M | // operands (often constants). This allows analysis of something like |
1682 | 1.55M | // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } |
1683 | 1.55M | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) |
1684 | 580k | if (AR->isAffine()) { |
1685 | 579k | const SCEV *Start = AR->getStart(); |
1686 | 579k | const SCEV *Step = AR->getStepRecurrence(*this); |
1687 | 579k | unsigned BitWidth = getTypeSizeInBits(AR->getType()); |
1688 | 579k | const Loop *L = AR->getLoop(); |
1689 | 579k | |
1690 | 579k | if (!AR->hasNoUnsignedWrap()) { |
1691 | 358k | auto NewFlags = proveNoWrapViaConstantRanges(AR); |
1692 | 358k | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags); |
1693 | 358k | } |
1694 | 579k | |
1695 | 579k | // If we have special knowledge that this addrec won't overflow, |
1696 | 579k | // we don't need to do any further analysis. |
1697 | 579k | if (AR->hasNoUnsignedWrap()) |
1698 | 228k | return getAddRecExpr( |
1699 | 228k | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1), |
1700 | 228k | getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); |
1701 | 351k | |
1702 | 351k | // Check whether the backedge-taken count is SCEVCouldNotCompute. |
1703 | 351k | // Note that this serves two purposes: It filters out loops that are |
1704 | 351k | // simply not analyzable, and it covers the case where this code is |
1705 | 351k | // being called from within backedge-taken count analysis, such that |
1706 | 351k | // attempting to ask for the backedge-taken count would likely result |
1707 | 351k | // in infinite recursion. In the later case, the analysis code will |
1708 | 351k | // cope with a conservative value, and it will take care to purge |
1709 | 351k | // that value once it has finished. |
1710 | 351k | const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); |
1711 | 351k | if (!isa<SCEVCouldNotCompute>(MaxBECount)) { |
1712 | 307k | // Manually compute the final value for AR, checking for |
1713 | 307k | // overflow. |
1714 | 307k | |
1715 | 307k | // Check whether the backedge-taken count can be losslessly casted to |
1716 | 307k | // the addrec's type. The count is always unsigned. |
1717 | 307k | const SCEV *CastedMaxBECount = |
1718 | 307k | getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); |
1719 | 307k | const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( |
1720 | 307k | CastedMaxBECount, MaxBECount->getType(), Depth); |
1721 | 307k | if (MaxBECount == RecastedMaxBECount) { |
1722 | 239k | Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); |
1723 | 239k | // Check whether Start+Step*MaxBECount has no unsigned overflow. |
1724 | 239k | const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step, |
1725 | 239k | SCEV::FlagAnyWrap, Depth + 1); |
1726 | 239k | const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul, |
1727 | 239k | SCEV::FlagAnyWrap, |
1728 | 239k | Depth + 1), |
1729 | 239k | WideTy, Depth + 1); |
1730 | 239k | const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1); |
1731 | 239k | const SCEV *WideMaxBECount = |
1732 | 239k | getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); |
1733 | 239k | const SCEV *OperandExtendedAdd = |
1734 | 239k | getAddExpr(WideStart, |
1735 | 239k | getMulExpr(WideMaxBECount, |
1736 | 239k | getZeroExtendExpr(Step, WideTy, Depth + 1), |
1737 | 239k | SCEV::FlagAnyWrap, Depth + 1), |
1738 | 239k | SCEV::FlagAnyWrap, Depth + 1); |
1739 | 239k | if (ZAdd == OperandExtendedAdd) { |
1740 | 41.0k | // Cache knowledge of AR NUW, which is propagated to this AddRec. |
1741 | 41.0k | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); |
1742 | 41.0k | // Return the expression with the addrec on the outside. |
1743 | 41.0k | return getAddRecExpr( |
1744 | 41.0k | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, |
1745 | 41.0k | Depth + 1), |
1746 | 41.0k | getZeroExtendExpr(Step, Ty, Depth + 1), L, |
1747 | 41.0k | AR->getNoWrapFlags()); |
1748 | 41.0k | } |
1749 | 197k | // Similar to above, only this time treat the step value as signed. |
1750 | 197k | // This covers loops that count down. |
1751 | 197k | OperandExtendedAdd = |
1752 | 197k | getAddExpr(WideStart, |
1753 | 197k | getMulExpr(WideMaxBECount, |
1754 | 197k | getSignExtendExpr(Step, WideTy, Depth + 1), |
1755 | 197k | SCEV::FlagAnyWrap, Depth + 1), |
1756 | 197k | SCEV::FlagAnyWrap, Depth + 1); |
1757 | 197k | if (ZAdd == OperandExtendedAdd) { |
1758 | 20.6k | // Cache knowledge of AR NW, which is propagated to this AddRec. |
1759 | 20.6k | // Negative step causes unsigned wrap, but it still can't self-wrap. |
1760 | 20.6k | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); |
1761 | 20.6k | // Return the expression with the addrec on the outside. |
1762 | 20.6k | return getAddRecExpr( |
1763 | 20.6k | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, |
1764 | 20.6k | Depth + 1), |
1765 | 20.6k | getSignExtendExpr(Step, Ty, Depth + 1), L, |
1766 | 20.6k | AR->getNoWrapFlags()); |
1767 | 20.6k | } |
1768 | 289k | } |
1769 | 307k | } |
1770 | 289k | |
1771 | 289k | // Normally, in the cases we can prove no-overflow via a |
1772 | 289k | // backedge guarding condition, we can also compute a backedge |
1773 | 289k | // taken count for the loop. The exceptions are assumptions and |
1774 | 289k | // guards present in the loop -- SCEV is not great at exploiting |
1775 | 289k | // these to compute max backedge taken counts, but can still use |
1776 | 289k | // these to prove lack of overflow. Use this fact to avoid |
1777 | 289k | // doing extra work that may not pay off. |
1778 | 289k | if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards43.4k || |
1779 | 289k | !AC.assumptions().empty()43.4k ) { |
1780 | 246k | // If the backedge is guarded by a comparison with the pre-inc |
1781 | 246k | // value the addrec is safe. Also, if the entry is guarded by |
1782 | 246k | // a comparison with the start value and the backedge is |
1783 | 246k | // guarded by a comparison with the post-inc value, the addrec |
1784 | 246k | // is safe. |
1785 | 246k | if (isKnownPositive(Step)) { |
1786 | 119k | const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - |
1787 | 119k | getUnsignedRangeMax(Step)); |
1788 | 119k | if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || |
1789 | 119k | isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)118k ) { |
1790 | 461 | // Cache knowledge of AR NUW, which is propagated to this |
1791 | 461 | // AddRec. |
1792 | 461 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); |
1793 | 461 | // Return the expression with the addrec on the outside. |
1794 | 461 | return getAddRecExpr( |
1795 | 461 | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, |
1796 | 461 | Depth + 1), |
1797 | 461 | getZeroExtendExpr(Step, Ty, Depth + 1), L, |
1798 | 461 | AR->getNoWrapFlags()); |
1799 | 461 | } |
1800 | 126k | } else if (isKnownNegative(Step)) { |
1801 | 114k | const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - |
1802 | 114k | getSignedRangeMin(Step)); |
1803 | 114k | if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || |
1804 | 114k | isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)94.7k ) { |
1805 | 23.1k | // Cache knowledge of AR NW, which is propagated to this |
1806 | 23.1k | // AddRec. Negative step causes unsigned wrap, but it |
1807 | 23.1k | // still can't self-wrap. |
1808 | 23.1k | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); |
1809 | 23.1k | // Return the expression with the addrec on the outside. |
1810 | 23.1k | return getAddRecExpr( |
1811 | 23.1k | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, |
1812 | 23.1k | Depth + 1), |
1813 | 23.1k | getSignExtendExpr(Step, Ty, Depth + 1), L, |
1814 | 23.1k | AR->getNoWrapFlags()); |
1815 | 23.1k | } |
1816 | 266k | } |
1817 | 246k | } |
1818 | 266k | |
1819 | 266k | // zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw> |
1820 | 266k | // if D + (C - D + Step * n) could be proven to not unsigned wrap |
1821 | 266k | // where D maximizes the number of trailing zeros of (C - D + Step * n) |
1822 | 266k | if (const auto *SC = dyn_cast<SCEVConstant>(Start)) { |
1823 | 98.3k | const APInt &C = SC->getAPInt(); |
1824 | 98.3k | const APInt &D = extractConstantWithoutWrapping(*this, C, Step); |
1825 | 98.3k | if (D != 0) { |
1826 | 9.74k | const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); |
1827 | 9.74k | const SCEV *SResidual = |
1828 | 9.74k | getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); |
1829 | 9.74k | const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); |
1830 | 9.74k | return getAddExpr(SZExtD, SZExtR, |
1831 | 9.74k | (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), |
1832 | 9.74k | Depth + 1); |
1833 | 9.74k | } |
1834 | 256k | } |
1835 | 256k | |
1836 | 256k | if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { |
1837 | 1.59k | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); |
1838 | 1.59k | return getAddRecExpr( |
1839 | 1.59k | getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1), |
1840 | 1.59k | getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); |
1841 | 1.59k | } |
1842 | 1.22M | } |
1843 | 1.22M | |
1844 | 1.22M | // zext(A % B) --> zext(A) % zext(B) |
1845 | 1.22M | { |
1846 | 1.22M | const SCEV *LHS; |
1847 | 1.22M | const SCEV *RHS; |
1848 | 1.22M | if (matchURem(Op, LHS, RHS)) |
1849 | 293 | return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), |
1850 | 293 | getZeroExtendExpr(RHS, Ty, Depth + 1)); |
1851 | 1.22M | } |
1852 | 1.22M | |
1853 | 1.22M | // zext(A / B) --> zext(A) / zext(B). |
1854 | 1.22M | if (auto *Div = dyn_cast<SCEVUDivExpr>(Op)) |
1855 | 87.9k | return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1), |
1856 | 87.9k | getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1)); |
1857 | 1.14M | |
1858 | 1.14M | if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { |
1859 | 281k | // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw> |
1860 | 281k | if (SA->hasNoUnsignedWrap()) { |
1861 | 40.7k | // If the addition does not unsign overflow then we can, by definition, |
1862 | 40.7k | // commute the zero extension with the addition operation. |
1863 | 40.7k | SmallVector<const SCEV *, 4> Ops; |
1864 | 40.7k | for (const auto *Op : SA->operands()) |
1865 | 81.5k | Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); |
1866 | 40.7k | return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); |
1867 | 40.7k | } |
1868 | 240k | |
1869 | 240k | // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...)) |
1870 | 240k | // if D + (C - D + x + y + ...) could be proven to not unsigned wrap |
1871 | 240k | // where D maximizes the number of trailing zeros of (C - D + x + y + ...) |
1872 | 240k | // |
1873 | 240k | // Often address arithmetics contain expressions like |
1874 | 240k | // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))). |
1875 | 240k | // This transformation is useful while proving that such expressions are |
1876 | 240k | // equal or differ by a small constant amount, see LoadStoreVectorizer pass. |
1877 | 240k | if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) { |
1878 | 200k | const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); |
1879 | 200k | if (D != 0) { |
1880 | 6.67k | const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); |
1881 | 6.67k | const SCEV *SResidual = |
1882 | 6.67k | getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); |
1883 | 6.67k | const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); |
1884 | 6.67k | return getAddExpr(SZExtD, SZExtR, |
1885 | 6.67k | (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), |
1886 | 6.67k | Depth + 1); |
1887 | 6.67k | } |
1888 | 1.09M | } |
1889 | 240k | } |
1890 | 1.09M | |
1891 | 1.09M | if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) { |
1892 | 192k | // zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw> |
1893 | 192k | if (SM->hasNoUnsignedWrap()) { |
1894 | 149k | // If the multiply does not unsign overflow then we can, by definition, |
1895 | 149k | // commute the zero extension with the multiply operation. |
1896 | 149k | SmallVector<const SCEV *, 4> Ops; |
1897 | 149k | for (const auto *Op : SM->operands()) |
1898 | 298k | Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); |
1899 | 149k | return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1); |
1900 | 149k | } |
1901 | 43.4k | |
1902 | 43.4k | // zext(2^K * (trunc X to iN)) to iM -> |
1903 | 43.4k | // 2^K * (zext(trunc X to i{N-K}) to iM)<nuw> |
1904 | 43.4k | // |
1905 | 43.4k | // Proof: |
1906 | 43.4k | // |
1907 | 43.4k | // zext(2^K * (trunc X to iN)) to iM |
1908 | 43.4k | // = zext((trunc X to iN) << K) to iM |
1909 | 43.4k | // = zext((trunc X to i{N-K}) << K)<nuw> to iM |
1910 | 43.4k | // (because shl removes the top K bits) |
1911 | 43.4k | // = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM |
1912 | 43.4k | // = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>. |
1913 | 43.4k | // |
1914 | 43.4k | if (SM->getNumOperands() == 2) |
1915 | 43.0k | if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0))) |
1916 | 33.8k | if (MulLHS->getAPInt().isPowerOf2()) |
1917 | 10.4k | if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) { |
1918 | 1.23k | int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) - |
1919 | 1.23k | MulLHS->getAPInt().logBase2(); |
1920 | 1.23k | Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits); |
1921 | 1.23k | return getMulExpr( |
1922 | 1.23k | getZeroExtendExpr(MulLHS, Ty), |
1923 | 1.23k | getZeroExtendExpr( |
1924 | 1.23k | getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty), |
1925 | 1.23k | SCEV::FlagNUW, Depth + 1); |
1926 | 1.23k | } |
1927 | 943k | } |
1928 | 943k | |
1929 | 943k | // The cast wasn't folded; create an explicit cast node. |
1930 | 943k | // Recompute the insert position, as it may have been invalidated. |
1931 | 943k | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S51.2k ; |
1932 | 892k | SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), |
1933 | 892k | Op, Ty); |
1934 | 892k | UniqueSCEVs.InsertNode(S, IP); |
1935 | 892k | addToLoopUseLists(S); |
1936 | 892k | return S; |
1937 | 892k | } |
1938 | | |
1939 | | const SCEV * |
1940 | 5.02M | ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { |
1941 | 5.02M | assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && |
1942 | 5.02M | "This is not an extending conversion!"); |
1943 | 5.02M | assert(isSCEVable(Ty) && |
1944 | 5.02M | "This is not a conversion to a SCEVable type!"); |
1945 | 5.02M | Ty = getEffectiveSCEVType(Ty); |
1946 | 5.02M | |
1947 | 5.02M | // Fold if the operand is constant. |
1948 | 5.02M | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
1949 | 2.32M | return getConstant( |
1950 | 2.32M | cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty))); |
1951 | 2.69M | |
1952 | 2.69M | // sext(sext(x)) --> sext(x) |
1953 | 2.69M | if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) |
1954 | 25.0k | return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1); |
1955 | 2.66M | |
1956 | 2.66M | // sext(zext(x)) --> zext(x) |
1957 | 2.66M | if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) |
1958 | 52.1k | return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); |
1959 | 2.61M | |
1960 | 2.61M | // Before doing any expensive analysis, check to see if we've already |
1961 | 2.61M | // computed a SCEV for this Op and Ty. |
1962 | 2.61M | FoldingSetNodeID ID; |
1963 | 2.61M | ID.AddInteger(scSignExtend); |
1964 | 2.61M | ID.AddPointer(Op); |
1965 | 2.61M | ID.AddPointer(Ty); |
1966 | 2.61M | void *IP = nullptr; |
1967 | 2.61M | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S1.25M ; |
1968 | 1.36M | // Limit recursion depth. |
1969 | 1.36M | if (Depth > MaxCastDepth) { |
1970 | 4 | SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), |
1971 | 4 | Op, Ty); |
1972 | 4 | UniqueSCEVs.InsertNode(S, IP); |
1973 | 4 | addToLoopUseLists(S); |
1974 | 4 | return S; |
1975 | 4 | } |
1976 | 1.36M | |
1977 | 1.36M | // sext(trunc(x)) --> sext(x) or x or trunc(x) |
1978 | 1.36M | if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { |
1979 | 31.8k | // It's possible the bits taken off by the truncate were all sign bits. If |
1980 | 31.8k | // so, we should be able to simplify this further. |
1981 | 31.8k | const SCEV *X = ST->getOperand(); |
1982 | 31.8k | ConstantRange CR = getSignedRange(X); |
1983 | 31.8k | unsigned TruncBits = getTypeSizeInBits(ST->getType()); |
1984 | 31.8k | unsigned NewBits = getTypeSizeInBits(Ty); |
1985 | 31.8k | if (CR.truncate(TruncBits).signExtend(NewBits).contains( |
1986 | 31.8k | CR.sextOrTrunc(NewBits))) |
1987 | 20.8k | return getTruncateOrSignExtend(X, Ty, Depth); |
1988 | 1.34M | } |
1989 | 1.34M | |
1990 | 1.34M | if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { |
1991 | 281k | // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> |
1992 | 281k | if (SA->hasNoSignedWrap()) { |
1993 | 121k | // If the addition does not sign overflow then we can, by definition, |
1994 | 121k | // commute the sign extension with the addition operation. |
1995 | 121k | SmallVector<const SCEV *, 4> Ops; |
1996 | 121k | for (const auto *Op : SA->operands()) |
1997 | 243k | Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1)); |
1998 | 121k | return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); |
1999 | 121k | } |
2000 | 159k | |
2001 | 159k | // sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...)) |
2002 | 159k | // if D + (C - D + x + y + ...) could be proven to not signed wrap |
2003 | 159k | // where D maximizes the number of trailing zeros of (C - D + x + y + ...) |
2004 | 159k | // |
2005 | 159k | // For instance, this will bring two seemingly different expressions: |
2006 | 159k | // 1 + sext(5 + 20 * %x + 24 * %y) and |
2007 | 159k | // sext(6 + 20 * %x + 24 * %y) |
2008 | 159k | // to the same form: |
2009 | 159k | // 2 + sext(4 + 20 * %x + 24 * %y) |
2010 | 159k | if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) { |
2011 | 105k | const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); |
2012 | 105k | if (D != 0) { |
2013 | 5.51k | const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); |
2014 | 5.51k | const SCEV *SResidual = |
2015 | 5.51k | getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); |
2016 | 5.51k | const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); |
2017 | 5.51k | return getAddExpr(SSExtD, SSExtR, |
2018 | 5.51k | (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), |
2019 | 5.51k | Depth + 1); |
2020 | 5.51k | } |
2021 | 1.21M | } |
2022 | 159k | } |
2023 | 1.21M | // If the input value is a chrec scev, and we can prove that the value |
2024 | 1.21M | // did not overflow the old, smaller, value, we can sign extend all of the |
2025 | 1.21M | // operands (often constants). This allows analysis of something like |
2026 | 1.21M | // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } |
2027 | 1.21M | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) |
2028 | 598k | if (AR->isAffine()) { |
2029 | 598k | const SCEV *Start = AR->getStart(); |
2030 | 598k | const SCEV *Step = AR->getStepRecurrence(*this); |
2031 | 598k | unsigned BitWidth = getTypeSizeInBits(AR->getType()); |
2032 | 598k | const Loop *L = AR->getLoop(); |
2033 | 598k | |
2034 | 598k | if (!AR->hasNoSignedWrap()) { |
2035 | 464k | auto NewFlags = proveNoWrapViaConstantRanges(AR); |
2036 | 464k | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags); |
2037 | 464k | } |
2038 | 598k | |
2039 | 598k | // If we have special knowledge that this addrec won't overflow, |
2040 | 598k | // we don't need to do any further analysis. |
2041 | 598k | if (AR->hasNoSignedWrap()) |
2042 | 215k | return getAddRecExpr( |
2043 | 215k | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), |
2044 | 215k | getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW); |
2045 | 383k | |
2046 | 383k | // Check whether the backedge-taken count is SCEVCouldNotCompute. |
2047 | 383k | // Note that this serves two purposes: It filters out loops that are |
2048 | 383k | // simply not analyzable, and it covers the case where this code is |
2049 | 383k | // being called from within backedge-taken count analysis, such that |
2050 | 383k | // attempting to ask for the backedge-taken count would likely result |
2051 | 383k | // in infinite recursion. In the later case, the analysis code will |
2052 | 383k | // cope with a conservative value, and it will take care to purge |
2053 | 383k | // that value once it has finished. |
2054 | 383k | const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); |
2055 | 383k | if (!isa<SCEVCouldNotCompute>(MaxBECount)) { |
2056 | 333k | // Manually compute the final value for AR, checking for |
2057 | 333k | // overflow. |
2058 | 333k | |
2059 | 333k | // Check whether the backedge-taken count can be losslessly casted to |
2060 | 333k | // the addrec's type. The count is always unsigned. |
2061 | 333k | const SCEV *CastedMaxBECount = |
2062 | 333k | getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); |
2063 | 333k | const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( |
2064 | 333k | CastedMaxBECount, MaxBECount->getType(), Depth); |
2065 | 333k | if (MaxBECount == RecastedMaxBECount) { |
2066 | 269k | Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); |
2067 | 269k | // Check whether Start+Step*MaxBECount has no signed overflow. |
2068 | 269k | const SCEV *SMul = getMulExpr(CastedMaxBECount, Step, |
2069 | 269k | SCEV::FlagAnyWrap, Depth + 1); |
2070 | 269k | const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul, |
2071 | 269k | SCEV::FlagAnyWrap, |
2072 | 269k | Depth + 1), |
2073 | 269k | WideTy, Depth + 1); |
2074 | 269k | const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1); |
2075 | 269k | const SCEV *WideMaxBECount = |
2076 | 269k | getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); |
2077 | 269k | const SCEV *OperandExtendedAdd = |
2078 | 269k | getAddExpr(WideStart, |
2079 | 269k | getMulExpr(WideMaxBECount, |
2080 | 269k | getSignExtendExpr(Step, WideTy, Depth + 1), |
2081 | 269k | SCEV::FlagAnyWrap, Depth + 1), |
2082 | 269k | SCEV::FlagAnyWrap, Depth + 1); |
2083 | 269k | if (SAdd == OperandExtendedAdd) { |
2084 | 17.4k | // Cache knowledge of AR NSW, which is propagated to this AddRec. |
2085 | 17.4k | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); |
2086 | 17.4k | // Return the expression with the addrec on the outside. |
2087 | 17.4k | return getAddRecExpr( |
2088 | 17.4k | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, |
2089 | 17.4k | Depth + 1), |
2090 | 17.4k | getSignExtendExpr(Step, Ty, Depth + 1), L, |
2091 | 17.4k | AR->getNoWrapFlags()); |
2092 | 17.4k | } |
2093 | 252k | // Similar to above, only this time treat the step value as unsigned. |
2094 | 252k | // This covers loops that count up with an unsigned step. |
2095 | 252k | OperandExtendedAdd = |
2096 | 252k | getAddExpr(WideStart, |
2097 | 252k | getMulExpr(WideMaxBECount, |
2098 | 252k | getZeroExtendExpr(Step, WideTy, Depth + 1), |
2099 | 252k | SCEV::FlagAnyWrap, Depth + 1), |
2100 | 252k | SCEV::FlagAnyWrap, Depth + 1); |
2101 | 252k | if (SAdd == OperandExtendedAdd) { |
2102 | 1 | // If AR wraps around then |
2103 | 1 | // |
2104 | 1 | // abs(Step) * MaxBECount > unsigned-max(AR->getType()) |
2105 | 1 | // => SAdd != OperandExtendedAdd |
2106 | 1 | // |
2107 | 1 | // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> |
2108 | 1 | // (SAdd == OperandExtendedAdd => AR is NW) |
2109 | 1 | |
2110 | 1 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); |
2111 | 1 | |
2112 | 1 | // Return the expression with the addrec on the outside. |
2113 | 1 | return getAddRecExpr( |
2114 | 1 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, |
2115 | 1 | Depth + 1), |
2116 | 1 | getZeroExtendExpr(Step, Ty, Depth + 1), L, |
2117 | 1 | AR->getNoWrapFlags()); |
2118 | 1 | } |
2119 | 365k | } |
2120 | 333k | } |
2121 | 365k | |
2122 | 365k | // Normally, in the cases we can prove no-overflow via a |
2123 | 365k | // backedge guarding condition, we can also compute a backedge |
2124 | 365k | // taken count for the loop. The exceptions are assumptions and |
2125 | 365k | // guards present in the loop -- SCEV is not great at exploiting |
2126 | 365k | // these to compute max backedge taken counts, but can still use |
2127 | 365k | // these to prove lack of overflow. Use this fact to avoid |
2128 | 365k | // doing extra work that may not pay off. |
2129 | 365k | |
2130 | 365k | if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards49.2k || |
2131 | 365k | !AC.assumptions().empty()49.2k ) { |
2132 | 316k | // If the backedge is guarded by a comparison with the pre-inc |
2133 | 316k | // value the addrec is safe. Also, if the entry is guarded by |
2134 | 316k | // a comparison with the start value and the backedge is |
2135 | 316k | // guarded by a comparison with the post-inc value, the addrec |
2136 | 316k | // is safe. |
2137 | 316k | ICmpInst::Predicate Pred; |
2138 | 316k | const SCEV *OverflowLimit = |
2139 | 316k | getSignedOverflowLimitForStep(Step, &Pred, this); |
2140 | 316k | if (OverflowLimit && |
2141 | 316k | (307k isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit)307k || |
2142 | 307k | isKnownOnEveryIteration(Pred, AR, OverflowLimit)306k )) { |
2143 | 737 | // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. |
2144 | 737 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); |
2145 | 737 | return getAddRecExpr( |
2146 | 737 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), |
2147 | 737 | getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); |
2148 | 737 | } |
2149 | 364k | } |
2150 | 364k | |
2151 | 364k | // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw> |
2152 | 364k | // if D + (C - D + Step * n) could be proven to not signed wrap |
2153 | 364k | // where D maximizes the number of trailing zeros of (C - D + Step * n) |
2154 | 364k | if (const auto *SC = dyn_cast<SCEVConstant>(Start)) { |
2155 | 146k | const APInt &C = SC->getAPInt(); |
2156 | 146k | const APInt &D = extractConstantWithoutWrapping(*this, C, Step); |
2157 | 146k | if (D != 0) { |
2158 | 9.05k | const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); |
2159 | 9.05k | const SCEV *SResidual = |
2160 | 9.05k | getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); |
2161 | 9.05k | const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); |
2162 | 9.05k | return getAddExpr(SSExtD, SSExtR, |
2163 | 9.05k | (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), |
2164 | 9.05k | Depth + 1); |
2165 | 9.05k | } |
2166 | 355k | } |
2167 | 355k | |
2168 | 355k | if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) { |
2169 | 167 | const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); |
2170 | 167 | return getAddRecExpr( |
2171 | 167 | getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), |
2172 | 167 | getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); |
2173 | 167 | } |
2174 | 971k | } |
2175 | 971k | |
2176 | 971k | // If the input value is provably positive and we could not simplify |
2177 | 971k | // away the sext build a zext instead. |
2178 | 971k | if (isKnownNonNegative(Op)) |
2179 | 130k | return getZeroExtendExpr(Op, Ty, Depth + 1); |
2180 | 841k | |
2181 | 841k | // The cast wasn't folded; create an explicit cast node. |
2182 | 841k | // Recompute the insert position, as it may have been invalidated. |
2183 | 841k | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S43.7k ; |
2184 | 797k | SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), |
2185 | 797k | Op, Ty); |
2186 | 797k | UniqueSCEVs.InsertNode(S, IP); |
2187 | 797k | addToLoopUseLists(S); |
2188 | 797k | return S; |
2189 | 797k | } |
2190 | | |
2191 | | /// getAnyExtendExpr - Return a SCEV for the given operand extended with |
2192 | | /// unspecified bits out to the given type. |
2193 | | const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, |
2194 | 429k | Type *Ty) { |
2195 | 429k | assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && |
2196 | 429k | "This is not an extending conversion!"); |
2197 | 429k | assert(isSCEVable(Ty) && |
2198 | 429k | "This is not a conversion to a SCEVable type!"); |
2199 | 429k | Ty = getEffectiveSCEVType(Ty); |
2200 | 429k | |
2201 | 429k | // Sign-extend negative constants. |
2202 | 429k | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) |
2203 | 235k | if (SC->getAPInt().isNegative()) |
2204 | 115k | return getSignExtendExpr(Op, Ty); |
2205 | 313k | |
2206 | 313k | // Peel off a truncate cast. |
2207 | 313k | if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { |
2208 | 6.93k | const SCEV *NewOp = T->getOperand(); |
2209 | 6.93k | if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) |
2210 | 0 | return getAnyExtendExpr(NewOp, Ty); |
2211 | 6.93k | return getTruncateOrNoop(NewOp, Ty); |
2212 | 6.93k | } |
2213 | 306k | |
2214 | 306k | // Next try a zext cast. If the cast is folded, use it. |
2215 | 306k | const SCEV *ZExt = getZeroExtendExpr(Op, Ty); |
2216 | 306k | if (!isa<SCEVZeroExtendExpr>(ZExt)) |
2217 | 150k | return ZExt; |
2218 | 155k | |
2219 | 155k | // Next try a sext cast. If the cast is folded, use it. |
2220 | 155k | const SCEV *SExt = getSignExtendExpr(Op, Ty); |
2221 | 155k | if (!isa<SCEVSignExtendExpr>(SExt)) |
2222 | 6.92k | return SExt; |
2223 | 149k | |
2224 | 149k | // Force the cast to be folded into the operands of an addrec. |
2225 | 149k | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { |
2226 | 62.2k | SmallVector<const SCEV *, 4> Ops; |
2227 | 62.2k | for (const SCEV *Op : AR->operands()) |
2228 | 124k | Ops.push_back(getAnyExtendExpr(Op, Ty)); |
2229 | 62.2k | return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); |
2230 | 62.2k | } |
2231 | 86.8k | |
2232 | 86.8k | // If the expression is obviously signed, use the sext cast value. |
2233 | 86.8k | if (isa<SCEVSMaxExpr>(Op)) |
2234 | 1.00k | return SExt; |
2235 | 85.8k | |
2236 | 85.8k | // Absent any other information, use the zext cast value. |
2237 | 85.8k | return ZExt; |
2238 | 85.8k | } |
2239 | | |
2240 | | /// Process the given Ops list, which is a list of operands to be added under |
2241 | | /// the given scale, update the given map. This is a helper function for |
2242 | | /// getAddRecExpr. As an example of what it does, given a sequence of operands |
2243 | | /// that would form an add expression like this: |
2244 | | /// |
2245 | | /// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) |
2246 | | /// |
2247 | | /// where A and B are constants, update the map with these values: |
2248 | | /// |
2249 | | /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) |
2250 | | /// |
2251 | | /// and add 13 + A*B*29 to AccumulatedConstant. |
2252 | | /// This will allow getAddRecExpr to produce this: |
2253 | | /// |
2254 | | /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) |
2255 | | /// |
2256 | | /// This form often exposes folding opportunities that are hidden in |
2257 | | /// the original operand list. |
2258 | | /// |
2259 | | /// Return true iff it appears that any interesting folding opportunities |
2260 | | /// may be exposed. This helps getAddRecExpr short-circuit extra work in |
2261 | | /// the common case where no interesting opportunities are present, and |
2262 | | /// is also used as a check to avoid infinite recursion. |
2263 | | static bool |
2264 | | CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, |
2265 | | SmallVectorImpl<const SCEV *> &NewOps, |
2266 | | APInt &AccumulatedConstant, |
2267 | | const SCEV *const *Ops, size_t NumOperands, |
2268 | | const APInt &Scale, |
2269 | 10.1M | ScalarEvolution &SE) { |
2270 | 10.1M | bool Interesting = false; |
2271 | 10.1M | |
2272 | 10.1M | // Iterate over the add operands. They are sorted, with constants first. |
2273 | 10.1M | unsigned i = 0; |
2274 | 17.5M | while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { |
2275 | 7.43M | ++i; |
2276 | 7.43M | // Pull a buried constant out to the outside. |
2277 | 7.43M | if (Scale != 1 || AccumulatedConstant != 07.42M || C->getValue()->isZero()7.42M ) |
2278 | 9.00k | Interesting = true; |
2279 | 7.43M | AccumulatedConstant += Scale * C->getAPInt(); |
2280 | 7.43M | } |
2281 | 10.1M | |
2282 | 10.1M | // Next comes everything else. We're especially interested in multiplies |
2283 | 10.1M | // here, but they're in the middle, so just visit the rest with one loop. |
2284 | 27.9M | for (; i != NumOperands; ++i17.8M ) { |
2285 | 17.8M | const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); |
2286 | 17.8M | if (Mul && isa<SCEVConstant>(Mul->getOperand(0))11.6M ) { |
2287 | 11.4M | APInt NewScale = |
2288 | 11.4M | Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt(); |
2289 | 11.4M | if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))11.3M ) { |
2290 | 191k | // A multiplication of a constant with another add; recurse. |
2291 | 191k | const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); |
2292 | 191k | Interesting |= |
2293 | 191k | CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, |
2294 | 191k | Add->op_begin(), Add->getNumOperands(), |
2295 | 191k | NewScale, SE); |
2296 | 11.2M | } else { |
2297 | 11.2M | // A multiplication of a constant with some other value. Update |
2298 | 11.2M | // the map. |
2299 | 11.2M | SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); |
2300 | 11.2M | const SCEV *Key = SE.getMulExpr(MulOps); |
2301 | 11.2M | auto Pair = M.insert({Key, NewScale}); |
2302 | 11.2M | if (Pair.second) { |
2303 | 11.0M | NewOps.push_back(Pair.first->first); |
2304 | 11.0M | } else { |
2305 | 229k | Pair.first->second += NewScale; |
2306 | 229k | // The map already had an entry for this value, which may indicate |
2307 | 229k | // a folding opportunity. |
2308 | 229k | Interesting = true; |
2309 | 229k | } |
2310 | 11.2M | } |
2311 | 11.4M | } else { |
2312 | 6.40M | // An ordinary operand. Update the map. |
2313 | 6.40M | std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = |
2314 | 6.40M | M.insert({Ops[i], Scale}); |
2315 | 6.40M | if (Pair.second) { |
2316 | 5.27M | NewOps.push_back(Pair.first->first); |
2317 | 5.27M | } else { |
2318 | 1.12M | Pair.first->second += Scale; |
2319 | 1.12M | // The map already had an entry for this value, which may indicate |
2320 | 1.12M | // a folding opportunity. |
2321 | 1.12M | Interesting = true; |
2322 | 1.12M | } |
2323 | 6.40M | } |
2324 | 17.8M | } |
2325 | 10.1M | |
2326 | 10.1M | return Interesting; |
2327 | 10.1M | } |
2328 | | |
2329 | | // We're trying to construct a SCEV of type `Type' with `Ops' as operands and |
2330 | | // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of |
2331 | | // can't-overflow flags for the operation if possible. |
2332 | | static SCEV::NoWrapFlags |
2333 | | StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, |
2334 | | const ArrayRef<const SCEV *> Ops, |
2335 | 92.1M | SCEV::NoWrapFlags Flags) { |
2336 | 92.1M | using namespace std::placeholders; |
2337 | 92.1M | |
2338 | 92.1M | using OBO = OverflowingBinaryOperator; |
2339 | 92.1M | |
2340 | 92.1M | bool CanAnalyze = |
2341 | 92.1M | Type == scAddExpr || Type == scAddRecExpr45.3M || Type == scMulExpr25.4M ; |
2342 | 92.1M | (void)CanAnalyze; |
2343 | 92.1M | assert(CanAnalyze && "don't call from other places!"); |
2344 | 92.1M | |
2345 | 92.1M | int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; |
2346 | 92.1M | SCEV::NoWrapFlags SignOrUnsignWrap = |
2347 | 92.1M | ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); |
2348 | 92.1M | |
2349 | 92.1M | // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. |
2350 | 92.1M | auto IsKnownNonNegative = [&](const SCEV *S) { |
2351 | 13.6M | return SE->isKnownNonNegative(S); |
2352 | 13.6M | }; |
2353 | 92.1M | |
2354 | 92.1M | if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)8.78M ) |
2355 | 2.08M | Flags = |
2356 | 2.08M | ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); |
2357 | 92.1M | |
2358 | 92.1M | SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); |
2359 | 92.1M | |
2360 | 92.1M | if (SignOrUnsignWrap != SignOrUnsignMask && |
2361 | 92.1M | (86.1M Type == scAddExpr86.1M || Type == scMulExpr40.8M ) && Ops.size() == 268.8M && |
2362 | 92.1M | isa<SCEVConstant>(Ops[0])60.9M ) { |
2363 | 55.2M | |
2364 | 55.2M | auto Opcode = [&] { |
2365 | 55.2M | switch (Type) { |
2366 | 55.2M | case scAddExpr: |
2367 | 33.2M | return Instruction::Add; |
2368 | 55.2M | case scMulExpr: |
2369 | 21.9M | return Instruction::Mul; |
2370 | 55.2M | default: |
2371 | 0 | llvm_unreachable("Unexpected SCEV op."); |
2372 | 55.2M | } |
2373 | 55.2M | }(); |
2374 | 55.2M | |
2375 | 55.2M | const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt(); |
2376 | 55.2M | |
2377 | 55.2M | // (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow. |
2378 | 55.2M | if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { |
2379 | 50.2M | auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
2380 | 50.2M | Opcode, C, OBO::NoSignedWrap); |
2381 | 50.2M | if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) |
2382 | 34.7M | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); |
2383 | 50.2M | } |
2384 | 55.2M | |
2385 | 55.2M | // (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow. |
2386 | 55.2M | if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { |
2387 | 54.5M | auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
2388 | 54.5M | Opcode, C, OBO::NoUnsignedWrap); |
2389 | 54.5M | if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) |
2390 | 23.7M | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); |
2391 | 54.5M | } |
2392 | 55.2M | } |
2393 | 92.1M | |
2394 | 92.1M | return Flags; |
2395 | 92.1M | } |
2396 | | |
2397 | 18.4M | bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { |
2398 | 18.4M | return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader())9.27M ; |
2399 | 18.4M | } |
2400 | | |
2401 | | /// Get a canonical add expression, or something simpler if possible. |
2402 | | const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, |
2403 | | SCEV::NoWrapFlags Flags, |
2404 | 51.6M | unsigned Depth) { |
2405 | 51.6M | assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && |
2406 | 51.6M | "only nuw or nsw allowed"); |
2407 | 51.6M | assert(!Ops.empty() && "Cannot get empty add!"); |
2408 | 51.6M | if (Ops.size() == 1) return Ops[0]4.83M ; |
2409 | | #ifndef NDEBUG |
2410 | | Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); |
2411 | | for (unsigned i = 1, e = Ops.size(); i != e; ++i) |
2412 | | assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && |
2413 | | "SCEVAddExpr operand types don't match!"); |
2414 | | #endif |
2415 | | |
2416 | 46.8M | // Sort by complexity, this groups all similar expression types together. |
2417 | 46.8M | GroupByComplexity(Ops, &LI, DT); |
2418 | 46.8M | |
2419 | 46.8M | Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); |
2420 | 46.8M | |
2421 | 46.8M | // If there are any constants, fold them together. |
2422 | 46.8M | unsigned Idx = 0; |
2423 | 46.8M | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { |
2424 | 41.0M | ++Idx; |
2425 | 41.0M | assert(Idx < Ops.size()); |
2426 | 44.5M | while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { |
2427 | 14.0M | // We found two constants, fold them together! |
2428 | 14.0M | Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt()); |
2429 | 14.0M | if (Ops.size() == 2) return Ops[0]10.5M ; |
2430 | 3.52M | Ops.erase(Ops.begin()+1); // Erase the folded element |
2431 | 3.52M | LHSC = cast<SCEVConstant>(Ops[0]); |
2432 | 3.52M | } |
2433 | 41.0M | |
2434 | 41.0M | // If we are left with a constant zero being added, strip it off. |
2435 | 41.0M | if (30.4M LHSC->getValue()->isZero()30.4M ) { |
2436 | 9.65M | Ops.erase(Ops.begin()); |
2437 | 9.65M | --Idx; |
2438 | 9.65M | } |
2439 | 30.4M | |
2440 | 30.4M | if (Ops.size() == 1) return Ops[0]9.00M ; |
2441 | 27.2M | } |
2442 | 27.2M | |
2443 | 27.2M | // Limit recursion calls depth. |
2444 | 27.2M | if (Depth > MaxArithDepth || hasHugeExpression(Ops)27.2M ) |
2445 | 1.57k | return getOrCreateAddExpr(Ops, Flags); |
2446 | 27.2M | |
2447 | 27.2M | // Okay, check to see if the same value occurs in the operand list more than |
2448 | 27.2M | // once. If so, merge them together into an multiply expression. Since we |
2449 | 27.2M | // sorted the list, these values are required to be adjacent. |
2450 | 27.2M | Type *Ty = Ops[0]->getType(); |
2451 | 27.2M | bool FoundMatch = false; |
2452 | 60.3M | for (unsigned i = 0, e = Ops.size(); i != e-1; ++i33.0M ) |
2453 | 33.0M | if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 |
2454 | 9.91k | // Scan ahead to count how many equal operands there are. |
2455 | 9.91k | unsigned Count = 2; |
2456 | 11.1k | while (i+Count != e && Ops[i+Count] == Ops[i]6.38k ) |
2457 | 1.22k | ++Count; |
2458 | 9.91k | // Merge the values into a multiply. |
2459 | 9.91k | const SCEV *Scale = getConstant(Ty, Count); |
2460 | 9.91k | const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1); |
2461 | 9.91k | if (Ops.size() == Count) |
2462 | 3.23k | return Mul; |
2463 | 6.67k | Ops[i] = Mul; |
2464 | 6.67k | Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); |
2465 | 6.67k | --i; e -= Count - 1; |
2466 | 6.67k | FoundMatch = true; |
2467 | 6.67k | } |
2468 | 27.2M | if (27.2M FoundMatch27.2M ) |
2469 | 5.68k | return getAddExpr(Ops, Flags, Depth + 1); |
2470 | 27.2M | |
2471 | 27.2M | // Check for truncates. If all the operands are truncated from the same |
2472 | 27.2M | // type, see if factoring out the truncate would permit the result to be |
2473 | 27.2M | // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y) |
2474 | 27.2M | // if the contents of the resulting outer trunc fold to something simple. |
2475 | 27.2M | auto FindTruncSrcType = [&]() -> Type * { |
2476 | 27.2M | // We're ultimately looking to fold an addrec of truncs and muls of only |
2477 | 27.2M | // constants and truncs, so if we find any other types of SCEV |
2478 | 27.2M | // as operands of the addrec then we bail and return nullptr here. |
2479 | 27.2M | // Otherwise, we return the type of the operand of a trunc that we find. |
2480 | 27.2M | if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx])) |
2481 | 81.7k | return T->getOperand()->getType(); |
2482 | 27.1M | if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { |
2483 | 9.37M | const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1); |
2484 | 9.37M | if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp)) |
2485 | 37.5k | return T->getOperand()->getType(); |
2486 | 27.1M | } |
2487 | 27.1M | return nullptr; |
2488 | 27.1M | }; |
2489 | 27.2M | if (auto *SrcType = FindTruncSrcType()) { |
2490 | 119k | SmallVector<const SCEV *, 8> LargeOps; |
2491 | 119k | bool Ok = true; |
2492 | 119k | // Check all the operands to see if they can be represented in the |
2493 | 119k | // source type of the truncate. |
2494 | 366k | for (unsigned i = 0, e = Ops.size(); i != e; ++i247k ) { |
2495 | 274k | if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) { |
2496 | 84.7k | if (T->getOperand()->getType() != SrcType) { |
2497 | 74 | Ok = false; |
2498 | 74 | break; |
2499 | 74 | } |
2500 | 84.6k | LargeOps.push_back(T->getOperand()); |
2501 | 189k | } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { |
2502 | 90.8k | LargeOps.push_back(getAnyExtendExpr(C, SrcType)); |
2503 | 98.8k | } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { |
2504 | 71.6k | SmallVector<const SCEV *, 8> LargeMulOps; |
2505 | 190k | for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok134k ; ++j118k ) { |
2506 | 130k | if (const SCEVTruncateExpr *T = |
2507 | 55.9k | dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { |
2508 | 55.9k | if (T->getOperand()->getType() != SrcType) { |
2509 | 368 | Ok = false; |
2510 | 368 | break; |
2511 | 368 | } |
2512 | 55.5k | LargeMulOps.push_back(T->getOperand()); |
2513 | 74.7k | } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) { |
2514 | 62.9k | LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); |
2515 | 62.9k | } else { |
2516 | 11.8k | Ok = false; |
2517 | 11.8k | break; |
2518 | 11.8k | } |
2519 | 130k | } |
2520 | 71.6k | if (Ok) |
2521 | 55.3k | LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1)); |
2522 | 71.6k | } else { |
2523 | 27.2k | Ok = false; |
2524 | 27.2k | break; |
2525 | 27.2k | } |
2526 | 274k | } |
2527 | 119k | if (Ok) { |
2528 | 83.8k | // Evaluate the expression in the larger type. |
2529 | 83.8k | const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1); |
2530 | 83.8k | // If it folds to something simple, use it. Otherwise, don't. |
2531 | 83.8k | if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)83.6k ) |
2532 | 228 | return getTruncateExpr(Fold, Ty); |
2533 | 27.2M | } |
2534 | 119k | } |
2535 | 27.2M | |
2536 | 27.2M | // Skip past any other cast SCEVs. |
2537 | 29.7M | while (27.2M Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr28.2M ) |
2538 | 2.53M | ++Idx; |
2539 | 27.2M | |
2540 | 27.2M | // If there are add operands they would be next. |
2541 | 27.2M | if (Idx < Ops.size()) { |
2542 | 25.7M | bool DeletedAdd = false; |
2543 | 31.1M | while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { |
2544 | 5.46M | if (Ops.size() > AddOpsInlineThreshold || |
2545 | 5.46M | Add->getNumOperands() > AddOpsInlineThreshold5.46M ) |
2546 | 1 | break; |
2547 | 5.46M | // If we have an add, expand the add operands onto the end of the operands |
2548 | 5.46M | // list. |
2549 | 5.46M | Ops.erase(Ops.begin()+Idx); |
2550 | 5.46M | Ops.append(Add->op_begin(), Add->op_end()); |
2551 | 5.46M | DeletedAdd = true; |
2552 | 5.46M | } |
2553 | 25.7M | |
2554 | 25.7M | // If we deleted at least one add, we added operands to the end of the list, |
2555 | 25.7M | // and they are not necessarily sorted. Recurse to resort and resimplify |
2556 | 25.7M | // any operands we just acquired. |
2557 | 25.7M | if (DeletedAdd) |
2558 | 4.72M | return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
2559 | 22.5M | } |
2560 | 22.5M | |
2561 | 22.5M | // Skip over the add expression until we get to a multiply. |
2562 | 22.5M | while (22.5M Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr20.9M ) |
2563 | 1 | ++Idx; |
2564 | 22.5M | |
2565 | 22.5M | // Check to see if there are any folding opportunities present with |
2566 | 22.5M | // operands multiplied by constant values. |
2567 | 22.5M | if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])20.9M ) { |
2568 | 9.91M | uint64_t BitWidth = getTypeSizeInBits(Ty); |
2569 | 9.91M | DenseMap<const SCEV *, APInt> M; |
2570 | 9.91M | SmallVector<const SCEV *, 8> NewOps; |
2571 | 9.91M | APInt AccumulatedConstant(BitWidth, 0); |
2572 | 9.91M | if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, |
2573 | 9.91M | Ops.data(), Ops.size(), |
2574 | 9.91M | APInt(BitWidth, 1), *this)) { |
2575 | 1.22M | struct APIntCompare { |
2576 | 1.22M | bool operator()(const APInt &LHS, const APInt &RHS) const { |
2577 | 703k | return LHS.ult(RHS); |
2578 | 703k | } |
2579 | 1.22M | }; |
2580 | 1.22M | |
2581 | 1.22M | // Some interesting folding opportunity is present, so its worthwhile to |
2582 | 1.22M | // re-generate the operands list. Group the operands by constant scale, |
2583 | 1.22M | // to avoid multiplying by the same constant scale multiple times. |
2584 | 1.22M | std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; |
2585 | 1.22M | for (const SCEV *NewOp : NewOps) |
2586 | 1.60M | MulOpLists[M.find(NewOp)->second].push_back(NewOp); |
2587 | 1.22M | // Re-generate the operands list. |
2588 | 1.22M | Ops.clear(); |
2589 | 1.22M | if (AccumulatedConstant != 0) |
2590 | 1.06M | Ops.push_back(getConstant(AccumulatedConstant)); |
2591 | 1.22M | for (auto &MulOp : MulOpLists) |
2592 | 1.44M | if (MulOp.first != 0) |
2593 | 240k | Ops.push_back(getMulExpr( |
2594 | 240k | getConstant(MulOp.first), |
2595 | 240k | getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1), |
2596 | 240k | SCEV::FlagAnyWrap, Depth + 1)); |
2597 | 1.22M | if (Ops.empty()) |
2598 | 85.2k | return getZero(Ty); |
2599 | 1.13M | if (Ops.size() == 1) |
2600 | 994k | return Ops[0]; |
2601 | 144k | return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
2602 | 144k | } |
2603 | 9.91M | } |
2604 | 21.2M | |
2605 | 21.2M | // If we are adding something to a multiply expression, make sure the |
2606 | 21.2M | // something is not already an operand of the multiply. If so, merge it into |
2607 | 21.2M | // the multiply. |
2608 | 31.1M | for (; 21.2M Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])24.1M ; ++Idx9.85M ) { |
2609 | 9.86M | const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); |
2610 | 29.6M | for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp19.8M ) { |
2611 | 19.8M | const SCEV *MulOpSCEV = Mul->getOperand(MulOp); |
2612 | 19.8M | if (isa<SCEVConstant>(MulOpSCEV)) |
2613 | 9.70M | continue; |
2614 | 78.5M | for (unsigned AddOp = 0, e = Ops.size(); 10.1M AddOp != e; ++AddOp68.4M ) |
2615 | 68.4M | if (MulOpSCEV == Ops[AddOp]) { |
2616 | 1.75k | // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) |
2617 | 1.75k | const SCEV *InnerMul = Mul->getOperand(MulOp == 0); |
2618 | 1.75k | if (Mul->getNumOperands() != 2) { |
2619 | 606 | // If the multiply has more than two operands, we must get the |
2620 | 606 | // Y*Z term. |
2621 | 606 | SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), |
2622 | 606 | Mul->op_begin()+MulOp); |
2623 | 606 | MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); |
2624 | 606 | InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); |
2625 | 606 | } |
2626 | 1.75k | SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul}; |
2627 | 1.75k | const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); |
2628 | 1.75k | const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV, |
2629 | 1.75k | SCEV::FlagAnyWrap, Depth + 1); |
2630 | 1.75k | if (Ops.size() == 2) return OuterMul468 ; |
2631 | 1.28k | if (AddOp < Idx) { |
2632 | 222 | Ops.erase(Ops.begin()+AddOp); |
2633 | 222 | Ops.erase(Ops.begin()+Idx-1); |
2634 | 1.06k | } else { |
2635 | 1.06k | Ops.erase(Ops.begin()+Idx); |
2636 | 1.06k | Ops.erase(Ops.begin()+AddOp-1); |
2637 | 1.06k | } |
2638 | 1.28k | Ops.push_back(OuterMul); |
2639 | 1.28k | return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
2640 | 1.28k | } |
2641 | 10.1M | |
2642 | 10.1M | // Check this multiply against other multiplies being added together. |
2643 | 10.1M | for (unsigned OtherMulIdx = Idx+1; |
2644 | 21.4M | OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx])15.5M ; |
2645 | 11.3M | ++OtherMulIdx11.3M ) { |
2646 | 11.3M | const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]); |
2647 | 11.3M | // If MulOp occurs in OtherMul, we can fold the two multiplies |
2648 | 11.3M | // together. |
2649 | 11.3M | for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); |
2650 | 34.1M | OMulOp != e; ++OMulOp22.7M ) |
2651 | 22.7M | if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { |
2652 | 13.3k | // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) |
2653 | 13.3k | const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); |
2654 | 13.3k | if (Mul->getNumOperands() != 2) { |
2655 | 2.33k | SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), |
2656 | 2.33k | Mul->op_begin()+MulOp); |
2657 | 2.33k | MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); |
2658 | 2.33k | InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); |
2659 | 2.33k | } |
2660 | 13.3k | const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); |
2661 | 13.3k | if (OtherMul->getNumOperands() != 2) { |
2662 | 6.22k | SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), |
2663 | 6.22k | OtherMul->op_begin()+OMulOp); |
2664 | 6.22k | MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); |
2665 | 6.22k | InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); |
2666 | 6.22k | } |
2667 | 13.3k | SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2}; |
2668 | 13.3k | const SCEV *InnerMulSum = |
2669 | 13.3k | getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); |
2670 | 13.3k | const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum, |
2671 | 13.3k | SCEV::FlagAnyWrap, Depth + 1); |
2672 | 13.3k | if (Ops.size() == 2) return OuterMul3.91k ; |
2673 | 9.39k | Ops.erase(Ops.begin()+Idx); |
2674 | 9.39k | Ops.erase(Ops.begin()+OtherMulIdx-1); |
2675 | 9.39k | Ops.push_back(OuterMul); |
2676 | 9.39k | return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
2677 | 9.39k | } |
2678 | 11.3M | } |
2679 | 10.1M | } |
2680 | 9.86M | } |
2681 | 21.2M | |
2682 | 21.2M | // If there are any add recurrences in the operands list, see if any other |
2683 | 21.2M | // added values are loop invariant. If so, we can fold them into the |
2684 | 21.2M | // recurrence. |
2685 | 21.3M | while (21.2M Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr14.3M ) |
2686 | 117k | ++Idx; |
2687 | 21.2M | |
2688 | 21.2M | // Scan over all recurrences, trying to fold loop invariants into them. |
2689 | 21.4M | for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx])14.3M ; ++Idx236k ) { |
2690 | 7.61M | // Scan all of the other operands to this add and add them to the vector if |
2691 | 7.61M | // they are loop invariant w.r.t. the recurrence. |
2692 | 7.61M | SmallVector<const SCEV *, 8> LIOps; |
2693 | 7.61M | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); |
2694 | 7.61M | const Loop *AddRecLoop = AddRec->getLoop(); |
2695 | 23.6M | for (unsigned i = 0, e = Ops.size(); i != e; ++i16.0M ) |
2696 | 16.0M | if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { |
2697 | 7.58M | LIOps.push_back(Ops[i]); |
2698 | 7.58M | Ops.erase(Ops.begin()+i); |
2699 | 7.58M | --i; --e; |
2700 | 7.58M | } |
2701 | 7.61M | |
2702 | 7.61M | // If we found some loop invariants, fold them into the recurrence. |
2703 | 7.61M | if (!LIOps.empty()) { |
2704 | 6.89M | // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} |
2705 | 6.89M | LIOps.push_back(AddRec->getStart()); |
2706 | 6.89M | |
2707 | 6.89M | SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), |
2708 | 6.89M | AddRec->op_end()); |
2709 | 6.89M | // This follows from the fact that the no-wrap flags on the outer add |
2710 | 6.89M | // expression are applicable on the 0th iteration, when the add recurrence |
2711 | 6.89M | // will be equal to its start value. |
2712 | 6.89M | AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); |
2713 | 6.89M | |
2714 | 6.89M | // Build the new addrec. Propagate the NUW and NSW flags if both the |
2715 | 6.89M | // outer add and the inner addrec are guaranteed to have no overflow. |
2716 | 6.89M | // Always propagate NW. |
2717 | 6.89M | Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); |
2718 | 6.89M | const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); |
2719 | 6.89M | |
2720 | 6.89M | // If all of the other operands were loop invariant, we are done. |
2721 | 6.89M | if (Ops.size() == 1) return NewRec6.83M ; |
2722 | 52.2k | |
2723 | 52.2k | // Otherwise, add the folded AddRec by the non-invariant parts. |
2724 | 52.2k | for (unsigned i = 0;; ++i28.9k ) |
2725 | 81.2k | if (Ops[i] == AddRec) { |
2726 | 52.2k | Ops[i] = NewRec; |
2727 | 52.2k | break; |
2728 | 52.2k | } |
2729 | 52.2k | return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
2730 | 52.2k | } |
2731 | 728k | |
2732 | 728k | // Okay, if there weren't any loop invariants to be folded, check to see if |
2733 | 728k | // there are multiple AddRec's with the same loop induction variable being |
2734 | 728k | // added together. If so, we can fold them. |
2735 | 728k | for (unsigned OtherIdx = Idx+1; |
2736 | 728k | OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx])654k ; |
2737 | 728k | ++OtherIdx0 ) { |
2738 | 492k | // We expect the AddRecExpr's to be sorted in reverse dominance order, |
2739 | 492k | // so that the 1st found AddRecExpr is dominated by all others. |
2740 | 492k | assert(DT.dominates( |
2741 | 492k | cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(), |
2742 | 492k | AddRec->getLoop()->getHeader()) && |
2743 | 492k | "AddRecExprs are not sorted in reverse dominance order?"); |
2744 | 492k | if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { |
2745 | 492k | // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> |
2746 | 492k | SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), |
2747 | 492k | AddRec->op_end()); |
2748 | 984k | for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx])492k ; |
2749 | 492k | ++OtherIdx) { |
2750 | 492k | const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); |
2751 | 492k | if (OtherAddRec->getLoop() == AddRecLoop) { |
2752 | 492k | for (unsigned i = 0, e = OtherAddRec->getNumOperands(); |
2753 | 1.47M | i != e; ++i985k ) { |
2754 | 985k | if (i >= AddRecOps.size()) { |
2755 | 432 | AddRecOps.append(OtherAddRec->op_begin()+i, |
2756 | 432 | OtherAddRec->op_end()); |
2757 | 432 | break; |
2758 | 432 | } |
2759 | 985k | SmallVector<const SCEV *, 2> TwoOps = { |
2760 | 985k | AddRecOps[i], OtherAddRec->getOperand(i)}; |
2761 | 985k | AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); |
2762 | 985k | } |
2763 | 492k | Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; |
2764 | 492k | } |
2765 | 492k | } |
2766 | 492k | // Step size has changed, so we cannot guarantee no self-wraparound. |
2767 | 492k | Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); |
2768 | 492k | return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
2769 | 492k | } |
2770 | 492k | } |
2771 | 728k | |
2772 | 728k | // Otherwise couldn't fold anything into this recurrence. Move onto the |
2773 | 728k | // next one. |
2774 | 728k | } |
2775 | 21.2M | |
2776 | 21.2M | // Okay, it looks like we really DO need an add expr. Check to see if we |
2777 | 21.2M | // already have one, otherwise create a new one. |
2778 | 21.2M | return getOrCreateAddExpr(Ops, Flags)13.8M ; |
2779 | 21.2M | } |
2780 | | |
2781 | | const SCEV * |
2782 | | ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops, |
2783 | 13.8M | SCEV::NoWrapFlags Flags) { |
2784 | 13.8M | FoldingSetNodeID ID; |
2785 | 13.8M | ID.AddInteger(scAddExpr); |
2786 | 13.8M | for (const SCEV *Op : Ops) |
2787 | 31.2M | ID.AddPointer(Op); |
2788 | 13.8M | void *IP = nullptr; |
2789 | 13.8M | SCEVAddExpr *S = |
2790 | 13.8M | static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
2791 | 13.8M | if (!S) { |
2792 | 5.14M | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
2793 | 5.14M | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
2794 | 5.14M | S = new (SCEVAllocator) |
2795 | 5.14M | SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); |
2796 | 5.14M | UniqueSCEVs.InsertNode(S, IP); |
2797 | 5.14M | addToLoopUseLists(S); |
2798 | 5.14M | } |
2799 | 13.8M | S->setNoWrapFlags(Flags); |
2800 | 13.8M | return S; |
2801 | 13.8M | } |
2802 | | |
2803 | | const SCEV * |
2804 | | ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops, |
2805 | 19.9M | const Loop *L, SCEV::NoWrapFlags Flags) { |
2806 | 19.9M | FoldingSetNodeID ID; |
2807 | 19.9M | ID.AddInteger(scAddRecExpr); |
2808 | 59.8M | for (unsigned i = 0, e = Ops.size(); i != e; ++i39.8M ) |
2809 | 39.8M | ID.AddPointer(Ops[i]); |
2810 | 19.9M | ID.AddPointer(L); |
2811 | 19.9M | void *IP = nullptr; |
2812 | 19.9M | SCEVAddRecExpr *S = |
2813 | 19.9M | static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
2814 | 19.9M | if (!S) { |
2815 | 5.45M | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
2816 | 5.45M | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
2817 | 5.45M | S = new (SCEVAllocator) |
2818 | 5.45M | SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L); |
2819 | 5.45M | UniqueSCEVs.InsertNode(S, IP); |
2820 | 5.45M | addToLoopUseLists(S); |
2821 | 5.45M | } |
2822 | 19.9M | S->setNoWrapFlags(Flags); |
2823 | 19.9M | return S; |
2824 | 19.9M | } |
2825 | | |
2826 | | const SCEV * |
2827 | | ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops, |
2828 | 7.73M | SCEV::NoWrapFlags Flags) { |
2829 | 7.73M | FoldingSetNodeID ID; |
2830 | 7.73M | ID.AddInteger(scMulExpr); |
2831 | 23.3M | for (unsigned i = 0, e = Ops.size(); i != e; ++i15.6M ) |
2832 | 15.6M | ID.AddPointer(Ops[i]); |
2833 | 7.73M | void *IP = nullptr; |
2834 | 7.73M | SCEVMulExpr *S = |
2835 | 7.73M | static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); |
2836 | 7.73M | if (!S) { |
2837 | 1.47M | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
2838 | 1.47M | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
2839 | 1.47M | S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), |
2840 | 1.47M | O, Ops.size()); |
2841 | 1.47M | UniqueSCEVs.InsertNode(S, IP); |
2842 | 1.47M | addToLoopUseLists(S); |
2843 | 1.47M | } |
2844 | 7.73M | S->setNoWrapFlags(Flags); |
2845 | 7.73M | return S; |
2846 | 7.73M | } |
2847 | | |
2848 | 3.27k | static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { |
2849 | 3.27k | uint64_t k = i*j; |
2850 | 3.27k | if (j > 1 && k / j != i3.26k ) Overflow = true0 ; |
2851 | 3.27k | return k; |
2852 | 3.27k | } |
2853 | | |
2854 | | /// Compute the result of "n choose k", the binomial coefficient. If an |
2855 | | /// intermediate computation overflows, Overflow will be set and the return will |
2856 | | /// be garbage. Overflow is not cleared on absence of overflow. |
2857 | 13.4k | static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { |
2858 | 13.4k | // We use the multiplicative formula: |
2859 | 13.4k | // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . |
2860 | 13.4k | // At each iteration, we take the n-th term of the numeral and divide by the |
2861 | 13.4k | // (k-n)th term of the denominator. This division will always produce an |
2862 | 13.4k | // integral result, and helps reduce the chance of overflow in the |
2863 | 13.4k | // intermediate computations. However, we can still overflow even when the |
2864 | 13.4k | // final result would fit. |
2865 | 13.4k | |
2866 | 13.4k | if (n == 0 || n == k10.2k ) return 16.73k ; |
2867 | 6.70k | if (k > n) return 00 ; |
2868 | 6.70k | |
2869 | 6.70k | if (k > n/2) |
2870 | 325 | k = n-k; |
2871 | 6.70k | |
2872 | 6.70k | uint64_t r = 1; |
2873 | 9.96k | for (uint64_t i = 1; i <= k; ++i3.26k ) { |
2874 | 3.26k | r = umul_ov(r, n-(i-1), Overflow); |
2875 | 3.26k | r /= i; |
2876 | 3.26k | } |
2877 | 6.70k | return r; |
2878 | 6.70k | } |
2879 | | |
2880 | | /// Determine if any of the operands in this SCEV are a constant or if |
2881 | | /// any of the add or multiply expressions in this SCEV contain a constant. |
2882 | 1.76M | static bool containsConstantInAddMulChain(const SCEV *StartExpr) { |
2883 | 1.76M | struct FindConstantInAddMulChain { |
2884 | 1.76M | bool FoundConstant = false; |
2885 | 1.76M | |
2886 | 5.75M | bool follow(const SCEV *S) { |
2887 | 5.75M | FoundConstant |= isa<SCEVConstant>(S); |
2888 | 5.75M | return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S)3.98M ; |
2889 | 5.75M | } |
2890 | 1.76M | |
2891 | 2.25M | bool isDone() const { |
2892 | 2.25M | return FoundConstant; |
2893 | 2.25M | } |
2894 | 1.76M | }; |
2895 | 1.76M | |
2896 | 1.76M | FindConstantInAddMulChain F; |
2897 | 1.76M | SCEVTraversal<FindConstantInAddMulChain> ST(F); |
2898 | 1.76M | ST.visitAll(StartExpr); |
2899 | 1.76M | return F.FoundConstant; |
2900 | 1.76M | } |
2901 | | |
2902 | | /// Get a canonical multiply expression, or something simpler if possible. |
2903 | | const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, |
2904 | | SCEV::NoWrapFlags Flags, |
2905 | 37.3M | unsigned Depth) { |
2906 | 37.3M | assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && |
2907 | 37.3M | "only nuw or nsw allowed"); |
2908 | 37.3M | assert(!Ops.empty() && "Cannot get empty mul!"); |
2909 | 37.3M | if (Ops.size() == 1) return Ops[0]11.9M ; |
2910 | | #ifndef NDEBUG |
2911 | | Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); |
2912 | | for (unsigned i = 1, e = Ops.size(); i != e; ++i) |
2913 | | assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && |
2914 | | "SCEVMulExpr operand types don't match!"); |
2915 | | #endif |
2916 | | |
2917 | 25.4M | // Sort by complexity, this groups all similar expression types together. |
2918 | 25.4M | GroupByComplexity(Ops, &LI, DT); |
2919 | 25.4M | |
2920 | 25.4M | Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); |
2921 | 25.4M | |
2922 | 25.4M | // Limit recursion calls depth. |
2923 | 25.4M | if (Depth > MaxArithDepth || hasHugeExpression(Ops)25.4M ) |
2924 | 3.80k | return getOrCreateMulExpr(Ops, Flags); |
2925 | 25.4M | |
2926 | 25.4M | // If there are any constants, fold them together. |
2927 | 25.4M | unsigned Idx = 0; |
2928 | 25.4M | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { |
2929 | 25.1M | |
2930 | 25.1M | if (Ops.size() == 2) |
2931 | 23.8M | // C1*(C2+V) -> C1*C2 + C1*V |
2932 | 23.8M | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) |
2933 | 2.11M | // If any of Add's ops are Adds or Muls with a constant, apply this |
2934 | 2.11M | // transformation as well. |
2935 | 2.11M | // |
2936 | 2.11M | // TODO: There are some cases where this transformation is not |
2937 | 2.11M | // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of |
2938 | 2.11M | // this transformation should be narrowed down. |
2939 | 2.11M | if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)1.76M ) |
2940 | 1.63M | return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), |
2941 | 1.63M | SCEV::FlagAnyWrap, Depth + 1), |
2942 | 1.63M | getMulExpr(LHSC, Add->getOperand(1), |
2943 | 1.63M | SCEV::FlagAnyWrap, Depth + 1), |
2944 | 1.63M | SCEV::FlagAnyWrap, Depth + 1); |
2945 | 23.5M | |
2946 | 23.5M | ++Idx; |
2947 | 24.7M | while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { |
2948 | 11.4M | // We found two constants, fold them together! |
2949 | 11.4M | ConstantInt *Fold = |
2950 | 11.4M | ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt()); |
2951 | 11.4M | Ops[0] = getConstant(Fold); |
2952 | 11.4M | Ops.erase(Ops.begin()+1); // Erase the folded element |
2953 | 11.4M | if (Ops.size() == 1) return Ops[0]10.2M ; |
2954 | 1.19M | LHSC = cast<SCEVConstant>(Ops[0]); |
2955 | 1.19M | } |
2956 | 23.5M | |
2957 | 23.5M | // If we are left with a constant one being multiplied, strip it off. |
2958 | 23.5M | if (13.2M cast<SCEVConstant>(Ops[0])->getValue()->isOne()13.2M ) { |
2959 | 1.15M | Ops.erase(Ops.begin()); |
2960 | 1.15M | --Idx; |
2961 | 12.1M | } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { |
2962 | 12.8k | // If we have a multiply of zero, it will always be zero. |
2963 | 12.8k | return Ops[0]; |
2964 | 12.1M | } else if (Ops[0]->isAllOnesValue()) { |
2965 | 9.03M | // If we have a mul by -1 of an add, try distributing the -1 among the |
2966 | 9.03M | // add operands. |
2967 | 9.03M | if (Ops.size() == 2) { |
2968 | 8.94M | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { |
2969 | 421k | SmallVector<const SCEV *, 4> NewOps; |
2970 | 421k | bool AnyFolded = false; |
2971 | 1.19M | for (const SCEV *AddOp : Add->operands()) { |
2972 | 1.19M | const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap, |
2973 | 1.19M | Depth + 1); |
2974 | 1.19M | if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true494k ; |
2975 | 1.19M | NewOps.push_back(Mul); |
2976 | 1.19M | } |
2977 | 421k | if (AnyFolded) |
2978 | 310k | return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1); |
2979 | 8.51M | } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { |
2980 | 2.25M | // Negation preserves a recurrence's no self-wrap property. |
2981 | 2.25M | SmallVector<const SCEV *, 4> Operands; |
2982 | 2.25M | for (const SCEV *AddRecOp : AddRec->operands()) |
2983 | 4.51M | Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap, |
2984 | 4.51M | Depth + 1)); |
2985 | 2.25M | |
2986 | 2.25M | return getAddRecExpr(Operands, AddRec->getLoop(), |
2987 | 2.25M | AddRec->getNoWrapFlags(SCEV::FlagNW)); |
2988 | 2.25M | } |
2989 | 10.7M | } |
2990 | 9.03M | } |
2991 | 10.7M | |
2992 | 10.7M | if (Ops.size() == 1) |
2993 | 1.13M | return Ops[0]; |
2994 | 9.83M | } |
2995 | 9.83M | |
2996 | 9.83M | // Skip over the add expression until we get to a multiply. |
2997 | 13.2M | while (9.83M Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr10.0M ) |
2998 | 3.39M | ++Idx; |
2999 | 9.83M | |
3000 | 9.83M | // If there are mul operands inline them all into this expression. |
3001 | 9.83M | if (Idx < Ops.size()) { |
3002 | 6.67M | bool DeletedMul = false; |
3003 | 7.98M | while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { |
3004 | 1.31M | if (Ops.size() > MulOpsInlineThreshold) |
3005 | 138 | break; |
3006 | 1.31M | // If we have an mul, expand the mul operands onto the end of the |
3007 | 1.31M | // operands list. |
3008 | 1.31M | Ops.erase(Ops.begin()+Idx); |
3009 | 1.31M | Ops.append(Mul->op_begin(), Mul->op_end()); |
3010 | 1.31M | DeletedMul = true; |
3011 | 1.31M | } |
3012 | 6.67M | |
3013 | 6.67M | // If we deleted at least one mul, we added operands to the end of the |
3014 | 6.67M | // list, and they are not necessarily sorted. Recurse to resort and |
3015 | 6.67M | // resimplify any operands we just acquired. |
3016 | 6.67M | if (DeletedMul) |
3017 | 1.31M | return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
3018 | 8.52M | } |
3019 | 8.52M | |
3020 | 8.52M | // If there are any add recurrences in the operands list, see if any other |
3021 | 8.52M | // added values are loop invariant. If so, we can fold them into the |
3022 | 8.52M | // recurrence. |
3023 | 9.09M | while (8.52M Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr5.45M ) |
3024 | 577k | ++Idx; |
3025 | 8.52M | |
3026 | 8.52M | // Scan over all recurrences, trying to fold loop invariants into them. |
3027 | 8.53M | for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx])4.88M ; ++Idx10.4k ) { |
3028 | 802k | // Scan all of the other operands to this mul and add them to the vector |
3029 | 802k | // if they are loop invariant w.r.t. the recurrence. |
3030 | 802k | SmallVector<const SCEV *, 8> LIOps; |
3031 | 802k | const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); |
3032 | 802k | const Loop *AddRecLoop = AddRec->getLoop(); |
3033 | 2.41M | for (unsigned i = 0, e = Ops.size(); i != e; ++i1.61M ) |
3034 | 1.61M | if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { |
3035 | 791k | LIOps.push_back(Ops[i]); |
3036 | 791k | Ops.erase(Ops.begin()+i); |
3037 | 791k | --i; --e; |
3038 | 791k | } |
3039 | 802k | |
3040 | 802k | // If we found some loop invariants, fold them into the recurrence. |
3041 | 802k | if (!LIOps.empty()) { |
3042 | 791k | // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} |
3043 | 791k | SmallVector<const SCEV *, 4> NewOps; |
3044 | 791k | NewOps.reserve(AddRec->getNumOperands()); |
3045 | 791k | const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1); |
3046 | 2.37M | for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i1.58M ) |
3047 | 1.58M | NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i), |
3048 | 1.58M | SCEV::FlagAnyWrap, Depth + 1)); |
3049 | 791k | |
3050 | 791k | // Build the new addrec. Propagate the NUW and NSW flags if both the |
3051 | 791k | // outer mul and the inner addrec are guaranteed to have no overflow. |
3052 | 791k | // |
3053 | 791k | // No self-wrap cannot be guaranteed after changing the step size, but |
3054 | 791k | // will be inferred if either NUW or NSW is true. |
3055 | 791k | Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); |
3056 | 791k | const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); |
3057 | 791k | |
3058 | 791k | // If all of the other operands were loop invariant, we are done. |
3059 | 791k | if (Ops.size() == 1) return NewRec788k ; |
3060 | 2.42k | |
3061 | 2.42k | // Otherwise, multiply the folded AddRec by the non-invariant parts. |
3062 | 2.42k | for (unsigned i = 0;; ++i1.76k ) |
3063 | 4.19k | if (Ops[i] == AddRec) { |
3064 | 2.42k | Ops[i] = NewRec; |
3065 | 2.42k | break; |
3066 | 2.42k | } |
3067 | 2.42k | return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
3068 | 2.42k | } |
3069 | 11.4k | |
3070 | 11.4k | // Okay, if there weren't any loop invariants to be folded, check to see |
3071 | 11.4k | // if there are multiple AddRec's with the same loop induction variable |
3072 | 11.4k | // being multiplied together. If so, we can fold them. |
3073 | 11.4k | |
3074 | 11.4k | // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> |
3075 | 11.4k | // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ |
3076 | 11.4k | // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z |
3077 | 11.4k | // ]]],+,...up to x=2n}. |
3078 | 11.4k | // Note that the arguments to choose() are always integers with values |
3079 | 11.4k | // known at compile time, never SCEV objects. |
3080 | 11.4k | // |
3081 | 11.4k | // The implementation avoids pointless extra computations when the two |
3082 | 11.4k | // addrec's are of different length (mathematically, it's equivalent to |
3083 | 11.4k | // an infinite stream of zeros on the right). |
3084 | 11.4k | bool OpsModified = false; |
3085 | 11.4k | for (unsigned OtherIdx = Idx+1; |
3086 | 11.8k | OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx])6.68k ; |
3087 | 11.4k | ++OtherIdx404 ) { |
3088 | 1.40k | const SCEVAddRecExpr *OtherAddRec = |
3089 | 1.40k | dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]); |
3090 | 1.40k | if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) |
3091 | 0 | continue; |
3092 | 1.40k | |
3093 | 1.40k | // Limit max number of arguments to avoid creation of unreasonably big |
3094 | 1.40k | // SCEVAddRecs with very complex operands. |
3095 | 1.40k | if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > |
3096 | 1.40k | MaxAddRecSize || isHugeExpression(AddRec)1.07k || |
3097 | 1.40k | isHugeExpression(OtherAddRec)1.07k ) |
3098 | 330 | continue; |
3099 | 1.07k | |
3100 | 1.07k | bool Overflow = false; |
3101 | 1.07k | Type *Ty = AddRec->getType(); |
3102 | 1.07k | bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; |
3103 | 1.07k | SmallVector<const SCEV*, 7> AddRecOps; |
3104 | 1.07k | for (int x = 0, xe = AddRec->getNumOperands() + |
3105 | 4.47k | OtherAddRec->getNumOperands() - 1; x != xe && !Overflow3.40k ; ++x3.40k ) { |
3106 | 3.40k | SmallVector <const SCEV *, 7> SumOps; |
3107 | 10.7k | for (int y = x, ye = 2*x+1; y != ye && !Overflow7.35k ; ++y7.35k ) { |
3108 | 7.35k | uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); |
3109 | 7.35k | for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), |
3110 | 7.35k | ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); |
3111 | 13.4k | z < ze && !Overflow6.08k ; ++z6.08k ) { |
3112 | 6.08k | uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); |
3113 | 6.08k | uint64_t Coeff; |
3114 | 6.08k | if (LargerThan64Bits) |
3115 | 10 | Coeff = umul_ov(Coeff1, Coeff2, Overflow); |
3116 | 6.07k | else |
3117 | 6.07k | Coeff = Coeff1*Coeff2; |
3118 | 6.08k | const SCEV *CoeffTerm = getConstant(Ty, Coeff); |
3119 | 6.08k | const SCEV *Term1 = AddRec->getOperand(y-z); |
3120 | 6.08k | const SCEV *Term2 = OtherAddRec->getOperand(z); |
3121 | 6.08k | SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2, |
3122 | 6.08k | SCEV::FlagAnyWrap, Depth + 1)); |
3123 | 6.08k | } |
3124 | 7.35k | } |
3125 | 3.40k | if (SumOps.empty()) |
3126 | 0 | SumOps.push_back(getZero(Ty)); |
3127 | 3.40k | AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1)); |
3128 | 3.40k | } |
3129 | 1.07k | if (!Overflow) { |
3130 | 1.07k | const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop, |
3131 | 1.07k | SCEV::FlagAnyWrap); |
3132 | 1.07k | if (Ops.size() == 2) return NewAddRec993 ; |
3133 | 77 | Ops[Idx] = NewAddRec; |
3134 | 77 | Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; |
3135 | 77 | OpsModified = true; |
3136 | 77 | AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec); |
3137 | 77 | if (!AddRec) |
3138 | 3 | break; |
3139 | 77 | } |
3140 | 1.07k | } |
3141 | 11.4k | if (10.4k OpsModified10.4k ) |
3142 | 54 | return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); |
3143 | 10.4k | |
3144 | 10.4k | // Otherwise couldn't fold anything into this recurrence. Move onto the |
3145 | 10.4k | // next one. |
3146 | 10.4k | } |
3147 | 8.52M | |
3148 | 8.52M | // Okay, it looks like we really DO need an mul expr. Check to see if we |
3149 | 8.52M | // already have one, otherwise create a new one. |
3150 | 8.52M | return getOrCreateMulExpr(Ops, Flags)7.72M ; |
3151 | 8.52M | } |
3152 | | |
3153 | | /// Represents an unsigned remainder expression based on unsigned division. |
3154 | | const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS, |
3155 | 79.2k | const SCEV *RHS) { |
3156 | 79.2k | assert(getEffectiveSCEVType(LHS->getType()) == |
3157 | 79.2k | getEffectiveSCEVType(RHS->getType()) && |
3158 | 79.2k | "SCEVURemExpr operand types don't match!"); |
3159 | 79.2k | |
3160 | 79.2k | // Short-circuit easy cases |
3161 | 79.2k | if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { |
3162 | 38.2k | // If constant is one, the result is trivial |
3163 | 38.2k | if (RHSC->getValue()->isOne()) |
3164 | 12.5k | return getZero(LHS->getType()); // X urem 1 --> 0 |
3165 | 25.7k | |
3166 | 25.7k | // If constant is a power of two, fold into a zext(trunc(LHS)). |
3167 | 25.7k | if (RHSC->getAPInt().isPowerOf2()) { |
3168 | 7.96k | Type *FullTy = LHS->getType(); |
3169 | 7.96k | Type *TruncTy = |
3170 | 7.96k | IntegerType::get(getContext(), RHSC->getAPInt().logBase2()); |
3171 | 7.96k | return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy); |
3172 | 7.96k | } |
3173 | 58.7k | } |
3174 | 58.7k | |
3175 | 58.7k | // Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y) |
3176 | 58.7k | const SCEV *UDiv = getUDivExpr(LHS, RHS); |
3177 | 58.7k | const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW); |
3178 | 58.7k | return getMinusSCEV(LHS, Mult, SCEV::FlagNUW); |
3179 | 58.7k | } |
3180 | | |
3181 | | /// Get a canonical unsigned division expression, or something simpler if |
3182 | | /// possible. |
3183 | | const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, |
3184 | 668k | const SCEV *RHS) { |
3185 | 668k | assert(getEffectiveSCEVType(LHS->getType()) == |
3186 | 668k | getEffectiveSCEVType(RHS->getType()) && |
3187 | 668k | "SCEVUDivExpr operand types don't match!"); |
3188 | 668k | |
3189 | 668k | if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { |
3190 | 623k | if (RHSC->getValue()->isOne()) |
3191 | 361k | return LHS; // X udiv 1 --> x |
3192 | 261k | // If the denominator is zero, the result of the udiv is undefined. Don't |
3193 | 261k | // try to analyze it, because the resolution chosen here may differ from |
3194 | 261k | // the resolution chosen in other parts of the compiler. |
3195 | 261k | if (!RHSC->getValue()->isZero()) { |
3196 | 261k | // Determine if the division can be folded into the operands of |
3197 | 261k | // its operands. |
3198 | 261k | // TODO: Generalize this to non-constants by using known-bits information. |
3199 | 261k | Type *Ty = LHS->getType(); |
3200 | 261k | unsigned LZ = RHSC->getAPInt().countLeadingZeros(); |
3201 | 261k | unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; |
3202 | 261k | // For non-power-of-two values, effectively round the value up to the |
3203 | 261k | // nearest power of two. |
3204 | 261k | if (!RHSC->getAPInt().isPowerOf2()) |
3205 | 36.3k | ++MaxShiftAmt; |
3206 | 261k | IntegerType *ExtTy = |
3207 | 261k | IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); |
3208 | 261k | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) |
3209 | 8.08k | if (const SCEVConstant *Step = |
3210 | 7.74k | dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) { |
3211 | 7.74k | // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. |
3212 | 7.74k | const APInt &StepInt = Step->getAPInt(); |
3213 | 7.74k | const APInt &DivInt = RHSC->getAPInt(); |
3214 | 7.74k | if (!StepInt.urem(DivInt) && |
3215 | 7.74k | getZeroExtendExpr(AR, ExtTy) == |
3216 | 840 | getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), |
3217 | 840 | getZeroExtendExpr(Step, ExtTy), |
3218 | 840 | AR->getLoop(), SCEV::FlagAnyWrap)) { |
3219 | 477 | SmallVector<const SCEV *, 4> Operands; |
3220 | 477 | for (const SCEV *Op : AR->operands()) |
3221 | 954 | Operands.push_back(getUDivExpr(Op, RHS)); |
3222 | 477 | return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); |
3223 | 477 | } |
3224 | 7.26k | /// Get a canonical UDivExpr for a recurrence. |
3225 | 7.26k | /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. |
3226 | 7.26k | // We can currently only fold X%N if X is constant. |
3227 | 7.26k | const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); |
3228 | 7.26k | if (StartC && !DivInt.urem(StepInt)5.94k && |
3229 | 7.26k | getZeroExtendExpr(AR, ExtTy) == |
3230 | 5.23k | getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), |
3231 | 5.23k | getZeroExtendExpr(Step, ExtTy), |
3232 | 5.23k | AR->getLoop(), SCEV::FlagAnyWrap)) { |
3233 | 2.96k | const APInt &StartInt = StartC->getAPInt(); |
3234 | 2.96k | const APInt &StartRem = StartInt.urem(StepInt); |
3235 | 2.96k | if (StartRem != 0) |
3236 | 10 | LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, |
3237 | 10 | AR->getLoop(), SCEV::FlagNW); |
3238 | 2.96k | } |
3239 | 7.26k | } |
3240 | 261k | // (A*B)/C --> A*(B/C) if safe and B/C can be folded. |
3241 | 261k | if (const SCEVMulExpr *261k M261k = dyn_cast<SCEVMulExpr>(LHS)) { |
3242 | 3.79k | SmallVector<const SCEV *, 4> Operands; |
3243 | 3.79k | for (const SCEV *Op : M->operands()) |
3244 | 7.82k | Operands.push_back(getZeroExtendExpr(Op, ExtTy)); |
3245 | 3.79k | if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) |
3246 | 246 | // Find an operand that's safely divisible. |
3247 | 450 | for (unsigned i = 0, e = M->getNumOperands(); 246 i != e; ++i204 ) { |
3248 | 348 | const SCEV *Op = M->getOperand(i); |
3249 | 348 | const SCEV *Div = getUDivExpr(Op, RHSC); |
3250 | 348 | if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op252 ) { |
3251 | 144 | Operands = SmallVector<const SCEV *, 4>(M->op_begin(), |
3252 | 144 | M->op_end()); |
3253 | 144 | Operands[i] = Div; |
3254 | 144 | return getMulExpr(Operands); |
3255 | 144 | } |
3256 | 348 | } |
3257 | 3.79k | } |
3258 | 261k | |
3259 | 261k | // (A/B)/C --> A/(B*C) if safe and B*C can be folded. |
3260 | 261k | if (const SCEVUDivExpr *261k OtherDiv261k = dyn_cast<SCEVUDivExpr>(LHS)) { |
3261 | 379 | if (auto *DivisorConstant = |
3262 | 379 | dyn_cast<SCEVConstant>(OtherDiv->getRHS())) { |
3263 | 379 | bool Overflow = false; |
3264 | 379 | APInt NewRHS = |
3265 | 379 | DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow); |
3266 | 379 | if (Overflow) { |
3267 | 332 | return getConstant(RHSC->getType(), 0, false); |
3268 | 332 | } |
3269 | 47 | return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS)); |
3270 | 47 | } |
3271 | 379 | } |
3272 | 260k | |
3273 | 260k | // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. |
3274 | 260k | if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { |
3275 | 66.8k | SmallVector<const SCEV *, 4> Operands; |
3276 | 66.8k | for (const SCEV *Op : A->operands()) |
3277 | 150k | Operands.push_back(getZeroExtendExpr(Op, ExtTy)); |
3278 | 66.8k | if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { |
3279 | 11.4k | Operands.clear(); |
3280 | 11.7k | for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i342 ) { |
3281 | 11.6k | const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); |
3282 | 11.6k | if (isa<SCEVUDivExpr>(Op) || |
3283 | 11.6k | getMulExpr(Op, RHS) != A->getOperand(i)11.5k ) |
3284 | 11.3k | break; |
3285 | 342 | Operands.push_back(Op); |
3286 | 342 | } |
3287 | 11.4k | if (Operands.size() == A->getNumOperands()) |
3288 | 119 | return getAddExpr(Operands); |
3289 | 260k | } |
3290 | 66.8k | } |
3291 | 260k | |
3292 | 260k | // Fold if both operands are constant. |
3293 | 260k | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { |
3294 | 24.4k | Constant *LHSCV = LHSC->getValue(); |
3295 | 24.4k | Constant *RHSCV = RHSC->getValue(); |
3296 | 24.4k | return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, |
3297 | 24.4k | RHSCV))); |
3298 | 24.4k | } |
3299 | 281k | } |
3300 | 261k | } |
3301 | 281k | |
3302 | 281k | FoldingSetNodeID ID; |
3303 | 281k | ID.AddInteger(scUDivExpr); |
3304 | 281k | ID.AddPointer(LHS); |
3305 | 281k | ID.AddPointer(RHS); |
3306 | 281k | void *IP = nullptr; |
3307 | 281k | if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S89.6k ; |
3308 | 192k | SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), |
3309 | 192k | LHS, RHS); |
3310 | 192k | UniqueSCEVs.InsertNode(S, IP); |
3311 | 192k | addToLoopUseLists(S); |
3312 | 192k | return S; |
3313 | 192k | } |
3314 | | |
3315 | 0 | static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { |
3316 | 0 | APInt A = C1->getAPInt().abs(); |
3317 | 0 | APInt B = C2->getAPInt().abs(); |
3318 | 0 | uint32_t ABW = A.getBitWidth(); |
3319 | 0 | uint32_t BBW = B.getBitWidth(); |
3320 | 0 |
|
3321 | 0 | if (ABW > BBW) |
3322 | 0 | B = B.zext(ABW); |
3323 | 0 | else if (ABW < BBW) |
3324 | 0 | A = A.zext(BBW); |
3325 | 0 |
|
3326 | 0 | return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B)); |
3327 | 0 | } |
3328 | | |
3329 | | /// Get a canonical unsigned division expression, or something simpler if |
3330 | | /// possible. There is no representation for an exact udiv in SCEV IR, but we |
3331 | | /// can attempt to remove factors from the LHS and RHS. We can't do this when |
3332 | | /// it's not exact because the udiv may be clearing bits. |
3333 | | const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, |
3334 | 27.8k | const SCEV *RHS) { |
3335 | 27.8k | // TODO: we could try to find factors in all sorts of things, but for now we |
3336 | 27.8k | // just deal with u/exact (multiply, constant). See SCEVDivision towards the |
3337 | 27.8k | // end of this file for inspiration. |
3338 | 27.8k | |
3339 | 27.8k | const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS); |
3340 | 27.8k | if (!Mul || !Mul->hasNoUnsignedWrap()1 ) |
3341 | 27.8k | return getUDivExpr(LHS, RHS); |
3342 | 0 | |
3343 | 0 | if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) { |
3344 | 0 | // If the mulexpr multiplies by a constant, then that constant must be the |
3345 | 0 | // first element of the mulexpr. |
3346 | 0 | if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) { |
3347 | 0 | if (LHSCst == RHSCst) { |
3348 | 0 | SmallVector<const SCEV *, 2> Operands; |
3349 | 0 | Operands.append(Mul->op_begin() + 1, Mul->op_end()); |
3350 | 0 | return getMulExpr(Operands); |
3351 | 0 | } |
3352 | 0 | |
3353 | 0 | // We can't just assume that LHSCst divides RHSCst cleanly, it could be |
3354 | 0 | // that there's a factor provided by one of the other terms. We need to |
3355 | 0 | // check. |
3356 | 0 | APInt Factor = gcd(LHSCst, RHSCst); |
3357 | 0 | if (!Factor.isIntN(1)) { |
3358 | 0 | LHSCst = |
3359 | 0 | cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor))); |
3360 | 0 | RHSCst = |
3361 | 0 | cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor))); |
3362 | 0 | SmallVector<const SCEV *, 2> Operands; |
3363 | 0 | Operands.push_back(LHSCst); |
3364 | 0 | Operands.append(Mul->op_begin() + 1, Mul->op_end()); |
3365 | 0 | LHS = getMulExpr(Operands); |
3366 | 0 | RHS = RHSCst; |
3367 | 0 | Mul = dyn_cast<SCEVMulExpr>(LHS); |
3368 | 0 | if (!Mul) |
3369 | 0 | return getUDivExactExpr(LHS, RHS); |
3370 | 0 | } |
3371 | 0 | } |
3372 | 0 | } |
3373 | 0 | |
3374 | 0 | for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) { |
3375 | 0 | if (Mul->getOperand(i) == RHS) { |
3376 | 0 | SmallVector<const SCEV *, 2> Operands; |
3377 | 0 | Operands.append(Mul->op_begin(), Mul->op_begin() + i); |
3378 | 0 | Operands.append(Mul->op_begin() + i + 1, Mul->op_end()); |
3379 | 0 | return getMulExpr(Operands); |
3380 | 0 | } |
3381 | 0 | } |
3382 | 0 |
|
3383 | 0 | return getUDivExpr(LHS, RHS); |
3384 | 0 | } |
3385 | | |
3386 | | /// Get an add recurrence expression for the specified loop. Simplify the |
3387 | | /// expression as much as possible. |
3388 | | const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, |
3389 | | const Loop *L, |
3390 | 4.46M | SCEV::NoWrapFlags Flags) { |
3391 | 4.46M | SmallVector<const SCEV *, 4> Operands; |
3392 | 4.46M | Operands.push_back(Start); |
3393 | 4.46M | if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) |
3394 | 515 | if (StepChrec->getLoop() == L) { |
3395 | 131 | Operands.append(StepChrec->op_begin(), StepChrec->op_end()); |
3396 | 131 | return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); |
3397 | 131 | } |
3398 | 4.46M | |
3399 | 4.46M | Operands.push_back(Step); |
3400 | 4.46M | return getAddRecExpr(Operands, L, Flags); |
3401 | 4.46M | } |
3402 | | |
3403 | | /// Get an add recurrence expression for the specified loop. Simplify the |
3404 | | /// expression as much as possible. |
3405 | | const SCEV * |
3406 | | ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, |
3407 | 20.9M | const Loop *L, SCEV::NoWrapFlags Flags) { |
3408 | 20.9M | if (Operands.size() == 1) return Operands[0]490k ; |
3409 | | #ifndef NDEBUG |
3410 | | Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); |
3411 | | for (unsigned i = 1, e = Operands.size(); i != e; ++i) |
3412 | | assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && |
3413 | | "SCEVAddRecExpr operand types don't match!"); |
3414 | | for (unsigned i = 0, e = Operands.size(); i != e; ++i) |
3415 | | assert(isLoopInvariant(Operands[i], L) && |
3416 | | "SCEVAddRecExpr operand is not loop-invariant!"); |
3417 | | #endif |
3418 | | |
3419 | 20.4M | if (Operands.back()->isZero()) { |
3420 | 491k | Operands.pop_back(); |
3421 | 491k | return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X |
3422 | 491k | } |
3423 | 19.9M | |
3424 | 19.9M | // It's tempting to want to call getMaxBackedgeTakenCount count here and |
3425 | 19.9M | // use that information to infer NUW and NSW flags. However, computing a |
3426 | 19.9M | // BE count requires calling getAddRecExpr, so we may not yet have a |
3427 | 19.9M | // meaningful BE count at this point (and if we don't, we'd be stuck |
3428 | 19.9M | // with a SCEVCouldNotCompute as the cached BE count). |
3429 | 19.9M | |
3430 | 19.9M | Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); |
3431 | 19.9M | |
3432 | 19.9M | // Canonicalize nested AddRecs in by nesting them in order of loop depth. |
3433 | 19.9M | if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { |
3434 | 780k | const Loop *NestedLoop = NestedAR->getLoop(); |
3435 | 780k | if (L->contains(NestedLoop) |
3436 | 780k | ? (L->getLoopDepth() < NestedLoop->getLoopDepth())0 |
3437 | 780k | : (!NestedLoop->contains(L) && |
3438 | 780k | DT.dominates(L->getHeader(), NestedLoop->getHeader())252k )) { |
3439 | 0 | SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), |
3440 | 0 | NestedAR->op_end()); |
3441 | 0 | Operands[0] = NestedAR->getStart(); |
3442 | 0 | // AddRecs require their operands be loop-invariant with respect to their |
3443 | 0 | // loops. Don't perform this transformation if it would break this |
3444 | 0 | // requirement. |
3445 | 0 | bool AllInvariant = all_of( |
3446 | 0 | Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); |
3447 | 0 |
|
3448 | 0 | if (AllInvariant) { |
3449 | 0 | // Create a recurrence for the outer loop with the same step size. |
3450 | 0 | // |
3451 | 0 | // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the |
3452 | 0 | // inner recurrence has the same property. |
3453 | 0 | SCEV::NoWrapFlags OuterFlags = |
3454 | 0 | maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); |
3455 | 0 |
|
3456 | 0 | NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); |
3457 | 0 | AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { |
3458 | 0 | return isLoopInvariant(Op, NestedLoop); |
3459 | 0 | }); |
3460 | 0 |
|
3461 | 0 | if (AllInvariant) { |
3462 | 0 | // Ok, both add recurrences are valid after the transformation. |
3463 | 0 | // |
3464 | 0 | // The inner recurrence keeps its NW flag but only keeps NUW/NSW if |
3465 | 0 | // the outer recurrence has the same property. |
3466 | 0 | SCEV::NoWrapFlags InnerFlags = |
3467 | 0 | maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); |
3468 | 0 | return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); |
3469 | 0 | } |
3470 | 0 | } |
3471 | 0 | // Reset Operands to its original state. |
3472 | 0 | Operands[0] = NestedAR; |
3473 | 0 | } |
3474 | 780k | } |
3475 | 19.9M | |
3476 | 19.9M | // Okay, it looks like we really DO need an addrec expr. Check to see if we |
3477 | 19.9M | // already have one, otherwise create a new one. |
3478 | 19.9M | return getOrCreateAddRecExpr(Operands, L, Flags); |
3479 | 19.9M | } |
3480 | | |
3481 | | const SCEV * |
3482 | | ScalarEvolution::getGEPExpr(GEPOperator *GEP, |
3483 | 2.06M | const SmallVectorImpl<const SCEV *> &IndexExprs) { |
3484 | 2.06M | const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand()); |
3485 | 2.06M | // getSCEV(Base)->getType() has the same address space as Base->getType() |
3486 | 2.06M | // because SCEV::getType() preserves the address space. |
3487 | 2.06M | Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType()); |
3488 | 2.06M | // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP |
3489 | 2.06M | // instruction to its SCEV, because the Instruction may be guarded by control |
3490 | 2.06M | // flow and the no-overflow bits may not be valid for the expression in any |
3491 | 2.06M | // context. This can be fixed similarly to how these flags are handled for |
3492 | 2.06M | // adds. |
3493 | 2.06M | SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW1.68M |
3494 | 2.06M | : SCEV::FlagAnyWrap374k ; |
3495 | 2.06M | |
3496 | 2.06M | const SCEV *TotalOffset = getZero(IntPtrTy); |
3497 | 2.06M | // The array size is unimportant. The first thing we do on CurTy is getting |
3498 | 2.06M | // its element type. |
3499 | 2.06M | Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0); |
3500 | 3.74M | for (const SCEV *IndexExpr : IndexExprs) { |
3501 | 3.74M | // Compute the (potentially symbolic) offset in bytes for this index. |
3502 | 3.74M | if (StructType *STy = dyn_cast<StructType>(CurTy)) { |
3503 | 1.07M | // For a struct, add the member offset. |
3504 | 1.07M | ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue(); |
3505 | 1.07M | unsigned FieldNo = Index->getZExtValue(); |
3506 | 1.07M | const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); |
3507 | 1.07M | |
3508 | 1.07M | // Add the field offset to the running total offset. |
3509 | 1.07M | TotalOffset = getAddExpr(TotalOffset, FieldOffset); |
3510 | 1.07M | |
3511 | 1.07M | // Update CurTy to the type of the field at Index. |
3512 | 1.07M | CurTy = STy->getTypeAtIndex(Index); |
3513 | 2.67M | } else { |
3514 | 2.67M | // Update CurTy to its element type. |
3515 | 2.67M | CurTy = cast<SequentialType>(CurTy)->getElementType(); |
3516 | 2.67M | // For an array, add the element offset, explicitly scaled. |
3517 | 2.67M | const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy); |
3518 | 2.67M | // Getelementptr indices are signed. |
3519 | 2.67M | IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy); |
3520 | 2.67M | |
3521 | 2.67M | // Multiply the index by the element size to compute the element offset. |
3522 | 2.67M | const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap); |
3523 | 2.67M | |
3524 | 2.67M | // Add the element offset to the running total offset. |
3525 | 2.67M | TotalOffset = getAddExpr(TotalOffset, LocalOffset); |
3526 | 2.67M | } |
3527 | 3.74M | } |
3528 | 2.06M | |
3529 | 2.06M | // Add the total offset from all the GEP indices to the base. |
3530 | 2.06M | return getAddExpr(BaseExpr, TotalOffset, Wrap); |
3531 | 2.06M | } |
3532 | | |
3533 | | std::tuple<const SCEV *, FoldingSetNodeID, void *> |
3534 | | ScalarEvolution::findExistingSCEVInCache(int SCEVType, |
3535 | 393k | ArrayRef<const SCEV *> Ops) { |
3536 | 393k | FoldingSetNodeID ID; |
3537 | 393k | void *IP = nullptr; |
3538 | 393k | ID.AddInteger(SCEVType); |
3539 | 1.18M | for (unsigned i = 0, e = Ops.size(); i != e; ++i793k ) |
3540 | 793k | ID.AddPointer(Ops[i]); |
3541 | 393k | return std::tuple<const SCEV *, FoldingSetNodeID, void *>( |
3542 | 393k | UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP); |
3543 | 393k | } |
3544 | | |
3545 | | const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind, |
3546 | 254k | SmallVectorImpl<const SCEV *> &Ops) { |
3547 | 254k | assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); |
3548 | 254k | if (Ops.size() == 1) return Ops[0]0 ; |
3549 | | #ifndef NDEBUG |
3550 | | Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); |
3551 | | for (unsigned i = 1, e = Ops.size(); i != e; ++i) |
3552 | | assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && |
3553 | | "Operand types don't match!"); |
3554 | | #endif |
3555 | | |
3556 | 254k | bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr204k ; |
3557 | 254k | bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr204k ; |
3558 | 254k | |
3559 | 254k | // Sort by complexity, this groups all similar expression types together. |
3560 | 254k | GroupByComplexity(Ops, &LI, DT); |
3561 | 254k | |
3562 | 254k | // Check if we have created the same expression before. |
3563 | 254k | if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) { |
3564 | 23.5k | return S; |
3565 | 23.5k | } |
3566 | 230k | |
3567 | 230k | // If there are any constants, fold them together. |
3568 | 230k | unsigned Idx = 0; |
3569 | 230k | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { |
3570 | 181k | ++Idx; |
3571 | 181k | assert(Idx < Ops.size()); |
3572 | 181k | auto FoldOp = [&](const APInt &LHS, const APInt &RHS) { |
3573 | 62.5k | if (Kind == scSMaxExpr) |
3574 | 4.98k | return APIntOps::smax(LHS, RHS); |
3575 | 57.6k | else if (Kind == scSMinExpr) |
3576 | 11.9k | return APIntOps::smin(LHS, RHS); |
3577 | 45.6k | else if (Kind == scUMaxExpr) |
3578 | 30 | return APIntOps::umax(LHS, RHS); |
3579 | 45.6k | else if (Kind == scUMinExpr) |
3580 | 45.6k | return APIntOps::umin(LHS, RHS); |
3581 | 0 | llvm_unreachable("Unknown SCEV min/max opcode"); |
3582 | 0 | }; |
3583 | 181k | |
3584 | 181k | while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { |
3585 | 62.5k | // We found two constants, fold them together! |
3586 | 62.5k | ConstantInt *Fold = ConstantInt::get( |
3587 | 62.5k | getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); |
3588 | 62.5k | Ops[0] = getConstant(Fold); |
3589 | 62.5k | Ops.erase(Ops.begin()+1); // Erase the folded element |
3590 | 62.5k | if (Ops.size() == 1) return Ops[0]62.4k ; |
3591 | 173 | LHSC = cast<SCEVConstant>(Ops[0]); |
3592 | 173 | } |
3593 | 181k | |
3594 | 181k | bool IsMinV = LHSC->getValue()->isMinValue(IsSigned); |
3595 | 118k | bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned); |
3596 | 118k | |
3597 | 118k | if (IsMax ? IsMinV44.8k : IsMaxV73.9k ) { |
3598 | 9.18k | // If we are left with a constant minimum(/maximum)-int, strip it off. |
3599 | 9.18k | Ops.erase(Ops.begin()); |
3600 | 9.18k | --Idx; |
3601 | 109k | } else if (IsMax ? IsMaxV44.5k : IsMinV65.0k ) { |
3602 | 98 | // If we have a max(/min) with a constant maximum(/minimum)-int, |
3603 | 98 | // it will always be the extremum. |
3604 | 98 | return LHSC; |
3605 | 98 | } |
3606 | 118k | |
3607 | 118k | if (Ops.size() == 1) return Ops[0]9.17k ; |
3608 | 158k | } |
3609 | 158k | |
3610 | 158k | // Find the first operation of the same kind |
3611 | 316k | while (158k Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind202k ) |
3612 | 158k | ++Idx; |
3613 | 158k | |
3614 | 158k | // Check to see if one of the operands is of the same kind. If so, expand its |
3615 | 158k | // operands onto our operand list, and recurse to simplify. |
3616 | 158k | if (Idx < Ops.size()) { |
3617 | 44.8k | bool DeletedAny = false; |
3618 | 46.3k | while (Ops[Idx]->getSCEVType() == Kind) { |
3619 | 1.55k | const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]); |
3620 | 1.55k | Ops.erase(Ops.begin()+Idx); |
3621 | 1.55k | Ops.append(SMME->op_begin(), SMME->op_end()); |
3622 | 1.55k | DeletedAny = true; |
3623 | 1.55k | } |
3624 | 44.8k | |
3625 | 44.8k | if (DeletedAny) |
3626 | 1.53k | return getMinMaxExpr(Kind, Ops); |
3627 | 157k | } |
3628 | 157k | |
3629 | 157k | // Okay, check to see if the same value occurs in the operand list twice. If |
3630 | 157k | // so, delete one. Since we sorted the list, these values are required to |
3631 | 157k | // be adjacent. |
3632 | 157k | llvm::CmpInst::Predicate GEPred = |
3633 | 157k | IsSigned ? ICmpInst::ICMP_SGE51.9k : ICmpInst::ICMP_UGE105k ; |
3634 | 157k | llvm::CmpInst::Predicate LEPred = |
3635 | 157k | IsSigned ? ICmpInst::ICMP_SLE51.9k : ICmpInst::ICMP_ULE105k ; |
3636 | 157k | llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred73.5k : LEPred83.8k ; |
3637 | 157k | llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred73.5k : GEPred83.8k ; |
3638 | 317k | for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i159k ) { |
3639 | 159k | if (Ops[i] == Ops[i + 1] || |
3640 | 159k | isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])159k ) { |
3641 | 334 | // X op Y op Y --> X op Y |
3642 | 334 | // X op Y --> X, if we know X, Y are ordered appropriately |
3643 | 334 | Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); |
3644 | 334 | --i; |
3645 | 334 | --e; |
3646 | 159k | } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i], |
3647 | 159k | Ops[i + 1])) { |
3648 | 17.1k | // X op Y --> Y, if we know X, Y are ordered appropriately |
3649 | 17.1k | Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); |
3650 | 17.1k | --i; |
3651 | 17.1k | --e; |
3652 | 17.1k | } |
3653 | 159k | } |
3654 | 157k | |
3655 | 157k | if (Ops.size() == 1) return Ops[0]17.3k ; |
3656 | 139k | |
3657 | 139k | assert(!Ops.empty() && "Reduced smax down to nothing!"); |
3658 | 139k | |
3659 | 139k | // Okay, it looks like we really DO need an expr. Check to see if we |
3660 | 139k | // already have one, otherwise create a new one. |
3661 | 139k | const SCEV *ExistingSCEV; |
3662 | 139k | FoldingSetNodeID ID; |
3663 | 139k | void *IP; |
3664 | 139k | std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops); |
3665 | 139k | if (ExistingSCEV) |
3666 | 1.20k | return ExistingSCEV; |
3667 | 138k | const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); |
3668 | 138k | std::uninitialized_copy(Ops.begin(), Ops.end(), O); |
3669 | 138k | SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr( |
3670 | 138k | ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size()); |
3671 | 138k | |
3672 | 138k | UniqueSCEVs.InsertNode(S, IP); |
3673 | 138k | addToLoopUseLists(S); |
3674 | 138k | return S; |
3675 | 138k | } |
3676 | | |
3677 | 48.8k | const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { |
3678 | 48.8k | SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; |
3679 | 48.8k | return getSMaxExpr(Ops); |
3680 | 48.8k | } |
3681 | | |
3682 | 49.2k | const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { |
3683 | 49.2k | return getMinMaxExpr(scSMaxExpr, Ops); |
3684 | 49.2k | } |
3685 | | |
3686 | 37.0k | const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { |
3687 | 37.0k | SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; |
3688 | 37.0k | return getUMaxExpr(Ops); |
3689 | 37.0k | } |
3690 | | |
3691 | 37.0k | const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { |
3692 | 37.0k | return getMinMaxExpr(scUMaxExpr, Ops); |
3693 | 37.0k | } |
3694 | | |
3695 | | const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, |
3696 | 28.3k | const SCEV *RHS) { |
3697 | 28.3k | SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; |
3698 | 28.3k | return getSMinExpr(Ops); |
3699 | 28.3k | } |
3700 | | |
3701 | 28.4k | const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) { |
3702 | 28.4k | return getMinMaxExpr(scSMinExpr, Ops); |
3703 | 28.4k | } |
3704 | | |
3705 | | const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, |
3706 | 18.3k | const SCEV *RHS) { |
3707 | 18.3k | SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; |
3708 | 18.3k | return getUMinExpr(Ops); |
3709 | 18.3k | } |
3710 | | |
3711 | 137k | const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) { |
3712 | 137k | return getMinMaxExpr(scUMinExpr, Ops); |
3713 | 137k | } |
3714 | | |
3715 | 3.20M | const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { |
3716 | 3.20M | // We can bypass creating a target-independent |
3717 | 3.20M | // constant expression and then folding it back into a ConstantInt. |
3718 | 3.20M | // This is just a compile-time optimization. |
3719 | 3.20M | return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); |
3720 | 3.20M | } |
3721 | | |
3722 | | const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, |
3723 | | StructType *STy, |
3724 | 1.07M | unsigned FieldNo) { |
3725 | 1.07M | // We can bypass creating a target-independent |
3726 | 1.07M | // constant expression and then folding it back into a ConstantInt. |
3727 | 1.07M | // This is just a compile-time optimization. |
3728 | 1.07M | return getConstant( |
3729 | 1.07M | IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); |
3730 | 1.07M | } |
3731 | | |
3732 | 5.32M | const SCEV *ScalarEvolution::getUnknown(Value *V) { |
3733 | 5.32M | // Don't attempt to do anything other than create a SCEVUnknown object |
3734 | 5.32M | // here. createSCEV only calls getUnknown after checking for all other |
3735 | 5.32M | // interesting possibilities, and any other code that calls getUnknown |
3736 | 5.32M | // is doing so in order to hide a value from SCEV canonicalization. |
3737 | 5.32M | |
3738 | 5.32M | FoldingSetNodeID ID; |
3739 | 5.32M | ID.AddInteger(scUnknown); |
3740 | 5.32M | ID.AddPointer(V); |
3741 | 5.32M | void *IP = nullptr; |
3742 | 5.32M | if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { |
3743 | 1.61M | assert(cast<SCEVUnknown>(S)->getValue() == V && |
3744 | 1.61M | "Stale SCEVUnknown in uniquing map!"); |
3745 | 1.61M | return S; |
3746 | 1.61M | } |
3747 | 3.71M | SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, |
3748 | 3.71M | FirstUnknown); |
3749 | 3.71M | FirstUnknown = cast<SCEVUnknown>(S); |
3750 | 3.71M | UniqueSCEVs.InsertNode(S, IP); |
3751 | 3.71M | return S; |
3752 | 3.71M | } |
3753 | | |
3754 | | //===----------------------------------------------------------------------===// |
3755 | | // Basic SCEV Analysis and PHI Idiom Recognition Code |
3756 | | // |
3757 | | |
3758 | | /// Test if values of the given type are analyzable within the SCEV |
3759 | | /// framework. This primarily includes integer types, and it can optionally |
3760 | | /// include pointer types if the ScalarEvolution class has access to |
3761 | | /// target-specific information. |
3762 | 26.0M | bool ScalarEvolution::isSCEVable(Type *Ty) const { |
3763 | 26.0M | // Integers and pointers are always SCEVable. |
3764 | 26.0M | return Ty->isIntOrPtrTy(); |
3765 | 26.0M | } |
3766 | | |
3767 | | /// Return the size in bits of the specified type, for which isSCEVable must |
3768 | | /// return true. |
3769 | 104M | uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { |
3770 | 104M | assert(isSCEVable(Ty) && "Type is not SCEVable!"); |
3771 | 104M | if (Ty->isPointerTy()) |
3772 | 8.04M | return getDataLayout().getIndexTypeSizeInBits(Ty); |
3773 | 96.3M | return getDataLayout().getTypeSizeInBits(Ty); |
3774 | 96.3M | } |
3775 | | |
3776 | | /// Return a type with the same bitwidth as the given type and which represents |
3777 | | /// how SCEV will treat the given type, for which isSCEVable must return |
3778 | | /// true. For pointer types, this is the pointer-sized integer type. |
3779 | 74.4M | Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { |
3780 | 74.4M | assert(isSCEVable(Ty) && "Type is not SCEVable!"); |
3781 | 74.4M | |
3782 | 74.4M | if (Ty->isIntegerTy()) |
3783 | 58.3M | return Ty; |
3784 | 16.0M | |
3785 | 16.0M | // The only other support type is pointer. |
3786 | 16.0M | assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); |
3787 | 16.0M | return getDataLayout().getIntPtrType(Ty); |
3788 | 16.0M | } |
3789 | | |
3790 | 223k | Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const { |
3791 | 223k | return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1215k : T28.01k ; |
3792 | 223k | } |
3793 | | |
3794 | 82.7M | const SCEV *ScalarEvolution::getCouldNotCompute() { |
3795 | 82.7M | return CouldNotCompute.get(); |
3796 | 82.7M | } |
3797 | | |
3798 | 44.2M | bool ScalarEvolution::checkValidity(const SCEV *S) const { |
3799 | 115M | bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) { |
3800 | 115M | auto *SU = dyn_cast<SCEVUnknown>(S); |
3801 | 115M | return SU && SU->getValue() == nullptr31.5M ; |
3802 | 115M | }); |
3803 | 44.2M | |
3804 | 44.2M | return !ContainsNulls; |
3805 | 44.2M | } |
3806 | | |
3807 | 2.48M | bool ScalarEvolution::containsAddRecurrence(const SCEV *S) { |
3808 | 2.48M | HasRecMapType::iterator I = HasRecMap.find(S); |
3809 | 2.48M | if (I != HasRecMap.end()) |
3810 | 822k | return I->second; |
3811 | 1.66M | |
3812 | 1.66M | bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>); |
3813 | 1.66M | HasRecMap.insert({S, FoundAddRec}); |
3814 | 1.66M | return FoundAddRec; |
3815 | 1.66M | } |
3816 | | |
3817 | | /// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. |
3818 | | /// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an |
3819 | | /// offset I, then return {S', I}, else return {\p S, nullptr}. |
3820 | 12.1M | static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) { |
3821 | 12.1M | const auto *Add = dyn_cast<SCEVAddExpr>(S); |
3822 | 12.1M | if (!Add) |
3823 | 10.3M | return {S, nullptr}; |
3824 | 1.77M | |
3825 | 1.77M | if (Add->getNumOperands() != 2) |
3826 | 138k | return {S, nullptr}; |
3827 | 1.63M | |
3828 | 1.63M | auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0)); |
3829 | 1.63M | if (!ConstOp) |
3830 | 487k | return {S, nullptr}; |
3831 | 1.15M | |
3832 | 1.15M | return {Add->getOperand(1), ConstOp->getValue()}; |
3833 | 1.15M | } |
3834 | | |
3835 | | /// Return the ValueOffsetPair set for \p S. \p S can be represented |
3836 | | /// by the value and offset from any ValueOffsetPair in the set. |
3837 | | SetVector<ScalarEvolution::ValueOffsetPair> * |
3838 | 6.15M | ScalarEvolution::getSCEVValues(const SCEV *S) { |
3839 | 6.15M | ExprValueMapType::iterator SI = ExprValueMap.find_as(S); |
3840 | 6.15M | if (SI == ExprValueMap.end()) |
3841 | 3.58M | return nullptr; |
3842 | | #ifndef NDEBUG |
3843 | | if (VerifySCEVMap) { |
3844 | | // Check there is no dangling Value in the set returned. |
3845 | | for (const auto &VE : SI->second) |
3846 | | assert(ValueExprMap.count(VE.first)); |
3847 | | } |
3848 | | #endif |
3849 | 2.57M | return &SI->second; |
3850 | 2.57M | } |
3851 | | |
3852 | | /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V) |
3853 | | /// cannot be used separately. eraseValueFromMap should be used to remove |
3854 | | /// V from ValueExprMap and ExprValueMap at the same time. |
3855 | 3.85M | void ScalarEvolution::eraseValueFromMap(Value *V) { |
3856 | 3.85M | ValueExprMapType::iterator I = ValueExprMap.find_as(V); |
3857 | 3.85M | if (I != ValueExprMap.end()) { |
3858 | 3.19M | const SCEV *S = I->second; |
3859 | 3.19M | // Remove {V, 0} from the set of ExprValueMap[S] |
3860 | 3.19M | if (SetVector<ValueOffsetPair> *SV = getSCEVValues(S)) |
3861 | 1.77M | SV->remove({V, nullptr}); |
3862 | 3.19M | |
3863 | 3.19M | // Remove {V, Offset} from the set of ExprValueMap[Stripped] |
3864 | 3.19M | const SCEV *Stripped; |
3865 | 3.19M | ConstantInt *Offset; |
3866 | 3.19M | std::tie(Stripped, Offset) = splitAddExpr(S); |
3867 | 3.19M | if (Offset != nullptr) { |
3868 | 254k | if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped)) |
3869 | 19.1k | SV->remove({V, Offset}); |
3870 | 254k | } |
3871 | 3.19M | ValueExprMap.erase(V); |
3872 | 3.19M | } |
3873 | 3.85M | } |
3874 | | |
3875 | | /// Check whether value has nuw/nsw/exact set but SCEV does not. |
3876 | | /// TODO: In reality it is better to check the poison recursively |
3877 | | /// but this is better than nothing. |
3878 | 9.44M | static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { |
3879 | 9.44M | if (auto *I = dyn_cast<Instruction>(V)) { |
3880 | 7.10M | if (isa<OverflowingBinaryOperator>(I)) { |
3881 | 1.39M | if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) { |
3882 | 1.39M | if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()916k ) |
3883 | 411k | return true; |
3884 | 978k | if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()456k ) |
3885 | 35.8k | return true; |
3886 | 5.70M | } |
3887 | 5.70M | } else if (isa<PossiblyExactOperator>(I) && I->isExact()96.0k ) |
3888 | 25.4k | return true; |
3889 | 8.97M | } |
3890 | 8.97M | return false; |
3891 | 8.97M | } |
3892 | | |
3893 | | /// Return an existing SCEV if it exists, otherwise analyze the expression and |
3894 | | /// create a new one. |
3895 | 55.0M | const SCEV *ScalarEvolution::getSCEV(Value *V) { |
3896 | 55.0M | assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); |
3897 | 55.0M | |
3898 | 55.0M | const SCEV *S = getExistingSCEV(V); |
3899 | 55.0M | if (S == nullptr) { |
3900 | 10.8M | S = createSCEV(V); |
3901 | 10.8M | // During PHI resolution, it is possible to create two SCEVs for the same |
3902 | 10.8M | // V, so it is needed to double check whether V->S is inserted into |
3903 | 10.8M | // ValueExprMap before insert S->{V, 0} into ExprValueMap. |
3904 | 10.8M | std::pair<ValueExprMapType::iterator, bool> Pair = |
3905 | 10.8M | ValueExprMap.insert({SCEVCallbackVH(V, this), S}); |
3906 | 10.8M | if (Pair.second && !SCEVLostPoisonFlags(S, V)9.44M ) { |
3907 | 8.97M | ExprValueMap[S].insert({V, nullptr}); |
3908 | 8.97M | |
3909 | 8.97M | // If S == Stripped + Offset, add Stripped -> {V, Offset} into |
3910 | 8.97M | // ExprValueMap. |
3911 | 8.97M | const SCEV *Stripped = S; |
3912 | 8.97M | ConstantInt *Offset = nullptr; |
3913 | 8.97M | std::tie(Stripped, Offset) = splitAddExpr(S); |
3914 | 8.97M | // If stripped is SCEVUnknown, don't bother to save |
3915 | 8.97M | // Stripped -> {V, offset}. It doesn't simplify and sometimes even |
3916 | 8.97M | // increase the complexity of the expansion code. |
3917 | 8.97M | // If V is GetElementPtrInst, don't save Stripped -> {V, offset} |
3918 | 8.97M | // because it may generate add/sub instead of GEP in SCEV expansion. |
3919 | 8.97M | if (Offset != nullptr && !isa<SCEVUnknown>(Stripped)896k && |
3920 | 8.97M | !isa<GetElementPtrInst>(V)54.6k ) |
3921 | 54.5k | ExprValueMap[Stripped].insert({V, Offset}); |
3922 | 8.97M | } |
3923 | 10.8M | } |
3924 | 55.0M | return S; |
3925 | 55.0M | } |
3926 | | |
3927 | 56.4M | const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { |
3928 | 56.4M | assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); |
3929 | 56.4M | |
3930 | 56.4M | ValueExprMapType::iterator I = ValueExprMap.find_as(V); |
3931 | 56.4M | if (I != ValueExprMap.end()) { |
3932 | 44.2M | const SCEV *S = I->second; |
3933 | 44.2M | if (checkValidity(S)) |
3934 | 44.2M | return S; |
3935 | 0 | eraseValueFromMap(V); |
3936 | 0 | forgetMemoizedResults(S); |
3937 | 0 | } |
3938 | 56.4M | return nullptr12.1M ; |
3939 | 56.4M | } |
3940 | | |
3941 | | /// Return a SCEV corresponding to -V = -1*V |
3942 | | const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, |
3943 | 9.14M | SCEV::NoWrapFlags Flags) { |
3944 | 9.14M | if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) |
3945 | 3.30M | return getConstant( |
3946 | 3.30M | cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); |
3947 | 5.83M | |
3948 | 5.83M | Type *Ty = V->getType(); |
3949 | 5.83M | Ty = getEffectiveSCEVType(Ty); |
3950 | 5.83M | return getMulExpr( |
3951 | 5.83M | V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags); |
3952 | 5.83M | } |
3953 | | |
3954 | | /// If Expr computes ~A, return A else return nullptr |
3955 | 36.1k | static const SCEV *MatchNotExpr(const SCEV *Expr) { |
3956 | 36.1k | const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); |
3957 | 36.1k | if (!Add || Add->getNumOperands() != 22.39k || |
3958 | 36.1k | !Add->getOperand(0)->isAllOnesValue()2.00k ) |
3959 | 35.9k | return nullptr; |
3960 | 225 | |
3961 | 225 | const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); |
3962 | 225 | if (!AddRHS || AddRHS->getNumOperands() != 20 || |
3963 | 225 | !AddRHS->getOperand(0)->isAllOnesValue()0 ) |
3964 | 225 | return nullptr; |
3965 | 0 | |
3966 | 0 | return AddRHS->getOperand(1); |
3967 | 0 | } |
3968 | | |
3969 | | /// Return a SCEV corresponding to ~V = -1-V |
3970 | 5.15M | const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { |
3971 | 5.15M | if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) |
3972 | 1.67M | return getConstant( |
3973 | 1.67M | cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); |
3974 | 3.48M | |
3975 | 3.48M | // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y) |
3976 | 3.48M | if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) { |
3977 | 36.1k | auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) { |
3978 | 36.1k | SmallVector<const SCEV *, 2> MatchedOperands; |
3979 | 36.1k | for (const SCEV *Operand : MME->operands()) { |
3980 | 36.1k | const SCEV *Matched = MatchNotExpr(Operand); |
3981 | 36.1k | if (!Matched) |
3982 | 36.1k | return (const SCEV *)nullptr; |
3983 | 0 | MatchedOperands.push_back(Matched); |
3984 | 0 | } |
3985 | 36.1k | return getMinMaxExpr( |
3986 | 0 | SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())), |
3987 | 0 | MatchedOperands); |
3988 | 36.1k | }; |
3989 | 36.1k | if (const SCEV *Replaced = MatchMinMaxNegation(MME)) |
3990 | 0 | return Replaced; |
3991 | 3.48M | } |
3992 | 3.48M | |
3993 | 3.48M | Type *Ty = V->getType(); |
3994 | 3.48M | Ty = getEffectiveSCEVType(Ty); |
3995 | 3.48M | const SCEV *AllOnes = |
3996 | 3.48M | getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); |
3997 | 3.48M | return getMinusSCEV(AllOnes, V); |
3998 | 3.48M | } |
3999 | | |
4000 | | const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, |
4001 | | SCEV::NoWrapFlags Flags, |
4002 | 9.69M | unsigned Depth) { |
4003 | 9.69M | // Fast path: X - X --> 0. |
4004 | 9.69M | if (LHS == RHS) |
4005 | 1.18M | return getZero(LHS->getType()); |
4006 | 8.50M | |
4007 | 8.50M | // We represent LHS - RHS as LHS + (-1)*RHS. This transformation |
4008 | 8.50M | // makes it so that we cannot make much use of NUW. |
4009 | 8.50M | auto AddFlags = SCEV::FlagAnyWrap; |
4010 | 8.50M | const bool RHSIsNotMinSigned = |
4011 | 8.50M | !getSignedRangeMin(RHS).isMinSignedValue(); |
4012 | 8.50M | if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { |
4013 | 8.49k | // Let M be the minimum representable signed value. Then (-1)*RHS |
4014 | 8.49k | // signed-wraps if and only if RHS is M. That can happen even for |
4015 | 8.49k | // a NSW subtraction because e.g. (-1)*M signed-wraps even though |
4016 | 8.49k | // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + |
4017 | 8.49k | // (-1)*RHS, we need to prove that RHS != M. |
4018 | 8.49k | // |
4019 | 8.49k | // If LHS is non-negative and we know that LHS - RHS does not |
4020 | 8.49k | // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap |
4021 | 8.49k | // either by proving that RHS > M or that LHS >= 0. |
4022 | 8.49k | if (RHSIsNotMinSigned || isKnownNonNegative(LHS)862 ) { |
4023 | 7.76k | AddFlags = SCEV::FlagNSW; |
4024 | 7.76k | } |
4025 | 8.49k | } |
4026 | 8.50M | |
4027 | 8.50M | // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - |
4028 | 8.50M | // RHS is NSW and LHS >= 0. |
4029 | 8.50M | // |
4030 | 8.50M | // The difficulty here is that the NSW flag may have been proven |
4031 | 8.50M | // relative to a loop that is to be found in a recurrence in LHS and |
4032 | 8.50M | // not in RHS. Applying NSW to (-1)*M may then let the NSW have a |
4033 | 8.50M | // larger scope than intended. |
4034 | 8.50M | auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW5.14M : SCEV::FlagAnyWrap3.35M ; |
4035 | 8.50M | |
4036 | 8.50M | return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); |
4037 | 8.50M | } |
4038 | | |
4039 | | const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty, |
4040 | 1.70M | unsigned Depth) { |
4041 | 1.70M | Type *SrcTy = V->getType(); |
4042 | 1.70M | assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
4043 | 1.70M | "Cannot truncate or zero extend with non-integer arguments!"); |
4044 | 1.70M | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
4045 | 1.23M | return V; // No conversion |
4046 | 471k | if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) |
4047 | 227k | return getTruncateExpr(V, Ty, Depth); |
4048 | 244k | return getZeroExtendExpr(V, Ty, Depth); |
4049 | 244k | } |
4050 | | |
4051 | | const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty, |
4052 | 2.70M | unsigned Depth) { |
4053 | 2.70M | Type *SrcTy = V->getType(); |
4054 | 2.70M | assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
4055 | 2.70M | "Cannot truncate or zero extend with non-integer arguments!"); |
4056 | 2.70M | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
4057 | 2.64M | return V; // No conversion |
4058 | 60.3k | if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) |
4059 | 1.70k | return getTruncateExpr(V, Ty, Depth); |
4060 | 58.6k | return getSignExtendExpr(V, Ty, Depth); |
4061 | 58.6k | } |
4062 | | |
4063 | | const SCEV * |
4064 | 6.15M | ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { |
4065 | 6.15M | Type *SrcTy = V->getType(); |
4066 | 6.15M | assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
4067 | 6.15M | "Cannot noop or zero extend with non-integer arguments!"); |
4068 | 6.15M | assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && |
4069 | 6.15M | "getNoopOrZeroExtend cannot truncate!"); |
4070 | 6.15M | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
4071 | 5.17M | return V; // No conversion |
4072 | 983k | return getZeroExtendExpr(V, Ty); |
4073 | 983k | } |
4074 | | |
4075 | | const SCEV * |
4076 | 106k | ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { |
4077 | 106k | Type *SrcTy = V->getType(); |
4078 | 106k | assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
4079 | 106k | "Cannot noop or sign extend with non-integer arguments!"); |
4080 | 106k | assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && |
4081 | 106k | "getNoopOrSignExtend cannot truncate!"); |
4082 | 106k | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
4083 | 95.1k | return V; // No conversion |
4084 | 11.7k | return getSignExtendExpr(V, Ty); |
4085 | 11.7k | } |
4086 | | |
4087 | | const SCEV * |
4088 | 660 | ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { |
4089 | 660 | Type *SrcTy = V->getType(); |
4090 | 660 | assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
4091 | 660 | "Cannot noop or any extend with non-integer arguments!"); |
4092 | 660 | assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && |
4093 | 660 | "getNoopOrAnyExtend cannot truncate!"); |
4094 | 660 | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
4095 | 660 | return V; // No conversion |
4096 | 0 | return getAnyExtendExpr(V, Ty); |
4097 | 0 | } |
4098 | | |
4099 | | const SCEV * |
4100 | 13.8k | ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { |
4101 | 13.8k | Type *SrcTy = V->getType(); |
4102 | 13.8k | assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && |
4103 | 13.8k | "Cannot truncate or noop with non-integer arguments!"); |
4104 | 13.8k | assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && |
4105 | 13.8k | "getTruncateOrNoop cannot extend!"); |
4106 | 13.8k | if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) |
4107 | 13.2k | return V; // No conversion |
4108 | 603 | return getTruncateExpr(V, Ty); |
4109 | 603 | } |
4110 | | |
4111 | | const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, |
4112 | 0 | const SCEV *RHS) { |
4113 | 0 | const SCEV *PromotedLHS = LHS; |
4114 | 0 | const SCEV *PromotedRHS = RHS; |
4115 | 0 |
|
4116 | 0 | if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) |
4117 | 0 | PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); |
4118 | 0 | else |
4119 | 0 | PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); |
4120 | 0 |
|
4121 | 0 | return getUMaxExpr(PromotedLHS, PromotedRHS); |
4122 | 0 | } |
4123 | | |
4124 | | const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, |
4125 | 9.60k | const SCEV *RHS) { |
4126 | 9.60k | SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; |
4127 | 9.60k | return getUMinFromMismatchedTypes(Ops); |
4128 | 9.60k | } |
4129 | | |
4130 | | const SCEV *ScalarEvolution::getUMinFromMismatchedTypes( |
4131 | 626k | SmallVectorImpl<const SCEV *> &Ops) { |
4132 | 626k | assert(!Ops.empty() && "At least one operand must be!"); |
4133 | 626k | // Trivial case. |
4134 | 626k | if (Ops.size() == 1) |
4135 | 507k | return Ops[0]; |
4136 | 118k | |
4137 | 118k | // Find the max type first. |
4138 | 118k | Type *MaxType = nullptr; |
4139 | 118k | for (auto *S : Ops) |
4140 | 238k | if (MaxType) |
4141 | 119k | MaxType = getWiderType(MaxType, S->getType()); |
4142 | 118k | else |
4143 | 118k | MaxType = S->getType(); |
4144 | 118k | |
4145 | 118k | // Extend all ops to max type. |
4146 | 118k | SmallVector<const SCEV *, 2> PromotedOps; |
4147 | 118k | for (auto *S : Ops) |
4148 | 238k | PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType)); |
4149 | 118k | |
4150 | 118k | // Generate umin. |
4151 | 118k | return getUMinExpr(PromotedOps); |
4152 | 118k | } |
4153 | | |
4154 | 429k | const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { |
4155 | 429k | // A pointer operand may evaluate to a nonpointer expression, such as null. |
4156 | 429k | if (!V->getType()->isPointerTy()) |
4157 | 2.58k | return V; |
4158 | 426k | |
4159 | 426k | if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { |
4160 | 0 | return getPointerBase(Cast->getOperand()); |
4161 | 426k | } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { |
4162 | 288k | const SCEV *PtrOp = nullptr; |
4163 | 626k | for (const SCEV *NAryOp : NAry->operands()) { |
4164 | 626k | if (NAryOp->getType()->isPointerTy()) { |
4165 | 288k | // Cannot find the base of an expression with multiple pointer operands. |
4166 | 288k | if (PtrOp) |
4167 | 1 | return V; |
4168 | 288k | PtrOp = NAryOp; |
4169 | 288k | } |
4170 | 626k | } |
4171 | 288k | if (288k !PtrOp288k ) |
4172 | 0 | return V; |
4173 | 288k | return getPointerBase(PtrOp); |
4174 | 288k | } |
4175 | 138k | return V; |
4176 | 138k | } |
4177 | | |
4178 | | /// Push users of the given Instruction onto the given Worklist. |
4179 | | static void |
4180 | | PushDefUseChildren(Instruction *I, |
4181 | 20.2M | SmallVectorImpl<Instruction *> &Worklist) { |
4182 | 20.2M | // Push the def-use children onto the Worklist stack. |
4183 | 20.2M | for (User *U : I->users()) |
4184 | 24.6M | Worklist.push_back(cast<Instruction>(U)); |
4185 | 20.2M | } |
4186 | | |
4187 | 220k | void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { |
4188 | 220k | SmallVector<Instruction *, 16> Worklist; |
4189 | 220k | PushDefUseChildren(PN, Worklist); |
4190 | 220k | |
4191 | 220k | SmallPtrSet<Instruction *, 8> Visited; |
4192 | 220k | Visited.insert(PN); |
4193 | 3.82M | while (!Worklist.empty()) { |
4194 | 3.60M | Instruction *I = Worklist.pop_back_val(); |
4195 | 3.60M | if (!Visited.insert(I).second) |
4196 | 471k | continue; |
4197 | 3.13M | |
4198 | 3.13M | auto It = ValueExprMap.find_as(static_cast<Value *>(I)); |
4199 | 3.13M | if (It != ValueExprMap.end()) { |
4200 | 351k | const SCEV *Old = It->second; |
4201 | 351k | |
4202 | 351k | // Short-circuit the def-use traversal if the symbolic name |
4203 | 351k | // ceases to appear in expressions. |
4204 | 351k | if (Old != SymName && !hasOperand(Old, SymName)349k ) |
4205 | 106k | continue; |
4206 | 244k | |
4207 | 244k | // SCEVUnknown for a PHI either means that it has an unrecognized |
4208 | 244k | // structure, it's a PHI that's in the progress of being computed |
4209 | 244k | // by createNodeForPHI, or it's a single-value PHI. In the first case, |
4210 | 244k | // additional loop trip count information isn't going to change anything. |
4211 | 244k | // In the second case, createNodeForPHI will perform the necessary |
4212 | 244k | // updates on its own when it gets to that point. In the third, we do |
4213 | 244k | // want to forget the SCEVUnknown. |
4214 | 244k | if (!isa<PHINode>(I) || |
4215 | 244k | !isa<SCEVUnknown>(Old)170 || |
4216 | 244k | (27 I != PN27 && Old == SymName27 )) { |
4217 | 244k | eraseValueFromMap(It->first); |
4218 | 244k | forgetMemoizedResults(Old); |
4219 | 244k | } |
4220 | 244k | } |
4221 | 3.13M | |
4222 | 3.13M | PushDefUseChildren(I, Worklist); |
4223 | 3.02M | } |
4224 | 220k | } |
4225 | | |
4226 | | namespace { |
4227 | | |
4228 | | /// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start |
4229 | | /// expression in case its Loop is L. If it is not L then |
4230 | | /// if IgnoreOtherLoops is true then use AddRec itself |
4231 | | /// otherwise rewrite cannot be done. |
4232 | | /// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. |
4233 | | class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> { |
4234 | | public: |
4235 | | static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
4236 | 1.30M | bool IgnoreOtherLoops = true) { |
4237 | 1.30M | SCEVInitRewriter Rewriter(L, SE); |
4238 | 1.30M | const SCEV *Result = Rewriter.visit(S); |
4239 | 1.30M | if (Rewriter.hasSeenLoopVariantSCEVUnknown()) |
4240 | 63.3k | return SE.getCouldNotCompute(); |
4241 | 1.24M | return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops3.46k |
4242 | 1.24M | ? SE.getCouldNotCompute()0 |
4243 | 1.24M | : Result; |
4244 | 1.24M | } |
4245 | | |
4246 | 263k | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
4247 | 263k | if (!SE.isLoopInvariant(Expr, L)) |
4248 | 64.4k | SeenLoopVariantSCEVUnknown = true; |
4249 | 263k | return Expr; |
4250 | 263k | } |
4251 | | |
4252 | 511k | const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { |
4253 | 511k | // Only re-write AddRecExprs for this loop. |
4254 | 511k | if (Expr->getLoop() == L) |
4255 | 508k | return Expr->getStart(); |
4256 | 3.54k | SeenOtherLoops = true; |
4257 | 3.54k | return Expr; |
4258 | 3.54k | } |
4259 | | |
4260 | 1.30M | bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } |
4261 | | |
4262 | 1.24M | bool hasSeenOtherLoops() { return SeenOtherLoops; } |
4263 | | |
4264 | | private: |
4265 | | explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) |
4266 | 1.30M | : SCEVRewriteVisitor(SE), L(L) {} |
4267 | | |
4268 | | const Loop *L; |
4269 | | bool SeenLoopVariantSCEVUnknown = false; |
4270 | | bool SeenOtherLoops = false; |
4271 | | }; |
4272 | | |
4273 | | /// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post |
4274 | | /// increment expression in case its Loop is L. If it is not L then |
4275 | | /// use AddRec itself. |
4276 | | /// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. |
4277 | | class SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> { |
4278 | | public: |
4279 | 929k | static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { |
4280 | 929k | SCEVPostIncRewriter Rewriter(L, SE); |
4281 | 929k | const SCEV *Result = Rewriter.visit(S); |
4282 | 929k | return Rewriter.hasSeenLoopVariantSCEVUnknown() |
4283 | 929k | ? SE.getCouldNotCompute()0 |
4284 | 929k | : Result; |
4285 | 929k | } |
4286 | | |
4287 | 197k | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
4288 | 197k | if (!SE.isLoopInvariant(Expr, L)) |
4289 | 0 | SeenLoopVariantSCEVUnknown = true; |
4290 | 197k | return Expr; |
4291 | 197k | } |
4292 | | |
4293 | 500k | const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { |
4294 | 500k | // Only re-write AddRecExprs for this loop. |
4295 | 500k | if (Expr->getLoop() == L) |
4296 | 496k | return Expr->getPostIncExpr(SE); |
4297 | 3.48k | SeenOtherLoops = true; |
4298 | 3.48k | return Expr; |
4299 | 3.48k | } |
4300 | | |
4301 | 929k | bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } |
4302 | | |
4303 | 0 | bool hasSeenOtherLoops() { return SeenOtherLoops; } |
4304 | | |
4305 | | private: |
4306 | | explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE) |
4307 | 929k | : SCEVRewriteVisitor(SE), L(L) {} |
4308 | | |
4309 | | const Loop *L; |
4310 | | bool SeenLoopVariantSCEVUnknown = false; |
4311 | | bool SeenOtherLoops = false; |
4312 | | }; |
4313 | | |
4314 | | /// This class evaluates the compare condition by matching it against the |
4315 | | /// condition of loop latch. If there is a match we assume a true value |
4316 | | /// for the condition while building SCEV nodes. |
4317 | | class SCEVBackedgeConditionFolder |
4318 | | : public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> { |
4319 | | public: |
4320 | | static const SCEV *rewrite(const SCEV *S, const Loop *L, |
4321 | 236k | ScalarEvolution &SE) { |
4322 | 236k | bool IsPosBECond = false; |
4323 | 236k | Value *BECond = nullptr; |
4324 | 236k | if (BasicBlock *Latch = L->getLoopLatch()) { |
4325 | 236k | BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator()); |
4326 | 236k | if (BI && BI->isConditional()236k ) { |
4327 | 233k | assert(BI->getSuccessor(0) != BI->getSuccessor(1) && |
4328 | 233k | "Both outgoing branches should not target same header!"); |
4329 | 233k | BECond = BI->getCondition(); |
4330 | 233k | IsPosBECond = BI->getSuccessor(0) == L->getHeader(); |
4331 | 233k | } else { |
4332 | 3.20k | return S; |
4333 | 3.20k | } |
4334 | 233k | } |
4335 | 233k | SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE); |
4336 | 233k | return Rewriter.visit(S); |
4337 | 233k | } |
4338 | | |
4339 | 56.5k | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
4340 | 56.5k | const SCEV *Result = Expr; |
4341 | 56.5k | bool InvariantF = SE.isLoopInvariant(Expr, L); |
4342 | 56.5k | |
4343 | 56.5k | if (!InvariantF) { |
4344 | 30.0k | Instruction *I = cast<Instruction>(Expr->getValue()); |
4345 | 30.0k | switch (I->getOpcode()) { |
4346 | 30.0k | case Instruction::Select: { |
4347 | 723 | SelectInst *SI = cast<SelectInst>(I); |
4348 | 723 | Optional<const SCEV *> Res = |
4349 | 723 | compareWithBackedgeCondition(SI->getCondition()); |
4350 | 723 | if (Res.hasValue()) { |
4351 | 10 | bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne(); |
4352 | 10 | Result = SE.getSCEV(IsOne ? SI->getTrueValue()8 : SI->getFalseValue()2 ); |
4353 | 10 | } |
4354 | 723 | break; |
4355 | 30.0k | } |
4356 | 30.0k | default: { |
4357 | 29.3k | Optional<const SCEV *> Res = compareWithBackedgeCondition(I); |
4358 | 29.3k | if (Res.hasValue()) |
4359 | 9 | Result = Res.getValue(); |
4360 | 29.3k | break; |
4361 | 56.5k | } |
4362 | 56.5k | } |
4363 | 56.5k | } |
4364 | 56.5k | return Result; |
4365 | 56.5k | } |
4366 | | |
4367 | | private: |
4368 | | explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond, |
4369 | | bool IsPosBECond, ScalarEvolution &SE) |
4370 | | : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond), |
4371 | 233k | IsPositiveBECond(IsPosBECond) {} |
4372 | | |
4373 | | Optional<const SCEV *> compareWithBackedgeCondition(Value *IC); |
4374 | | |
4375 | | const Loop *L; |
4376 | | /// Loop back condition. |
4377 | | Value *BackedgeCond = nullptr; |
4378 | | /// Set to true if loop back is on positive branch condition. |
4379 | | bool IsPositiveBECond; |
4380 | | }; |
4381 | | |
4382 | | Optional<const SCEV *> |
4383 | 30.0k | SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) { |
4384 | 30.0k | |
4385 | 30.0k | // If value matches the backedge condition for loop latch, |
4386 | 30.0k | // then return a constant evolution node based on loopback |
4387 | 30.0k | // branch taken. |
4388 | 30.0k | if (BackedgeCond == IC) |
4389 | 19 | return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext()))17 |
4390 | 19 | : SE.getZero(Type::getInt1Ty(SE.getContext()))2 ; |
4391 | 30.0k | return None; |
4392 | 30.0k | } |
4393 | | |
4394 | | class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> { |
4395 | | public: |
4396 | | static const SCEV *rewrite(const SCEV *S, const Loop *L, |
4397 | 313k | ScalarEvolution &SE) { |
4398 | 313k | SCEVShiftRewriter Rewriter(L, SE); |
4399 | 313k | const SCEV *Result = Rewriter.visit(S); |
4400 | 313k | return Rewriter.isValid() ? Result12.3k : SE.getCouldNotCompute()301k ; |
4401 | 313k | } |
4402 | | |
4403 | 305k | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
4404 | 305k | // Only allow AddRecExprs for this loop. |
4405 | 305k | if (!SE.isLoopInvariant(Expr, L)) |
4406 | 304k | Valid = false; |
4407 | 305k | return Expr; |
4408 | 305k | } |
4409 | | |
4410 | 11.3k | const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { |
4411 | 11.3k | if (Expr->getLoop() == L && Expr->isAffine()10.9k ) |
4412 | 10.9k | return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); |
4413 | 471 | Valid = false; |
4414 | 471 | return Expr; |
4415 | 471 | } |
4416 | | |
4417 | 313k | bool isValid() { return Valid; } |
4418 | | |
4419 | | private: |
4420 | | explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) |
4421 | 313k | : SCEVRewriteVisitor(SE), L(L) {} |
4422 | | |
4423 | | const Loop *L; |
4424 | | bool Valid = true; |
4425 | | }; |
4426 | | |
4427 | | } // end anonymous namespace |
4428 | | |
4429 | | SCEV::NoWrapFlags |
4430 | 822k | ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { |
4431 | 822k | if (!AR->isAffine()) |
4432 | 0 | return SCEV::FlagAnyWrap; |
4433 | 822k | |
4434 | 822k | using OBO = OverflowingBinaryOperator; |
4435 | 822k | |
4436 | 822k | SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; |
4437 | 822k | |
4438 | 822k | if (!AR->hasNoSignedWrap()) { |
4439 | 743k | ConstantRange AddRecRange = getSignedRange(AR); |
4440 | 743k | ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); |
4441 | 743k | |
4442 | 743k | auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
4443 | 743k | Instruction::Add, IncRange, OBO::NoSignedWrap); |
4444 | 743k | if (NSWRegion.contains(AddRecRange)) |
4445 | 99.9k | Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW); |
4446 | 743k | } |
4447 | 822k | |
4448 | 822k | if (!AR->hasNoUnsignedWrap()) { |
4449 | 773k | ConstantRange AddRecRange = getUnsignedRange(AR); |
4450 | 773k | ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this)); |
4451 | 773k | |
4452 | 773k | auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( |
4453 | 773k | Instruction::Add, IncRange, OBO::NoUnsignedWrap); |
4454 | 773k | if (NUWRegion.contains(AddRecRange)) |
4455 | 24.4k | Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW); |
4456 | 773k | } |
4457 | 822k | |
4458 | 822k | return Result; |
4459 | 822k | } |
4460 | | |
4461 | | namespace { |
4462 | | |
4463 | | /// Represents an abstract binary operation. This may exist as a |
4464 | | /// normal instruction or constant expression, or may have been |
4465 | | /// derived from an expression tree. |
4466 | | struct BinaryOp { |
4467 | | unsigned Opcode; |
4468 | | Value *LHS; |
4469 | | Value *RHS; |
4470 | | bool IsNSW = false; |
4471 | | bool IsNUW = false; |
4472 | | |
4473 | | /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or |
4474 | | /// constant expression. |
4475 | | Operator *Op = nullptr; |
4476 | | |
4477 | | explicit BinaryOp(Operator *Op) |
4478 | | : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)), |
4479 | 2.87M | Op(Op) { |
4480 | 2.87M | if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) { |
4481 | 2.67M | IsNSW = OBO->hasNoSignedWrap(); |
4482 | 2.67M | IsNUW = OBO->hasNoUnsignedWrap(); |
4483 | 2.67M | } |
4484 | 2.87M | } |
4485 | | |
4486 | | explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, |
4487 | | bool IsNUW = false) |
4488 | 68.9k | : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {} |
4489 | | }; |
4490 | | |
4491 | | } // end anonymous namespace |
4492 | | |
4493 | | /// Try to map \p V into a BinaryOp, and return \c None on failure. |
4494 | 11.7M | static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { |
4495 | 11.7M | auto *Op = dyn_cast<Operator>(V); |
4496 | 11.7M | if (!Op) |
4497 | 54.9k | return None; |
4498 | 11.7M | |
4499 | 11.7M | // Implementation detail: all the cleverness here should happen without |
4500 | 11.7M | // creating new SCEV expressions -- our caller knowns tricks to avoid creating |
4501 | 11.7M | // SCEV expressions when possible, and we should not break that. |
4502 | 11.7M | |
4503 | 11.7M | switch (Op->getOpcode()) { |
4504 | 11.7M | case Instruction::Add: |
4505 | 2.86M | case Instruction::Sub: |
4506 | 2.86M | case Instruction::Mul: |
4507 | 2.86M | case Instruction::UDiv: |
4508 | 2.86M | case Instruction::URem: |
4509 | 2.86M | case Instruction::And: |
4510 | 2.86M | case Instruction::Or: |
4511 | 2.86M | case Instruction::AShr: |
4512 | 2.86M | case Instruction::Shl: |
4513 | 2.86M | return BinaryOp(Op); |
4514 | 2.86M | |
4515 | 2.86M | case Instruction::Xor: |
4516 | 11.3k | if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1))) |
4517 | 6.53k | // If the RHS of the xor is a signmask, then this is just an add. |
4518 | 6.53k | // Instcombine turns add of signmask into xor as a strength reduction step. |
4519 | 6.53k | if (RHSC->getValue().isSignMask()) |
4520 | 438 | return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); |
4521 | 10.9k | return BinaryOp(Op); |
4522 | 10.9k | |
4523 | 69.6k | case Instruction::LShr: |
4524 | 69.6k | // Turn logical shift right of a constant into a unsigned divide. |
4525 | 69.6k | if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) { |
4526 | 68.3k | uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth(); |
4527 | 68.3k | |
4528 | 68.3k | // If the shift count is not less than the bitwidth, the result of |
4529 | 68.3k | // the shift is undefined. Don't try to analyze it, because the |
4530 | 68.3k | // resolution chosen here may differ from the resolution chosen in |
4531 | 68.3k | // other parts of the compiler. |
4532 | 68.3k | if (SA->getValue().ult(BitWidth)) { |
4533 | 68.3k | Constant *X = |
4534 | 68.3k | ConstantInt::get(SA->getContext(), |
4535 | 68.3k | APInt::getOneBitSet(BitWidth, SA->getZExtValue())); |
4536 | 68.3k | return BinaryOp(Instruction::UDiv, Op->getOperand(0), X); |
4537 | 68.3k | } |
4538 | 1.30k | } |
4539 | 1.30k | return BinaryOp(Op); |
4540 | 1.30k | |
4541 | 23.3k | case Instruction::ExtractValue: { |
4542 | 23.3k | auto *EVI = cast<ExtractValueInst>(Op); |
4543 | 23.3k | if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0) |
4544 | 641 | break; |
4545 | 22.7k | |
4546 | 22.7k | auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()); |
4547 | 22.7k | if (!WO) |
4548 | 22.5k | break; |
4549 | 195 | |
4550 | 195 | Instruction::BinaryOps BinOp = WO->getBinaryOp(); |
4551 | 195 | bool Signed = WO->isSigned(); |
4552 | 195 | // TODO: Should add nuw/nsw flags for mul as well. |
4553 | 195 | if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT)155 ) |
4554 | 83 | return BinaryOp(BinOp, WO->getLHS(), WO->getRHS()); |
4555 | 112 | |
4556 | 112 | // Now that we know that all uses of the arithmetic-result component of |
4557 | 112 | // CI are guarded by the overflow check, we can go ahead and pretend |
4558 | 112 | // that the arithmetic is non-overflowing. |
4559 | 112 | return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(), |
4560 | 112 | /* IsNSW = */ Signed, /* IsNUW = */ !Signed); |
4561 | 112 | } |
4562 | 112 | |
4563 | 8.74M | default: |
4564 | 8.74M | break; |
4565 | 8.76M | } |
4566 | 8.76M | |
4567 | 8.76M | return None; |
4568 | 8.76M | } |
4569 | | |
4570 | | /// Helper function to createAddRecFromPHIWithCasts. We have a phi |
4571 | | /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via |
4572 | | /// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the |
4573 | | /// way. This function checks if \p Op, an operand of this SCEVAddExpr, |
4574 | | /// follows one of the following patterns: |
4575 | | /// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) |
4576 | | /// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) |
4577 | | /// If the SCEV expression of \p Op conforms with one of the expected patterns |
4578 | | /// we return the type of the truncation operation, and indicate whether the |
4579 | | /// truncated type should be treated as signed/unsigned by setting |
4580 | | /// \p Signed to true/false, respectively. |
4581 | | static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, |
4582 | 6.20k | bool &Signed, ScalarEvolution &SE) { |
4583 | 6.20k | // The case where Op == SymbolicPHI (that is, with no type conversions on |
4584 | 6.20k | // the way) is handled by the regular add recurrence creating logic and |
4585 | 6.20k | // would have already been triggered in createAddRecForPHI. Reaching it here |
4586 | 6.20k | // means that createAddRecFromPHI had failed for this PHI before (e.g., |
4587 | 6.20k | // because one of the other operands of the SCEVAddExpr updating this PHI is |
4588 | 6.20k | // not invariant). |
4589 | 6.20k | // |
4590 | 6.20k | // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in |
4591 | 6.20k | // this case predicates that allow us to prove that Op == SymbolicPHI will |
4592 | 6.20k | // be added. |
4593 | 6.20k | if (Op == SymbolicPHI) |
4594 | 498 | return nullptr; |
4595 | 5.70k | |
4596 | 5.70k | unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType()); |
4597 | 5.70k | unsigned NewBits = SE.getTypeSizeInBits(Op->getType()); |
4598 | 5.70k | if (SourceBits != NewBits) |
4599 | 0 | return nullptr; |
4600 | 5.70k | |
4601 | 5.70k | const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op); |
4602 | 5.70k | const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op); |
4603 | 5.70k | if (!SExt && !ZExt5.64k ) |
4604 | 4.10k | return nullptr; |
4605 | 1.60k | const SCEVTruncateExpr *Trunc = |
4606 | 1.60k | SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())56 |
4607 | 1.60k | : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand())1.54k ; |
4608 | 1.60k | if (!Trunc) |
4609 | 1.56k | return nullptr; |
4610 | 36 | const SCEV *X = Trunc->getOperand(); |
4611 | 36 | if (X != SymbolicPHI) |
4612 | 6 | return nullptr; |
4613 | 30 | Signed = SExt != nullptr; |
4614 | 30 | return Trunc->getType(); |
4615 | 30 | } |
4616 | | |
4617 | 119k | static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { |
4618 | 119k | if (!PN->getType()->isIntegerTy()) |
4619 | 43.9k | return nullptr; |
4620 | 75.7k | const Loop *L = LI.getLoopFor(PN->getParent()); |
4621 | 75.7k | if (!L || L->getHeader() != PN->getParent()63.7k ) |
4622 | 17.5k | return nullptr; |
4623 | 58.1k | return L; |
4624 | 58.1k | } |
4625 | | |
4626 | | // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the |
4627 | | // computation that updates the phi follows the following pattern: |
4628 | | // (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum |
4629 | | // which correspond to a phi->trunc->sext/zext->add->phi update chain. |
4630 | | // If so, try to see if it can be rewritten as an AddRecExpr under some |
4631 | | // Predicates. If successful, return them as a pair. Also cache the results |
4632 | | // of the analysis. |
4633 | | // |
4634 | | // Example usage scenario: |
4635 | | // Say the Rewriter is called for the following SCEV: |
4636 | | // 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) |
4637 | | // where: |
4638 | | // %X = phi i64 (%Start, %BEValue) |
4639 | | // It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), |
4640 | | // and call this function with %SymbolicPHI = %X. |
4641 | | // |
4642 | | // The analysis will find that the value coming around the backedge has |
4643 | | // the following SCEV: |
4644 | | // BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) |
4645 | | // Upon concluding that this matches the desired pattern, the function |
4646 | | // will return the pair {NewAddRec, SmallPredsVec} where: |
4647 | | // NewAddRec = {%Start,+,%Step} |
4648 | | // SmallPredsVec = {P1, P2, P3} as follows: |
4649 | | // P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw> |
4650 | | // P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) |
4651 | | // P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) |
4652 | | // The returned pair means that SymbolicPHI can be rewritten into NewAddRec |
4653 | | // under the predicates {P1,P2,P3}. |
4654 | | // This predicated rewrite will be cached in PredicatedSCEVRewrites: |
4655 | | // PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} |
4656 | | // |
4657 | | // TODO's: |
4658 | | // |
4659 | | // 1) Extend the Induction descriptor to also support inductions that involve |
4660 | | // casts: When needed (namely, when we are called in the context of the |
4661 | | // vectorizer induction analysis), a Set of cast instructions will be |
4662 | | // populated by this method, and provided back to isInductionPHI. This is |
4663 | | // needed to allow the vectorizer to properly record them to be ignored by |
4664 | | // the cost model and to avoid vectorizing them (otherwise these casts, |
4665 | | // which are redundant under the runtime overflow checks, will be |
4666 | | // vectorized, which can be costly). |
4667 | | // |
4668 | | // 2) Support additional induction/PHISCEV patterns: We also want to support |
4669 | | // inductions where the sext-trunc / zext-trunc operations (partly) occur |
4670 | | // after the induction update operation (the induction increment): |
4671 | | // |
4672 | | // (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) |
4673 | | // which correspond to a phi->add->trunc->sext/zext->phi update chain. |
4674 | | // |
4675 | | // (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) |
4676 | | // which correspond to a phi->trunc->add->sext/zext->phi update chain. |
4677 | | // |
4678 | | // 3) Outline common code with createAddRecFromPHI to avoid duplication. |
4679 | | Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> |
4680 | 20.4k | ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) { |
4681 | 20.4k | SmallVector<const SCEVPredicate *, 3> Predicates; |
4682 | 20.4k | |
4683 | 20.4k | // *** Part1: Analyze if we have a phi-with-cast pattern for which we can |
4684 | 20.4k | // return an AddRec expression under some predicate. |
4685 | 20.4k | |
4686 | 20.4k | auto *PN = cast<PHINode>(SymbolicPHI->getValue()); |
4687 | 20.4k | const Loop *L = isIntegerLoopHeaderPHI(PN, LI); |
4688 | 20.4k | assert(L && "Expecting an integer loop header phi"); |
4689 | 20.4k | |
4690 | 20.4k | // The loop may have multiple entrances or multiple exits; we can analyze |
4691 | 20.4k | // this phi as an addrec if it has a unique entry value and a unique |
4692 | 20.4k | // backedge value. |
4693 | 20.4k | Value *BEValueV = nullptr, *StartValueV = nullptr; |
4694 | 61.3k | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i40.8k ) { |
4695 | 40.8k | Value *V = PN->getIncomingValue(i); |
4696 | 40.8k | if (L->contains(PN->getIncomingBlock(i))) { |
4697 | 20.4k | if (!BEValueV) { |
4698 | 20.4k | BEValueV = V; |
4699 | 20.4k | } else if (0 BEValueV != V0 ) { |
4700 | 0 | BEValueV = nullptr; |
4701 | 0 | break; |
4702 | 0 | } |
4703 | 20.4k | } else if (!StartValueV) { |
4704 | 20.4k | StartValueV = V; |
4705 | 20.4k | } else if (1 StartValueV != V1 ) { |
4706 | 1 | StartValueV = nullptr; |
4707 | 1 | break; |
4708 | 1 | } |
4709 | 40.8k | } |
4710 | 20.4k | if (!BEValueV || !StartValueV) |
4711 | 1 | return None; |
4712 | 20.4k | |
4713 | 20.4k | const SCEV *BEValue = getSCEV(BEValueV); |
4714 | 20.4k | |
4715 | 20.4k | // If the value coming around the backedge is an add with the symbolic |
4716 | 20.4k | // value we just inserted, possibly with casts that we can ignore under |
4717 | 20.4k | // an appropriate runtime guard, then we found a simple induction variable! |
4718 | 20.4k | const auto *Add = dyn_cast<SCEVAddExpr>(BEValue); |
4719 | 20.4k | if (!Add) |
4720 | 17.9k | return None; |
4721 | 2.53k | |
4722 | 2.53k | // If there is a single occurrence of the symbolic value, possibly |
4723 | 2.53k | // casted, replace it with a recurrence. |
4724 | 2.53k | unsigned FoundIndex = Add->getNumOperands(); |
4725 | 2.53k | Type *TruncTy = nullptr; |
4726 | 2.53k | bool Signed; |
4727 | 8.70k | for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i6.17k ) |
4728 | 6.20k | if ((TruncTy = |
4729 | 6.20k | isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) |
4730 | 30 | if (FoundIndex == e) { |
4731 | 30 | FoundIndex = i; |
4732 | 30 | break; |
4733 | 30 | } |
4734 | 2.53k | |
4735 | 2.53k | if (FoundIndex == Add->getNumOperands()) |
4736 | 2.50k | return None; |
4737 | 30 | |
4738 | 30 | // Create an add with everything but the specified operand. |
4739 | 30 | SmallVector<const SCEV *, 8> Ops; |
4740 | 90 | for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i60 ) |
4741 | 60 | if (i != FoundIndex) |
4742 | 30 | Ops.push_back(Add->getOperand(i)); |
4743 | 30 | const SCEV *Accum = getAddExpr(Ops); |
4744 | 30 | |
4745 | 30 | // The runtime checks will not be valid if the step amount is |
4746 | 30 | // varying inside the loop. |
4747 | 30 | if (!isLoopInvariant(Accum, L)) |
4748 | 3 | return None; |
4749 | 27 | |
4750 | 27 | // *** Part2: Create the predicates |
4751 | 27 | |
4752 | 27 | // Analysis was successful: we have a phi-with-cast pattern for which we |
4753 | 27 | // can return an AddRec expression under the following predicates: |
4754 | 27 | // |
4755 | 27 | // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) |
4756 | 27 | // fits within the truncated type (does not overflow) for i = 0 to n-1. |
4757 | 27 | // P2: An Equal predicate that guarantees that |
4758 | 27 | // Start = (Ext ix (Trunc iy (Start) to ix) to iy) |
4759 | 27 | // P3: An Equal predicate that guarantees that |
4760 | 27 | // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) |
4761 | 27 | // |
4762 | 27 | // As we next prove, the above predicates guarantee that: |
4763 | 27 | // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) |
4764 | 27 | // |
4765 | 27 | // |
4766 | 27 | // More formally, we want to prove that: |
4767 | 27 | // Expr(i+1) = Start + (i+1) * Accum |
4768 | 27 | // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum |
4769 | 27 | // |
4770 | 27 | // Given that: |
4771 | 27 | // 1) Expr(0) = Start |
4772 | 27 | // 2) Expr(1) = Start + Accum |
4773 | 27 | // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 |
4774 | 27 | // 3) Induction hypothesis (step i): |
4775 | 27 | // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum |
4776 | 27 | // |
4777 | 27 | // Proof: |
4778 | 27 | // Expr(i+1) = |
4779 | 27 | // = Start + (i+1)*Accum |
4780 | 27 | // = (Start + i*Accum) + Accum |
4781 | 27 | // = Expr(i) + Accum |
4782 | 27 | // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum |
4783 | 27 | // :: from step i |
4784 | 27 | // |
4785 | 27 | // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum |
4786 | 27 | // |
4787 | 27 | // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) |
4788 | 27 | // + (Ext ix (Trunc iy (Accum) to ix) to iy) |
4789 | 27 | // + Accum :: from P3 |
4790 | 27 | // |
4791 | 27 | // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) |
4792 | 27 | // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) |
4793 | 27 | // |
4794 | 27 | // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum |
4795 | 27 | // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum |
4796 | 27 | // |
4797 | 27 | // By induction, the same applies to all iterations 1<=i<n: |
4798 | 27 | // |
4799 | 27 | |
4800 | 27 | // Create a truncated addrec for which we will add a no overflow check (P1). |
4801 | 27 | const SCEV *StartVal = getSCEV(StartValueV); |
4802 | 27 | const SCEV *PHISCEV = |
4803 | 27 | getAddRecExpr(getTruncateExpr(StartVal, TruncTy), |
4804 | 27 | getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); |
4805 | 27 | |
4806 | 27 | // PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr. |
4807 | 27 | // ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV |
4808 | 27 | // will be constant. |
4809 | 27 | // |
4810 | 27 | // If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't |
4811 | 27 | // add P1. |
4812 | 27 | if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) { |
4813 | 25 | SCEVWrapPredicate::IncrementWrapFlags AddedFlags = |
4814 | 25 | Signed ? SCEVWrapPredicate::IncrementNSSW10 |
4815 | 25 | : SCEVWrapPredicate::IncrementNUSW15 ; |
4816 | 25 | const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); |
4817 | 25 | Predicates.push_back(AddRecPred); |
4818 | 25 | } |
4819 | 27 | |
4820 | 27 | // Create the Equal Predicates P2,P3: |
4821 | 27 | |
4822 | 27 | // It is possible that the predicates P2 and/or P3 are computable at |
4823 | 27 | // compile time due to StartVal and/or Accum being constants. |
4824 | 27 | // If either one is, then we can check that now and escape if either P2 |
4825 | 27 | // or P3 is false. |
4826 | 27 | |
4827 | 27 | // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) |
4828 | 27 | // for each of StartVal and Accum |
4829 | 27 | auto getExtendedExpr = [&](const SCEV *Expr, |
4830 | 53 | bool CreateSignExtend) -> const SCEV * { |
4831 | 53 | assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); |
4832 | 53 | const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); |
4833 | 53 | const SCEV *ExtendedExpr = |
4834 | 53 | CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType())38 |
4835 | 53 | : getZeroExtendExpr(TruncatedExpr, Expr->getType())15 ; |
4836 | 53 | return ExtendedExpr; |
4837 | 53 | }; |
4838 | 27 | |
4839 | 27 | // Given: |
4840 | 27 | // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy |
4841 | 27 | // = getExtendedExpr(Expr) |
4842 | 27 | // Determine whether the predicate P: Expr == ExtendedExpr |
4843 | 27 | // is known to be false at compile time |
4844 | 27 | auto PredIsKnownFalse = [&](const SCEV *Expr, |
4845 | 53 | const SCEV *ExtendedExpr) -> bool { |
4846 | 53 | return Expr != ExtendedExpr && |
4847 | 53 | isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr)21 ; |
4848 | 53 | }; |
4849 | 27 | |
4850 | 27 | const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); |
4851 | 27 | if (PredIsKnownFalse(StartVal, StartExtended)) { |
4852 | 1 | LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";); |
4853 | 1 | return None; |
4854 | 1 | } |
4855 | 26 | |
4856 | 26 | // The Step is always Signed (because the overflow checks are either |
4857 | 26 | // NSSW or NUSW) |
4858 | 26 | const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); |
4859 | 26 | if (PredIsKnownFalse(Accum, AccumExtended)) { |
4860 | 4 | LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";); |
4861 | 4 | return None; |
4862 | 4 | } |
4863 | 22 | |
4864 | 22 | auto AppendPredicate = [&](const SCEV *Expr, |
4865 | 44 | const SCEV *ExtendedExpr) -> void { |
4866 | 44 | if (Expr != ExtendedExpr && |
4867 | 44 | !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)12 ) { |
4868 | 12 | const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); |
4869 | 12 | LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred); |
4870 | 12 | Predicates.push_back(Pred); |
4871 | 12 | } |
4872 | 44 | }; |
4873 | 22 | |
4874 | 22 | AppendPredicate(StartVal, StartExtended); |
4875 | 22 | AppendPredicate(Accum, AccumExtended); |
4876 | 22 | |
4877 | 22 | // *** Part3: Predicates are ready. Now go ahead and create the new addrec in |
4878 | 22 | // which the casts had been folded away. The caller can rewrite SymbolicPHI |
4879 | 22 | // into NewAR if it will also add the runtime overflow checks specified in |
4880 | 22 | // Predicates. |
4881 | 22 | auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); |
4882 | 22 | |
4883 | 22 | std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite = |
4884 | 22 | std::make_pair(NewAR, Predicates); |
4885 | 22 | // Remember the result of the analysis for this SCEV at this locayyytion. |
4886 | 22 | PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite; |
4887 | 22 | return PredRewrite; |
4888 | 22 | } |
4889 | | |
4890 | | Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> |
4891 | 99.2k | ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { |
4892 | 99.2k | auto *PN = cast<PHINode>(SymbolicPHI->getValue()); |
4893 | 99.2k | const Loop *L = isIntegerLoopHeaderPHI(PN, LI); |
4894 | 99.2k | if (!L) |
4895 | 61.5k | return None; |
4896 | 37.7k | |
4897 | 37.7k | // Check to see if we already analyzed this PHI. |
4898 | 37.7k | auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); |
4899 | 37.7k | if (I != PredicatedSCEVRewrites.end()) { |
4900 | 17.2k | std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite = |
4901 | 17.2k | I->second; |
4902 | 17.2k | // Analysis was done before and failed to create an AddRec: |
4903 | 17.2k | if (Rewrite.first == SymbolicPHI) |
4904 | 17.1k | return None; |
4905 | 133 | // Analysis was done before and succeeded to create an AddRec under |
4906 | 133 | // a predicate: |
4907 | 133 | assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec"); |
4908 | 133 | assert(!(Rewrite.second).empty() && "Expected to find Predicates"); |
4909 | 133 | return Rewrite; |
4910 | 133 | } |
4911 | 20.4k | |
4912 | 20.4k | Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> |
4913 | 20.4k | Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI); |
4914 | 20.4k | |
4915 | 20.4k | // Record in the cache that the analysis failed |
4916 | 20.4k | if (!Rewrite) { |
4917 | 20.4k | SmallVector<const SCEVPredicate *, 3> Predicates; |
4918 | 20.4k | PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; |
4919 | 20.4k | return None; |
4920 | 20.4k | } |
4921 | 22 | |
4922 | 22 | return Rewrite; |
4923 | 22 | } |
4924 | | |
4925 | | // FIXME: This utility is currently required because the Rewriter currently |
4926 | | // does not rewrite this expression: |
4927 | | // {0, +, (sext ix (trunc iy to ix) to iy)} |
4928 | | // into {0, +, %step}, |
4929 | | // even when the following Equal predicate exists: |
4930 | | // "%step == (sext ix (trunc iy to ix) to iy)". |
4931 | | bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( |
4932 | 38 | const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { |
4933 | 38 | if (AR1 == AR2) |
4934 | 7 | return true; |
4935 | 31 | |
4936 | 47 | auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool 31 { |
4937 | 47 | if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2))31 && |
4938 | 47 | !Preds.implies(SE.getEqualPredicate(Expr2, Expr1))31 ) |
4939 | 23 | return false; |
4940 | 24 | return true; |
4941 | 24 | }; |
4942 | 31 | |
4943 | 31 | if (!areExprsEqual(AR1->getStart(), AR2->getStart()) || |
4944 | 31 | !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE))16 ) |
4945 | 23 | return false; |
4946 | 8 | return true; |
4947 | 8 | } |
4948 | | |
4949 | | /// A helper function for createAddRecFromPHI to handle simple cases. |
4950 | | /// |
4951 | | /// This function tries to find an AddRec expression for the simplest (yet most |
4952 | | /// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)). |
4953 | | /// If it fails, createAddRecFromPHI will use a more general, but slow, |
4954 | | /// technique for finding the AddRec expression. |
4955 | | const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, |
4956 | | Value *BEValueV, |
4957 | 1.53M | Value *StartValueV) { |
4958 | 1.53M | const Loop *L = LI.getLoopFor(PN->getParent()); |
4959 | 1.53M | assert(L && L->getHeader() == PN->getParent()); |
4960 | 1.53M | assert(BEValueV && StartValueV); |
4961 | 1.53M | |
4962 | 1.53M | auto BO = MatchBinaryOp(BEValueV, DT); |
4963 | 1.53M | if (!BO) |
4964 | 489k | return nullptr; |
4965 | 1.04M | |
4966 | 1.04M | if (BO->Opcode != Instruction::Add) |
4967 | 40.8k | return nullptr; |
4968 | 1.00M | |
4969 | 1.00M | const SCEV *Accum = nullptr; |
4970 | 1.00M | if (BO->LHS == PN && L->isLoopInvariant(BO->RHS)975k ) |
4971 | 972k | Accum = getSCEV(BO->RHS); |
4972 | 28.1k | else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS)7.71k ) |
4973 | 783 | Accum = getSCEV(BO->LHS); |
4974 | 1.00M | |
4975 | 1.00M | if (!Accum) |
4976 | 27.3k | return nullptr; |
4977 | 973k | |
4978 | 973k | SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; |
4979 | 973k | if (BO->IsNUW) |
4980 | 691k | Flags = setFlags(Flags, SCEV::FlagNUW); |
4981 | 973k | if (BO->IsNSW) |
4982 | 698k | Flags = setFlags(Flags, SCEV::FlagNSW); |
4983 | 973k | |
4984 | 973k | const SCEV *StartVal = getSCEV(StartValueV); |
4985 | 973k | const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); |
4986 | 973k | |
4987 | 973k | ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; |
4988 | 973k | |
4989 | 973k | // We can add Flags to the post-inc expression only if we |
4990 | 973k | // know that it is *undefined behavior* for BEValueV to |
4991 | 973k | // overflow. |
4992 | 973k | if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) |
4993 | 973k | if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) |
4994 | 490k | (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags); |
4995 | 973k | |
4996 | 973k | return PHISCEV; |
4997 | 973k | } |
4998 | | |
4999 | 1.82M | const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { |
5000 | 1.82M | const Loop *L = LI.getLoopFor(PN->getParent()); |
5001 | 1.82M | if (!L || L->getHeader() != PN->getParent()1.71M ) |
5002 | 290k | return nullptr; |
5003 | 1.53M | |
5004 | 1.53M | // The loop may have multiple entrances or multiple exits; we can analyze |
5005 | 1.53M | // this phi as an addrec if it has a unique entry value and a unique |
5006 | 1.53M | // backedge value. |
5007 | 1.53M | Value *BEValueV = nullptr, *StartValueV = nullptr; |
5008 | 4.59M | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i3.06M ) { |
5009 | 3.06M | Value *V = PN->getIncomingValue(i); |
5010 | 3.06M | if (L->contains(PN->getIncomingBlock(i))) { |
5011 | 1.53M | if (!BEValueV) { |
5012 | 1.53M | BEValueV = V; |
5013 | 1.53M | } else if (201 BEValueV != V201 ) { |
5014 | 108 | BEValueV = nullptr; |
5015 | 108 | break; |
5016 | 108 | } |
5017 | 1.53M | } else if (!StartValueV) { |
5018 | 1.53M | StartValueV = V; |
5019 | 1.53M | } else if (687 StartValueV != V687 ) { |
5020 | 406 | StartValueV = nullptr; |
5021 | 406 | break; |
5022 | 406 | } |
5023 | 3.06M | } |
5024 | 1.53M | if (!BEValueV || !StartValueV1.53M ) |
5025 | 514 | return nullptr; |
5026 | 1.53M | |
5027 | 1.53M | assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && |
5028 | 1.53M | "PHI node already processed?"); |
5029 | 1.53M | |
5030 | 1.53M | // First, try to find AddRec expression without creating a fictituos symbolic |
5031 | 1.53M | // value for PN. |
5032 | 1.53M | if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV)) |
5033 | 973k | return S; |
5034 | 557k | |
5035 | 557k | // Handle PHI node value symbolically. |
5036 | 557k | const SCEV *SymbolicName = getUnknown(PN); |
5037 | 557k | ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); |
5038 | 557k | |
5039 | 557k | // Using this symbolic name for the PHI, analyze the value coming around |
5040 | 557k | // the back-edge. |
5041 | 557k | const SCEV *BEValue = getSCEV(BEValueV); |
5042 | 557k | |
5043 | 557k | // NOTE: If BEValue is loop invariant, we know that the PHI node just |
5044 | 557k | // has a special value for the first iteration of the loop. |
5045 | 557k | |
5046 | 557k | // If the value coming around the backedge is an add with the symbolic |
5047 | 557k | // value we just inserted, then we found a simple induction variable! |
5048 | 557k | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { |
5049 | 243k | // If there is a single occurrence of the symbolic value, replace it |
5050 | 243k | // with a recurrence. |
5051 | 243k | unsigned FoundIndex = Add->getNumOperands(); |
5052 | 518k | for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i275k ) |
5053 | 500k | if (Add->getOperand(i) == SymbolicName) |
5054 | 224k | if (FoundIndex == e) { |
5055 | 224k | FoundIndex = i; |
5056 | 224k | break; |
5057 | 224k | } |
5058 | 243k | |
5059 | 243k | if (FoundIndex != Add->getNumOperands()) { |
5060 | 224k | // Create an add with everything but the specified operand. |
5061 | 224k | SmallVector<const SCEV *, 8> Ops; |
5062 | 685k | for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i461k ) |
5063 | 461k | if (i != FoundIndex) |
5064 | 236k | Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i), |
5065 | 236k | L, *this)); |
5066 | 224k | const SCEV *Accum = getAddExpr(Ops); |
5067 | 224k | |
5068 | 224k | // This is not a valid addrec if the step amount is varying each |
5069 | 224k | // loop iteration, but is not itself an addrec in this loop. |
5070 | 224k | if (isLoopInvariant(Accum, L) || |
5071 | 224k | (15.1k isa<SCEVAddRecExpr>(Accum)15.1k && |
5072 | 209k | cast<SCEVAddRecExpr>(Accum)->getLoop() == L131 )) { |
5073 | 209k | SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; |
5074 | 209k | |
5075 | 209k | if (auto BO = MatchBinaryOp(BEValueV, DT)) { |
5076 | 6.50k | if (BO->Opcode == Instruction::Add && BO->LHS == PN4.84k ) { |
5077 | 82 | if (BO->IsNUW) |
5078 | 1 | Flags = setFlags(Flags, SCEV::FlagNUW); |
5079 | 82 | if (BO->IsNSW) |
5080 | 28 | Flags = setFlags(Flags, SCEV::FlagNSW); |
5081 | 82 | } |
5082 | 203k | } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { |
5083 | 184k | // If the increment is an inbounds GEP, then we know the address |
5084 | 184k | // space cannot be wrapped around. We cannot make any guarantee |
5085 | 184k | // about signed or unsigned overflow because pointers are |
5086 | 184k | // unsigned but we may have a negative index from the base |
5087 | 184k | // pointer. We can guarantee that no unsigned wrap occurs if the |
5088 | 184k | // indices form a positive value. |
5089 | 184k | if (GEP->isInBounds() && GEP->getOperand(0) == PN116k ) { |
5090 | 115k | Flags = setFlags(Flags, SCEV::FlagNW); |
5091 | 115k | |
5092 | 115k | const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); |
5093 | 115k | if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) |
5094 | 76.7k | Flags = setFlags(Flags, SCEV::FlagNUW); |
5095 | 115k | } |
5096 | 184k | |
5097 | 184k | // We cannot transfer nuw and nsw flags from subtraction |
5098 | 184k | // operations -- sub nuw X, Y is not the same as add nuw X, -Y |
5099 | 184k | // for instance. |
5100 | 184k | } |
5101 | 209k | |
5102 | 209k | const SCEV *StartVal = getSCEV(StartValueV); |
5103 | 209k | const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); |
5104 | 209k | |
5105 | 209k | // Okay, for the entire analysis of this edge we assumed the PHI |
5106 | 209k | // to be symbolic. We now need to go back and purge all of the |
5107 | 209k | // entries for the scalars that use the symbolic expression. |
5108 | 209k | forgetSymbolicName(PN, SymbolicName); |
5109 | 209k | ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; |
5110 | 209k | |
5111 | 209k | // We can add Flags to the post-inc expression only if we |
5112 | 209k | // know that it is *undefined behavior* for BEValueV to |
5113 | 209k | // overflow. |
5114 | 209k | if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) |
5115 | 209k | if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)209k ) |
5116 | 39.3k | (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); |
5117 | 209k | |
5118 | 209k | return PHISCEV; |
5119 | 209k | } |
5120 | 313k | } |
5121 | 313k | } else { |
5122 | 313k | // Otherwise, this could be a loop like this: |
5123 | 313k | // i = 0; for (j = 1; ..; ++j) { .... i = j; } |
5124 | 313k | // In this case, j = {1,+,1} and BEValue is j. |
5125 | 313k | // Because the other in-value of i (0) fits the evolution of BEValue |
5126 | 313k | // i really is an addrec evolution. |
5127 | 313k | // |
5128 | 313k | // We can generalize this saying that i is the shifted value of BEValue |
5129 | 313k | // by one iteration: |
5130 | 313k | // PHI(f(0), f({1,+,1})) --> f({0,+,1}) |
5131 | 313k | const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); |
5132 | 313k | const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false); |
5133 | 313k | if (Shifted != getCouldNotCompute() && |
5134 | 313k | Start != getCouldNotCompute()12.3k ) { |
5135 | 12.3k | const SCEV *StartVal = getSCEV(StartValueV); |
5136 | 12.3k | if (Start == StartVal) { |
5137 | 10.6k | // Okay, for the entire analysis of this edge we assumed the PHI |
5138 | 10.6k | // to be symbolic. We now need to go back and purge all of the |
5139 | 10.6k | // entries for the scalars that use the symbolic expression. |
5140 | 10.6k | forgetSymbolicName(PN, SymbolicName); |
5141 | 10.6k | ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; |
5142 | 10.6k | return Shifted; |
5143 | 10.6k | } |
5144 | 337k | } |
5145 | 313k | } |
5146 | 337k | |
5147 | 337k | // Remove the temporary PHI node SCEV that has been inserted while intending |
5148 | 337k | // to create an AddRecExpr for this PHI node. We can not keep this temporary |
5149 | 337k | // as it will prevent later (possibly simpler) SCEV expressions to be added |
5150 | 337k | // to the ValueExprMap. |
5151 | 337k | eraseValueFromMap(PN); |
5152 | 337k | |
5153 | 337k | return nullptr; |
5154 | 337k | } |
5155 | | |
5156 | | // Checks if the SCEV S is available at BB. S is considered available at BB |
5157 | | // if S can be materialized at BB without introducing a fault. |
5158 | | static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, |
5159 | 252k | BasicBlock *BB) { |
5160 | 252k | struct CheckAvailable { |
5161 | 252k | bool TraversalDone = false; |
5162 | 252k | bool Available = true; |
5163 | 252k | |
5164 | 252k | const Loop *L = nullptr; // The loop BB is in (can be nullptr) |
5165 | 252k | BasicBlock *BB = nullptr; |
5166 | 252k | DominatorTree &DT; |
5167 | 252k | |
5168 | 252k | CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT) |
5169 | 252k | : L(L), BB(BB), DT(DT) {} |
5170 | 252k | |
5171 | 252k | bool setUnavailable() { |
5172 | 111k | TraversalDone = true; |
5173 | 111k | Available = false; |
5174 | 111k | return false; |
5175 | 111k | } |
5176 | 252k | |
5177 | 466k | bool follow(const SCEV *S) { |
5178 | 466k | switch (S->getSCEVType()) { |
5179 | 466k | case scConstant: 239k case scTruncate: 239k case scZeroExtend: 239k case scSignExtend: |
5180 | 239k | case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: |
5181 | 239k | case scUMinExpr: |
5182 | 239k | case scSMinExpr: |
5183 | 239k | // These expressions are available if their operand(s) is/are. |
5184 | 239k | return true; |
5185 | 239k | |
5186 | 239k | case scAddRecExpr: { |
5187 | 12.3k | // We allow add recurrences that are on the loop BB is in, or some |
5188 | 12.3k | // outer loop. This guarantees availability because the value of the |
5189 | 12.3k | // add recurrence at BB is simply the "current" value of the induction |
5190 | 12.3k | // variable. We can relax this in the future; for instance an add |
5191 | 12.3k | // recurrence on a sibling dominating loop is also available at BB. |
5192 | 12.3k | const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop(); |
5193 | 12.3k | if (L && (6.16k ARLoop == L6.16k || ARLoop->contains(L)2.08k )) |
5194 | 4.34k | return true; |
5195 | 8.04k | |
5196 | 8.04k | return setUnavailable(); |
5197 | 8.04k | } |
5198 | 8.04k | |
5199 | 199k | case scUnknown: { |
5200 | 199k | // For SCEVUnknown, we check for simple dominance. |
5201 | 199k | const auto *SU = cast<SCEVUnknown>(S); |
5202 | 199k | Value *V = SU->getValue(); |
5203 | 199k | |
5204 | 199k | if (isa<Argument>(V)) |
5205 | 5.74k | return false; |
5206 | 194k | |
5207 | 194k | if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB)193k ) |
5208 | 105k | return false; |
5209 | 88.4k | |
5210 | 88.4k | return setUnavailable(); |
5211 | 88.4k | } |
5212 | 88.4k | |
5213 | 88.4k | case scUDivExpr: |
5214 | 14.7k | case scCouldNotCompute: |
5215 | 14.7k | // We do not try to smart about these at all. |
5216 | 14.7k | return setUnavailable(); |
5217 | 0 | } |
5218 | 0 | llvm_unreachable("switch should be fully covered!"); |
5219 | 0 | } |
5220 | 252k | |
5221 | 252k | bool isDone() { return TraversalDone; }240k |
5222 | 252k | }; |
5223 | 252k | |
5224 | 252k | CheckAvailable CA(L, BB, DT); |
5225 | 252k | SCEVTraversal<CheckAvailable> ST(CA); |
5226 | 252k | |
5227 | 252k | ST.visitAll(S); |
5228 | 252k | return CA.Available; |
5229 | 252k | } |
5230 | | |
5231 | | // Try to match a control flow sequence that branches out at BI and merges back |
5232 | | // at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful |
5233 | | // match. |
5234 | | static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, |
5235 | 168k | Value *&C, Value *&LHS, Value *&RHS) { |
5236 | 168k | C = BI->getCondition(); |
5237 | 168k | |
5238 | 168k | BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); |
5239 | 168k | BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); |
5240 | 168k | |
5241 | 168k | if (!LeftEdge.isSingleEdge()) |
5242 | 0 | return false; |
5243 | 168k | |
5244 | 168k | assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()"); |
5245 | 168k | |
5246 | 168k | Use &LeftUse = Merge->getOperandUse(0); |
5247 | 168k | Use &RightUse = Merge->getOperandUse(1); |
5248 | 168k | |
5249 | 168k | if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)82.2k ) { |
5250 | 77.0k | LHS = LeftUse; |
5251 | 77.0k | RHS = RightUse; |
5252 | 77.0k | return true; |
5253 | 77.0k | } |
5254 | 91.7k | |
5255 | 91.7k | if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)82.1k ) { |
5256 | 75.8k | LHS = RightUse; |
5257 | 75.8k | RHS = LeftUse; |
5258 | 75.8k | return true; |
5259 | 75.8k | } |
5260 | 15.8k | |
5261 | 15.8k | return false; |
5262 | 15.8k | } |
5263 | | |
5264 | 628k | const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { |
5265 | 628k | auto IsReachable = |
5266 | 1.02M | [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); }; |
5267 | 628k | if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)514k ) { |
5268 | 514k | const Loop *L = LI.getLoopFor(PN->getParent()); |
5269 | 514k | |
5270 | 514k | // We don't want to break LCSSA, even in a SCEV expression tree. |
5271 | 1.07M | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i564k ) |
5272 | 906k | if (LI.getLoopFor(PN->getIncomingBlock(i)) != L) |
5273 | 342k | return nullptr; |
5274 | 514k | |
5275 | 514k | // Try to match |
5276 | 514k | // |
5277 | 514k | // br %cond, label %left, label %right |
5278 | 514k | // left: |
5279 | 514k | // br label %merge |
5280 | 514k | // right: |
5281 | 514k | // br label %merge |
5282 | 514k | // merge: |
5283 | 514k | // V = phi [ %x, %left ], [ %y, %right ] |
5284 | 514k | // |
5285 | 514k | // as "select %cond, %x, %y" |
5286 | 514k | |
5287 | 514k | BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); |
5288 | 172k | assert(IDom && "At least the entry block should dominate PN"); |
5289 | 172k | |
5290 | 172k | auto *BI = dyn_cast<BranchInst>(IDom->getTerminator()); |
5291 | 172k | Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; |
5292 | 172k | |
5293 | 172k | if (BI && BI->isConditional()168k && |
5294 | 172k | BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS)168k && |
5295 | 172k | IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent())152k && |
5296 | 172k | IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent())98.5k ) |
5297 | 43.6k | return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); |
5298 | 242k | } |
5299 | 242k | |
5300 | 242k | return nullptr; |
5301 | 242k | } |
5302 | | |
5303 | 1.82M | const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { |
5304 | 1.82M | if (const SCEV *S = createAddRecFromPHI(PN)) |
5305 | 1.19M | return S; |
5306 | 628k | |
5307 | 628k | if (const SCEV *S = createNodeFromSelectLikePHI(PN)) |
5308 | 43.6k | return S; |
5309 | 584k | |
5310 | 584k | // If the PHI has a single incoming value, follow that value, unless the |
5311 | 584k | // PHI's incoming blocks are in a different loop, in which case doing so |
5312 | 584k | // risks breaking LCSSA form. Instcombine would normally zap these, but |
5313 | 584k | // it doesn't have DominatorTree information, so it may miss cases. |
5314 | 584k | if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) |
5315 | 44.4k | if (LI.replacementPreservesLCSSAForm(PN, V)) |
5316 | 267 | return getSCEV(V); |
5317 | 584k | |
5318 | 584k | // If it's not a loop phi, we can't handle it yet. |
5319 | 584k | return getUnknown(PN); |
5320 | 584k | } |
5321 | | |
5322 | | const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, |
5323 | | Value *Cond, |
5324 | | Value *TrueVal, |
5325 | 144k | Value *FalseVal) { |
5326 | 144k | // Handle "constant" branch or select. This can occur for instance when a |
5327 | 144k | // loop pass transforms an inner loop and moves on to process the outer loop. |
5328 | 144k | if (auto *CI = dyn_cast<ConstantInt>(Cond)) |
5329 | 11.3k | return getSCEV(CI->isOne() ? TrueVal195 : FalseVal11.1k ); |
5330 | 132k | |
5331 | 132k | // Try to match some simple smax or umax patterns. |
5332 | 132k | auto *ICI = dyn_cast<ICmpInst>(Cond); |
5333 | 132k | if (!ICI) |
5334 | 9.46k | return getUnknown(I); |
5335 | 123k | |
5336 | 123k | Value *LHS = ICI->getOperand(0); |
5337 | 123k | Value *RHS = ICI->getOperand(1); |
5338 | 123k | |
5339 | 123k | switch (ICI->getPredicate()) { |
5340 | 123k | case ICmpInst::ICMP_SLT: |
5341 | 20.0k | case ICmpInst::ICMP_SLE: |
5342 | 20.0k | std::swap(LHS, RHS); |
5343 | 20.0k | LLVM_FALLTHROUGH; |
5344 | 53.3k | case ICmpInst::ICMP_SGT: |
5345 | 53.3k | case ICmpInst::ICMP_SGE: |
5346 | 53.3k | // a >s b ? a+x : b+x -> smax(a, b)+x |
5347 | 53.3k | // a >s b ? b+x : a+x -> smin(a, b)+x |
5348 | 53.3k | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { |
5349 | 47.3k | const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType()); |
5350 | 47.3k | const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType()); |
5351 | 47.3k | const SCEV *LA = getSCEV(TrueVal); |
5352 | 47.3k | const SCEV *RA = getSCEV(FalseVal); |
5353 | 47.3k | const SCEV *LDiff = getMinusSCEV(LA, LS); |
5354 | 47.3k | const SCEV *RDiff = getMinusSCEV(RA, RS); |
5355 | 47.3k | if (LDiff == RDiff) |
5356 | 26.9k | return getAddExpr(getSMaxExpr(LS, RS), LDiff); |
5357 | 20.3k | LDiff = getMinusSCEV(LA, RS); |
5358 | 20.3k | RDiff = getMinusSCEV(RA, LS); |
5359 | 20.3k | if (LDiff == RDiff) |
5360 | 6.88k | return getAddExpr(getSMinExpr(LS, RS), LDiff); |
5361 | 19.4k | } |
5362 | 19.4k | break; |
5363 | 26.5k | case ICmpInst::ICMP_ULT: |
5364 | 26.5k | case ICmpInst::ICMP_ULE: |
5365 | 26.5k | std::swap(LHS, RHS); |
5366 | 26.5k | LLVM_FALLTHROUGH; |
5367 | 37.5k | case ICmpInst::ICMP_UGT: |
5368 | 37.5k | case ICmpInst::ICMP_UGE: |
5369 | 37.5k | // a >u b ? a+x : b+x -> umax(a, b)+x |
5370 | 37.5k | // a >u b ? b+x : a+x -> umin(a, b)+x |
5371 | 37.5k | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { |
5372 | 31.1k | const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); |
5373 | 31.1k | const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType()); |
5374 | 31.1k | const SCEV *LA = getSCEV(TrueVal); |
5375 | 31.1k | const SCEV *RA = getSCEV(FalseVal); |
5376 | 31.1k | const SCEV *LDiff = getMinusSCEV(LA, LS); |
5377 | 31.1k | const SCEV *RDiff = getMinusSCEV(RA, RS); |
5378 | 31.1k | if (LDiff == RDiff) |
5379 | 9.58k | return getAddExpr(getUMaxExpr(LS, RS), LDiff); |
5380 | 21.5k | LDiff = getMinusSCEV(LA, RS); |
5381 | 21.5k | RDiff = getMinusSCEV(RA, LS); |
5382 | 21.5k | if (LDiff == RDiff) |
5383 | 6.53k | return getAddExpr(getUMinExpr(LS, RS), LDiff); |
5384 | 21.4k | } |
5385 | 21.4k | break; |
5386 | 21.4k | case ICmpInst::ICMP_NE: |
5387 | 492 | // n != 0 ? n+x : 1+x -> umax(n, 1)+x |
5388 | 492 | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && |
5389 | 492 | isa<ConstantInt>(RHS)490 && cast<ConstantInt>(RHS)->isZero()464 ) { |
5390 | 454 | const SCEV *One = getOne(I->getType()); |
5391 | 454 | const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); |
5392 | 454 | const SCEV *LA = getSCEV(TrueVal); |
5393 | 454 | const SCEV *RA = getSCEV(FalseVal); |
5394 | 454 | const SCEV *LDiff = getMinusSCEV(LA, LS); |
5395 | 454 | const SCEV *RDiff = getMinusSCEV(RA, One); |
5396 | 454 | if (LDiff == RDiff) |
5397 | 7 | return getAddExpr(getUMaxExpr(One, LS), LDiff); |
5398 | 485 | } |
5399 | 485 | break; |
5400 | 32.0k | case ICmpInst::ICMP_EQ: |
5401 | 32.0k | // n == 0 ? 1+x : n+x -> umax(n, 1)+x |
5402 | 32.0k | if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && |
5403 | 32.0k | isa<ConstantInt>(RHS)26.1k && cast<ConstantInt>(RHS)->isZero()16.6k ) { |
5404 | 13.4k | const SCEV *One = getOne(I->getType()); |
5405 | 13.4k | const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); |
5406 | 13.4k | const SCEV *LA = getSCEV(TrueVal); |
5407 | 13.4k | const SCEV *RA = getSCEV(FalseVal); |
5408 | 13.4k | const SCEV *LDiff = getMinusSCEV(LA, One); |
5409 | 13.4k | const SCEV *RDiff = getMinusSCEV(RA, LS); |
5410 | 13.4k | if (LDiff == RDiff) |
5411 | 572 | return getAddExpr(getUMaxExpr(One, LS), LDiff); |
5412 | 31.4k | } |
5413 | 31.4k | break; |
5414 | 31.4k | default: |
5415 | 0 | break; |
5416 | 72.8k | } |
5417 | 72.8k | |
5418 | 72.8k | return getUnknown(I); |
5419 | 72.8k | } |
5420 | | |
5421 | | /// Expand GEP instructions into add and multiply operations. This allows them |
5422 | | /// to be analyzed by regular SCEV code. |
5423 | 2.04M | const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { |
5424 | 2.04M | // Don't attempt to analyze GEPs over unsized objects. |
5425 | 2.04M | if (!GEP->getSourceElementType()->isSized()) |
5426 | 0 | return getUnknown(GEP); |
5427 | 2.04M | |
5428 | 2.04M | SmallVector<const SCEV *, 4> IndexExprs; |
5429 | 5.76M | for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index3.72M ) |
5430 | 3.72M | IndexExprs.push_back(getSCEV(*Index)); |
5431 | 2.04M | return getGEPExpr(GEP, IndexExprs); |
5432 | 2.04M | } |
5433 | | |
5434 | 12.1M | uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { |
5435 | 12.1M | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) |
5436 | 1.69M | return C->getAPInt().countTrailingZeros(); |
5437 | 10.4M | |
5438 | 10.4M | if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) |
5439 | 72.7k | return std::min(GetMinTrailingZeros(T->getOperand()), |
5440 | 72.7k | (uint32_t)getTypeSizeInBits(T->getType())); |
5441 | 10.3M | |
5442 | 10.3M | if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { |
5443 | 452k | uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); |
5444 | 452k | return OpRes == getTypeSizeInBits(E->getOperand()->getType()) |
5445 | 452k | ? getTypeSizeInBits(E->getType())0 |
5446 | 452k | : OpRes; |
5447 | 452k | } |
5448 | 9.93M | |
5449 | 9.93M | if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { |
5450 | 405k | uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); |
5451 | 405k | return OpRes == getTypeSizeInBits(E->getOperand()->getType()) |
5452 | 405k | ? getTypeSizeInBits(E->getType())1 |
5453 | 405k | : OpRes405k ; |
5454 | 405k | } |
5455 | 9.53M | |
5456 | 9.53M | if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { |
5457 | 2.27M | // The result is the min of all operands results. |
5458 | 2.27M | uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); |
5459 | 3.57M | for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e1.67M ; ++i1.29M ) |
5460 | 1.29M | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); |
5461 | 2.27M | return MinOpRes; |
5462 | 2.27M | } |
5463 | 7.25M | |
5464 | 7.25M | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { |
5465 | 1.07M | // The result is the sum of all operands results. |
5466 | 1.07M | uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); |
5467 | 1.07M | uint32_t BitWidth = getTypeSizeInBits(M->getType()); |
5468 | 1.07M | for (unsigned i = 1, e = M->getNumOperands(); |
5469 | 2.18M | SumOpRes != BitWidth && i != e2.18M ; ++i1.10M ) |
5470 | 1.10M | SumOpRes = |
5471 | 1.10M | std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth); |
5472 | 1.07M | return SumOpRes; |
5473 | 1.07M | } |
5474 | 6.17M | |
5475 | 6.17M | if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { |
5476 | 3.94M | // The result is the min of all operands results. |
5477 | 3.94M | uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); |
5478 | 6.38M | for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e3.80M ; ++i2.44M ) |
5479 | 2.44M | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); |
5480 | 3.94M | return MinOpRes; |
5481 | 3.94M | } |
5482 | 2.23M | |
5483 | 2.23M | if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { |
5484 | 37.9k | // The result is the min of all operands results. |
5485 | 37.9k | uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); |
5486 | 42.7k | for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e4.97k ; ++i4.79k ) |
5487 | 4.79k | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); |
5488 | 37.9k | return MinOpRes; |
5489 | 37.9k | } |
5490 | 2.19M | |
5491 | 2.19M | if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { |
5492 | 35.1k | // The result is the min of all operands results. |
5493 | 35.1k | uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); |
5494 | 35.7k | for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e784 ; ++i610 ) |
5495 | 610 | MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); |
5496 | 35.1k | return MinOpRes; |
5497 | 35.1k | } |
5498 | 2.16M | |
5499 | 2.16M | if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { |
5500 | 1.93M | // For a SCEVUnknown, ask ValueTracking. |
5501 | 1.93M | KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT); |
5502 | 1.93M | return Known.countMinTrailingZeros(); |
5503 | 1.93M | } |
5504 | 230k | |
5505 | 230k | // SCEVUDivExpr |
5506 | 230k | return 0; |
5507 | 230k | } |
5508 | | |
5509 | 33.3M | uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { |
5510 | 33.3M | auto I = MinTrailingZerosCache.find(S); |
5511 | 33.3M | if (I != MinTrailingZerosCache.end()) |
5512 | 21.1M | return I->second; |
5513 | 12.1M | |
5514 | 12.1M | uint32_t Result = GetMinTrailingZerosImpl(S); |
5515 | 12.1M | auto InsertPair = MinTrailingZerosCache.insert({S, Result}); |
5516 | 12.1M | assert(InsertPair.second && "Should insert a new key"); |
5517 | 12.1M | return InsertPair.first->second; |
5518 | 12.1M | } |
5519 | | |
5520 | | /// Helper method to assign a range to V from metadata present in the IR. |
5521 | 3.82M | static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { |
5522 | 3.82M | if (Instruction *I = dyn_cast<Instruction>(V)) |
5523 | 3.23M | if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) |
5524 | 36.7k | return getConstantRangeFromMetadata(*MD); |
5525 | 3.78M | |
5526 | 3.78M | return None; |
5527 | 3.78M | } |
5528 | | |
5529 | | /// Determine the range for a particular SCEV. If SignHint is |
5530 | | /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges |
5531 | | /// with a "cleaner" unsigned (resp. signed) representation. |
5532 | | const ConstantRange & |
5533 | | ScalarEvolution::getRangeRef(const SCEV *S, |
5534 | 212M | ScalarEvolution::RangeSignHint SignHint) { |
5535 | 212M | DenseMap<const SCEV *, ConstantRange> &Cache = |
5536 | 212M | SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges89.9M |
5537 | 212M | : SignedRanges122M ; |
5538 | 212M | ConstantRange::PreferredRangeType RangeType = |
5539 | 212M | SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED |
5540 | 212M | ? ConstantRange::Unsigned89.9M : ConstantRange::Signed122M ; |
5541 | 212M | |
5542 | 212M | // See if we've computed this range already. |
5543 | 212M | DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S); |
5544 | 212M | if (I != Cache.end()) |
5545 | 186M | return I->second; |
5546 | 26.0M | |
5547 | 26.0M | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) |
5548 | 6.63M | return setRange(C, SignHint, ConstantRange(C->getAPInt())); |
5549 | 19.4M | |
5550 | 19.4M | unsigned BitWidth = getTypeSizeInBits(S->getType()); |
5551 | 19.4M | ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); |
5552 | 19.4M | |
5553 | 19.4M | // If the value has known zeros, the maximum value will have those known zeros |
5554 | 19.4M | // as well. |
5555 | 19.4M | uint32_t TZ = GetMinTrailingZeros(S); |
5556 | 19.4M | if (TZ != 0) { |
5557 | 4.26M | if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) |
5558 | 1.97M | ConservativeResult = |
5559 | 1.97M | ConstantRange(APInt::getMinValue(BitWidth), |
5560 | 1.97M | APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); |
5561 | 2.29M | else |
5562 | 2.29M | ConservativeResult = ConstantRange( |
5563 | 2.29M | APInt::getSignedMinValue(BitWidth), |
5564 | 2.29M | APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); |
5565 | 4.26M | } |
5566 | 19.4M | |
5567 | 19.4M | if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { |
5568 | 3.82M | ConstantRange X = getRangeRef(Add->getOperand(0), SignHint); |
5569 | 9.65M | for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i5.82M ) |
5570 | 5.82M | X = X.add(getRangeRef(Add->getOperand(i), SignHint)); |
5571 | 3.82M | return setRange(Add, SignHint, |
5572 | 3.82M | ConservativeResult.intersectWith(X, RangeType)); |
5573 | 3.82M | } |
5574 | 15.5M | |
5575 | 15.5M | if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { |
5576 | 2.09M | ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint); |
5577 | 4.23M | for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i2.14M ) |
5578 | 2.14M | X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint)); |
5579 | 2.09M | return setRange(Mul, SignHint, |
5580 | 2.09M | ConservativeResult.intersectWith(X, RangeType)); |
5581 | 2.09M | } |
5582 | 13.4M | |
5583 | 13.4M | if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { |
5584 | 75.8k | ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint); |
5585 | 152k | for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i77.1k ) |
5586 | 77.1k | X = X.smax(getRangeRef(SMax->getOperand(i), SignHint)); |
5587 | 75.8k | return setRange(SMax, SignHint, |
5588 | 75.8k | ConservativeResult.intersectWith(X, RangeType)); |
5589 | 75.8k | } |
5590 | 13.4M | |
5591 | 13.4M | if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { |
5592 | 70.2k | ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint); |
5593 | 141k | for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i70.8k ) |
5594 | 70.8k | X = X.umax(getRangeRef(UMax->getOperand(i), SignHint)); |
5595 | 70.2k | return setRange(UMax, SignHint, |
5596 | 70.2k | ConservativeResult.intersectWith(X, RangeType)); |
5597 | 70.2k | } |
5598 | 13.3M | |
5599 | 13.3M | if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { |
5600 | 353k | ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); |
5601 | 353k | ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); |
5602 | 353k | return setRange(UDiv, SignHint, |
5603 | 353k | ConservativeResult.intersectWith(X.udiv(Y), RangeType)); |
5604 | 353k | } |
5605 | 13.0M | |
5606 | 13.0M | if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { |
5607 | 846k | ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint); |
5608 | 846k | return setRange(ZExt, SignHint, |
5609 | 846k | ConservativeResult.intersectWith(X.zeroExtend(BitWidth), |
5610 | 846k | RangeType)); |
5611 | 846k | } |
5612 | 12.1M | |
5613 | 12.1M | if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { |
5614 | 775k | ConstantRange X = getRangeRef(SExt->getOperand(), SignHint); |
5615 | 775k | return setRange(SExt, SignHint, |
5616 | 775k | ConservativeResult.intersectWith(X.signExtend(BitWidth), |
5617 | 775k | RangeType)); |
5618 | 775k | } |
5619 | 11.3M | |
5620 | 11.3M | if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { |
5621 | 138k | ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint); |
5622 | 138k | return setRange(Trunc, SignHint, |
5623 | 138k | ConservativeResult.intersectWith(X.truncate(BitWidth), |
5624 | 138k | RangeType)); |
5625 | 138k | } |
5626 | 11.2M | |
5627 | 11.2M | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { |
5628 | 7.31M | // If there's no unsigned wrap, the value will never be less than its |
5629 | 7.31M | // initial value. |
5630 | 7.31M | if (AddRec->hasNoUnsignedWrap()) |
5631 | 2.64M | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) |
5632 | 2.37M | if (!C->getValue()->isZero()) |
5633 | 850k | ConservativeResult = ConservativeResult.intersectWith( |
5634 | 850k | ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType); |
5635 | 7.31M | |
5636 | 7.31M | // If there's no signed wrap, and all the operands have the same sign or |
5637 | 7.31M | // zero, the value won't ever change sign. |
5638 | 7.31M | if (AddRec->hasNoSignedWrap()) { |
5639 | 2.58M | bool AllNonNeg = true; |
5640 | 2.58M | bool AllNonPos = true; |
5641 | 7.76M | for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i5.17M ) { |
5642 | 5.17M | if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false616k ; |
5643 | 5.17M | if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false3.69M ; |
5644 | 5.17M | } |
5645 | 2.58M | if (AllNonNeg) |
5646 | 2.05M | ConservativeResult = ConservativeResult.intersectWith( |
5647 | 2.05M | ConstantRange(APInt(BitWidth, 0), |
5648 | 2.05M | APInt::getSignedMinValue(BitWidth)), RangeType); |
5649 | 535k | else if (AllNonPos) |
5650 | 9.38k | ConservativeResult = ConservativeResult.intersectWith( |
5651 | 9.38k | ConstantRange(APInt::getSignedMinValue(BitWidth), |
5652 | 9.38k | APInt(BitWidth, 1)), RangeType); |
5653 | 2.58M | } |
5654 | 7.31M | |
5655 | 7.31M | // TODO: non-affine addrec |
5656 | 7.31M | if (AddRec->isAffine()) { |
5657 | 7.31M | const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); |
5658 | 7.31M | if (!isa<SCEVCouldNotCompute>(MaxBECount) && |
5659 | 7.31M | getTypeSizeInBits(MaxBECount->getType()) <= BitWidth5.80M ) { |
5660 | 5.62M | auto RangeFromAffine = getRangeForAffineAR( |
5661 | 5.62M | AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, |
5662 | 5.62M | BitWidth); |
5663 | 5.62M | if (!RangeFromAffine.isFullSet()) |
5664 | 2.97M | ConservativeResult = |
5665 | 2.97M | ConservativeResult.intersectWith(RangeFromAffine, RangeType); |
5666 | 5.62M | |
5667 | 5.62M | auto RangeFromFactoring = getRangeViaFactoring( |
5668 | 5.62M | AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, |
5669 | 5.62M | BitWidth); |
5670 | 5.62M | if (!RangeFromFactoring.isFullSet()) |
5671 | 64 | ConservativeResult = |
5672 | 64 | ConservativeResult.intersectWith(RangeFromFactoring, RangeType); |
5673 | 5.62M | } |
5674 | 7.31M | } |
5675 | 7.31M | |
5676 | 7.31M | return setRange(AddRec, SignHint, std::move(ConservativeResult)); |
5677 | 7.31M | } |
5678 | 3.92M | |
5679 | 3.92M | if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { |
5680 | 3.82M | // Check if the IR explicitly contains !range metadata. |
5681 | 3.82M | Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue()); |
5682 | 3.82M | if (MDRange.hasValue()) |
5683 | 36.7k | ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(), |
5684 | 36.7k | RangeType); |
5685 | 3.82M | |
5686 | 3.82M | // Split here to avoid paying the compile-time cost of calling both |
5687 | 3.82M | // computeKnownBits and ComputeNumSignBits. This restriction can be lifted |
5688 | 3.82M | // if needed. |
5689 | 3.82M | const DataLayout &DL = getDataLayout(); |
5690 | 3.82M | if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { |
5691 | 1.77M | // For a SCEVUnknown, ask ValueTracking. |
5692 | 1.77M | KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); |
5693 | 1.77M | if (Known.One != ~Known.Zero + 1) |
5694 | 126k | ConservativeResult = |
5695 | 126k | ConservativeResult.intersectWith( |
5696 | 126k | ConstantRange(Known.One, ~Known.Zero + 1), RangeType); |
5697 | 2.04M | } else { |
5698 | 2.04M | assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && |
5699 | 2.04M | "generalize as needed!"); |
5700 | 2.04M | unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); |
5701 | 2.04M | if (NS > 1) |
5702 | 85.9k | ConservativeResult = ConservativeResult.intersectWith( |
5703 | 85.9k | ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), |
5704 | 85.9k | APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1), |
5705 | 85.9k | RangeType); |
5706 | 2.04M | } |
5707 | 3.82M | |
5708 | 3.82M | // A range of Phi is a subset of union of all ranges of its input. |
5709 | 3.82M | if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) { |
5710 | 1.21M | // Make sure that we do not run over cycled Phis. |
5711 | 1.21M | if (PendingPhiRanges.insert(Phi).second) { |
5712 | 907k | ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); |
5713 | 1.17M | for (auto &Op : Phi->operands()) { |
5714 | 1.17M | auto OpRange = getRangeRef(getSCEV(Op), SignHint); |
5715 | 1.17M | RangeFromOps = RangeFromOps.unionWith(OpRange); |
5716 | 1.17M | // No point to continue if we already have a full set. |
5717 | 1.17M | if (RangeFromOps.isFullSet()) |
5718 | 743k | break; |
5719 | 1.17M | } |
5720 | 907k | ConservativeResult = |
5721 | 907k | ConservativeResult.intersectWith(RangeFromOps, RangeType); |
5722 | 907k | bool Erased = PendingPhiRanges.erase(Phi); |
5723 | 907k | assert(Erased && "Failed to erase Phi properly?"); |
5724 | 907k | (void) Erased; |
5725 | 907k | } |
5726 | 1.21M | } |
5727 | 3.82M | |
5728 | 3.82M | return setRange(U, SignHint, std::move(ConservativeResult)); |
5729 | 3.82M | } |
5730 | 103k | |
5731 | 103k | return setRange(S, SignHint, std::move(ConservativeResult)); |
5732 | 103k | } |
5733 | | |
5734 | | // Given a StartRange, Step and MaxBECount for an expression compute a range of |
5735 | | // values that the expression can take. Initially, the expression has a value |
5736 | | // from StartRange and then is changed by Step up to MaxBECount times. Signed |
5737 | | // argument defines if we treat Step as signed or unsigned. |
5738 | | static ConstantRange getRangeForAffineARHelper(APInt Step, |
5739 | | const ConstantRange &StartRange, |
5740 | | const APInt &MaxBECount, |
5741 | 16.8M | unsigned BitWidth, bool Signed) { |
5742 | 16.8M | // If either Step or MaxBECount is 0, then the expression won't change, and we |
5743 | 16.8M | // just need to return the initial range. |
5744 | 16.8M | if (Step == 0 || MaxBECount == 016.8M ) |
5745 | 37.6k | return StartRange; |
5746 | 16.8M | |
5747 | 16.8M | // If we don't know anything about the initial value (i.e. StartRange is |
5748 | 16.8M | // FullRange), then we don't know anything about the final range either. |
5749 | 16.8M | // Return FullRange. |
5750 | 16.8M | if (StartRange.isFullSet()) |
5751 | 3.25M | return ConstantRange::getFull(BitWidth); |
5752 | 13.5M | |
5753 | 13.5M | // If Step is signed and negative, then we use its absolute value, but we also |
5754 | 13.5M | // note that we're moving in the opposite direction. |
5755 | 13.5M | bool Descending = Signed && Step.isNegative()9.05M ; |
5756 | 13.5M | |
5757 | 13.5M | if (Signed) |
5758 | 9.05M | // This is correct even for INT_SMIN. Let's look at i8 to illustrate this: |
5759 | 9.05M | // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128. |
5760 | 9.05M | // This equations hold true due to the well-defined wrap-around behavior of |
5761 | 9.05M | // APInt. |
5762 | 9.05M | Step = Step.abs(); |
5763 | 13.5M | |
5764 | 13.5M | // Check if Offset is more than full span of BitWidth. If it is, the |
5765 | 13.5M | // expression is guaranteed to overflow. |
5766 | 13.5M | if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount)) |
5767 | 3.10M | return ConstantRange::getFull(BitWidth); |
5768 | 10.4M | |
5769 | 10.4M | // Offset is by how much the expression can change. Checks above guarantee no |
5770 | 10.4M | // overflow here. |
5771 | 10.4M | APInt Offset = Step * MaxBECount; |
5772 | 10.4M | |
5773 | 10.4M | // Minimum value of the final range will match the minimal value of StartRange |
5774 | 10.4M | // if the expression is increasing and will be decreased by Offset otherwise. |
5775 | 10.4M | // Maximum value of the final range will match the maximal value of StartRange |
5776 | 10.4M | // if the expression is decreasing and will be increased by Offset otherwise. |
5777 | 10.4M | APInt StartLower = StartRange.getLower(); |
5778 | 10.4M | APInt StartUpper = StartRange.getUpper() - 1; |
5779 | 10.4M | APInt MovedBoundary = Descending ? (StartLower - std::move(Offset))2.08M |
5780 | 10.4M | : (StartUpper + std::move(Offset))8.39M ; |
5781 | 10.4M | |
5782 | 10.4M | // It's possible that the new minimum/maximum value will fall into the initial |
5783 | 10.4M | // range (due to wrap around). This means that the expression can take any |
5784 | 10.4M | // value in this bitwidth, and we have to return full range. |
5785 | 10.4M | if (StartRange.contains(MovedBoundary)) |
5786 | 1.87M | return ConstantRange::getFull(BitWidth); |
5787 | 8.59M | |
5788 | 8.59M | APInt NewLower = |
5789 | 8.59M | Descending ? std::move(MovedBoundary)1.72M : std::move(StartLower)6.86M ; |
5790 | 8.59M | APInt NewUpper = |
5791 | 8.59M | Descending ? std::move(StartUpper)1.72M : std::move(MovedBoundary)6.86M ; |
5792 | 8.59M | NewUpper += 1; |
5793 | 8.59M | |
5794 | 8.59M | // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. |
5795 | 8.59M | return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper)); |
5796 | 8.59M | } |
5797 | | |
5798 | | ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, |
5799 | | const SCEV *Step, |
5800 | | const SCEV *MaxBECount, |
5801 | 5.62M | unsigned BitWidth) { |
5802 | 5.62M | assert(!isa<SCEVCouldNotCompute>(MaxBECount) && |
5803 | 5.62M | getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && |
5804 | 5.62M | "Precondition!"); |
5805 | 5.62M | |
5806 | 5.62M | MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); |
5807 | 5.62M | APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount); |
5808 | 5.62M | |
5809 | 5.62M | // First, consider step signed. |
5810 | 5.62M | ConstantRange StartSRange = getSignedRange(Start); |
5811 | 5.62M | ConstantRange StepSRange = getSignedRange(Step); |
5812 | 5.62M | |
5813 | 5.62M | // If Step can be both positive and negative, we need to find ranges for the |
5814 | 5.62M | // maximum absolute step values in both directions and union them. |
5815 | 5.62M | ConstantRange SR = |
5816 | 5.62M | getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange, |
5817 | 5.62M | MaxBECountValue, BitWidth, /* Signed = */ true); |
5818 | 5.62M | SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(), |
5819 | 5.62M | StartSRange, MaxBECountValue, |
5820 | 5.62M | BitWidth, /* Signed = */ true)); |
5821 | 5.62M | |
5822 | 5.62M | // Next, consider step unsigned. |
5823 | 5.62M | ConstantRange UR = getRangeForAffineARHelper( |
5824 | 5.62M | getUnsignedRangeMax(Step), getUnsignedRange(Start), |
5825 | 5.62M | MaxBECountValue, BitWidth, /* Signed = */ false); |
5826 | 5.62M | |
5827 | 5.62M | // Finally, intersect signed and unsigned ranges. |
5828 | 5.62M | return SR.intersectWith(UR, ConstantRange::Smallest); |
5829 | 5.62M | } |
5830 | | |
5831 | | ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, |
5832 | | const SCEV *Step, |
5833 | | const SCEV *MaxBECount, |
5834 | 5.62M | unsigned BitWidth) { |
5835 | 5.62M | // RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q}) |
5836 | 5.62M | // == RangeOf({A,+,P}) union RangeOf({B,+,Q}) |
5837 | 5.62M | |
5838 | 5.62M | struct SelectPattern { |
5839 | 5.62M | Value *Condition = nullptr; |
5840 | 5.62M | APInt TrueValue; |
5841 | 5.62M | APInt FalseValue; |
5842 | 5.62M | |
5843 | 5.62M | explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth, |
5844 | 5.62M | const SCEV *S) { |
5845 | 5.62M | Optional<unsigned> CastOp; |
5846 | 5.62M | APInt Offset(BitWidth, 0); |
5847 | 5.62M | |
5848 | 5.62M | assert(SE.getTypeSizeInBits(S->getType()) == BitWidth && |
5849 | 5.62M | "Should be!"); |
5850 | 5.62M | |
5851 | 5.62M | // Peel off a constant offset: |
5852 | 5.62M | if (auto *SA = dyn_cast<SCEVAddExpr>(S)) { |
5853 | 1.42M | // In the future we could consider being smarter here and handle |
5854 | 1.42M | // {Start+Step,+,Step} too. |
5855 | 1.42M | if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0))1.18M ) |
5856 | 377k | return; |
5857 | 1.04M | |
5858 | 1.04M | Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt(); |
5859 | 1.04M | S = SA->getOperand(1); |
5860 | 1.04M | } |
5861 | 5.62M | |
5862 | 5.62M | // Peel off a cast operation |
5863 | 5.62M | if (auto *5.24M SCast5.24M = dyn_cast<SCEVCastExpr>(S)) { |
5864 | 211k | CastOp = SCast->getSCEVType(); |
5865 | 211k | S = SCast->getOperand(); |
5866 | 211k | } |
5867 | 5.24M | |
5868 | 5.24M | using namespace llvm::PatternMatch; |
5869 | 5.24M | |
5870 | 5.24M | auto *SU = dyn_cast<SCEVUnknown>(S); |
5871 | 5.24M | const APInt *TrueVal, *FalseVal; |
5872 | 5.24M | if (!SU || |
5873 | 5.24M | !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal), |
5874 | 5.24M | m_APInt(FalseVal)))) { |
5875 | 5.24M | Condition = nullptr; |
5876 | 5.24M | return; |
5877 | 5.24M | } |
5878 | 353 | |
5879 | 353 | TrueValue = *TrueVal; |
5880 | 353 | FalseValue = *FalseVal; |
5881 | 353 | |
5882 | 353 | // Re-apply the cast we peeled off earlier |
5883 | 353 | if (CastOp.hasValue()) |
5884 | 181 | switch (*CastOp) { |
5885 | 181 | default: |
5886 | 0 | llvm_unreachable("Unknown SCEV cast type!"); |
5887 | 181 | |
5888 | 181 | case scTruncate: |
5889 | 16 | TrueValue = TrueValue.trunc(BitWidth); |
5890 | 16 | FalseValue = FalseValue.trunc(BitWidth); |
5891 | 16 | break; |
5892 | 181 | case scZeroExtend: |
5893 | 135 | TrueValue = TrueValue.zext(BitWidth); |
5894 | 135 | FalseValue = FalseValue.zext(BitWidth); |
5895 | 135 | break; |
5896 | 181 | case scSignExtend: |
5897 | 30 | TrueValue = TrueValue.sext(BitWidth); |
5898 | 30 | FalseValue = FalseValue.sext(BitWidth); |
5899 | 30 | break; |
5900 | 353 | } |
5901 | 353 | |
5902 | 353 | // Re-apply the constant offset we peeled off earlier |
5903 | 353 | TrueValue += Offset; |
5904 | 353 | FalseValue += Offset; |
5905 | 353 | } |
5906 | 5.62M | |
5907 | 5.62M | bool isRecognized() { return Condition != nullptr; } |
5908 | 5.62M | }; |
5909 | 5.62M | |
5910 | 5.62M | SelectPattern StartPattern(*this, BitWidth, Start); |
5911 | 5.62M | if (!StartPattern.isRecognized()) |
5912 | 5.62M | return ConstantRange::getFull(BitWidth); |
5913 | 277 | |
5914 | 277 | SelectPattern StepPattern(*this, BitWidth, Step); |
5915 | 277 | if (!StepPattern.isRecognized()) |
5916 | 201 | return ConstantRange::getFull(BitWidth); |
5917 | 76 | |
5918 | 76 | if (StartPattern.Condition != StepPattern.Condition) { |
5919 | 0 | // We don't handle this case today; but we could, by considering four |
5920 | 0 | // possibilities below instead of two. I'm not sure if there are cases where |
5921 | 0 | // that will help over what getRange already does, though. |
5922 | 0 | return ConstantRange::getFull(BitWidth); |
5923 | 0 | } |
5924 | 76 | |
5925 | 76 | // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to |
5926 | 76 | // construct arbitrary general SCEV expressions here. This function is called |
5927 | 76 | // from deep in the call stack, and calling getSCEV (on a sext instruction, |
5928 | 76 | // say) can end up caching a suboptimal value. |
5929 | 76 | |
5930 | 76 | // FIXME: without the explicit `this` receiver below, MSVC errors out with |
5931 | 76 | // C2352 and C2512 (otherwise it isn't needed). |
5932 | 76 | |
5933 | 76 | const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue); |
5934 | 76 | const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue); |
5935 | 76 | const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue); |
5936 | 76 | const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue); |
5937 | 76 | |
5938 | 76 | ConstantRange TrueRange = |
5939 | 76 | this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth); |
5940 | 76 | ConstantRange FalseRange = |
5941 | 76 | this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth); |
5942 | 76 | |
5943 | 76 | return TrueRange.unionWith(FalseRange); |
5944 | 76 | } |
5945 | | |
5946 | 1.52M | SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { |
5947 | 1.52M | if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap14 ; |
5948 | 1.52M | const BinaryOperator *BinOp = cast<BinaryOperator>(V); |
5949 | 1.52M | |
5950 | 1.52M | // Return early if there are no flags to propagate to the SCEV. |
5951 | 1.52M | SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; |
5952 | 1.52M | if (BinOp->hasNoUnsignedWrap()) |
5953 | 700k | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); |
5954 | 1.52M | if (BinOp->hasNoSignedWrap()) |
5955 | 962k | Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); |
5956 | 1.52M | if (Flags == SCEV::FlagAnyWrap) |
5957 | 463k | return SCEV::FlagAnyWrap; |
5958 | 1.05M | |
5959 | 1.05M | return isSCEVExprNeverPoison(BinOp) ? Flags69.9k : SCEV::FlagAnyWrap988k ; |
5960 | 1.05M | } |
5961 | | |
5962 | 2.24M | bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { |
5963 | 2.24M | // Here we check that I is in the header of the innermost loop containing I, |
5964 | 2.24M | // since we only deal with instructions in the loop header. The actual loop we |
5965 | 2.24M | // need to check later will come from an add recurrence, but getting that |
5966 | 2.24M | // requires computing the SCEV of the operands, which can be expensive. This |
5967 | 2.24M | // check we can do cheaply to rule out some cases early. |
5968 | 2.24M | Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent()); |
5969 | 2.24M | if (InnermostContainingLoop == nullptr || |
5970 | 2.24M | InnermostContainingLoop->getHeader() != I->getParent()2.09M ) |
5971 | 818k | return false; |
5972 | 1.42M | |
5973 | 1.42M | // Only proceed if we can prove that I does not yield poison. |
5974 | 1.42M | if (!programUndefinedIfFullPoison(I)) |
5975 | 1.27M | return false; |
5976 | 150k | |
5977 | 150k | // At this point we know that if I is executed, then it does not wrap |
5978 | 150k | // according to at least one of NSW or NUW. If I is not executed, then we do |
5979 | 150k | // not know if the calculation that I represents would wrap. Multiple |
5980 | 150k | // instructions can map to the same SCEV. If we apply NSW or NUW from I to |
5981 | 150k | // the SCEV, we must guarantee no wrapping for that SCEV also when it is |
5982 | 150k | // derived from other instructions that map to the same SCEV. We cannot make |
5983 | 150k | // that guarantee for cases where I is not executed. So we need to find the |
5984 | 150k | // loop that I is considered in relation to and prove that I is executed for |
5985 | 150k | // every iteration of that loop. That implies that the value that I |
5986 | 150k | // calculates does not wrap anywhere in the loop, so then we can apply the |
5987 | 150k | // flags to the SCEV. |
5988 | 150k | // |
5989 | 150k | // We check isLoopInvariant to disambiguate in case we are adding recurrences |
5990 | 150k | // from different loops, so that we know which loop to prove that I is |
5991 | 150k | // executed in. |
5992 | 223k | for (unsigned OpIndex = 0; 150k OpIndex < I->getNumOperands(); ++OpIndex73.2k ) { |
5993 | 188k | // I could be an extractvalue from a call to an overflow intrinsic. |
5994 | 188k | // TODO: We can do better here in some cases. |
5995 | 188k | if (!isSCEVable(I->getOperand(OpIndex)->getType())) |
5996 | 1 | return false; |
5997 | 188k | const SCEV *Op = getSCEV(I->getOperand(OpIndex)); |
5998 | 188k | if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { |
5999 | 120k | bool AllOtherOpsLoopInvariant = true; |
6000 | 355k | for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands(); |
6001 | 238k | ++OtherOpIndex234k ) { |
6002 | 238k | if (OtherOpIndex != OpIndex) { |
6003 | 120k | const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex)); |
6004 | 120k | if (!isLoopInvariant(OtherOp, AddRec->getLoop())) { |
6005 | 3.84k | AllOtherOpsLoopInvariant = false; |
6006 | 3.84k | break; |
6007 | 3.84k | } |
6008 | 120k | } |
6009 | 238k | } |
6010 | 120k | if (AllOtherOpsLoopInvariant && |
6011 | 120k | isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop())117k ) |
6012 | 115k | return true; |
6013 | 120k | } |
6014 | 188k | } |
6015 | 150k | return false35.3k ; |
6016 | 150k | } |
6017 | | |
6018 | 1.18M | bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { |
6019 | 1.18M | // If we know that \c I can never be poison period, then that's enough. |
6020 | 1.18M | if (isSCEVExprNeverPoison(I)) |
6021 | 45.0k | return true; |
6022 | 1.13M | |
6023 | 1.13M | // For an add recurrence specifically, we assume that infinite loops without |
6024 | 1.13M | // side effects are undefined behavior, and then reason as follows: |
6025 | 1.13M | // |
6026 | 1.13M | // If the add recurrence is poison in any iteration, it is poison on all |
6027 | 1.13M | // future iterations (since incrementing poison yields poison). If the result |
6028 | 1.13M | // of the add recurrence is fed into the loop latch condition and the loop |
6029 | 1.13M | // does not contain any throws or exiting blocks other than the latch, we now |
6030 | 1.13M | // have the ability to "choose" whether the backedge is taken or not (by |
6031 | 1.13M | // choosing a sufficiently evil value for the poison feeding into the branch) |
6032 | 1.13M | // for every iteration including and after the one in which \p I first became |
6033 | 1.13M | // poison. There are two possibilities (let's call the iteration in which \p |
6034 | 1.13M | // I first became poison as K): |
6035 | 1.13M | // |
6036 | 1.13M | // 1. In the set of iterations including and after K, the loop body executes |
6037 | 1.13M | // no side effects. In this case executing the backege an infinte number |
6038 | 1.13M | // of times will yield undefined behavior. |
6039 | 1.13M | // |
6040 | 1.13M | // 2. In the set of iterations including and after K, the loop body executes |
6041 | 1.13M | // at least one side effect. In this case, that specific instance of side |
6042 | 1.13M | // effect is control dependent on poison, which also yields undefined |
6043 | 1.13M | // behavior. |
6044 | 1.13M | |
6045 | 1.13M | auto *ExitingBB = L->getExitingBlock(); |
6046 | 1.13M | auto *LatchBB = L->getLoopLatch(); |
6047 | 1.13M | if (!ExitingBB || !LatchBB974k || ExitingBB != LatchBB974k ) |
6048 | 172k | return false; |
6049 | 965k | |
6050 | 965k | SmallPtrSet<const Instruction *, 16> Pushed; |
6051 | 965k | SmallVector<const Instruction *, 8> PoisonStack; |
6052 | 965k | |
6053 | 965k | // We start by assuming \c I, the post-inc add recurrence, is poison. Only |
6054 | 965k | // things that are known to be fully poison under that assumption go on the |
6055 | 965k | // PoisonStack. |
6056 | 965k | Pushed.insert(I); |
6057 | 965k | PoisonStack.push_back(I); |
6058 | 965k | |
6059 | 965k | bool LatchControlDependentOnPoison = false; |
6060 | 2.79M | while (!PoisonStack.empty() && !LatchControlDependentOnPoison1.83M ) { |
6061 | 1.82M | const Instruction *Poison = PoisonStack.pop_back_val(); |
6062 | 1.82M | |
6063 | 2.71M | for (auto *PoisonUser : Poison->users()) { |
6064 | 2.71M | if (propagatesFullPoison(cast<Instruction>(PoisonUser))) { |
6065 | 876k | if (Pushed.insert(cast<Instruction>(PoisonUser)).second) |
6066 | 866k | PoisonStack.push_back(cast<Instruction>(PoisonUser)); |
6067 | 1.83M | } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) { |
6068 | 716k | assert(BI->isConditional() && "Only possibility!"); |
6069 | 716k | if (BI->getParent() == LatchBB) { |
6070 | 713k | LatchControlDependentOnPoison = true; |
6071 | 713k | break; |
6072 | 713k | } |
6073 | 716k | } |
6074 | 2.71M | } |
6075 | 1.82M | } |
6076 | 965k | |
6077 | 965k | return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L)713k ; |
6078 | 965k | } |
6079 | | |
6080 | | ScalarEvolution::LoopProperties |
6081 | 731k | ScalarEvolution::getLoopProperties(const Loop *L) { |
6082 | 731k | using LoopProperties = ScalarEvolution::LoopProperties; |
6083 | 731k | |
6084 | 731k | auto Itr = LoopPropertiesCache.find(L); |
6085 | 731k | if (Itr == LoopPropertiesCache.end()) { |
6086 | 9.05M | auto HasSideEffects = [](Instruction *I) { |
6087 | 9.05M | if (auto *SI = dyn_cast<StoreInst>(I)) |
6088 | 827k | return !SI->isSimple(); |
6089 | 8.22M | |
6090 | 8.22M | return I->mayHaveSideEffects(); |
6091 | 8.22M | }; |
6092 | 411k | |
6093 | 411k | LoopProperties LP = {/* HasNoAbnormalExits */ true, |
6094 | 411k | /*HasNoSideEffects*/ true}; |
6095 | 411k | |
6096 | 411k | for (auto *BB : L->getBlocks()) |
6097 | 9.05M | for (auto &I : *BB)1.24M { |
6098 | 9.05M | if (!isGuaranteedToTransferExecutionToSuccessor(&I)) |
6099 | 160k | LP.HasNoAbnormalExits = false; |
6100 | 9.05M | if (HasSideEffects(&I)) |
6101 | 194k | LP.HasNoSideEffects = false; |
6102 | 9.05M | if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects610k ) |
6103 | 610k | break; // We're already as pessimistic as we can get. |
6104 | 9.05M | } |
6105 | 411k | |
6106 | 411k | auto InsertPair = LoopPropertiesCache.insert({L, LP}); |
6107 | 411k | assert(InsertPair.second && "We just checked!"); |
6108 | 411k | Itr = InsertPair.first; |
6109 | 411k | } |
6110 | 731k | |
6111 | 731k | return Itr->second; |
6112 | 731k | } |
6113 | | |
6114 | 10.8M | const SCEV *ScalarEvolution::createSCEV(Value *V) { |
6115 | 10.8M | if (!isSCEVable(V->getType())) |
6116 | 0 | return getUnknown(V); |
6117 | 10.8M | |
6118 | 10.8M | if (Instruction *I = dyn_cast<Instruction>(V)) { |
6119 | 8.51M | // Don't attempt to analyze instructions in blocks that aren't |
6120 | 8.51M | // reachable. Such instructions don't matter, and they aren't required |
6121 | 8.51M | // to obey basic rules for definitions dominating uses which this |
6122 | 8.51M | // analysis depends on. |
6123 | 8.51M | if (!DT.isReachableFromEntry(I->getParent())) |
6124 | 1 | return getUnknown(UndefValue::get(V->getType())); |
6125 | 2.34M | } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) |
6126 | 1.88M | return getConstant(CI); |
6127 | 457k | else if (isa<ConstantPointerNull>(V)) |
6128 | 75.6k | return getZero(V->getType()); |
6129 | 381k | else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) |
6130 | 0 | return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee()); |
6131 | 381k | else if (!isa<ConstantExpr>(V)) |
6132 | 332k | return getUnknown(V); |
6133 | 8.56M | |
6134 | 8.56M | Operator *U = cast<Operator>(V); |
6135 | 8.56M | if (auto BO = MatchBinaryOp(U, DT)) { |
6136 | 1.67M | switch (BO->Opcode) { |
6137 | 1.67M | case Instruction::Add: { |
6138 | 1.16M | // The simple thing to do would be to just call getSCEV on both operands |
6139 | 1.16M | // and call getAddExpr with the result. However if we're looking at a |
6140 | 1.16M | // bunch of things all added together, this can be quite inefficient, |
6141 | 1.16M | // because it leads to N-1 getAddExpr calls for N ultimate operands. |
6142 | 1.16M | // Instead, gather up all the operands and make a single getAddExpr call. |
6143 | 1.16M | // LLVM IR canonical form means we need only traverse the left operands. |
6144 | 1.16M | SmallVector<const SCEV *, 4> AddOps; |
6145 | 1.26M | do { |
6146 | 1.26M | if (BO->Op) { |
6147 | 1.26M | if (auto *OpSCEV = getExistingSCEV(BO->Op)) { |
6148 | 34.5k | AddOps.push_back(OpSCEV); |
6149 | 34.5k | break; |
6150 | 34.5k | } |
6151 | 1.22M | |
6152 | 1.22M | // If a NUW or NSW flag can be applied to the SCEV for this |
6153 | 1.22M | // addition, then compute the SCEV for this addition by itself |
6154 | 1.22M | // with a separate call to getAddExpr. We need to do that |
6155 | 1.22M | // instead of pushing the operands of the addition onto AddOps, |
6156 | 1.22M | // since the flags are only known to apply to this particular |
6157 | 1.22M | // addition - they may not apply to other additions that can be |
6158 | 1.22M | // formed with operands from AddOps. |
6159 | 1.22M | const SCEV *RHS = getSCEV(BO->RHS); |
6160 | 1.22M | SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); |
6161 | 1.22M | if (Flags != SCEV::FlagAnyWrap) { |
6162 | 58.9k | const SCEV *LHS = getSCEV(BO->LHS); |
6163 | 58.9k | if (BO->Opcode == Instruction::Sub) |
6164 | 7 | AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); |
6165 | 58.9k | else |
6166 | 58.9k | AddOps.push_back(getAddExpr(LHS, RHS, Flags)); |
6167 | 58.9k | break; |
6168 | 58.9k | } |
6169 | 1.16M | } |
6170 | 1.16M | |
6171 | 1.16M | if (BO->Opcode == Instruction::Sub) |
6172 | 7.81k | AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS))); |
6173 | 1.15M | else |
6174 | 1.15M | AddOps.push_back(getSCEV(BO->RHS)); |
6175 | 1.16M | |
6176 | 1.16M | auto NewBO = MatchBinaryOp(BO->LHS, DT); |
6177 | 1.16M | if (!NewBO || (169k NewBO->Opcode != Instruction::Add169k && |
6178 | 1.06M | NewBO->Opcode != Instruction::Sub81.1k )) { |
6179 | 1.06M | AddOps.push_back(getSCEV(BO->LHS)); |
6180 | 1.06M | break; |
6181 | 1.06M | } |
6182 | 100k | BO = NewBO; |
6183 | 100k | } while (true); |
6184 | 1.16M | |
6185 | 1.16M | return getAddExpr(AddOps); |
6186 | 1.67M | } |
6187 | 1.67M | |
6188 | 1.67M | case Instruction::Mul: { |
6189 | 80.3k | SmallVector<const SCEV *, 4> MulOps; |
6190 | 82.0k | do { |
6191 | 82.0k | if (BO->Op) { |
6192 | 81.9k | if (auto *OpSCEV = getExistingSCEV(BO->Op)) { |
6193 | 554 | MulOps.push_back(OpSCEV); |
6194 | 554 | break; |
6195 | 554 | } |
6196 | 81.4k | |
6197 | 81.4k | SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); |
6198 | 81.4k | if (Flags != SCEV::FlagAnyWrap) { |
6199 | 4.08k | MulOps.push_back( |
6200 | 4.08k | getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags)); |
6201 | 4.08k | break; |
6202 | 4.08k | } |
6203 | 77.3k | } |
6204 | 77.3k | |
6205 | 77.3k | MulOps.push_back(getSCEV(BO->RHS)); |
6206 | 77.3k | auto NewBO = MatchBinaryOp(BO->LHS, DT); |
6207 | 77.3k | if (!NewBO || NewBO->Opcode != Instruction::Mul16.4k ) { |
6208 | 75.7k | MulOps.push_back(getSCEV(BO->LHS)); |
6209 | 75.7k | break; |
6210 | 75.7k | } |
6211 | 1.63k | BO = NewBO; |
6212 | 1.63k | } while (true); |
6213 | 80.3k | |
6214 | 80.3k | return getMulExpr(MulOps); |
6215 | 1.67M | } |
6216 | 1.67M | case Instruction::UDiv: |
6217 | 64.1k | return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); |
6218 | 1.67M | case Instruction::URem: |
6219 | 9.76k | return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); |
6220 | 1.67M | case Instruction::Sub: { |
6221 | 127k | SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; |
6222 | 127k | if (BO->Op) |
6223 | 127k | Flags = getNoWrapFlagsFromUB(BO->Op); |
6224 | 127k | return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); |
6225 | 1.67M | } |
6226 | 1.67M | case Instruction::And: |
6227 | 71.1k | // For an expression like x&255 that merely masks off the high bits, |
6228 | 71.1k | // use zext(trunc(x)) as the SCEV expression. |
6229 | 71.1k | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { |
6230 | 59.3k | if (CI->isZero()) |
6231 | 2 | return getSCEV(BO->RHS); |
6232 | 59.3k | if (CI->isMinusOne()) |
6233 | 0 | return getSCEV(BO->LHS); |
6234 | 59.3k | const APInt &A = CI->getValue(); |
6235 | 59.3k | |
6236 | 59.3k | // Instcombine's ShrinkDemandedConstant may strip bits out of |
6237 | 59.3k | // constants, obscuring what would otherwise be a low-bits mask. |
6238 | 59.3k | // Use computeKnownBits to compute what ShrinkDemandedConstant |
6239 | 59.3k | // knew about to reconstruct a low-bits mask value. |
6240 | 59.3k | unsigned LZ = A.countLeadingZeros(); |
6241 | 59.3k | unsigned TZ = A.countTrailingZeros(); |
6242 | 59.3k | unsigned BitWidth = A.getBitWidth(); |
6243 | 59.3k | KnownBits Known(BitWidth); |
6244 | 59.3k | computeKnownBits(BO->LHS, Known, getDataLayout(), |
6245 | 59.3k | 0, &AC, nullptr, &DT); |
6246 | 59.3k | |
6247 | 59.3k | APInt EffectiveMask = |
6248 | 59.3k | APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); |
6249 | 59.3k | if ((LZ != 0 || TZ != 014.5k ) && !((~A & ~Known.Zero) & EffectiveMask)59.1k ) { |
6250 | 58.5k | const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); |
6251 | 58.5k | const SCEV *LHS = getSCEV(BO->LHS); |
6252 | 58.5k | const SCEV *ShiftedLHS = nullptr; |
6253 | 58.5k | if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) { |
6254 | 389 | if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) { |
6255 | 364 | // For an expression like (x * 8) & 8, simplify the multiply. |
6256 | 364 | unsigned MulZeros = OpC->getAPInt().countTrailingZeros(); |
6257 | 364 | unsigned GCD = std::min(MulZeros, TZ); |
6258 | 364 | APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD); |
6259 | 364 | SmallVector<const SCEV*, 4> MulOps; |
6260 | 364 | MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD))); |
6261 | 364 | MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end()); |
6262 | 364 | auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags()); |
6263 | 364 | ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt)); |
6264 | 364 | } |
6265 | 389 | } |
6266 | 58.5k | if (!ShiftedLHS) |
6267 | 58.1k | ShiftedLHS = getUDivExpr(LHS, MulCount); |
6268 | 58.5k | return getMulExpr( |
6269 | 58.5k | getZeroExtendExpr( |
6270 | 58.5k | getTruncateExpr(ShiftedLHS, |
6271 | 58.5k | IntegerType::get(getContext(), BitWidth - LZ - TZ)), |
6272 | 58.5k | BO->LHS->getType()), |
6273 | 58.5k | MulCount); |
6274 | 58.5k | } |
6275 | 12.6k | } |
6276 | 12.6k | break; |
6277 | 12.6k | |
6278 | 31.2k | case Instruction::Or: |
6279 | 31.2k | // If the RHS of the Or is a constant, we may have something like: |
6280 | 31.2k | // X*4+1 which got turned into X*4|1. Handle this as an Add so loop |
6281 | 31.2k | // optimizations will transparently handle this case. |
6282 | 31.2k | // |
6283 | 31.2k | // In order for this transformation to be safe, the LHS must be of the |
6284 | 31.2k | // form X*(2^n) and the Or constant must be less than 2^n. |
6285 | 31.2k | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { |
6286 | 18.0k | const SCEV *LHS = getSCEV(BO->LHS); |
6287 | 18.0k | const APInt &CIVal = CI->getValue(); |
6288 | 18.0k | if (GetMinTrailingZeros(LHS) >= |
6289 | 18.0k | (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { |
6290 | 16.5k | // Build a plain add SCEV. |
6291 | 16.5k | const SCEV *S = getAddExpr(LHS, getSCEV(CI)); |
6292 | 16.5k | // If the LHS of the add was an addrec and it has no-wrap flags, |
6293 | 16.5k | // transfer the no-wrap flags, since an or won't introduce a wrap. |
6294 | 16.5k | if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { |
6295 | 13.1k | const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); |
6296 | 13.1k | const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( |
6297 | 13.1k | OldAR->getNoWrapFlags()); |
6298 | 13.1k | } |
6299 | 16.5k | return S; |
6300 | 16.5k | } |
6301 | 14.6k | } |
6302 | 14.6k | break; |
6303 | 14.6k | |
6304 | 14.6k | case Instruction::Xor: |
6305 | 8.90k | if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { |
6306 | 5.14k | // If the RHS of xor is -1, then this is a not operation. |
6307 | 5.14k | if (CI->isMinusOne()) |
6308 | 3.67k | return getNotSCEV(getSCEV(BO->LHS)); |
6309 | 1.47k | |
6310 | 1.47k | // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. |
6311 | 1.47k | // This is a variant of the check for xor with -1, and it handles |
6312 | 1.47k | // the case where instcombine has trimmed non-demanded bits out |
6313 | 1.47k | // of an xor with -1. |
6314 | 1.47k | if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS)) |
6315 | 498 | if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1))) |
6316 | 488 | if (LBO->getOpcode() == Instruction::And && |
6317 | 488 | LCI->getValue() == CI->getValue()313 ) |
6318 | 141 | if (const SCEVZeroExtendExpr *Z = |
6319 | 128 | dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) { |
6320 | 128 | Type *UTy = BO->LHS->getType(); |
6321 | 128 | const SCEV *Z0 = Z->getOperand(); |
6322 | 128 | Type *Z0Ty = Z0->getType(); |
6323 | 128 | unsigned Z0TySize = getTypeSizeInBits(Z0Ty); |
6324 | 128 | |
6325 | 128 | // If C is a low-bits mask, the zero extend is serving to |
6326 | 128 | // mask off the high bits. Complement the operand and |
6327 | 128 | // re-apply the zext. |
6328 | 128 | if (CI->getValue().isMask(Z0TySize)) |
6329 | 128 | return getZeroExtendExpr(getNotSCEV(Z0), UTy); |
6330 | 0 | |
6331 | 0 | // If C is a single bit, it may be in the sign-bit position |
6332 | 0 | // before the zero-extend. In this case, represent the xor |
6333 | 0 | // using an add, which is equivalent, and re-apply the zext. |
6334 | 0 | APInt Trunc = CI->getValue().trunc(Z0TySize); |
6335 | 0 | if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && |
6336 | 0 | Trunc.isSignMask()) |
6337 | 0 | return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), |
6338 | 0 | UTy); |
6339 | 5.09k | } |
6340 | 1.47k | } |
6341 | 5.09k | break; |
6342 | 5.09k | |
6343 | 91.3k | case Instruction::Shl: |
6344 | 91.3k | // Turn shift left of a constant amount into a multiply. |
6345 | 91.3k | if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) { |
6346 | 87.2k | uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth(); |
6347 | 87.2k | |
6348 | 87.2k | // If the shift count is not less than the bitwidth, the result of |
6349 | 87.2k | // the shift is undefined. Don't try to analyze it, because the |
6350 | 87.2k | // resolution chosen here may differ from the resolution chosen in |
6351 | 87.2k | // other parts of the compiler. |
6352 | 87.2k | if (SA->getValue().uge(BitWidth)) |
6353 | 4 | break; |
6354 | 87.2k | |
6355 | 87.2k | // It is currently not resolved how to interpret NSW for left |
6356 | 87.2k | // shift by BitWidth - 1, so we avoid applying flags in that |
6357 | 87.2k | // case. Remove this check (or this comment) once the situation |
6358 | 87.2k | // is resolved. See |
6359 | 87.2k | // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html |
6360 | 87.2k | // and http://reviews.llvm.org/D8890 . |
6361 | 87.2k | auto Flags = SCEV::FlagAnyWrap; |
6362 | 87.2k | if (BO->Op && SA->getValue().ult(BitWidth - 1)) |
6363 | 87.1k | Flags = getNoWrapFlagsFromUB(BO->Op); |
6364 | 87.2k | |
6365 | 87.2k | Constant *X = ConstantInt::get( |
6366 | 87.2k | getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); |
6367 | 87.2k | return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags); |
6368 | 87.2k | } |
6369 | 4.15k | break; |
6370 | 4.15k | |
6371 | 27.2k | case Instruction::AShr: { |
6372 | 27.2k | // AShr X, C, where C is a constant. |
6373 | 27.2k | ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS); |
6374 | 27.2k | if (!CI) |
6375 | 1.25k | break; |
6376 | 25.9k | |
6377 | 25.9k | Type *OuterTy = BO->LHS->getType(); |
6378 | 25.9k | uint64_t BitWidth = getTypeSizeInBits(OuterTy); |
6379 | 25.9k | // If the shift count is not less than the bitwidth, the result of |
6380 | 25.9k | // the shift is undefined. Don't try to analyze it, because the |
6381 | 25.9k | // resolution chosen here may differ from the resolution chosen in |
6382 | 25.9k | // other parts of the compiler. |
6383 | 25.9k | if (CI->getValue().uge(BitWidth)) |
6384 | 4 | break; |
6385 | 25.9k | |
6386 | 25.9k | if (CI->isZero()) |
6387 | 1 | return getSCEV(BO->LHS); // shift by zero --> noop |
6388 | 25.9k | |
6389 | 25.9k | uint64_t AShrAmt = CI->getZExtValue(); |
6390 | 25.9k | Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt); |
6391 | 25.9k | |
6392 | 25.9k | Operator *L = dyn_cast<Operator>(BO->LHS); |
6393 | 25.9k | if (L && L->getOpcode() == Instruction::Shl25.5k ) { |
6394 | 12.0k | // X = Shl A, n |
6395 | 12.0k | // Y = AShr X, m |
6396 | 12.0k | // Both n and m are constant. |
6397 | 12.0k | |
6398 | 12.0k | const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0)); |
6399 | 12.0k | if (L->getOperand(1) == BO->RHS) |
6400 | 10.7k | // For a two-shift sext-inreg, i.e. n = m, |
6401 | 10.7k | // use sext(trunc(x)) as the SCEV expression. |
6402 | 10.7k | return getSignExtendExpr( |
6403 | 10.7k | getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy); |
6404 | 1.31k | |
6405 | 1.31k | ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1)); |
6406 | 1.31k | if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)1.28k ) { |
6407 | 1.28k | uint64_t ShlAmt = ShlAmtCI->getZExtValue(); |
6408 | 1.28k | if (ShlAmt > AShrAmt) { |
6409 | 29 | // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV |
6410 | 29 | // expression. We already checked that ShlAmt < BitWidth, so |
6411 | 29 | // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as |
6412 | 29 | // ShlAmt - AShrAmt < Amt. |
6413 | 29 | APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, |
6414 | 29 | ShlAmt - AShrAmt); |
6415 | 29 | return getSignExtendExpr( |
6416 | 29 | getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy), |
6417 | 29 | getConstant(Mul)), OuterTy); |
6418 | 29 | } |
6419 | 15.1k | } |
6420 | 1.31k | } |
6421 | 15.1k | break; |
6422 | 15.1k | } |
6423 | 1.67M | } |
6424 | 1.67M | } |
6425 | 6.94M | |
6426 | 6.94M | switch (U->getOpcode()) { |
6427 | 6.94M | case Instruction::Trunc: |
6428 | 128k | return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); |
6429 | 6.94M | |
6430 | 6.94M | case Instruction::ZExt: |
6431 | 205k | return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); |
6432 | 6.94M | |
6433 | 6.94M | case Instruction::SExt: |
6434 | 218k | if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) { |
6435 | 39.6k | // The NSW flag of a subtract does not always survive the conversion to |
6436 | 39.6k | // A + (-1)*B. By pushing sign extension onto its operands we are much |
6437 | 39.6k | // more likely to preserve NSW and allow later AddRec optimisations. |
6438 | 39.6k | // |
6439 | 39.6k | // NOTE: This is effectively duplicating this logic from getSignExtend: |
6440 | 39.6k | // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> |
6441 | 39.6k | // but by that point the NSW information has potentially been lost. |
6442 | 39.6k | if (BO->Opcode == Instruction::Sub && BO->IsNSW3.45k ) { |
6443 | 2.88k | Type *Ty = U->getType(); |
6444 | 2.88k | auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty); |
6445 | 2.88k | auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty); |
6446 | 2.88k | return getMinusSCEV(V1, V2, SCEV::FlagNSW); |
6447 | 2.88k | } |
6448 | 215k | } |
6449 | 215k | return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); |
6450 | 215k | |
6451 | 521k | case Instruction::BitCast: |
6452 | 521k | // BitCasts are no-op casts so we just eliminate the cast. |
6453 | 521k | if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) |
6454 | 521k | return getSCEV(U->getOperand(0)); |
6455 | 216 | break; |
6456 | 216 | |
6457 | 216 | // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can |
6458 | 216 | // lead to pointer expressions which cannot safely be expanded to GEPs, |
6459 | 216 | // because ScalarEvolution doesn't respect the GEP aliasing rules when |
6460 | 216 | // simplifying integer expressions. |
6461 | 216 | |
6462 | 2.04M | case Instruction::GetElementPtr: |
6463 | 2.04M | return createNodeForGEP(cast<GEPOperator>(U)); |
6464 | 216 | |
6465 | 1.82M | case Instruction::PHI: |
6466 | 1.82M | return createNodeForPHI(cast<PHINode>(U)); |
6467 | 216 | |
6468 | 100k | case Instruction::Select: |
6469 | 100k | // U can also be a select constant expr, which let fall through. Since |
6470 | 100k | // createNodeForSelect only works for a condition that is an `ICmpInst`, and |
6471 | 100k | // constant expressions cannot have instructions as operands, we'd have |
6472 | 100k | // returned getUnknown for a select constant expressions anyway. |
6473 | 100k | if (isa<Instruction>(U)) |
6474 | 100k | return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0), |
6475 | 100k | U->getOperand(1), U->getOperand(2)); |
6476 | 2 | break; |
6477 | 2 | |
6478 | 235k | case Instruction::Call: |
6479 | 235k | case Instruction::Invoke: |
6480 | 235k | if (Value *RV = CallSite(U).getReturnedArgOperand()) |
6481 | 613 | return getSCEV(RV); |
6482 | 235k | break; |
6483 | 1.90M | } |
6484 | 1.90M | |
6485 | 1.90M | return getUnknown(V); |
6486 | 1.90M | } |
6487 | | |
6488 | | //===----------------------------------------------------------------------===// |
6489 | | // Iteration Count Computation Code |
6490 | | // |
6491 | | |
6492 | 780k | static unsigned getConstantTripCount(const SCEVConstant *ExitCount) { |
6493 | 780k | if (!ExitCount) |
6494 | 520k | return 0; |
6495 | 260k | |
6496 | 260k | ConstantInt *ExitConst = ExitCount->getValue(); |
6497 | 260k | |
6498 | 260k | // Guard against huge trip counts. |
6499 | 260k | if (ExitConst->getValue().getActiveBits() > 32) |
6500 | 75.3k | return 0; |
6501 | 184k | |
6502 | 184k | // In case of integer overflow, this returns 0, which is correct. |
6503 | 184k | return ((unsigned)ExitConst->getZExtValue()) + 1; |
6504 | 184k | } |
6505 | | |
6506 | 40.2k | unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) { |
6507 | 40.2k | if (BasicBlock *ExitingBB = L->getExitingBlock()) |
6508 | 40.1k | return getSmallConstantTripCount(L, ExitingBB); |
6509 | 87 | |
6510 | 87 | // No trip count information for multiple exits. |
6511 | 87 | return 0; |
6512 | 87 | } |
6513 | | |
6514 | | unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, |
6515 | 427k | BasicBlock *ExitingBlock) { |
6516 | 427k | assert(ExitingBlock && "Must pass a non-null exiting block!"); |
6517 | 427k | assert(L->isLoopExiting(ExitingBlock) && |
6518 | 427k | "Exiting block must actually branch out of the loop!"); |
6519 | 427k | const SCEVConstant *ExitCount = |
6520 | 427k | dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock)); |
6521 | 427k | return getConstantTripCount(ExitCount); |
6522 | 427k | } |
6523 | | |
6524 | 353k | unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { |
6525 | 353k | const auto *MaxExitCount = |
6526 | 353k | dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L)); |
6527 | 353k | return getConstantTripCount(MaxExitCount); |
6528 | 353k | } |
6529 | | |
6530 | 283 | unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { |
6531 | 283 | if (BasicBlock *ExitingBB = L->getExitingBlock()) |
6532 | 272 | return getSmallConstantTripMultiple(L, ExitingBB); |
6533 | 11 | |
6534 | 11 | // No trip multiple information for multiple exits. |
6535 | 11 | return 0; |
6536 | 11 | } |
6537 | | |
6538 | | /// Returns the largest constant divisor of the trip count of this loop as a |
6539 | | /// normal unsigned value, if possible. This means that the actual trip count is |
6540 | | /// always a multiple of the returned value (don't forget the trip count could |
6541 | | /// very well be zero as well!). |
6542 | | /// |
6543 | | /// Returns 1 if the trip count is unknown or not guaranteed to be the |
6544 | | /// multiple of a constant (which is also the case if the trip count is simply |
6545 | | /// constant, use getSmallConstantTripCount for that case), Will also return 1 |
6546 | | /// if the trip count is very large (>= 2^32). |
6547 | | /// |
6548 | | /// As explained in the comments for getSmallConstantTripCount, this assumes |
6549 | | /// that control exits the loop via ExitingBlock. |
6550 | | unsigned |
6551 | | ScalarEvolution::getSmallConstantTripMultiple(const Loop *L, |
6552 | 387k | BasicBlock *ExitingBlock) { |
6553 | 387k | assert(ExitingBlock && "Must pass a non-null exiting block!"); |
6554 | 387k | assert(L->isLoopExiting(ExitingBlock) && |
6555 | 387k | "Exiting block must actually branch out of the loop!"); |
6556 | 387k | const SCEV *ExitCount = getExitCount(L, ExitingBlock); |
6557 | 387k | if (ExitCount == getCouldNotCompute()) |
6558 | 210k | return 1; |
6559 | 177k | |
6560 | 177k | // Get the trip count from the BE count by adding 1. |
6561 | 177k | const SCEV *TCExpr = getAddExpr(ExitCount, getOne(ExitCount->getType())); |
6562 | 177k | |
6563 | 177k | const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr); |
6564 | 177k | if (!TC) |
6565 | 131k | // Attempt to factor more general cases. Returns the greatest power of |
6566 | 131k | // two divisor. If overflow happens, the trip count expression is still |
6567 | 131k | // divisible by the greatest power of 2 divisor returned. |
6568 | 131k | return 1U << std::min((uint32_t)31, GetMinTrailingZeros(TCExpr)); |
6569 | 45.8k | |
6570 | 45.8k | ConstantInt *Result = TC->getValue(); |
6571 | 45.8k | |
6572 | 45.8k | // Guard against huge trip counts (this requires checking |
6573 | 45.8k | // for zero to handle the case where the trip count == -1 and the |
6574 | 45.8k | // addition wraps). |
6575 | 45.8k | if (!Result || Result->getValue().getActiveBits() > 32 || |
6576 | 45.8k | Result->getValue().getActiveBits() == 045.8k ) |
6577 | 13 | return 1; |
6578 | 45.8k | |
6579 | 45.8k | return (unsigned)Result->getZExtValue(); |
6580 | 45.8k | } |
6581 | | |
6582 | | /// Get the expression for the number of loop iterations for which this loop is |
6583 | | /// guaranteed not to exit via ExitingBlock. Otherwise return |
6584 | | /// SCEVCouldNotCompute. |
6585 | | const SCEV *ScalarEvolution::getExitCount(const Loop *L, |
6586 | 1.48M | BasicBlock *ExitingBlock) { |
6587 | 1.48M | return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); |
6588 | 1.48M | } |
6589 | | |
6590 | | const SCEV * |
6591 | | ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, |
6592 | 169k | SCEVUnionPredicate &Preds) { |
6593 | 169k | return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds); |
6594 | 169k | } |
6595 | | |
6596 | 730k | const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { |
6597 | 730k | return getBackedgeTakenInfo(L).getExact(L, this); |
6598 | 730k | } |
6599 | | |
6600 | | /// Similar to getBackedgeTakenCount, except return the least SCEV value that is |
6601 | | /// known never to be less than the actual backedge taken count. |
6602 | 8.63M | const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { |
6603 | 8.63M | return getBackedgeTakenInfo(L).getMax(this); |
6604 | 8.63M | } |
6605 | | |
6606 | 344k | bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { |
6607 | 344k | return getBackedgeTakenInfo(L).isMaxOrZero(this); |
6608 | 344k | } |
6609 | | |
6610 | | /// Push PHI nodes in the header of the given loop onto the given Worklist. |
6611 | | static void |
6612 | 1.12M | PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { |
6613 | 1.12M | BasicBlock *Header = L->getHeader(); |
6614 | 1.12M | |
6615 | 1.12M | // Push all Loop-header PHIs onto the Worklist stack. |
6616 | 1.12M | for (PHINode &PN : Header->phis()) |
6617 | 1.73M | Worklist.push_back(&PN); |
6618 | 1.12M | } |
6619 | | |
6620 | | const ScalarEvolution::BackedgeTakenInfo & |
6621 | 169k | ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { |
6622 | 169k | auto &BTI = getBackedgeTakenInfo(L); |
6623 | 169k | if (BTI.hasFullInfo()) |
6624 | 98.5k | return BTI; |
6625 | 71.0k | |
6626 | 71.0k | auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); |
6627 | 71.0k | |
6628 | 71.0k | if (!Pair.second) |
6629 | 86 | return Pair.first->second; |
6630 | 70.9k | |
6631 | 70.9k | BackedgeTakenInfo Result = |
6632 | 70.9k | computeBackedgeTakenCount(L, /*AllowPredicates=*/true); |
6633 | 70.9k | |
6634 | 70.9k | return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result); |
6635 | 70.9k | } |
6636 | | |
6637 | | const ScalarEvolution::BackedgeTakenInfo & |
6638 | 12.1M | ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { |
6639 | 12.1M | // Initially insert an invalid entry for this loop. If the insertion |
6640 | 12.1M | // succeeds, proceed to actually compute a backedge-taken count and |
6641 | 12.1M | // update the value. The temporary CouldNotCompute value tells SCEV |
6642 | 12.1M | // code elsewhere that it shouldn't attempt to request a new |
6643 | 12.1M | // backedge-taken count, which could result in infinite recursion. |
6644 | 12.1M | std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = |
6645 | 12.1M | BackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); |
6646 | 12.1M | if (!Pair.second) |
6647 | 11.4M | return Pair.first->second; |
6648 | 721k | |
6649 | 721k | // computeBackedgeTakenCount may allocate memory for its result. Inserting it |
6650 | 721k | // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result |
6651 | 721k | // must be cleared in this scope. |
6652 | 721k | BackedgeTakenInfo Result = computeBackedgeTakenCount(L); |
6653 | 721k | |
6654 | 721k | // In product build, there are no usage of statistic. |
6655 | 721k | (void)NumTripCountsComputed; |
6656 | 721k | (void)NumTripCountsNotComputed; |
6657 | | #if LLVM_ENABLE_STATS || !defined(NDEBUG) |
6658 | | const SCEV *BEExact = Result.getExact(L, this); |
6659 | | if (BEExact != getCouldNotCompute()) { |
6660 | | assert(isLoopInvariant(BEExact, L) && |
6661 | | isLoopInvariant(Result.getMax(this), L) && |
6662 | | "Computed backedge-taken count isn't loop invariant for loop!"); |
6663 | | ++NumTripCountsComputed; |
6664 | | } |
6665 | | else if (Result.getMax(this) == getCouldNotCompute() && |
6666 | | isa<PHINode>(L->getHeader()->begin())) { |
6667 | | // Only count loops that have phi nodes as not being computable. |
6668 | | ++NumTripCountsNotComputed; |
6669 | | } |
6670 | | #endif // LLVM_ENABLE_STATS || !defined(NDEBUG) |
6671 | | |
6672 | 721k | // Now that we know more about the trip count for this loop, forget any |
6673 | 721k | // existing SCEV values for PHI nodes in this loop since they are only |
6674 | 721k | // conservative estimates made without the benefit of trip count |
6675 | 721k | // information. This is similar to the code in forgetLoop, except that |
6676 | 721k | // it handles SCEVUnknown PHI nodes specially. |
6677 | 721k | if (Result.hasAnyInfo()) { |
6678 | 486k | SmallVector<Instruction *, 16> Worklist; |
6679 | 486k | PushLoopPHIs(L, Worklist); |
6680 | 486k | |
6681 | 486k | SmallPtrSet<Instruction *, 8> Discovered; |
6682 | 12.0M | while (!Worklist.empty()) { |
6683 | 11.5M | Instruction *I = Worklist.pop_back_val(); |
6684 | 11.5M | |
6685 | 11.5M | ValueExprMapType::iterator It = |
6686 | 11.5M | ValueExprMap.find_as(static_cast<Value *>(I)); |
6687 | 11.5M | if (It != ValueExprMap.end()) { |
6688 | 1.50M | const SCEV *Old = It->second; |
6689 | 1.50M | |
6690 | 1.50M | // SCEVUnknown for a PHI either means that it has an unrecognized |
6691 | 1.50M | // structure, or it's a PHI that's in the progress of being computed |
6692 | 1.50M | // by createNodeForPHI. In the former case, additional loop trip |
6693 | 1.50M | // count information isn't going to change anything. In the later |
6694 | 1.50M | // case, createNodeForPHI will perform the necessary updates on its |
6695 | 1.50M | // own when it gets to that point. |
6696 | 1.50M | if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)708k ) { |
6697 | 1.31M | eraseValueFromMap(It->first); |
6698 | 1.31M | forgetMemoizedResults(Old); |
6699 | 1.31M | } |
6700 | 1.50M | if (PHINode *PN = dyn_cast<PHINode>(I)) |
6701 | 708k | ConstantEvolutionLoopExitValue.erase(PN); |
6702 | 1.50M | } |
6703 | 11.5M | |
6704 | 11.5M | // Since we don't need to invalidate anything for correctness and we're |
6705 | 11.5M | // only invalidating to make SCEV's results more precise, we get to stop |
6706 | 11.5M | // early to avoid invalidating too much. This is especially important in |
6707 | 11.5M | // cases like: |
6708 | 11.5M | // |
6709 | 11.5M | // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node |
6710 | 11.5M | // loop0: |
6711 | 11.5M | // %pn0 = phi |
6712 | 11.5M | // ... |
6713 | 11.5M | // loop1: |
6714 | 11.5M | // %pn1 = phi |
6715 | 11.5M | // ... |
6716 | 11.5M | // |
6717 | 11.5M | // where both loop0 and loop1's backedge taken count uses the SCEV |
6718 | 11.5M | // expression for %v. If we don't have the early stop below then in cases |
6719 | 11.5M | // like the above, getBackedgeTakenInfo(loop1) will clear out the trip |
6720 | 11.5M | // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip |
6721 | 11.5M | // count for loop1, effectively nullifying SCEV's trip count cache. |
6722 | 11.5M | for (auto *U : I->users()) |
6723 | 15.0M | if (auto *I = dyn_cast<Instruction>(U)) { |
6724 | 15.0M | auto *LoopForUser = LI.getLoopFor(I->getParent()); |
6725 | 15.0M | if (LoopForUser && L->contains(LoopForUser)14.8M && |
6726 | 15.0M | Discovered.insert(I).second14.7M ) |
6727 | 10.7M | Worklist.push_back(I); |
6728 | 15.0M | } |
6729 | 11.5M | } |
6730 | 486k | } |
6731 | 721k | |
6732 | 721k | // Re-lookup the insert position, since the call to |
6733 | 721k | // computeBackedgeTakenCount above could result in a |
6734 | 721k | // recusive call to getBackedgeTakenInfo (on a different |
6735 | 721k | // loop), which would invalidate the iterator computed |
6736 | 721k | // earlier. |
6737 | 721k | return BackedgeTakenCounts.find(L)->second = std::move(Result); |
6738 | 721k | } |
6739 | | |
6740 | 0 | void ScalarEvolution::forgetAllLoops() { |
6741 | 0 | // This method is intended to forget all info about loops. It should |
6742 | 0 | // invalidate caches as if the following happened: |
6743 | 0 | // - The trip counts of all loops have changed arbitrarily |
6744 | 0 | // - Every llvm::Value has been updated in place to produce a different |
6745 | 0 | // result. |
6746 | 0 | BackedgeTakenCounts.clear(); |
6747 | 0 | PredicatedBackedgeTakenCounts.clear(); |
6748 | 0 | LoopPropertiesCache.clear(); |
6749 | 0 | ConstantEvolutionLoopExitValue.clear(); |
6750 | 0 | ValueExprMap.clear(); |
6751 | 0 | ValuesAtScopes.clear(); |
6752 | 0 | LoopDispositions.clear(); |
6753 | 0 | BlockDispositions.clear(); |
6754 | 0 | UnsignedRanges.clear(); |
6755 | 0 | SignedRanges.clear(); |
6756 | 0 | ExprValueMap.clear(); |
6757 | 0 | HasRecMap.clear(); |
6758 | 0 | MinTrailingZerosCache.clear(); |
6759 | 0 | PredicatedSCEVRewrites.clear(); |
6760 | 0 | } |
6761 | | |
6762 | 225k | void ScalarEvolution::forgetLoop(const Loop *L) { |
6763 | 225k | // Drop any stored trip count value. |
6764 | 225k | auto RemoveLoopFromBackedgeMap = |
6765 | 1.28M | [](DenseMap<const Loop *, BackedgeTakenInfo> &Map, const Loop *L) { |
6766 | 1.28M | auto BTCPos = Map.find(L); |
6767 | 1.28M | if (BTCPos != Map.end()) { |
6768 | 48.4k | BTCPos->second.clear(); |
6769 | 48.4k | Map.erase(BTCPos); |
6770 | 48.4k | } |
6771 | 1.28M | }; |
6772 | 225k | |
6773 | 225k | SmallVector<const Loop *, 16> LoopWorklist(1, L); |
6774 | 225k | SmallVector<Instruction *, 32> Worklist; |
6775 | 225k | SmallPtrSet<Instruction *, 16> Visited; |
6776 | 225k | |
6777 | 225k | // Iterate over all the loops and sub-loops to drop SCEV information. |
6778 | 867k | while (!LoopWorklist.empty()) { |
6779 | 642k | auto *CurrL = LoopWorklist.pop_back_val(); |
6780 | 642k | |
6781 | 642k | RemoveLoopFromBackedgeMap(BackedgeTakenCounts, CurrL); |
6782 | 642k | RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts, CurrL); |
6783 | 642k | |
6784 | 642k | // Drop information about predicated SCEV rewrites for this loop. |
6785 | 642k | for (auto I = PredicatedSCEVRewrites.begin(); |
6786 | 643k | I != PredicatedSCEVRewrites.end();) { |
6787 | 1.90k | std::pair<const SCEV *, const Loop *> Entry = I->first; |
6788 | 1.90k | if (Entry.second == CurrL) |
6789 | 90 | PredicatedSCEVRewrites.erase(I++); |
6790 | 1.81k | else |
6791 | 1.81k | ++I; |
6792 | 1.90k | } |
6793 | 642k | |
6794 | 642k | auto LoopUsersItr = LoopUsers.find(CurrL); |
6795 | 642k | if (LoopUsersItr != LoopUsers.end()) { |
6796 | 47.8k | for (auto *S : LoopUsersItr->second) |
6797 | 474k | forgetMemoizedResults(S); |
6798 | 47.8k | LoopUsers.erase(LoopUsersItr); |
6799 | 47.8k | } |
6800 | 642k | |
6801 | 642k | // Drop information about expressions based on loop-header PHIs. |
6802 | 642k | PushLoopPHIs(CurrL, Worklist); |
6803 | 642k | |
6804 | 21.2M | while (!Worklist.empty()) { |
6805 | 20.6M | Instruction *I = Worklist.pop_back_val(); |
6806 | 20.6M | if (!Visited.insert(I).second) |
6807 | 4.82M | continue; |
6808 | 15.8M | |
6809 | 15.8M | ValueExprMapType::iterator It = |
6810 | 15.8M | ValueExprMap.find_as(static_cast<Value *>(I)); |
6811 | 15.8M | if (It != ValueExprMap.end()) { |
6812 | 254k | eraseValueFromMap(It->first); |
6813 | 254k | forgetMemoizedResults(It->second); |
6814 | 254k | if (PHINode *PN = dyn_cast<PHINode>(I)) |
6815 | 59.8k | ConstantEvolutionLoopExitValue.erase(PN); |
6816 | 254k | } |
6817 | 15.8M | |
6818 | 15.8M | PushDefUseChildren(I, Worklist); |
6819 | 15.8M | } |
6820 | 642k | |
6821 | 642k | LoopPropertiesCache.erase(CurrL); |
6822 | 642k | // Forget all contained loops too, to avoid dangling entries in the |
6823 | 642k | // ValuesAtScopes map. |
6824 | 642k | LoopWorklist.append(CurrL->begin(), CurrL->end()); |
6825 | 642k | } |
6826 | 225k | } |
6827 | | |
6828 | 192k | void ScalarEvolution::forgetTopmostLoop(const Loop *L) { |
6829 | 271k | while (Loop *Parent = L->getParentLoop()) |
6830 | 78.8k | L = Parent; |
6831 | 192k | forgetLoop(L); |
6832 | 192k | } |
6833 | | |
6834 | 33.7k | void ScalarEvolution::forgetValue(Value *V) { |
6835 | 33.7k | Instruction *I = dyn_cast<Instruction>(V); |
6836 | 33.7k | if (!I) return0 ; |
6837 | 33.7k | |
6838 | 33.7k | // Drop information about expressions based on loop-header PHIs. |
6839 | 33.7k | SmallVector<Instruction *, 16> Worklist; |
6840 | 33.7k | Worklist.push_back(I); |
6841 | 33.7k | |
6842 | 33.7k | SmallPtrSet<Instruction *, 8> Visited; |
6843 | 1.46M | while (!Worklist.empty()) { |
6844 | 1.42M | I = Worklist.pop_back_val(); |
6845 | 1.42M | if (!Visited.insert(I).second) |
6846 | 246k | continue; |
6847 | 1.17M | |
6848 | 1.17M | ValueExprMapType::iterator It = |
6849 | 1.17M | ValueExprMap.find_as(static_cast<Value *>(I)); |
6850 | 1.17M | if (It != ValueExprMap.end()) { |
6851 | 98.6k | eraseValueFromMap(It->first); |
6852 | 98.6k | forgetMemoizedResults(It->second); |
6853 | 98.6k | if (PHINode *PN = dyn_cast<PHINode>(I)) |
6854 | 31.4k | ConstantEvolutionLoopExitValue.erase(PN); |
6855 | 98.6k | } |
6856 | 1.17M | |
6857 | 1.17M | PushDefUseChildren(I, Worklist); |
6858 | 1.17M | } |
6859 | 33.7k | } |
6860 | | |
6861 | | /// Get the exact loop backedge taken count considering all loop exits. A |
6862 | | /// computable result can only be returned for loops with all exiting blocks |
6863 | | /// dominating the latch. howFarToZero assumes that the limit of each loop test |
6864 | | /// is never skipped. This is a valid assumption as long as the loop exits via |
6865 | | /// that test. For precise results, it is the caller's responsibility to specify |
6866 | | /// the relevant loop exiting block using getExact(ExitingBlock, SE). |
6867 | | const SCEV * |
6868 | | ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, |
6869 | 899k | SCEVUnionPredicate *Preds) const { |
6870 | 899k | // If any exits were not computable, the loop is not computable. |
6871 | 899k | if (!isComplete() || ExitNotTaken.empty()482k ) |
6872 | 417k | return SE->getCouldNotCompute(); |
6873 | 481k | |
6874 | 481k | const BasicBlock *Latch = L->getLoopLatch(); |
6875 | 481k | // All exiting blocks we have collected must dominate the only backedge. |
6876 | 481k | if (!Latch) |
6877 | 0 | return SE->getCouldNotCompute(); |
6878 | 481k | |
6879 | 481k | // All exiting blocks we have gathered dominate loop's latch, so exact trip |
6880 | 481k | // count is simply a minimum out of all these calculated exit counts. |
6881 | 481k | SmallVector<const SCEV *, 2> Ops; |
6882 | 482k | for (auto &ENT : ExitNotTaken) { |
6883 | 482k | const SCEV *BECount = ENT.ExactNotTaken; |
6884 | 482k | assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!"); |
6885 | 482k | assert(SE->DT.dominates(ENT.ExitingBlock, Latch) && |
6886 | 482k | "We should only have known counts for exiting blocks that dominate " |
6887 | 482k | "latch!"); |
6888 | 482k | |
6889 | 482k | Ops.push_back(BECount); |
6890 | 482k | |
6891 | 482k | if (Preds && !ENT.hasAlwaysTruePredicate()98.7k ) |
6892 | 221 | Preds->add(ENT.Predicate.get()); |
6893 | 482k | |
6894 | 482k | assert((Preds || ENT.hasAlwaysTruePredicate()) && |
6895 | 482k | "Predicate should be always true!"); |
6896 | 482k | } |
6897 | 481k | |
6898 | 481k | return SE->getUMinFromMismatchedTypes(Ops); |
6899 | 481k | } |
6900 | | |
6901 | | /// Get the exact not taken count for this loop exit. |
6902 | | const SCEV * |
6903 | | ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, |
6904 | 2.30M | ScalarEvolution *SE) const { |
6905 | 2.30M | for (auto &ENT : ExitNotTaken) |
6906 | 1.48M | if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()1.40M ) |
6907 | 1.40M | return ENT.ExactNotTaken; |
6908 | 2.30M | |
6909 | 2.30M | return SE->getCouldNotCompute()903k ; |
6910 | 2.30M | } |
6911 | | |
6912 | | /// getMax - Get the max backedge taken count for the loop. |
6913 | | const SCEV * |
6914 | 8.63M | ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { |
6915 | 8.63M | auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { |
6916 | 6.12M | return !ENT.hasAlwaysTruePredicate(); |
6917 | 6.12M | }; |
6918 | 8.63M | |
6919 | 8.63M | if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax()) |
6920 | 1.09M | return SE->getCouldNotCompute(); |
6921 | 7.54M | |
6922 | 7.54M | assert((isa<SCEVCouldNotCompute>(getMax()) || isa<SCEVConstant>(getMax())) && |
6923 | 7.54M | "No point in having a non-constant max backedge taken count!"); |
6924 | 7.54M | return getMax(); |
6925 | 7.54M | } |
6926 | | |
6927 | 344k | bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const { |
6928 | 344k | auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { |
6929 | 146 | return !ENT.hasAlwaysTruePredicate(); |
6930 | 146 | }; |
6931 | 344k | return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue)146 ; |
6932 | 344k | } |
6933 | | |
6934 | | bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, |
6935 | 38.7M | ScalarEvolution *SE) const { |
6936 | 38.7M | if (getMax() && getMax() != SE->getCouldNotCompute()37.2M && |
6937 | 38.7M | SE->hasOperand(getMax(), S)35.3M ) |
6938 | 0 | return true; |
6939 | 38.7M | |
6940 | 38.7M | for (auto &ENT : ExitNotTaken) |
6941 | 34.6M | if (ENT.ExactNotTaken != SE->getCouldNotCompute() && |
6942 | 34.6M | SE->hasOperand(ENT.ExactNotTaken, S)) |
6943 | 2.37k | return true; |
6944 | 38.7M | |
6945 | 38.7M | return false38.7M ; |
6946 | 38.7M | } |
6947 | | |
6948 | | ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) |
6949 | 1.19M | : ExactNotTaken(E), MaxNotTaken(E) { |
6950 | 1.19M | assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || |
6951 | 1.19M | isa<SCEVConstant>(MaxNotTaken)) && |
6952 | 1.19M | "No point in having a non-constant max backedge taken count!"); |
6953 | 1.19M | } |
6954 | | |
6955 | | ScalarEvolution::ExitLimit::ExitLimit( |
6956 | | const SCEV *E, const SCEV *M, bool MaxOrZero, |
6957 | | ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList) |
6958 | 487k | : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { |
6959 | 487k | assert((isa<SCEVCouldNotCompute>(ExactNotTaken) || |
6960 | 487k | !isa<SCEVCouldNotCompute>(MaxNotTaken)) && |
6961 | 487k | "Exact is not allowed to be less precise than Max"); |
6962 | 487k | assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || |
6963 | 487k | isa<SCEVConstant>(MaxNotTaken)) && |
6964 | 487k | "No point in having a non-constant max backedge taken count!"); |
6965 | 487k | for (auto *PredSet : PredSetList) |
6966 | 518k | for (auto *P : *PredSet) |
6967 | 139 | addPredicate(P); |
6968 | 487k | } |
6969 | | |
6970 | | ScalarEvolution::ExitLimit::ExitLimit( |
6971 | | const SCEV *E, const SCEV *M, bool MaxOrZero, |
6972 | | const SmallPtrSetImpl<const SCEVPredicate *> &PredSet) |
6973 | 438k | : ExitLimit(E, M, MaxOrZero, {&PredSet}) { |
6974 | 438k | assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || |
6975 | 438k | isa<SCEVConstant>(MaxNotTaken)) && |
6976 | 438k | "No point in having a non-constant max backedge taken count!"); |
6977 | 438k | } Unexecuted instantiation: llvm::ScalarEvolution::ExitLimit::ExitLimit(llvm::SCEV const*, llvm::SCEV const*, bool, llvm::SmallPtrSetImpl<llvm::SCEVPredicate const*> const&) llvm::ScalarEvolution::ExitLimit::ExitLimit(llvm::SCEV const*, llvm::SCEV const*, bool, llvm::SmallPtrSetImpl<llvm::SCEVPredicate const*> const&) Line | Count | Source | 6973 | 438k | : ExitLimit(E, M, MaxOrZero, {&PredSet}) { | 6974 | 438k | assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || | 6975 | 438k | isa<SCEVConstant>(MaxNotTaken)) && | 6976 | 438k | "No point in having a non-constant max backedge taken count!"); | 6977 | 438k | } |
|
6978 | | |
6979 | | ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, |
6980 | | bool MaxOrZero) |
6981 | 8.94k | : ExitLimit(E, M, MaxOrZero, None) { |
6982 | 8.94k | assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || |
6983 | 8.94k | isa<SCEVConstant>(MaxNotTaken)) && |
6984 | 8.94k | "No point in having a non-constant max backedge taken count!"); |
6985 | 8.94k | } Unexecuted instantiation: llvm::ScalarEvolution::ExitLimit::ExitLimit(llvm::SCEV const*, llvm::SCEV const*, bool) llvm::ScalarEvolution::ExitLimit::ExitLimit(llvm::SCEV const*, llvm::SCEV const*, bool) Line | Count | Source | 6981 | 8.94k | : ExitLimit(E, M, MaxOrZero, None) { | 6982 | 8.94k | assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || | 6983 | 8.94k | isa<SCEVConstant>(MaxNotTaken)) && | 6984 | 8.94k | "No point in having a non-constant max backedge taken count!"); | 6985 | 8.94k | } |
|
6986 | | |
6987 | | /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each |
6988 | | /// computable exit into a persistent ExitNotTakenInfo array. |
6989 | | ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( |
6990 | | ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> |
6991 | | ExitCounts, |
6992 | | bool Complete, const SCEV *MaxCount, bool MaxOrZero) |
6993 | 792k | : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) { |
6994 | 792k | using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; |
6995 | 792k | |
6996 | 792k | ExitNotTaken.reserve(ExitCounts.size()); |
6997 | 792k | std::transform( |
6998 | 792k | ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken), |
6999 | 792k | [&](const EdgeExitInfo &EEI) { |
7000 | 391k | BasicBlock *ExitBB = EEI.first; |
7001 | 391k | const ExitLimit &EL = EEI.second; |
7002 | 391k | if (EL.Predicates.empty()) |
7003 | 391k | return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr); |
7004 | 136 | |
7005 | 136 | std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate); |
7006 | 136 | for (auto *Pred : EL.Predicates) |
7007 | 139 | Predicate->add(Pred); |
7008 | 136 | |
7009 | 136 | return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate)); |
7010 | 136 | }); |
7011 | 792k | assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) && |
7012 | 792k | "No point in having a non-constant max backedge taken count!"); |
7013 | 792k | } |
7014 | | |
7015 | | /// Invalidate this result and free the ExitNotTakenInfo array. |
7016 | 791k | void ScalarEvolution::BackedgeTakenInfo::clear() { |
7017 | 791k | ExitNotTaken.clear(); |
7018 | 791k | } |
7019 | | |
7020 | | /// Compute the number of times the backedge of the specified loop will execute. |
7021 | | ScalarEvolution::BackedgeTakenInfo |
7022 | | ScalarEvolution::computeBackedgeTakenCount(const Loop *L, |
7023 | 792k | bool AllowPredicates) { |
7024 | 792k | SmallVector<BasicBlock *, 8> ExitingBlocks; |
7025 | 792k | L->getExitingBlocks(ExitingBlocks); |
7026 | 792k | |
7027 | 792k | using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; |
7028 | 792k | |
7029 | 792k | SmallVector<EdgeExitInfo, 4> ExitCounts; |
7030 | 792k | bool CouldComputeBECount = true; |
7031 | 792k | BasicBlock *Latch = L->getLoopLatch(); // may be NULL. |
7032 | 792k | const SCEV *MustExitMaxBECount = nullptr; |
7033 | 792k | const SCEV *MayExitMaxBECount = nullptr; |
7034 | 792k | bool MustExitMaxOrZero = false; |
7035 | 792k | |
7036 | 792k | // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts |
7037 | 792k | // and compute maxBECount. |
7038 | 792k | // Do a union of all the predicates here. |
7039 | 1.78M | for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i994k ) { |
7040 | 994k | BasicBlock *ExitBB = ExitingBlocks[i]; |
7041 | 994k | ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); |
7042 | 994k | |
7043 | 994k | assert((AllowPredicates || EL.Predicates.empty()) && |
7044 | 994k | "Predicated exit limit when predicates are not allowed!"); |
7045 | 994k | |
7046 | 994k | // 1. For each exit that can be computed, add an entry to ExitCounts. |
7047 | 994k | // CouldComputeBECount is true only if all exits can be computed. |
7048 | 994k | if (EL.ExactNotTaken == getCouldNotCompute()) |
7049 | 602k | // We couldn't compute an exact value for this exit, so |
7050 | 602k | // we won't be able to compute an exact value for the loop. |
7051 | 602k | CouldComputeBECount = false; |
7052 | 391k | else |
7053 | 391k | ExitCounts.emplace_back(ExitBB, EL); |
7054 | 994k | |
7055 | 994k | // 2. Derive the loop's MaxBECount from each exit's max number of |
7056 | 994k | // non-exiting iterations. Partition the loop exits into two kinds: |
7057 | 994k | // LoopMustExits and LoopMayExits. |
7058 | 994k | // |
7059 | 994k | // If the exit dominates the loop latch, it is a LoopMustExit otherwise it |
7060 | 994k | // is a LoopMayExit. If any computable LoopMustExit is found, then |
7061 | 994k | // MaxBECount is the minimum EL.MaxNotTaken of computable |
7062 | 994k | // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum |
7063 | 994k | // EL.MaxNotTaken, where CouldNotCompute is considered greater than any |
7064 | 994k | // computable EL.MaxNotTaken. |
7065 | 994k | if (EL.MaxNotTaken != getCouldNotCompute() && Latch507k && |
7066 | 994k | DT.dominates(ExitBB, Latch)507k ) { |
7067 | 507k | if (!MustExitMaxBECount) { |
7068 | 506k | MustExitMaxBECount = EL.MaxNotTaken; |
7069 | 506k | MustExitMaxOrZero = EL.MaxOrZero; |
7070 | 506k | } else { |
7071 | 1.17k | MustExitMaxBECount = |
7072 | 1.17k | getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken); |
7073 | 1.17k | } |
7074 | 507k | } else if (486k MayExitMaxBECount != getCouldNotCompute()486k ) { |
7075 | 357k | if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute()0 ) |
7076 | 357k | MayExitMaxBECount = EL.MaxNotTaken; |
7077 | 0 | else { |
7078 | 0 | MayExitMaxBECount = |
7079 | 0 | getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken); |
7080 | 0 | } |
7081 | 357k | } |
7082 | 994k | } |
7083 | 792k | const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount506k : |
7084 | 792k | (MayExitMaxBECount 285k ? MayExitMaxBECount285k : getCouldNotCompute()243 ); |
7085 | 792k | // The loop backedge will be taken the maximum or zero times if there's |
7086 | 792k | // a single exit that must be taken the maximum or zero times. |
7087 | 792k | bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1221 ); |
7088 | 792k | return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount, |
7089 | 792k | MaxBECount, MaxOrZero); |
7090 | 792k | } |
7091 | | |
7092 | | ScalarEvolution::ExitLimit |
7093 | | ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, |
7094 | 994k | bool AllowPredicates) { |
7095 | 994k | assert(L->contains(ExitingBlock) && "Exit count for non-loop block?"); |
7096 | 994k | // If our exiting block does not dominate the latch, then its connection with |
7097 | 994k | // loop's exit limit may be far from trivial. |
7098 | 994k | const BasicBlock *Latch = L->getLoopLatch(); |
7099 | 994k | if (!Latch || !DT.dominates(ExitingBlock, Latch)993k ) |
7100 | 112k | return getCouldNotCompute(); |
7101 | 882k | |
7102 | 882k | bool IsOnlyExit = (L->getExitingBlock() != nullptr); |
7103 | 882k | Instruction *Term = ExitingBlock->getTerminator(); |
7104 | 882k | if (BranchInst *BI = dyn_cast<BranchInst>(Term)) { |
7105 | 868k | assert(BI->isConditional() && "If unconditional, it can't be in loop!"); |
7106 | 868k | bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); |
7107 | 868k | assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) && |
7108 | 868k | "It should have one successor in loop and one exit block!"); |
7109 | 868k | // Proceed to the next level to examine the exit condition expression. |
7110 | 868k | return computeExitLimitFromCond( |
7111 | 868k | L, BI->getCondition(), ExitIfTrue, |
7112 | 868k | /*ControlsExit=*/IsOnlyExit, AllowPredicates); |
7113 | 868k | } |
7114 | 13.8k | |
7115 | 13.8k | if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) { |
7116 | 5.60k | // For switch, make sure that there is a single exit from the loop. |
7117 | 5.60k | BasicBlock *Exit = nullptr; |
7118 | 5.60k | for (auto *SBB : successors(ExitingBlock)) |
7119 | 22.5k | if (!L->contains(SBB)) { |
7120 | 8.73k | if (Exit) // Multiple exit successors. |
7121 | 3.12k | return getCouldNotCompute(); |
7122 | 5.60k | Exit = SBB; |
7123 | 5.60k | } |
7124 | 5.60k | assert(Exit && "Exiting block must have at least one exit"); |
7125 | 2.47k | return computeExitLimitFromSingleExitSwitch(L, SI, Exit, |
7126 | 2.47k | /*ControlsExit=*/IsOnlyExit); |
7127 | 8.22k | } |
7128 | 8.22k | |
7129 | 8.22k | return getCouldNotCompute(); |
7130 | 8.22k | } |
7131 | | |
7132 | | ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( |
7133 | | const Loop *L, Value *ExitCond, bool ExitIfTrue, |
7134 | 868k | bool ControlsExit, bool AllowPredicates) { |
7135 | 868k | ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates); |
7136 | 868k | return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue, |
7137 | 868k | ControlsExit, AllowPredicates); |
7138 | 868k | } |
7139 | | |
7140 | | Optional<ScalarEvolution::ExitLimit> |
7141 | | ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, |
7142 | | bool ExitIfTrue, bool ControlsExit, |
7143 | 948k | bool AllowPredicates) { |
7144 | 948k | (void)this->L; |
7145 | 948k | (void)this->ExitIfTrue; |
7146 | 948k | (void)this->AllowPredicates; |
7147 | 948k | |
7148 | 948k | assert(this->L == L && this->ExitIfTrue == ExitIfTrue && |
7149 | 948k | this->AllowPredicates == AllowPredicates && |
7150 | 948k | "Variance in assumed invariant key components!"); |
7151 | 948k | auto Itr = TripCountMap.find({ExitCond, ControlsExit}); |
7152 | 948k | if (Itr == TripCountMap.end()) |
7153 | 948k | return None; |
7154 | 66 | return Itr->second; |
7155 | 66 | } |
7156 | | |
7157 | | void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, |
7158 | | bool ExitIfTrue, |
7159 | | bool ControlsExit, |
7160 | | bool AllowPredicates, |
7161 | 948k | const ExitLimit &EL) { |
7162 | 948k | assert(this->L == L && this->ExitIfTrue == ExitIfTrue && |
7163 | 948k | this->AllowPredicates == AllowPredicates && |
7164 | 948k | "Variance in assumed invariant key components!"); |
7165 | 948k | |
7166 | 948k | auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL}); |
7167 | 948k | assert(InsertResult.second && "Expected successful insertion!"); |
7168 | 948k | (void)InsertResult; |
7169 | 948k | (void)ExitIfTrue; |
7170 | 948k | } |
7171 | | |
7172 | | ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( |
7173 | | ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, |
7174 | 948k | bool ControlsExit, bool AllowPredicates) { |
7175 | 948k | |
7176 | 948k | if (auto MaybeEL = |
7177 | 66 | Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates)) |
7178 | 66 | return *MaybeEL; |
7179 | 948k | |
7180 | 948k | ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue, |
7181 | 948k | ControlsExit, AllowPredicates); |
7182 | 948k | Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL); |
7183 | 948k | return EL; |
7184 | 948k | } |
7185 | | |
7186 | | ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( |
7187 | | ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, |
7188 | 948k | bool ControlsExit, bool AllowPredicates) { |
7189 | 948k | // Check if the controlling expression for this loop is an And or Or. |
7190 | 948k | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { |
7191 | 40.4k | if (BO->getOpcode() == Instruction::And) { |
7192 | 29.6k | // Recurse on the operands of the and. |
7193 | 29.6k | bool EitherMayExit = !ExitIfTrue; |
7194 | 29.6k | ExitLimit EL0 = computeExitLimitFromCondCached( |
7195 | 29.6k | Cache, L, BO->getOperand(0), ExitIfTrue, |
7196 | 29.6k | ControlsExit && !EitherMayExit24.5k , AllowPredicates); |
7197 | 29.6k | ExitLimit EL1 = computeExitLimitFromCondCached( |
7198 | 29.6k | Cache, L, BO->getOperand(1), ExitIfTrue, |
7199 | 29.6k | ControlsExit && !EitherMayExit24.5k , AllowPredicates); |
7200 | 29.6k | const SCEV *BECount = getCouldNotCompute(); |
7201 | 29.6k | const SCEV *MaxBECount = getCouldNotCompute(); |
7202 | 29.6k | if (EitherMayExit) { |
7203 | 28.6k | // Both conditions must be true for the loop to continue executing. |
7204 | 28.6k | // Choose the less conservative count. |
7205 | 28.6k | if (EL0.ExactNotTaken == getCouldNotCompute() || |
7206 | 28.6k | EL1.ExactNotTaken == getCouldNotCompute()23.6k ) |
7207 | 27.9k | BECount = getCouldNotCompute(); |
7208 | 678 | else |
7209 | 678 | BECount = |
7210 | 678 | getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); |
7211 | 28.6k | if (EL0.MaxNotTaken == getCouldNotCompute()) |
7212 | 4.97k | MaxBECount = EL1.MaxNotTaken; |
7213 | 23.6k | else if (EL1.MaxNotTaken == getCouldNotCompute()) |
7214 | 16.0k | MaxBECount = EL0.MaxNotTaken; |
7215 | 7.65k | else |
7216 | 7.65k | MaxBECount = |
7217 | 7.65k | getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); |
7218 | 28.6k | } else { |
7219 | 969 | // Both conditions must be true at the same time for the loop to exit. |
7220 | 969 | // For now, be conservative. |
7221 | 969 | if (EL0.MaxNotTaken == EL1.MaxNotTaken) |
7222 | 936 | MaxBECount = EL0.MaxNotTaken; |
7223 | 969 | if (EL0.ExactNotTaken == EL1.ExactNotTaken) |
7224 | 937 | BECount = EL0.ExactNotTaken; |
7225 | 969 | } |
7226 | 29.6k | |
7227 | 29.6k | // There are cases (e.g. PR26207) where computeExitLimitFromCond is able |
7228 | 29.6k | // to be more aggressive when computing BECount than when computing |
7229 | 29.6k | // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and |
7230 | 29.6k | // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken |
7231 | 29.6k | // to not. |
7232 | 29.6k | if (isa<SCEVCouldNotCompute>(MaxBECount) && |
7233 | 29.6k | !isa<SCEVCouldNotCompute>(BECount)5.32k ) |
7234 | 1 | MaxBECount = getConstant(getUnsignedRangeMax(BECount)); |
7235 | 29.6k | |
7236 | 29.6k | return ExitLimit(BECount, MaxBECount, false, |
7237 | 29.6k | {&EL0.Predicates, &EL1.Predicates}); |
7238 | 29.6k | } |
7239 | 10.8k | if (BO->getOpcode() == Instruction::Or) { |
7240 | 10.3k | // Recurse on the operands of the or. |
7241 | 10.3k | bool EitherMayExit = ExitIfTrue; |
7242 | 10.3k | ExitLimit EL0 = computeExitLimitFromCondCached( |
7243 | 10.3k | Cache, L, BO->getOperand(0), ExitIfTrue, |
7244 | 10.3k | ControlsExit && !EitherMayExit8.54k , AllowPredicates); |
7245 | 10.3k | ExitLimit EL1 = computeExitLimitFromCondCached( |
7246 | 10.3k | Cache, L, BO->getOperand(1), ExitIfTrue, |
7247 | 10.3k | ControlsExit && !EitherMayExit8.54k , AllowPredicates); |
7248 | 10.3k | const SCEV *BECount = getCouldNotCompute(); |
7249 | 10.3k | const SCEV *MaxBECount = getCouldNotCompute(); |
7250 | 10.3k | if (EitherMayExit) { |
7251 | 2.24k | // Both conditions must be false for the loop to continue executing. |
7252 | 2.24k | // Choose the less conservative count. |
7253 | 2.24k | if (EL0.ExactNotTaken == getCouldNotCompute() || |
7254 | 2.24k | EL1.ExactNotTaken == getCouldNotCompute()264 ) |
7255 | 2.20k | BECount = getCouldNotCompute(); |
7256 | 43 | else |
7257 | 43 | BECount = |
7258 | 43 | getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); |
7259 | 2.24k | if (EL0.MaxNotTaken == getCouldNotCompute()) |
7260 | 1.98k | MaxBECount = EL1.MaxNotTaken; |
7261 | 267 | else if (EL1.MaxNotTaken == getCouldNotCompute()) |
7262 | 224 | MaxBECount = EL0.MaxNotTaken; |
7263 | 43 | else |
7264 | 43 | MaxBECount = |
7265 | 43 | getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); |
7266 | 8.05k | } else { |
7267 | 8.05k | // Both conditions must be false at the same time for the loop to exit. |
7268 | 8.05k | // For now, be conservative. |
7269 | 8.05k | if (EL0.MaxNotTaken == EL1.MaxNotTaken) |
7270 | 797 | MaxBECount = EL0.MaxNotTaken; |
7271 | 8.05k | if (EL0.ExactNotTaken == EL1.ExactNotTaken) |
7272 | 798 | BECount = EL0.ExactNotTaken; |
7273 | 8.05k | } |
7274 | 10.3k | // There are cases (e.g. PR26207) where computeExitLimitFromCond is able |
7275 | 10.3k | // to be more aggressive when computing BECount than when computing |
7276 | 10.3k | // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and |
7277 | 10.3k | // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken |
7278 | 10.3k | // to not. |
7279 | 10.3k | if (isa<SCEVCouldNotCompute>(MaxBECount) && |
7280 | 10.3k | !isa<SCEVCouldNotCompute>(BECount)9.97k ) |
7281 | 1 | MaxBECount = getConstant(getUnsignedRangeMax(BECount)); |
7282 | 10.3k | |
7283 | 10.3k | return ExitLimit(BECount, MaxBECount, false, |
7284 | 10.3k | {&EL0.Predicates, &EL1.Predicates}); |
7285 | 10.3k | } |
7286 | 908k | } |
7287 | 908k | |
7288 | 908k | // With an icmp, it may be feasible to compute an exact backedge-taken count. |
7289 | 908k | // Proceed to the next level to examine the icmp. |
7290 | 908k | if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) { |
7291 | 869k | ExitLimit EL = |
7292 | 869k | computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit); |
7293 | 869k | if (EL.hasFullInfo() || !AllowPredicates446k ) |
7294 | 804k | return EL; |
7295 | 65.1k | |
7296 | 65.1k | // Try again, but use SCEV predicates this time. |
7297 | 65.1k | return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit, |
7298 | 65.1k | /*AllowPredicates=*/true); |
7299 | 65.1k | } |
7300 | 38.9k | |
7301 | 38.9k | // Check for a constant condition. These are normally stripped out by |
7302 | 38.9k | // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to |
7303 | 38.9k | // preserve the CFG and is temporarily leaving constant conditions |
7304 | 38.9k | // in place. |
7305 | 38.9k | if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { |
7306 | 769 | if (ExitIfTrue == !CI->getZExtValue()) |
7307 | 167 | // The backedge is always taken. |
7308 | 167 | return getCouldNotCompute(); |
7309 | 602 | else |
7310 | 602 | // The backedge is never taken. |
7311 | 602 | return getZero(CI->getType()); |
7312 | 38.1k | } |
7313 | 38.1k | |
7314 | 38.1k | // If it's not an integer or pointer comparison then compute it the hard way. |
7315 | 38.1k | return computeExitCountExhaustively(L, ExitCond, ExitIfTrue); |
7316 | 38.1k | } |
7317 | | |
7318 | | ScalarEvolution::ExitLimit |
7319 | | ScalarEvolution::computeExitLimitFromICmp(const Loop *L, |
7320 | | ICmpInst *ExitCond, |
7321 | | bool ExitIfTrue, |
7322 | | bool ControlsExit, |
7323 | 934k | bool AllowPredicates) { |
7324 | 934k | // If the condition was exit on true, convert the condition to exit on false |
7325 | 934k | ICmpInst::Predicate Pred; |
7326 | 934k | if (!ExitIfTrue) |
7327 | 395k | Pred = ExitCond->getPredicate(); |
7328 | 539k | else |
7329 | 539k | Pred = ExitCond->getInversePredicate(); |
7330 | 934k | const ICmpInst::Predicate OriginalPred = Pred; |
7331 | 934k | |
7332 | 934k | // Handle common loops like: for (X = "string"; *X; ++X) |
7333 | 934k | if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) |
7334 | 166k | if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { |
7335 | 141k | ExitLimit ItCnt = |
7336 | 141k | computeLoadConstantCompareExitLimit(LI, RHS, L, Pred); |
7337 | 141k | if (ItCnt.hasAnyInfo()) |
7338 | 0 | return ItCnt; |
7339 | 934k | } |
7340 | 934k | |
7341 | 934k | const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); |
7342 | 934k | const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); |
7343 | 934k | |
7344 | 934k | // Try to evaluate any dependencies out of the loop. |
7345 | 934k | LHS = getSCEVAtScope(LHS, L); |
7346 | 934k | RHS = getSCEVAtScope(RHS, L); |
7347 | 934k | |
7348 | 934k | // At this point, we would like to compute how many iterations of the |
7349 | 934k | // loop the predicate will return true for these inputs. |
7350 | 934k | if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)6.80k ) { |
7351 | 6.34k | // If there is a loop-invariant, force it into the RHS. |
7352 | 6.34k | std::swap(LHS, RHS); |
7353 | 6.34k | Pred = ICmpInst::getSwappedPredicate(Pred); |
7354 | 6.34k | } |
7355 | 934k | |
7356 | 934k | // Simplify the operands before analyzing them. |
7357 | 934k | (void)SimplifyICmpOperands(Pred, LHS, RHS); |
7358 | 934k | |
7359 | 934k | // If we have a comparison of a chrec against a constant, try to use value |
7360 | 934k | // ranges to answer this query. |
7361 | 934k | if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) |
7362 | 420k | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) |
7363 | 154k | if (AddRec->getLoop() == L) { |
7364 | 154k | // Form the constant range. |
7365 | 154k | ConstantRange CompRange = |
7366 | 154k | ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt()); |
7367 | 154k | |
7368 | 154k | const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); |
7369 | 154k | if (!isa<SCEVCouldNotCompute>(Ret)) return Ret122k ; |
7370 | 812k | } |
7371 | 812k | |
7372 | 812k | switch (Pred) { |
7373 | 812k | case ICmpInst::ICMP_NE: { // while (X != Y) |
7374 | 451k | // Convert to: while (X-Y != 0) |
7375 | 451k | ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, |
7376 | 451k | AllowPredicates); |
7377 | 451k | if (EL.hasAnyInfo()) return EL197k ; |
7378 | 254k | break; |
7379 | 254k | } |
7380 | 254k | case ICmpInst::ICMP_EQ: { // while (X == Y) |
7381 | 33.0k | // Convert to: while (X-Y == 0) |
7382 | 33.0k | ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L); |
7383 | 33.0k | if (EL.hasAnyInfo()) return EL0 ; |
7384 | 33.0k | break; |
7385 | 33.0k | } |
7386 | 277k | case ICmpInst::ICMP_SLT: |
7387 | 277k | case ICmpInst::ICMP_ULT: { // while (X < Y) |
7388 | 277k | bool IsSigned = Pred == ICmpInst::ICMP_SLT; |
7389 | 277k | ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, |
7390 | 277k | AllowPredicates); |
7391 | 277k | if (EL.hasAnyInfo()) return EL194k ; |
7392 | 82.6k | break; |
7393 | 82.6k | } |
7394 | 82.6k | case ICmpInst::ICMP_SGT: |
7395 | 43.5k | case ICmpInst::ICMP_UGT: { // while (X > Y) |
7396 | 43.5k | bool IsSigned = Pred == ICmpInst::ICMP_SGT; |
7397 | 43.5k | ExitLimit EL = |
7398 | 43.5k | howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, |
7399 | 43.5k | AllowPredicates); |
7400 | 43.5k | if (EL.hasAnyInfo()) return EL14.3k ; |
7401 | 29.2k | break; |
7402 | 29.2k | } |
7403 | 29.2k | default: |
7404 | 6.41k | break; |
7405 | 405k | } |
7406 | 405k | |
7407 | 405k | auto *ExhaustiveCount = |
7408 | 405k | computeExitCountExhaustively(L, ExitCond, ExitIfTrue); |
7409 | 405k | |
7410 | 405k | if (!isa<SCEVCouldNotCompute>(ExhaustiveCount)) |
7411 | 520 | return ExhaustiveCount; |
7412 | 405k | |
7413 | 405k | return computeShiftCompareExitLimit(ExitCond->getOperand(0), |
7414 | 405k | ExitCond->getOperand(1), L, OriginalPred); |
7415 | 405k | } |
7416 | | |
7417 | | ScalarEvolution::ExitLimit |
7418 | | ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, |
7419 | | SwitchInst *Switch, |
7420 | | BasicBlock *ExitingBlock, |
7421 | 2.47k | bool ControlsExit) { |
7422 | 2.47k | assert(!L->contains(ExitingBlock) && "Not an exiting block!"); |
7423 | 2.47k | |
7424 | 2.47k | // Give up if the exit is the default dest of a switch. |
7425 | 2.47k | if (Switch->getDefaultDest() == ExitingBlock) |
7426 | 1.14k | return getCouldNotCompute(); |
7427 | 1.33k | |
7428 | 1.33k | assert(L->contains(Switch->getDefaultDest()) && |
7429 | 1.33k | "Default case must not exit the loop!"); |
7430 | 1.33k | const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L); |
7431 | 1.33k | const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); |
7432 | 1.33k | |
7433 | 1.33k | // while (X != Y) --> while (X-Y != 0) |
7434 | 1.33k | ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); |
7435 | 1.33k | if (EL.hasAnyInfo()) |
7436 | 1 | return EL; |
7437 | 1.33k | |
7438 | 1.33k | return getCouldNotCompute(); |
7439 | 1.33k | } |
7440 | | |
7441 | | static ConstantInt * |
7442 | | EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, |
7443 | 123k | ScalarEvolution &SE) { |
7444 | 123k | const SCEV *InVal = SE.getConstant(C); |
7445 | 123k | const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); |
7446 | 123k | assert(isa<SCEVConstant>(Val) && |
7447 | 123k | "Evaluation of SCEV at constant didn't fold correctly?"); |
7448 | 123k | return cast<SCEVConstant>(Val)->getValue(); |
7449 | 123k | } |
7450 | | |
7451 | | /// Given an exit condition of 'icmp op load X, cst', try to see if we can |
7452 | | /// compute the backedge execution count. |
7453 | | ScalarEvolution::ExitLimit |
7454 | | ScalarEvolution::computeLoadConstantCompareExitLimit( |
7455 | | LoadInst *LI, |
7456 | | Constant *RHS, |
7457 | | const Loop *L, |
7458 | 141k | ICmpInst::Predicate predicate) { |
7459 | 141k | if (LI->isVolatile()) return getCouldNotCompute()934 ; |
7460 | 140k | |
7461 | 140k | // Check to see if the loaded pointer is a getelementptr of a global. |
7462 | 140k | // TODO: Use SCEV instead of manually grubbing with GEPs. |
7463 | 140k | GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); |
7464 | 140k | if (!GEP) return getCouldNotCompute()53.7k ; |
7465 | 86.8k | |
7466 | 86.8k | // Make sure that it is really a constant global we are gepping, with an |
7467 | 86.8k | // initializer, and make sure the first IDX is really 0. |
7468 | 86.8k | GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); |
7469 | 86.8k | if (!GV || !GV->isConstant()657 || !GV->hasDefinitiveInitializer()52 || |
7470 | 86.8k | GEP->getNumOperands() < 351 || !isa<Constant>(GEP->getOperand(1))51 || |
7471 | 86.8k | !cast<Constant>(GEP->getOperand(1))->isNullValue()51 ) |
7472 | 86.7k | return getCouldNotCompute(); |
7473 | 51 | |
7474 | 51 | // Okay, we allow one non-constant index into the GEP instruction. |
7475 | 51 | Value *VarIdx = nullptr; |
7476 | 51 | std::vector<Constant*> Indexes; |
7477 | 51 | unsigned VarIdxNum = 0; |
7478 | 105 | for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i54 ) |
7479 | 66 | if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { |
7480 | 3 | Indexes.push_back(CI); |
7481 | 63 | } else if (!isa<ConstantInt>(GEP->getOperand(i))) { |
7482 | 63 | if (VarIdx) return getCouldNotCompute()12 ; // Multiple non-constant idx's. |
7483 | 51 | VarIdx = GEP->getOperand(i); |
7484 | 51 | VarIdxNum = i-2; |
7485 | 51 | Indexes.push_back(nullptr); |
7486 | 51 | } |
7487 | 51 | |
7488 | 51 | // Loop-invariant loads may be a byproduct of loop optimization. Skip them. |
7489 | 51 | if (39 !VarIdx39 ) |
7490 | 0 | return getCouldNotCompute(); |
7491 | 39 | |
7492 | 39 | // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. |
7493 | 39 | // Check to see if X is a loop variant variable value now. |
7494 | 39 | const SCEV *Idx = getSCEV(VarIdx); |
7495 | 39 | Idx = getSCEVAtScope(Idx, L); |
7496 | 39 | |
7497 | 39 | // We can only recognize very limited forms of loop index expressions, in |
7498 | 39 | // particular, only affine AddRec's like {C1,+,C2}. |
7499 | 39 | const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); |
7500 | 39 | if (!IdxExpr || !IdxExpr->isAffine()17 || isLoopInvariant(IdxExpr, L)17 || |
7501 | 39 | !isa<SCEVConstant>(IdxExpr->getOperand(0))17 || |
7502 | 39 | !isa<SCEVConstant>(IdxExpr->getOperand(1))3 ) |
7503 | 36 | return getCouldNotCompute(); |
7504 | 3 | |
7505 | 3 | unsigned MaxSteps = MaxBruteForceIterations; |
7506 | 303 | for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum300 ) { |
7507 | 300 | ConstantInt *ItCst = ConstantInt::get( |
7508 | 300 | cast<IntegerType>(IdxExpr->getType()), IterationNum); |
7509 | 300 | ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); |
7510 | 300 | |
7511 | 300 | // Form the GEP offset. |
7512 | 300 | Indexes[VarIdxNum] = Val; |
7513 | 300 | |
7514 | 300 | Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), |
7515 | 300 | Indexes); |
7516 | 300 | if (!Result) break0 ; // Cannot compute! |
7517 | 300 | |
7518 | 300 | // Evaluate the condition for this iteration. |
7519 | 300 | Result = ConstantExpr::getICmp(predicate, Result, RHS); |
7520 | 300 | if (!isa<ConstantInt>(Result)) break0 ; // Couldn't decide for sure |
7521 | 300 | if (cast<ConstantInt>(Result)->getValue().isMinValue()) { |
7522 | 0 | ++NumArrayLenItCounts; |
7523 | 0 | return getConstant(ItCst); // Found terminating iteration! |
7524 | 0 | } |
7525 | 300 | } |
7526 | 3 | return getCouldNotCompute(); |
7527 | 3 | } |
7528 | | |
7529 | | ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( |
7530 | 405k | Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { |
7531 | 405k | ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV); |
7532 | 405k | if (!RHS) |
7533 | 216k | return getCouldNotCompute(); |
7534 | 189k | |
7535 | 189k | const BasicBlock *Latch = L->getLoopLatch(); |
7536 | 189k | if (!Latch) |
7537 | 0 | return getCouldNotCompute(); |
7538 | 189k | |
7539 | 189k | const BasicBlock *Predecessor = L->getLoopPredecessor(); |
7540 | 189k | if (!Predecessor) |
7541 | 15 | return getCouldNotCompute(); |
7542 | 189k | |
7543 | 189k | // Return true if V is of the form "LHS `shift_op` <positive constant>". |
7544 | 189k | // Return LHS in OutLHS and shift_opt in OutOpCode. |
7545 | 189k | auto MatchPositiveShift = |
7546 | 208k | [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { |
7547 | 208k | |
7548 | 208k | using namespace PatternMatch; |
7549 | 208k | |
7550 | 208k | ConstantInt *ShiftAmt; |
7551 | 208k | if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) |
7552 | 17.9k | OutOpCode = Instruction::LShr; |
7553 | 190k | else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) |
7554 | 629 | OutOpCode = Instruction::AShr; |
7555 | 189k | else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) |
7556 | 229 | OutOpCode = Instruction::Shl; |
7557 | 189k | else |
7558 | 189k | return false; |
7559 | 18.8k | |
7560 | 18.8k | return ShiftAmt->getValue().isStrictlyPositive(); |
7561 | 18.8k | }; |
7562 | 189k | |
7563 | 189k | // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in |
7564 | 189k | // |
7565 | 189k | // loop: |
7566 | 189k | // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] |
7567 | 189k | // %iv.shifted = lshr i32 %iv, <positive constant> |
7568 | 189k | // |
7569 | 189k | // Return true on a successful match. Return the corresponding PHI node (%iv |
7570 | 189k | // above) in PNOut and the opcode of the shift operation in OpCodeOut. |
7571 | 189k | auto MatchShiftRecurrence = |
7572 | 189k | [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { |
7573 | 189k | Optional<Instruction::BinaryOps> PostShiftOpCode; |
7574 | 189k | |
7575 | 189k | { |
7576 | 189k | Instruction::BinaryOps OpC; |
7577 | 189k | Value *V; |
7578 | 189k | |
7579 | 189k | // If we encounter a shift instruction, "peel off" the shift operation, |
7580 | 189k | // and remember that we did so. Later when we inspect %iv's backedge |
7581 | 189k | // value, we will make sure that the backedge value uses the same |
7582 | 189k | // operation. |
7583 | 189k | // |
7584 | 189k | // Note: the peeled shift operation does not have to be the same |
7585 | 189k | // instruction as the one feeding into the PHI's backedge value. We only |
7586 | 189k | // really care about it being the same *kind* of shift instruction -- |
7587 | 189k | // that's all that is required for our later inferences to hold. |
7588 | 189k | if (MatchPositiveShift(LHS, V, OpC)) { |
7589 | 9.64k | PostShiftOpCode = OpC; |
7590 | 9.64k | LHS = V; |
7591 | 9.64k | } |
7592 | 189k | } |
7593 | 189k | |
7594 | 189k | PNOut = dyn_cast<PHINode>(LHS); |
7595 | 189k | if (!PNOut || PNOut->getParent() != L->getHeader()21.7k ) |
7596 | 170k | return false; |
7597 | 19.1k | |
7598 | 19.1k | Value *BEValue = PNOut->getIncomingValueForBlock(Latch); |
7599 | 19.1k | Value *OpLHS; |
7600 | 19.1k | |
7601 | 19.1k | return |
7602 | 19.1k | // The backedge value for the PHI node must be a shift by a positive |
7603 | 19.1k | // amount |
7604 | 19.1k | MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && |
7605 | 19.1k | |
7606 | 19.1k | // of the PHI node itself |
7607 | 19.1k | OpLHS == PNOut9.17k && |
7608 | 19.1k | |
7609 | 19.1k | // and the kind of shift should be match the kind of shift we peeled |
7610 | 19.1k | // off, if any. |
7611 | 19.1k | (9.17k !PostShiftOpCode.hasValue()9.17k || *PostShiftOpCode == OpCodeOut8.91k ); |
7612 | 19.1k | }; |
7613 | 189k | |
7614 | 189k | PHINode *PN; |
7615 | 189k | Instruction::BinaryOps OpCode; |
7616 | 189k | if (!MatchShiftRecurrence(LHS, PN, OpCode)) |
7617 | 180k | return getCouldNotCompute(); |
7618 | 9.17k | |
7619 | 9.17k | const DataLayout &DL = getDataLayout(); |
7620 | 9.17k | |
7621 | 9.17k | // The key rationale for this optimization is that for some kinds of shift |
7622 | 9.17k | // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 |
7623 | 9.17k | // within a finite number of iterations. If the condition guarding the |
7624 | 9.17k | // backedge (in the sense that the backedge is taken if the condition is true) |
7625 | 9.17k | // is false for the value the shift recurrence stabilizes to, then we know |
7626 | 9.17k | // that the backedge is taken only a finite number of times. |
7627 | 9.17k | |
7628 | 9.17k | ConstantInt *StableValue = nullptr; |
7629 | 9.17k | switch (OpCode) { |
7630 | 9.17k | default: |
7631 | 0 | llvm_unreachable("Impossible case!"); |
7632 | 9.17k | |
7633 | 9.17k | case Instruction::AShr: { |
7634 | 178 | // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most |
7635 | 178 | // bitwidth(K) iterations. |
7636 | 178 | Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); |
7637 | 178 | KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr, |
7638 | 178 | Predecessor->getTerminator(), &DT); |
7639 | 178 | auto *Ty = cast<IntegerType>(RHS->getType()); |
7640 | 178 | if (Known.isNonNegative()) |
7641 | 45 | StableValue = ConstantInt::get(Ty, 0); |
7642 | 133 | else if (Known.isNegative()) |
7643 | 10 | StableValue = ConstantInt::get(Ty, -1, true); |
7644 | 123 | else |
7645 | 123 | return getCouldNotCompute(); |
7646 | 55 | |
7647 | 55 | break; |
7648 | 55 | } |
7649 | 8.99k | case Instruction::LShr: |
7650 | 8.99k | case Instruction::Shl: |
7651 | 8.99k | // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>} |
7652 | 8.99k | // stabilize to 0 in at most bitwidth(K) iterations. |
7653 | 8.99k | StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0); |
7654 | 8.99k | break; |
7655 | 9.05k | } |
7656 | 9.05k | |
7657 | 9.05k | auto *Result = |
7658 | 9.05k | ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); |
7659 | 9.05k | assert(Result->getType()->isIntegerTy(1) && |
7660 | 9.05k | "Otherwise cannot be an operand to a branch instruction"); |
7661 | 9.05k | |
7662 | 9.05k | if (Result->isZeroValue()) { |
7663 | 8.94k | unsigned BitWidth = getTypeSizeInBits(RHS->getType()); |
7664 | 8.94k | const SCEV *UpperBound = |
7665 | 8.94k | getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); |
7666 | 8.94k | return ExitLimit(getCouldNotCompute(), UpperBound, false); |
7667 | 8.94k | } |
7668 | 104 | |
7669 | 104 | return getCouldNotCompute(); |
7670 | 104 | } |
7671 | | |
7672 | | /// Return true if we can constant fold an instruction of the specified type, |
7673 | | /// assuming that all operands were constants. |
7674 | 2.62M | static bool CanConstantFold(const Instruction *I) { |
7675 | 2.62M | if (isa<BinaryOperator>(I) || isa<CmpInst>(I)2.40M || |
7676 | 2.62M | isa<SelectInst>(I)1.89M || isa<CastInst>(I)1.83M || isa<GetElementPtrInst>(I)1.66M || |
7677 | 2.62M | isa<LoadInst>(I)1.52M || isa<ExtractValueInst>(I)673k ) |
7678 | 1.97M | return true; |
7679 | 650k | |
7680 | 650k | if (const CallInst *CI = dyn_cast<CallInst>(I)) |
7681 | 186k | if (const Function *F = CI->getCalledFunction()) |
7682 | 175k | return canConstantFoldCallTo(CI, F); |
7683 | 474k | return false; |
7684 | 474k | } |
7685 | | |
7686 | | /// Determine whether this instruction can constant evolve within this loop |
7687 | | /// assuming its operands can all constant evolve. |
7688 | 1.61M | static bool canConstantEvolve(Instruction *I, const Loop *L) { |
7689 | 1.61M | // An instruction outside of the loop can't be derived from a loop PHI. |
7690 | 1.61M | if (!L->contains(I)) return false124k ; |
7691 | 1.48M | |
7692 | 1.48M | if (isa<PHINode>(I)) { |
7693 | 262k | // We don't currently keep track of the control flow needed to evaluate |
7694 | 262k | // PHIs, so we cannot handle PHIs inside of loops. |
7695 | 262k | return L->getHeader() == I->getParent(); |
7696 | 262k | } |
7697 | 1.22M | |
7698 | 1.22M | // If we won't be able to constant fold this expression even if the operands |
7699 | 1.22M | // are constants, bail early. |
7700 | 1.22M | return CanConstantFold(I); |
7701 | 1.22M | } |
7702 | | |
7703 | | /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by |
7704 | | /// recursing through each instruction operand until reaching a loop header phi. |
7705 | | static PHINode * |
7706 | | getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, |
7707 | | DenseMap<Instruction *, PHINode *> &PHIMap, |
7708 | 1.01M | unsigned Depth) { |
7709 | 1.01M | if (Depth > MaxConstantEvolvingDepth) |
7710 | 8 | return nullptr; |
7711 | 1.01M | |
7712 | 1.01M | // Otherwise, we can evaluate this instruction if all of its operands are |
7713 | 1.01M | // constant or derived from a PHI node themselves. |
7714 | 1.01M | PHINode *PHI = nullptr; |
7715 | 1.50M | for (Value *Op : UseInst->operands()) { |
7716 | 1.50M | if (isa<Constant>(Op)) continue407k ; |
7717 | 1.09M | |
7718 | 1.09M | Instruction *OpInst = dyn_cast<Instruction>(Op); |
7719 | 1.09M | if (!OpInst || !canConstantEvolve(OpInst, L)1.07M ) return nullptr266k ; |
7720 | 827k | |
7721 | 827k | PHINode *P = dyn_cast<PHINode>(OpInst); |
7722 | 827k | if (!P) |
7723 | 594k | // If this operand is already visited, reuse the prior result. |
7724 | 594k | // We may have P != PHI if this is the deepest point at which the |
7725 | 594k | // inconsistent paths meet. |
7726 | 594k | P = PHIMap.lookup(OpInst); |
7727 | 827k | if (!P) { |
7728 | 592k | // Recurse and memoize the results, whether a phi is found or not. |
7729 | 592k | // This recursive call invalidates pointers into PHIMap. |
7730 | 592k | P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1); |
7731 | 592k | PHIMap[OpInst] = P; |
7732 | 592k | } |
7733 | 827k | if (!P) |
7734 | 281k | return nullptr; // Not evolving from PHI |
7735 | 546k | if (PHI && PHI != P8.58k ) |
7736 | 4.22k | return nullptr; // Evolving from multiple different PHIs. |
7737 | 542k | PHI = P; |
7738 | 542k | } |
7739 | 1.01M | // This is a expression evolving from a constant PHI! |
7740 | 1.01M | return PHI464k ; |
7741 | 1.01M | } |
7742 | | |
7743 | | /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node |
7744 | | /// in the loop that V is derived from. We allow arbitrary operations along the |
7745 | | /// way, but the operands of an operation must either be constants or a value |
7746 | | /// derived from a constant PHI. If this expression does not fit with these |
7747 | | /// constraints, return null. |
7748 | 444k | static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { |
7749 | 444k | Instruction *I = dyn_cast<Instruction>(V); |
7750 | 444k | if (!I || !canConstantEvolve(I, L)443k ) return nullptr16.6k ; |
7751 | 427k | |
7752 | 427k | if (PHINode *PN = dyn_cast<PHINode>(I)) |
7753 | 4.26k | return PN; |
7754 | 423k | |
7755 | 423k | // Record non-constant instructions contained by the loop. |
7756 | 423k | DenseMap<Instruction *, PHINode *> PHIMap; |
7757 | 423k | return getConstantEvolvingPHIOperands(I, L, PHIMap, 0); |
7758 | 423k | } |
7759 | | |
7760 | | /// EvaluateExpression - Given an expression that passes the |
7761 | | /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node |
7762 | | /// in the loop has the value PHIVal. If we can't fold this expression for some |
7763 | | /// reason, return null. |
7764 | | static Constant *EvaluateExpression(Value *V, const Loop *L, |
7765 | | DenseMap<Instruction *, Constant *> &Vals, |
7766 | | const DataLayout &DL, |
7767 | 149k | const TargetLibraryInfo *TLI) { |
7768 | 149k | // Convenient constant check, but redundant for recursive calls. |
7769 | 149k | if (Constant *C = dyn_cast<Constant>(V)) return C107 ; |
7770 | 148k | Instruction *I = dyn_cast<Instruction>(V); |
7771 | 148k | if (!I) return nullptr0 ; |
7772 | 148k | |
7773 | 148k | if (Constant *C = Vals.lookup(I)) return C58.5k ; |
7774 | 90.3k | |
7775 | 90.3k | // An instruction inside the loop depends on a value outside the loop that we |
7776 | 90.3k | // weren't given a mapping for, or a value such as a call inside the loop. |
7777 | 90.3k | if (!canConstantEvolve(I, L)) return nullptr1.61k ; |
7778 | 88.7k | |
7779 | 88.7k | // An unmapped PHI can be due to a branch or another loop inside this loop, |
7780 | 88.7k | // or due to this not being the initial iteration through a loop where we |
7781 | 88.7k | // couldn't compute the evolution of this particular PHI last time. |
7782 | 88.7k | if (isa<PHINode>(I)) return nullptr8.68k ; |
7783 | 80.0k | |
7784 | 80.0k | std::vector<Constant*> Operands(I->getNumOperands()); |
7785 | 80.0k | |
7786 | 205k | for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i124k ) { |
7787 | 140k | Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); |
7788 | 140k | if (!Operand) { |
7789 | 46.5k | Operands[i] = dyn_cast<Constant>(I->getOperand(i)); |
7790 | 46.5k | if (!Operands[i]) return nullptr119 ; |
7791 | 46.4k | continue; |
7792 | 46.4k | } |
7793 | 93.9k | Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); |
7794 | 93.9k | Vals[Operand] = C; |
7795 | 93.9k | if (!C) return nullptr15.4k ; |
7796 | 78.5k | Operands[i] = C; |
7797 | 78.5k | } |
7798 | 80.0k | |
7799 | 80.0k | if (CmpInst *64.4k CI64.4k = dyn_cast<CmpInst>(I)) |
7800 | 16.9k | return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], |
7801 | 16.9k | Operands[1], DL, TLI); |
7802 | 47.5k | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { |
7803 | 4.91k | if (!LI->isVolatile()) |
7804 | 4.81k | return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); |
7805 | 42.7k | } |
7806 | 42.7k | return ConstantFoldInstOperands(I, Operands, DL, TLI); |
7807 | 42.7k | } |
7808 | | |
7809 | | |
7810 | | // If every incoming value to PN except the one for BB is a specific Constant, |
7811 | | // return that, else return nullptr. |
7812 | 244k | static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { |
7813 | 244k | Constant *IncomingVal = nullptr; |
7814 | 244k | |
7815 | 476k | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i232k ) { |
7816 | 429k | if (PN->getIncomingBlock(i) == BB) |
7817 | 185k | continue; |
7818 | 244k | |
7819 | 244k | auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i)); |
7820 | 244k | if (!CurrentVal) |
7821 | 197k | return nullptr; |
7822 | 47.0k | |
7823 | 47.0k | if (IncomingVal != CurrentVal) { |
7824 | 47.0k | if (IncomingVal) |
7825 | 0 | return nullptr; |
7826 | 47.0k | IncomingVal = CurrentVal; |
7827 | 47.0k | } |
7828 | 47.0k | } |
7829 | 244k | |
7830 | 244k | return IncomingVal47.0k ; |
7831 | 244k | } |
7832 | | |
7833 | | /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is |
7834 | | /// in the header of its containing loop, we know the loop executes a |
7835 | | /// constant number of times, and the PHI node is just a recurrence |
7836 | | /// involving constants, fold it. |
7837 | | Constant * |
7838 | | ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, |
7839 | | const APInt &BEs, |
7840 | 670 | const Loop *L) { |
7841 | 670 | auto I = ConstantEvolutionLoopExitValue.find(PN); |
7842 | 670 | if (I != ConstantEvolutionLoopExitValue.end()) |
7843 | 0 | return I->second; |
7844 | 670 | |
7845 | 670 | if (BEs.ugt(MaxBruteForceIterations)) |
7846 | 131 | return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. |
7847 | 539 | |
7848 | 539 | Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; |
7849 | 539 | |
7850 | 539 | DenseMap<Instruction *, Constant *> CurrentIterVals; |
7851 | 539 | BasicBlock *Header = L->getHeader(); |
7852 | 539 | assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); |
7853 | 539 | |
7854 | 539 | BasicBlock *Latch = L->getLoopLatch(); |
7855 | 539 | if (!Latch) |
7856 | 0 | return nullptr; |
7857 | 539 | |
7858 | 2.09k | for (PHINode &PHI : Header->phis())539 { |
7859 | 2.09k | if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) |
7860 | 1.19k | CurrentIterVals[&PHI] = StartCST; |
7861 | 2.09k | } |
7862 | 539 | if (!CurrentIterVals.count(PN)) |
7863 | 225 | return RetVal = nullptr; |
7864 | 314 | |
7865 | 314 | Value *BEValue = PN->getIncomingValueForBlock(Latch); |
7866 | 314 | |
7867 | 314 | // Execute the loop symbolically to determine the exit value. |
7868 | 314 | assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) && |
7869 | 314 | "BEs is <= MaxBruteForceIterations which is an 'unsigned'!"); |
7870 | 314 | |
7871 | 314 | unsigned NumIterations = BEs.getZExtValue(); // must be in range |
7872 | 314 | unsigned IterationNum = 0; |
7873 | 314 | const DataLayout &DL = getDataLayout(); |
7874 | 833 | for (; ; ++IterationNum519 ) { |
7875 | 833 | if (IterationNum == NumIterations) |
7876 | 85 | return RetVal = CurrentIterVals[PN]; // Got exit value! |
7877 | 748 | |
7878 | 748 | // Compute the value of the PHIs for the next iteration. |
7879 | 748 | // EvaluateExpression adds non-phi values to the CurrentIterVals map. |
7880 | 748 | DenseMap<Instruction *, Constant *> NextIterVals; |
7881 | 748 | Constant *NextPHI = |
7882 | 748 | EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); |
7883 | 748 | if (!NextPHI) |
7884 | 229 | return nullptr; // Couldn't evaluate! |
7885 | 519 | NextIterVals[PN] = NextPHI; |
7886 | 519 | |
7887 | 519 | bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; |
7888 | 519 | |
7889 | 519 | // Also evaluate the other PHI nodes. However, we don't get to stop if we |
7890 | 519 | // cease to be able to evaluate one of them or if they stop evolving, |
7891 | 519 | // because that doesn't necessarily prevent us from computing PN. |
7892 | 519 | SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; |
7893 | 1.88k | for (const auto &I : CurrentIterVals) { |
7894 | 1.88k | PHINode *PHI = dyn_cast<PHINode>(I.first); |
7895 | 1.88k | if (!PHI || PHI == PN957 || PHI->getParent() != Header438 ) continue1.44k ; |
7896 | 438 | PHIsToCompute.emplace_back(PHI, I.second); |
7897 | 438 | } |
7898 | 519 | // We use two distinct loops because EvaluateExpression may invalidate any |
7899 | 519 | // iterators into CurrentIterVals. |
7900 | 519 | for (const auto &I : PHIsToCompute) { |
7901 | 438 | PHINode *PHI = I.first; |
7902 | 438 | Constant *&NextPHI = NextIterVals[PHI]; |
7903 | 438 | if (!NextPHI) { // Not already computed. |
7904 | 438 | Value *BEValue = PHI->getIncomingValueForBlock(Latch); |
7905 | 438 | NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); |
7906 | 438 | } |
7907 | 438 | if (NextPHI != I.second) |
7908 | 380 | StoppedEvolving = false; |
7909 | 438 | } |
7910 | 519 | |
7911 | 519 | // If all entries in CurrentIterVals == NextIterVals then we can stop |
7912 | 519 | // iterating, the loop can't continue to change. |
7913 | 519 | if (StoppedEvolving) |
7914 | 0 | return RetVal = CurrentIterVals[PN]; |
7915 | 519 | |
7916 | 519 | CurrentIterVals.swap(NextIterVals); |
7917 | 519 | } |
7918 | 314 | } |
7919 | | |
7920 | | const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, |
7921 | | Value *Cond, |
7922 | 444k | bool ExitWhen) { |
7923 | 444k | PHINode *PN = getConstantEvolvingPHI(Cond, L); |
7924 | 444k | if (!PN) return getCouldNotCompute()303k ; |
7925 | 140k | |
7926 | 140k | // If the loop is canonicalized, the PHI will have exactly two entries. |
7927 | 140k | // That's the only form we support here. |
7928 | 140k | if (PN->getNumIncomingValues() != 2) return getCouldNotCompute()30 ; |
7929 | 140k | |
7930 | 140k | DenseMap<Instruction *, Constant *> CurrentIterVals; |
7931 | 140k | BasicBlock *Header = L->getHeader(); |
7932 | 140k | assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); |
7933 | 140k | |
7934 | 140k | BasicBlock *Latch = L->getLoopLatch(); |
7935 | 140k | assert(Latch && "Should follow from NumIncomingValues == 2!"); |
7936 | 140k | |
7937 | 242k | for (PHINode &PHI : Header->phis()) { |
7938 | 242k | if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) |
7939 | 45.8k | CurrentIterVals[&PHI] = StartCST; |
7940 | 242k | } |
7941 | 140k | if (!CurrentIterVals.count(PN)) |
7942 | 134k | return getCouldNotCompute(); |
7943 | 5.88k | |
7944 | 5.88k | // Okay, we find a PHI node that defines the trip count of this loop. Execute |
7945 | 5.88k | // the loop symbolically to determine when the condition gets a value of |
7946 | 5.88k | // "ExitWhen". |
7947 | 5.88k | unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. |
7948 | 5.88k | const DataLayout &DL = getDataLayout(); |
7949 | 26.8k | for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum20.9k ){ |
7950 | 26.7k | auto *CondVal = dyn_cast_or_null<ConstantInt>( |
7951 | 26.7k | EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI)); |
7952 | 26.7k | |
7953 | 26.7k | // Couldn't symbolically evaluate. |
7954 | 26.7k | if (!CondVal) return getCouldNotCompute()5.18k ; |
7955 | 21.5k | |
7956 | 21.5k | if (CondVal->getValue() == uint64_t(ExitWhen)) { |
7957 | 591 | ++NumBruteForceTripCountsComputed; |
7958 | 591 | return getConstant(Type::getInt32Ty(getContext()), IterationNum); |
7959 | 591 | } |
7960 | 20.9k | |
7961 | 20.9k | // Update all the PHI nodes for the next iteration. |
7962 | 20.9k | DenseMap<Instruction *, Constant *> NextIterVals; |
7963 | 20.9k | |
7964 | 20.9k | // Create a list of which PHIs we need to compute. We want to do this before |
7965 | 20.9k | // calling EvaluateExpression on them because that may invalidate iterators |
7966 | 20.9k | // into CurrentIterVals. |
7967 | 20.9k | SmallVector<PHINode *, 8> PHIsToCompute; |
7968 | 57.8k | for (const auto &I : CurrentIterVals) { |
7969 | 57.8k | PHINode *PHI = dyn_cast<PHINode>(I.first); |
7970 | 57.8k | if (!PHI || PHI->getParent() != Header27.0k ) continue30.7k ; |
7971 | 27.0k | PHIsToCompute.push_back(PHI); |
7972 | 27.0k | } |
7973 | 27.0k | for (PHINode *PHI : PHIsToCompute) { |
7974 | 27.0k | Constant *&NextPHI = NextIterVals[PHI]; |
7975 | 27.0k | if (NextPHI) continue0 ; // Already computed! |
7976 | 27.0k | |
7977 | 27.0k | Value *BEValue = PHI->getIncomingValueForBlock(Latch); |
7978 | 27.0k | NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); |
7979 | 27.0k | } |
7980 | 20.9k | CurrentIterVals.swap(NextIterVals); |
7981 | 20.9k | } |
7982 | 5.88k | |
7983 | 5.88k | // Too many iterations were needed to evaluate. |
7984 | 5.88k | return getCouldNotCompute()110 ; |
7985 | 5.88k | } |
7986 | | |
7987 | 9.04M | const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { |
7988 | 9.04M | SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = |
7989 | 9.04M | ValuesAtScopes[V]; |
7990 | 9.04M | // Check to see if we've folded this expression at this loop before. |
7991 | 9.04M | for (auto &LS : Values) |
7992 | 48.4M | if (LS.first == L) |
7993 | 2.70M | return LS.second ? LS.second2.70M : V11 ; |
7994 | 9.04M | |
7995 | 9.04M | Values.emplace_back(L, nullptr); |
7996 | 6.34M | |
7997 | 6.34M | // Otherwise compute it. |
7998 | 6.34M | const SCEV *C = computeSCEVAtScope(V, L); |
7999 | 6.34M | for (auto &LS : reverse(ValuesAtScopes[V])) |
8000 | 6.33M | if (LS.first == L) { |
8001 | 6.32M | LS.second = C; |
8002 | 6.32M | break; |
8003 | 6.32M | } |
8004 | 6.34M | return C; |
8005 | 9.04M | } |
8006 | | |
8007 | | /// This builds up a Constant using the ConstantExpr interface. That way, we |
8008 | | /// will return Constants for objects which aren't represented by a |
8009 | | /// SCEVConstant, because SCEVConstant is restricted to ConstantInt. |
8010 | | /// Returns NULL if the SCEV isn't representable as a Constant. |
8011 | 1.79M | static Constant *BuildConstantFromSCEV(const SCEV *V) { |
8012 | 1.79M | switch (static_cast<SCEVTypes>(V->getSCEVType())) { |
8013 | 1.79M | case scCouldNotCompute: |
8014 | 78.3k | case scAddRecExpr: |
8015 | 78.3k | break; |
8016 | 451k | case scConstant: |
8017 | 451k | return cast<SCEVConstant>(V)->getValue(); |
8018 | 712k | case scUnknown: |
8019 | 712k | return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue()); |
8020 | 78.3k | case scSignExtend: { |
8021 | 18.6k | const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V); |
8022 | 18.6k | if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) |
8023 | 0 | return ConstantExpr::getSExt(CastOp, SS->getType()); |
8024 | 18.6k | break; |
8025 | 18.6k | } |
8026 | 18.6k | case scZeroExtend: { |
8027 | 14.1k | const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V); |
8028 | 14.1k | if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) |
8029 | 4 | return ConstantExpr::getZExt(CastOp, SZ->getType()); |
8030 | 14.1k | break; |
8031 | 14.1k | } |
8032 | 14.1k | case scTruncate: { |
8033 | 5.98k | const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V); |
8034 | 5.98k | if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) |
8035 | 1 | return ConstantExpr::getTrunc(CastOp, ST->getType()); |
8036 | 5.98k | break; |
8037 | 5.98k | } |
8038 | 448k | case scAddExpr: { |
8039 | 448k | const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); |
8040 | 448k | if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { |
8041 | 406k | if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { |
8042 | 0 | unsigned AS = PTy->getAddressSpace(); |
8043 | 0 | Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); |
8044 | 0 | C = ConstantExpr::getBitCast(C, DestPtrTy); |
8045 | 0 | } |
8046 | 406k | for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i205 ) { |
8047 | 406k | Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); |
8048 | 406k | if (!C2) return nullptr406k ; |
8049 | 205 | |
8050 | 205 | // First pointer! |
8051 | 205 | if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { |
8052 | 155 | unsigned AS = C2->getType()->getPointerAddressSpace(); |
8053 | 155 | std::swap(C, C2); |
8054 | 155 | Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); |
8055 | 155 | // The offsets have been converted to bytes. We can add bytes to an |
8056 | 155 | // i8* by GEP with the byte count in the first index. |
8057 | 155 | C = ConstantExpr::getBitCast(C, DestPtrTy); |
8058 | 155 | } |
8059 | 205 | |
8060 | 205 | // Don't bother trying to sum two pointers. We probably can't |
8061 | 205 | // statically compute a load that results from it anyway. |
8062 | 205 | if (C2->getType()->isPointerTy()) |
8063 | 0 | return nullptr; |
8064 | 205 | |
8065 | 205 | if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { |
8066 | 155 | if (PTy->getElementType()->isStructTy()) |
8067 | 0 | C2 = ConstantExpr::getIntegerCast( |
8068 | 0 | C2, Type::getInt32Ty(C->getContext()), true); |
8069 | 155 | C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); |
8070 | 155 | } else |
8071 | 50 | C = ConstantExpr::getAdd(C, C2); |
8072 | 205 | } |
8073 | 406k | return C156 ; |
8074 | 41.6k | } |
8075 | 41.6k | break; |
8076 | 41.6k | } |
8077 | 47.1k | case scMulExpr: { |
8078 | 47.1k | const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); |
8079 | 47.1k | if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { |
8080 | 45.2k | // Don't bother with pointers at all. |
8081 | 45.2k | if (C->getType()->isPointerTy()) return nullptr0 ; |
8082 | 45.3k | for (unsigned i = 1, e = SM->getNumOperands(); 45.2k i != e; ++i62 ) { |
8083 | 45.2k | Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); |
8084 | 45.2k | if (!C2 || C2->getType()->isPointerTy()69 ) return nullptr45.1k ; |
8085 | 62 | C = ConstantExpr::getMul(C, C2); |
8086 | 62 | } |
8087 | 45.2k | return C62 ; |
8088 | 1.89k | } |
8089 | 1.89k | break; |
8090 | 1.89k | } |
8091 | 6.61k | case scUDivExpr: { |
8092 | 6.61k | const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); |
8093 | 6.61k | if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) |
8094 | 5 | if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) |
8095 | 1 | if (LHS->getType() == RHS->getType()) |
8096 | 1 | return ConstantExpr::getUDiv(LHS, RHS); |
8097 | 6.60k | break; |
8098 | 6.60k | } |
8099 | 14.8k | case scSMaxExpr: |
8100 | 14.8k | case scUMaxExpr: |
8101 | 14.8k | case scSMinExpr: |
8102 | 14.8k | case scUMinExpr: |
8103 | 14.8k | break; // TODO: smax, umax, smin, umax. |
8104 | 182k | } |
8105 | 182k | return nullptr; |
8106 | 182k | } |
8107 | | |
8108 | 6.34M | const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { |
8109 | 6.34M | if (isa<SCEVConstant>(V)) return V1.85M ; |
8110 | 4.49M | |
8111 | 4.49M | // If this instruction is evolved from a constant-evolving PHI, compute the |
8112 | 4.49M | // exit value from the loop without using SCEVs. |
8113 | 4.49M | if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { |
8114 | 1.85M | if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { |
8115 | 1.40M | if (PHINode *PN = dyn_cast<PHINode>(I)) { |
8116 | 352k | const Loop *LI = this->LI[I->getParent()]; |
8117 | 352k | // Looking for loop exit value. |
8118 | 352k | if (LI && LI->getParentLoop() == L250k && |
8119 | 352k | PN->getParent() == LI->getHeader()17.2k ) { |
8120 | 12.4k | // Okay, there is no closed form solution for the PHI node. Check |
8121 | 12.4k | // to see if the loop that contains it has a known backedge-taken |
8122 | 12.4k | // count. If so, we may be able to force computation of the exit |
8123 | 12.4k | // value. |
8124 | 12.4k | const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); |
8125 | 12.4k | // This trivial case can show up in some degenerate cases where |
8126 | 12.4k | // the incoming IR has not yet been fully simplified. |
8127 | 12.4k | if (BackedgeTakenCount->isZero()) { |
8128 | 78 | Value *InitValue = nullptr; |
8129 | 78 | bool MultipleInitValues = false; |
8130 | 234 | for (unsigned i = 0; i < PN->getNumIncomingValues(); i++156 ) { |
8131 | 156 | if (!LI->contains(PN->getIncomingBlock(i))) { |
8132 | 78 | if (!InitValue) |
8133 | 78 | InitValue = PN->getIncomingValue(i); |
8134 | 0 | else if (InitValue != PN->getIncomingValue(i)) { |
8135 | 0 | MultipleInitValues = true; |
8136 | 0 | break; |
8137 | 0 | } |
8138 | 78 | } |
8139 | 156 | } |
8140 | 78 | if (!MultipleInitValues && InitValue) |
8141 | 78 | return getSCEV(InitValue); |
8142 | 12.3k | } |
8143 | 12.3k | // Do we have a loop invariant value flowing around the backedge |
8144 | 12.3k | // for a loop which must execute the backedge? |
8145 | 12.3k | if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && |
8146 | 12.3k | isKnownPositive(BackedgeTakenCount)5.90k && |
8147 | 12.3k | PN->getNumIncomingValues() == 2685 ) { |
8148 | 684 | unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0235 : 1449 ; |
8149 | 684 | const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred)); |
8150 | 684 | if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent())) |
8151 | 10 | return OnBackedge; |
8152 | 12.3k | } |
8153 | 12.3k | if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { |
8154 | 670 | // Okay, we know how many times the containing loop executes. If |
8155 | 670 | // this is a constant evolving PHI node, get the final value at |
8156 | 670 | // the specified iteration number. |
8157 | 670 | Constant *RV = |
8158 | 670 | getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); |
8159 | 670 | if (RV) return getSCEV(RV)85 ; |
8160 | 352k | } |
8161 | 12.3k | } |
8162 | 352k | |
8163 | 352k | // If there is a single-input Phi, evaluate it at our scope. If we can |
8164 | 352k | // prove that this replacement does not break LCSSA form, use new value. |
8165 | 352k | if (PN->getNumOperands() == 1) { |
8166 | 31.1k | const SCEV *Input = getSCEV(PN->getOperand(0)); |
8167 | 31.1k | const SCEV *InputAtScope = getSCEVAtScope(Input, L); |
8168 | 31.1k | // TODO: We can generalize it using LI.replacementPreservesLCSSAForm, |
8169 | 31.1k | // for the simplest case just support constants. |
8170 | 31.1k | if (isa<SCEVConstant>(InputAtScope)) return InputAtScope87 ; |
8171 | 1.39M | } |
8172 | 352k | } |
8173 | 1.39M | |
8174 | 1.39M | // Okay, this is an expression that we cannot symbolically evaluate |
8175 | 1.39M | // into a SCEV. Check to see if it's possible to symbolically evaluate |
8176 | 1.39M | // the arguments into constants, and if so, try to constant propagate the |
8177 | 1.39M | // result. This is particularly useful for computing loop exit values. |
8178 | 1.39M | if (CanConstantFold(I)) { |
8179 | 881k | SmallVector<Constant *, 4> Operands; |
8180 | 881k | bool MadeImprovement = false; |
8181 | 882k | for (Value *Op : I->operands()) { |
8182 | 882k | if (Constant *C = dyn_cast<Constant>(Op)) { |
8183 | 65.4k | Operands.push_back(C); |
8184 | 65.4k | continue; |
8185 | 65.4k | } |
8186 | 817k | |
8187 | 817k | // If any of the operands is non-constant and if they are |
8188 | 817k | // non-integer and non-pointer, don't even try to analyze them |
8189 | 817k | // with scev techniques. |
8190 | 817k | if (!isSCEVable(Op->getType())) |
8191 | 11.4k | return V; |
8192 | 805k | |
8193 | 805k | const SCEV *OrigV = getSCEV(Op); |
8194 | 805k | const SCEV *OpV = getSCEVAtScope(OrigV, L); |
8195 | 805k | MadeImprovement |= OrigV != OpV; |
8196 | 805k | |
8197 | 805k | Constant *C = BuildConstantFromSCEV(OpV); |
8198 | 805k | if (!C) return V805k ; |
8199 | 277 | if (C->getType() != Op->getType()) |
8200 | 158 | C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, |
8201 | 158 | Op->getType(), |
8202 | 158 | false), |
8203 | 158 | C, Op->getType()); |
8204 | 277 | Operands.push_back(C); |
8205 | 277 | } |
8206 | 881k | |
8207 | 881k | // Check to see if getSCEVAtScope actually made an improvement. |
8208 | 881k | if (64.0k MadeImprovement64.0k ) { |
8209 | 205 | Constant *C = nullptr; |
8210 | 205 | const DataLayout &DL = getDataLayout(); |
8211 | 205 | if (const CmpInst *CI = dyn_cast<CmpInst>(I)) |
8212 | 21 | C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], |
8213 | 21 | Operands[1], DL, &TLI); |
8214 | 184 | else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { |
8215 | 154 | if (!LI->isVolatile()) |
8216 | 154 | C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); |
8217 | 154 | } else |
8218 | 30 | C = ConstantFoldInstOperands(I, Operands, DL, &TLI); |
8219 | 205 | if (!C) return V130 ; |
8220 | 75 | return getSCEV(C); |
8221 | 75 | } |
8222 | 64.0k | } |
8223 | 1.39M | } |
8224 | 1.04M | |
8225 | 1.04M | // This is some other type of SCEVUnknown, just return it. |
8226 | 1.04M | return V; |
8227 | 1.04M | } |
8228 | 2.63M | |
8229 | 2.63M | if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { |
8230 | 1.28M | // Avoid performing the look-up in the common case where the specified |
8231 | 1.28M | // expression has no loop-variant portions. |
8232 | 3.94M | for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i2.66M ) { |
8233 | 2.66M | const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); |
8234 | 2.66M | if (OpAtScope != Comm->getOperand(i)) { |
8235 | 2.10k | // Okay, at least one of these operands is loop variant but might be |
8236 | 2.10k | // foldable. Build a new instance of the folded commutative expression. |
8237 | 2.10k | SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(), |
8238 | 2.10k | Comm->op_begin()+i); |
8239 | 2.10k | NewOps.push_back(OpAtScope); |
8240 | 2.10k | |
8241 | 3.50k | for (++i; i != e; ++i1.39k ) { |
8242 | 1.39k | OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); |
8243 | 1.39k | NewOps.push_back(OpAtScope); |
8244 | 1.39k | } |
8245 | 2.10k | if (isa<SCEVAddExpr>(Comm)) |
8246 | 645 | return getAddExpr(NewOps, Comm->getNoWrapFlags()); |
8247 | 1.45k | if (isa<SCEVMulExpr>(Comm)) |
8248 | 1.33k | return getMulExpr(NewOps, Comm->getNoWrapFlags()); |
8249 | 123 | if (isa<SCEVMinMaxExpr>(Comm)) |
8250 | 123 | return getMinMaxExpr(Comm->getSCEVType(), NewOps); |
8251 | 0 | llvm_unreachable("Unknown commutative SCEV type!"); |
8252 | 0 | } |
8253 | 2.66M | } |
8254 | 1.28M | // If we got here, all operands are loop invariant. |
8255 | 1.28M | return Comm1.27M ; |
8256 | 1.35M | } |
8257 | 1.35M | |
8258 | 1.35M | if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { |
8259 | 74.0k | const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); |
8260 | 74.0k | const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); |
8261 | 74.0k | if (LHS == Div->getLHS() && RHS == Div->getRHS()73.9k ) |
8262 | 73.9k | return Div; // must be loop invariant |
8263 | 131 | return getUDivExpr(LHS, RHS); |
8264 | 131 | } |
8265 | 1.28M | |
8266 | 1.28M | // If this is a loop recurrence for a loop that does not contain L, then we |
8267 | 1.28M | // are dealing with the final value computed by the loop. |
8268 | 1.28M | if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { |
8269 | 830k | // First, attempt to evaluate each operand. |
8270 | 830k | // Avoid performing the look-up in the common case where the specified |
8271 | 830k | // expression has no loop-variant portions. |
8272 | 2.49M | for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i1.66M ) { |
8273 | 1.66M | const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); |
8274 | 1.66M | if (OpAtScope == AddRec->getOperand(i)) |
8275 | 1.66M | continue; |
8276 | 674 | |
8277 | 674 | // Okay, at least one of these operands is loop variant but might be |
8278 | 674 | // foldable. Build a new instance of the folded commutative expression. |
8279 | 674 | SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(), |
8280 | 674 | AddRec->op_begin()+i); |
8281 | 674 | NewOps.push_back(OpAtScope); |
8282 | 1.61k | for (++i; i != e; ++i945 ) |
8283 | 945 | NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); |
8284 | 674 | |
8285 | 674 | const SCEV *FoldedRec = |
8286 | 674 | getAddRecExpr(NewOps, AddRec->getLoop(), |
8287 | 674 | AddRec->getNoWrapFlags(SCEV::FlagNW)); |
8288 | 674 | AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec); |
8289 | 674 | // The addrec may be folded to a nonrecurrence, for example, if the |
8290 | 674 | // induction variable is multiplied by zero after constant folding. Go |
8291 | 674 | // ahead and return the folded value. |
8292 | 674 | if (!AddRec) |
8293 | 4 | return FoldedRec; |
8294 | 670 | break; |
8295 | 670 | } |
8296 | 830k | |
8297 | 830k | // If the scope is outside the addrec's loop, evaluate it by using the |
8298 | 830k | // loop exit value of the addrec. |
8299 | 830k | if (830k !AddRec->getLoop()->contains(L)830k ) { |
8300 | 37.4k | // To evaluate this recurrence, we need to know how many times the AddRec |
8301 | 37.4k | // loop iterates. Compute this now. |
8302 | 37.4k | const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); |
8303 | 37.4k | if (BackedgeTakenCount == getCouldNotCompute()) return AddRec21.3k ; |
8304 | 16.1k | |
8305 | 16.1k | // Then, evaluate the AddRec. |
8306 | 16.1k | return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); |
8307 | 16.1k | } |
8308 | 792k | |
8309 | 792k | return AddRec; |
8310 | 792k | } |
8311 | 450k | |
8312 | 450k | if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { |
8313 | 238k | const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); |
8314 | 238k | if (Op == Cast->getOperand()) |
8315 | 238k | return Cast; // must be loop invariant |
8316 | 108 | return getZeroExtendExpr(Op, Cast->getType()); |
8317 | 108 | } |
8318 | 212k | |
8319 | 212k | if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { |
8320 | 157k | const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); |
8321 | 157k | if (Op == Cast->getOperand()) |
8322 | 157k | return Cast; // must be loop invariant |
8323 | 95 | return getSignExtendExpr(Op, Cast->getType()); |
8324 | 95 | } |
8325 | 54.8k | |
8326 | 54.8k | if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { |
8327 | 54.8k | const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); |
8328 | 54.8k | if (Op == Cast->getOperand()) |
8329 | 54.7k | return Cast; // must be loop invariant |
8330 | 72 | return getTruncateExpr(Op, Cast->getType()); |
8331 | 72 | } |
8332 | 0 | |
8333 | 0 | llvm_unreachable("Unknown SCEV type!"); |
8334 | 0 | } |
8335 | | |
8336 | 947k | const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { |
8337 | 947k | return getSCEVAtScope(getSCEV(V), L); |
8338 | 947k | } |
8339 | | |
8340 | 453k | const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const { |
8341 | 453k | if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) |
8342 | 533 | return stripInjectiveFunctions(ZExt->getOperand()); |
8343 | 453k | if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) |
8344 | 5 | return stripInjectiveFunctions(SExt->getOperand()); |
8345 | 453k | return S; |
8346 | 453k | } |
8347 | | |
8348 | | /// Finds the minimum unsigned root of the following equation: |
8349 | | /// |
8350 | | /// A * X = B (mod N) |
8351 | | /// |
8352 | | /// where N = 2^BW and BW is the common bit width of A and B. The signedness of |
8353 | | /// A and B isn't important. |
8354 | | /// |
8355 | | /// If the equation does not have a solution, SCEVCouldNotCompute is returned. |
8356 | | static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, |
8357 | 60.0k | ScalarEvolution &SE) { |
8358 | 60.0k | uint32_t BW = A.getBitWidth(); |
8359 | 60.0k | assert(BW == SE.getTypeSizeInBits(B->getType())); |
8360 | 60.0k | assert(A != 0 && "A must be non-zero."); |
8361 | 60.0k | |
8362 | 60.0k | // 1. D = gcd(A, N) |
8363 | 60.0k | // |
8364 | 60.0k | // The gcd of A and N may have only one prime factor: 2. The number of |
8365 | 60.0k | // trailing zeros in A is its multiplicity |
8366 | 60.0k | uint32_t Mult2 = A.countTrailingZeros(); |
8367 | 60.0k | // D = 2^Mult2 |
8368 | 60.0k | |
8369 | 60.0k | // 2. Check if B is divisible by D. |
8370 | 60.0k | // |
8371 | 60.0k | // B is divisible by D if and only if the multiplicity of prime factor 2 for B |
8372 | 60.0k | // is not less than multiplicity of this prime factor for D. |
8373 | 60.0k | if (SE.GetMinTrailingZeros(B) < Mult2) |
8374 | 32.2k | return SE.getCouldNotCompute(); |
8375 | 27.8k | |
8376 | 27.8k | // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic |
8377 | 27.8k | // modulo (N / D). |
8378 | 27.8k | // |
8379 | 27.8k | // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent |
8380 | 27.8k | // (N / D) in general. The inverse itself always fits into BW bits, though, |
8381 | 27.8k | // so we immediately truncate it. |
8382 | 27.8k | APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D |
8383 | 27.8k | APInt Mod(BW + 1, 0); |
8384 | 27.8k | Mod.setBit(BW - Mult2); // Mod = N / D |
8385 | 27.8k | APInt I = AD.multiplicativeInverse(Mod).trunc(BW); |
8386 | 27.8k | |
8387 | 27.8k | // 4. Compute the minimum unsigned root of the equation: |
8388 | 27.8k | // I * (B / D) mod (N / D) |
8389 | 27.8k | // To simplify the computation, we factor out the divide by D: |
8390 | 27.8k | // (I * B mod N) / D |
8391 | 27.8k | const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2)); |
8392 | 27.8k | return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D); |
8393 | 27.8k | } |
8394 | | |
8395 | | /// For a given quadratic addrec, generate coefficients of the corresponding |
8396 | | /// quadratic equation, multiplied by a common value to ensure that they are |
8397 | | /// integers. |
8398 | | /// The returned value is a tuple { A, B, C, M, BitWidth }, where |
8399 | | /// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C |
8400 | | /// were multiplied by, and BitWidth is the bit width of the original addrec |
8401 | | /// coefficients. |
8402 | | /// This function returns None if the addrec coefficients are not compile- |
8403 | | /// time constants. |
8404 | | static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>> |
8405 | 25 | GetQuadraticEquation(const SCEVAddRecExpr *AddRec) { |
8406 | 25 | assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); |
8407 | 25 | const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); |
8408 | 25 | const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1)); |
8409 | 25 | const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2)); |
8410 | 25 | LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: " |
8411 | 25 | << *AddRec << '\n'); |
8412 | 25 | |
8413 | 25 | // We currently can only solve this if the coefficients are constants. |
8414 | 25 | if (!LC || !MC || !NC) { |
8415 | 0 | LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n"); |
8416 | 0 | return None; |
8417 | 0 | } |
8418 | 25 | |
8419 | 25 | APInt L = LC->getAPInt(); |
8420 | 25 | APInt M = MC->getAPInt(); |
8421 | 25 | APInt N = NC->getAPInt(); |
8422 | 25 | assert(!N.isNullValue() && "This is not a quadratic addrec"); |
8423 | 25 | |
8424 | 25 | unsigned BitWidth = LC->getAPInt().getBitWidth(); |
8425 | 25 | unsigned NewWidth = BitWidth + 1; |
8426 | 25 | LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: " |
8427 | 25 | << BitWidth << '\n'); |
8428 | 25 | // The sign-extension (as opposed to a zero-extension) here matches the |
8429 | 25 | // extension used in SolveQuadraticEquationWrap (with the same motivation). |
8430 | 25 | N = N.sext(NewWidth); |
8431 | 25 | M = M.sext(NewWidth); |
8432 | 25 | L = L.sext(NewWidth); |
8433 | 25 | |
8434 | 25 | // The increments are M, M+N, M+2N, ..., so the accumulated values are |
8435 | 25 | // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is, |
8436 | 25 | // L+M, L+2M+N, L+3M+3N, ... |
8437 | 25 | // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N. |
8438 | 25 | // |
8439 | 25 | // The equation Acc = 0 is then |
8440 | 25 | // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0. |
8441 | 25 | // In a quadratic form it becomes: |
8442 | 25 | // N n^2 + (2M-N) n + 2L = 0. |
8443 | 25 | |
8444 | 25 | APInt A = N; |
8445 | 25 | APInt B = 2 * M - A; |
8446 | 25 | APInt C = 2 * L; |
8447 | 25 | APInt T = APInt(NewWidth, 2); |
8448 | 25 | LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B |
8449 | 25 | << "x + " << C << ", coeff bw: " << NewWidth |
8450 | 25 | << ", multiplied by " << T << '\n'); |
8451 | 25 | return std::make_tuple(A, B, C, T, BitWidth); |
8452 | 25 | } |
8453 | | |
8454 | | /// Helper function to compare optional APInts: |
8455 | | /// (a) if X and Y both exist, return min(X, Y), |
8456 | | /// (b) if neither X nor Y exist, return None, |
8457 | | /// (c) if exactly one of X and Y exists, return that value. |
8458 | 45 | static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) { |
8459 | 45 | if (X.hasValue() && Y.hasValue()32 ) { |
8460 | 32 | unsigned W = std::max(X->getBitWidth(), Y->getBitWidth()); |
8461 | 32 | APInt XW = X->sextOrSelf(W); |
8462 | 32 | APInt YW = Y->sextOrSelf(W); |
8463 | 32 | return XW.slt(YW) ? *X10 : *Y22 ; |
8464 | 32 | } |
8465 | 13 | if (!X.hasValue() && !Y.hasValue()) |
8466 | 5 | return None; |
8467 | 8 | return X.hasValue() ? *X0 : *Y; |
8468 | 8 | } |
8469 | | |
8470 | | /// Helper function to truncate an optional APInt to a given BitWidth. |
8471 | | /// When solving addrec-related equations, it is preferable to return a value |
8472 | | /// that has the same bit width as the original addrec's coefficients. If the |
8473 | | /// solution fits in the original bit width, truncate it (except for i1). |
8474 | | /// Returning a value of a different bit width may inhibit some optimizations. |
8475 | | /// |
8476 | | /// In general, a solution to a quadratic equation generated from an addrec |
8477 | | /// may require BW+1 bits, where BW is the bit width of the addrec's |
8478 | | /// coefficients. The reason is that the coefficients of the quadratic |
8479 | | /// equation are BW+1 bits wide (to avoid truncation when converting from |
8480 | | /// the addrec to the equation). |
8481 | 17 | static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) { |
8482 | 17 | if (!X.hasValue()) |
8483 | 5 | return None; |
8484 | 12 | unsigned W = X->getBitWidth(); |
8485 | 12 | if (BitWidth > 1 && BitWidth < W10 && X->isIntN(BitWidth)10 ) |
8486 | 10 | return X->trunc(BitWidth); |
8487 | 2 | return X; |
8488 | 2 | } |
8489 | | |
8490 | | /// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n |
8491 | | /// iterations. The values L, M, N are assumed to be signed, and they |
8492 | | /// should all have the same bit widths. |
8493 | | /// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW, |
8494 | | /// where BW is the bit width of the addrec's coefficients. |
8495 | | /// If the calculated value is a BW-bit integer (for BW > 1), it will be |
8496 | | /// returned as such, otherwise the bit width of the returned value may |
8497 | | /// be greater than BW. |
8498 | | /// |
8499 | | /// This function returns None if |
8500 | | /// (a) the addrec coefficients are not constant, or |
8501 | | /// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases |
8502 | | /// like x^2 = 5, no integer solutions exist, in other cases an integer |
8503 | | /// solution may exist, but SolveQuadraticEquationWrap may fail to find it. |
8504 | | static Optional<APInt> |
8505 | 10 | SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { |
8506 | 10 | APInt A, B, C, M; |
8507 | 10 | unsigned BitWidth; |
8508 | 10 | auto T = GetQuadraticEquation(AddRec); |
8509 | 10 | if (!T.hasValue()) |
8510 | 0 | return None; |
8511 | 10 | |
8512 | 10 | std::tie(A, B, C, M, BitWidth) = *T; |
8513 | 10 | LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n"); |
8514 | 10 | Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1); |
8515 | 10 | if (!X.hasValue()) |
8516 | 0 | return None; |
8517 | 10 | |
8518 | 10 | ConstantInt *CX = ConstantInt::get(SE.getContext(), *X); |
8519 | 10 | ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE); |
8520 | 10 | if (!V->isZero()) |
8521 | 8 | return None; |
8522 | 2 | |
8523 | 2 | return TruncIfPossible(X, BitWidth); |
8524 | 2 | } |
8525 | | |
8526 | | /// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n |
8527 | | /// iterations. The values M, N are assumed to be signed, and they |
8528 | | /// should all have the same bit widths. |
8529 | | /// Find the least n such that c(n) does not belong to the given range, |
8530 | | /// while c(n-1) does. |
8531 | | /// |
8532 | | /// This function returns None if |
8533 | | /// (a) the addrec coefficients are not constant, or |
8534 | | /// (b) SolveQuadraticEquationWrap was unable to find a solution for the |
8535 | | /// bounds of the range. |
8536 | | static Optional<APInt> |
8537 | | SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, |
8538 | 15 | const ConstantRange &Range, ScalarEvolution &SE) { |
8539 | 15 | assert(AddRec->getOperand(0)->isZero() && |
8540 | 15 | "Starting value of addrec should be 0"); |
8541 | 15 | LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range " |
8542 | 15 | << Range << ", addrec " << *AddRec << '\n'); |
8543 | 15 | // This case is handled in getNumIterationsInRange. Here we can assume that |
8544 | 15 | // we start in the range. |
8545 | 15 | assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) && |
8546 | 15 | "Addrec's initial value should be in range"); |
8547 | 15 | |
8548 | 15 | APInt A, B, C, M; |
8549 | 15 | unsigned BitWidth; |
8550 | 15 | auto T = GetQuadraticEquation(AddRec); |
8551 | 15 | if (!T.hasValue()) |
8552 | 0 | return None; |
8553 | 15 | |
8554 | 15 | // Be careful about the return value: there can be two reasons for not |
8555 | 15 | // returning an actual number. First, if no solutions to the equations |
8556 | 15 | // were found, and second, if the solutions don't leave the given range. |
8557 | 15 | // The first case means that the actual solution is "unknown", the second |
8558 | 15 | // means that it's known, but not valid. If the solution is unknown, we |
8559 | 15 | // cannot make any conclusions. |
8560 | 15 | // Return a pair: the optional solution and a flag indicating if the |
8561 | 15 | // solution was found. |
8562 | 30 | auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> 15 { |
8563 | 30 | // Solve for signed overflow and unsigned overflow, pick the lower |
8564 | 30 | // solution. |
8565 | 30 | LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary " |
8566 | 30 | << Bound << " (before multiplying by " << M << ")\n"); |
8567 | 30 | Bound *= M; // The quadratic equation multiplier. |
8568 | 30 | |
8569 | 30 | Optional<APInt> SO = None; |
8570 | 30 | if (BitWidth > 1) { |
8571 | 30 | LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " |
8572 | 30 | "signed overflow\n"); |
8573 | 30 | SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth); |
8574 | 30 | } |
8575 | 30 | LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " |
8576 | 30 | "unsigned overflow\n"); |
8577 | 30 | Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, |
8578 | 30 | BitWidth+1); |
8579 | 30 | |
8580 | 50 | auto LeavesRange = [&] (const APInt &X) { |
8581 | 50 | ConstantInt *C0 = ConstantInt::get(SE.getContext(), X); |
8582 | 50 | ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE); |
8583 | 50 | if (Range.contains(V0->getValue())) |
8584 | 35 | return false; |
8585 | 15 | // X should be at least 1, so X-1 is non-negative. |
8586 | 15 | ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1); |
8587 | 15 | ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE); |
8588 | 15 | if (Range.contains(V1->getValue())) |
8589 | 12 | return true; |
8590 | 3 | return false; |
8591 | 3 | }; |
8592 | 30 | |
8593 | 30 | // If SolveQuadraticEquationWrap returns None, it means that there can |
8594 | 30 | // be a solution, but the function failed to find it. We cannot treat it |
8595 | 30 | // as "no solution". |
8596 | 30 | if (!SO.hasValue() || !UO.hasValue()) |
8597 | 0 | return { None, false }; |
8598 | 30 | |
8599 | 30 | // Check the smaller value first to see if it leaves the range. |
8600 | 30 | // At this point, both SO and UO must have values. |
8601 | 30 | Optional<APInt> Min = MinOptional(SO, UO); |
8602 | 30 | if (LeavesRange(*Min)) |
8603 | 10 | return { Min, true }; |
8604 | 20 | Optional<APInt> Max = Min == SO ? UO : SO0 ; |
8605 | 20 | if (LeavesRange(*Max)) |
8606 | 2 | return { Max, true }; |
8607 | 18 | |
8608 | 18 | // Solutions were found, but were eliminated, hence the "true". |
8609 | 18 | return { None, true }; |
8610 | 18 | }; |
8611 | 15 | |
8612 | 15 | std::tie(A, B, C, M, BitWidth) = *T; |
8613 | 15 | // Lower bound is inclusive, subtract 1 to represent the exiting value. |
8614 | 15 | APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1; |
8615 | 15 | APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth()); |
8616 | 15 | auto SL = SolveForBoundary(Lower); |
8617 | 15 | auto SU = SolveForBoundary(Upper); |
8618 | 15 | // If any of the solutions was unknown, no meaninigful conclusions can |
8619 | 15 | // be made. |
8620 | 15 | if (!SL.second || !SU.second) |
8621 | 0 | return None; |
8622 | 15 | |
8623 | 15 | // Claim: The correct solution is not some value between Min and Max. |
8624 | 15 | // |
8625 | 15 | // Justification: Assuming that Min and Max are different values, one of |
8626 | 15 | // them is when the first signed overflow happens, the other is when the |
8627 | 15 | // first unsigned overflow happens. Crossing the range boundary is only |
8628 | 15 | // possible via an overflow (treating 0 as a special case of it, modeling |
8629 | 15 | // an overflow as crossing k*2^W for some k). |
8630 | 15 | // |
8631 | 15 | // The interesting case here is when Min was eliminated as an invalid |
8632 | 15 | // solution, but Max was not. The argument is that if there was another |
8633 | 15 | // overflow between Min and Max, it would also have been eliminated if |
8634 | 15 | // it was considered. |
8635 | 15 | // |
8636 | 15 | // For a given boundary, it is possible to have two overflows of the same |
8637 | 15 | // type (signed/unsigned) without having the other type in between: this |
8638 | 15 | // can happen when the vertex of the parabola is between the iterations |
8639 | 15 | // corresponding to the overflows. This is only possible when the two |
8640 | 15 | // overflows cross k*2^W for the same k. In such case, if the second one |
8641 | 15 | // left the range (and was the first one to do so), the first overflow |
8642 | 15 | // would have to enter the range, which would mean that either we had left |
8643 | 15 | // the range before or that we started outside of it. Both of these cases |
8644 | 15 | // are contradictions. |
8645 | 15 | // |
8646 | 15 | // Claim: In the case where SolveForBoundary returns None, the correct |
8647 | 15 | // solution is not some value between the Max for this boundary and the |
8648 | 15 | // Min of the other boundary. |
8649 | 15 | // |
8650 | 15 | // Justification: Assume that we had such Max_A and Min_B corresponding |
8651 | 15 | // to range boundaries A and B and such that Max_A < Min_B. If there was |
8652 | 15 | // a solution between Max_A and Min_B, it would have to be caused by an |
8653 | 15 | // overflow corresponding to either A or B. It cannot correspond to B, |
8654 | 15 | // since Min_B is the first occurrence of such an overflow. If it |
8655 | 15 | // corresponded to A, it would have to be either a signed or an unsigned |
8656 | 15 | // overflow that is larger than both eliminated overflows for A. But |
8657 | 15 | // between the eliminated overflows and this overflow, the values would |
8658 | 15 | // cover the entire value space, thus crossing the other boundary, which |
8659 | 15 | // is a contradiction. |
8660 | 15 | |
8661 | 15 | return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth); |
8662 | 15 | } |
8663 | | |
8664 | | ScalarEvolution::ExitLimit |
8665 | | ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, |
8666 | 453k | bool AllowPredicates) { |
8667 | 453k | |
8668 | 453k | // This is only used for loops with a "x != y" exit test. The exit condition |
8669 | 453k | // is now expressed as a single expression, V = x-y. So the exit test is |
8670 | 453k | // effectively V != 0. We know and take advantage of the fact that this |
8671 | 453k | // expression only being used in a comparison by zero context. |
8672 | 453k | |
8673 | 453k | SmallPtrSet<const SCEVPredicate *, 4> Predicates; |
8674 | 453k | // If the value is a constant |
8675 | 453k | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { |
8676 | 14 | // If the value is already zero, the branch will execute zero times. |
8677 | 14 | if (C->getValue()->isZero()) return C; |
8678 | 0 | return getCouldNotCompute(); // Otherwise it will loop infinitely. |
8679 | 0 | } |
8680 | 453k | |
8681 | 453k | const SCEVAddRecExpr *AddRec = |
8682 | 453k | dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V)); |
8683 | 453k | |
8684 | 453k | if (!AddRec && AllowPredicates223k ) |
8685 | 23.4k | // Try to make this an AddRec using runtime tests, in the first X |
8686 | 23.4k | // iterations of this loop, where X is the SCEV expression found by the |
8687 | 23.4k | // algorithm below. |
8688 | 23.4k | AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates); |
8689 | 453k | |
8690 | 453k | if (!AddRec || AddRec->getLoop() != L229k ) |
8691 | 223k | return getCouldNotCompute(); |
8692 | 229k | |
8693 | 229k | // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of |
8694 | 229k | // the quadratic equation to solve it. |
8695 | 229k | if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()10 ) { |
8696 | 10 | // We can only use this value if the chrec ends up with an exact zero |
8697 | 10 | // value at this index. When solving for "X*X != 5", for example, we |
8698 | 10 | // should not accept a root of 2. |
8699 | 10 | if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) { |
8700 | 2 | const auto *R = cast<SCEVConstant>(getConstant(S.getValue())); |
8701 | 2 | return ExitLimit(R, R, false, Predicates); |
8702 | 2 | } |
8703 | 8 | return getCouldNotCompute(); |
8704 | 8 | } |
8705 | 229k | |
8706 | 229k | // Otherwise we can only handle this if it is affine. |
8707 | 229k | if (!AddRec->isAffine()) |
8708 | 0 | return getCouldNotCompute(); |
8709 | 229k | |
8710 | 229k | // If this is an affine expression, the execution count of this branch is |
8711 | 229k | // the minimum unsigned root of the following equation: |
8712 | 229k | // |
8713 | 229k | // Start + Step*N = 0 (mod 2^BW) |
8714 | 229k | // |
8715 | 229k | // equivalent to: |
8716 | 229k | // |
8717 | 229k | // Step*N = -Start (mod 2^BW) |
8718 | 229k | // |
8719 | 229k | // where BW is the common bit width of Start and Step. |
8720 | 229k | |
8721 | 229k | // Get the initial value for the loop. |
8722 | 229k | const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); |
8723 | 229k | const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); |
8724 | 229k | |
8725 | 229k | // For now we handle only constant steps. |
8726 | 229k | // |
8727 | 229k | // TODO: Handle a nonconstant Step given AddRec<NUW>. If the |
8728 | 229k | // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap |
8729 | 229k | // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. |
8730 | 229k | // We have not yet seen any such cases. |
8731 | 229k | const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); |
8732 | 229k | if (!StepC || StepC->getValue()->isZero()229k ) |
8733 | 179 | return getCouldNotCompute(); |
8734 | 229k | |
8735 | 229k | // For positive steps (counting up until unsigned overflow): |
8736 | 229k | // N = -Start/Step (as unsigned) |
8737 | 229k | // For negative steps (counting down to zero): |
8738 | 229k | // N = Start/-Step |
8739 | 229k | // First compute the unsigned distance from zero in the direction of Step. |
8740 | 229k | bool CountDown = StepC->getAPInt().isNegative(); |
8741 | 229k | const SCEV *Distance = CountDown ? Start34.2k : getNegativeSCEV(Start)195k ; |
8742 | 229k | |
8743 | 229k | // Handle unitary steps, which cannot wraparound. |
8744 | 229k | // 1*N = -Start; -1*N = Start (mod 2^BW), so: |
8745 | 229k | // N = Distance (as unsigned) |
8746 | 229k | if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()89.2k ) { |
8747 | 162k | APInt MaxBECount = getUnsignedRangeMax(Distance); |
8748 | 162k | |
8749 | 162k | // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, |
8750 | 162k | // we end up with a loop whose backedge-taken count is n - 1. Detect this |
8751 | 162k | // case, and see if we can improve the bound. |
8752 | 162k | // |
8753 | 162k | // Explicitly handling this here is necessary because getUnsignedRange |
8754 | 162k | // isn't context-sensitive; it doesn't know that we only care about the |
8755 | 162k | // range inside the loop. |
8756 | 162k | const SCEV *Zero = getZero(Distance->getType()); |
8757 | 162k | const SCEV *One = getOne(Distance->getType()); |
8758 | 162k | const SCEV *DistancePlusOne = getAddExpr(Distance, One); |
8759 | 162k | if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) { |
8760 | 85.6k | // If Distance + 1 doesn't overflow, we can compute the maximum distance |
8761 | 85.6k | // as "unsigned_max(Distance + 1) - 1". |
8762 | 85.6k | ConstantRange CR = getUnsignedRange(DistancePlusOne); |
8763 | 85.6k | MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1); |
8764 | 85.6k | } |
8765 | 162k | return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates); |
8766 | 162k | } |
8767 | 67.3k | |
8768 | 67.3k | // If the condition controls loop exit (the loop exits only if the expression |
8769 | 67.3k | // is true) and the addition is no-wrap we can use unsigned divide to |
8770 | 67.3k | // compute the backedge count. In this case, the step may not divide the |
8771 | 67.3k | // distance, but we don't care because if the condition is "missed" the loop |
8772 | 67.3k | // will have undefined behavior due to wrapping. |
8773 | 67.3k | if (ControlsExit && AddRec->hasNoSelfWrap()64.9k && |
8774 | 67.3k | loopHasNoAbnormalExits(AddRec->getLoop())17.1k ) { |
8775 | 7.34k | const SCEV *Exact = |
8776 | 7.34k | getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step)2.91k : Step4.43k ); |
8777 | 7.34k | const SCEV *Max = |
8778 | 7.34k | Exact == getCouldNotCompute() |
8779 | 7.34k | ? Exact0 |
8780 | 7.34k | : getConstant(getUnsignedRangeMax(Exact)); |
8781 | 7.34k | return ExitLimit(Exact, Max, false, Predicates); |
8782 | 7.34k | } |
8783 | 60.0k | |
8784 | 60.0k | // Solve the general equation. |
8785 | 60.0k | const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), |
8786 | 60.0k | getNegativeSCEV(Start), *this); |
8787 | 60.0k | const SCEV *M = E == getCouldNotCompute() |
8788 | 60.0k | ? E32.2k |
8789 | 60.0k | : getConstant(getUnsignedRangeMax(E))27.8k ; |
8790 | 60.0k | return ExitLimit(E, M, false, Predicates); |
8791 | 60.0k | } |
8792 | | |
8793 | | ScalarEvolution::ExitLimit |
8794 | 33.0k | ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { |
8795 | 33.0k | // Loops that look like: while (X == 0) are very strange indeed. We don't |
8796 | 33.0k | // handle them yet except for the trivial case. This could be expanded in the |
8797 | 33.0k | // future as needed. |
8798 | 33.0k | |
8799 | 33.0k | // If the value is a constant, check to see if it is known to be non-zero |
8800 | 33.0k | // already. If so, the backedge will execute zero times. |
8801 | 33.0k | if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { |
8802 | 23 | if (!C->getValue()->isZero()) |
8803 | 0 | return getZero(C->getType()); |
8804 | 23 | return getCouldNotCompute(); // Otherwise it will loop infinitely. |
8805 | 23 | } |
8806 | 33.0k | |
8807 | 33.0k | // We could implement others, but I really doubt anyone writes loops like |
8808 | 33.0k | // this, and if they did, they would already be constant folded. |
8809 | 33.0k | return getCouldNotCompute(); |
8810 | 33.0k | } |
8811 | | |
8812 | | std::pair<BasicBlock *, BasicBlock *> |
8813 | 5.29M | ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { |
8814 | 5.29M | // If the block has a unique predecessor, then there is no path from the |
8815 | 5.29M | // predecessor to the block that does not go through the direct edge |
8816 | 5.29M | // from the predecessor to the block. |
8817 | 5.29M | if (BasicBlock *Pred = BB->getSinglePredecessor()) |
8818 | 4.12M | return {Pred, BB}; |
8819 | 1.16M | |
8820 | 1.16M | // A loop's header is defined to be a block that dominates the loop. |
8821 | 1.16M | // If the header has a unique predecessor outside the loop, it must be |
8822 | 1.16M | // a block that has exactly one successor that can reach the loop. |
8823 | 1.16M | if (Loop *L = LI.getLoopFor(BB)) |
8824 | 492k | return {L->getLoopPredecessor(), L->getHeader()}; |
8825 | 670k | |
8826 | 670k | return {nullptr, nullptr}; |
8827 | 670k | } |
8828 | | |
8829 | | /// SCEV structural equivalence is usually sufficient for testing whether two |
8830 | | /// expressions are equal, however for the purposes of looking for a condition |
8831 | | /// guarding a loop, it can be useful to be a little more general, since a |
8832 | | /// front-end may have replicated the controlling expression. |
8833 | 25.7M | static bool HasSameValue(const SCEV *A, const SCEV *B) { |
8834 | 25.7M | // Quick check to see if they are the same SCEV. |
8835 | 25.7M | if (A == B) return true989k ; |
8836 | 24.7M | |
8837 | 24.7M | auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { |
8838 | 356k | // Not all instructions that are "identical" compute the same value. For |
8839 | 356k | // instance, two distinct alloca instructions allocating the same type are |
8840 | 356k | // identical and do not read memory; but compute distinct values. |
8841 | 356k | return A->isIdenticalTo(B) && (7.99k isa<BinaryOperator>(A)7.99k || isa<GetElementPtrInst>(A)7.99k ); |
8842 | 356k | }; |
8843 | 24.7M | |
8844 | 24.7M | // Otherwise, if they're both SCEVUnknown, it's possible that they hold |
8845 | 24.7M | // two different instructions with the same value. Check for this case. |
8846 | 24.7M | if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) |
8847 | 4.12M | if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) |
8848 | 469k | if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) |
8849 | 411k | if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) |
8850 | 356k | if (ComputesEqualValues(AI, BI)) |
8851 | 4 | return true; |
8852 | 24.7M | |
8853 | 24.7M | // Otherwise assume they may have a different value. |
8854 | 24.7M | return false; |
8855 | 24.7M | } |
8856 | | |
8857 | | bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, |
8858 | | const SCEV *&LHS, const SCEV *&RHS, |
8859 | 18.3M | unsigned Depth) { |
8860 | 18.3M | bool Changed = false; |
8861 | 18.3M | // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or |
8862 | 18.3M | // '0 != 0'. |
8863 | 18.3M | auto TrivialCase = [&](bool TriviallyTrue) { |
8864 | 4.06M | LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); |
8865 | 4.06M | Pred = TriviallyTrue ? ICmpInst::ICMP_EQ14.7k : ICmpInst::ICMP_NE4.05M ; |
8866 | 4.06M | return true; |
8867 | 4.06M | }; |
8868 | 18.3M | // If we hit the max recursion limit bail out. |
8869 | 18.3M | if (Depth >= 3) |
8870 | 0 | return false; |
8871 | 18.3M | |
8872 | 18.3M | // Canonicalize a constant to the right side. |
8873 | 18.3M | if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { |
8874 | 4.72M | // Check for both operands constant. |
8875 | 4.72M | if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { |
8876 | 3.99M | if (ConstantExpr::getICmp(Pred, |
8877 | 3.99M | LHSC->getValue(), |
8878 | 3.99M | RHSC->getValue())->isNullValue()) |
8879 | 3.97M | return TrivialCase(false); |
8880 | 14.4k | else |
8881 | 14.4k | return TrivialCase(true); |
8882 | 737k | } |
8883 | 737k | // Otherwise swap the operands to put the constant on the right. |
8884 | 737k | std::swap(LHS, RHS); |
8885 | 737k | Pred = ICmpInst::getSwappedPredicate(Pred); |
8886 | 737k | Changed = true; |
8887 | 737k | } |
8888 | 18.3M | |
8889 | 18.3M | // If we're comparing an addrec with a value which is loop-invariant in the |
8890 | 18.3M | // addrec's loop, put the addrec on the left. Also make a dominance check, |
8891 | 18.3M | // as both operands could be addrecs loop-invariant in each other's loop. |
8892 | 18.3M | if (const SCEVAddRecExpr *14.3M AR14.3M = dyn_cast<SCEVAddRecExpr>(RHS)) { |
8893 | 116k | const Loop *L = AR->getLoop(); |
8894 | 116k | if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())9.75k ) { |
8895 | 9.59k | std::swap(LHS, RHS); |
8896 | 9.59k | Pred = ICmpInst::getSwappedPredicate(Pred); |
8897 | 9.59k | Changed = true; |
8898 | 9.59k | } |
8899 | 116k | } |
8900 | 14.3M | |
8901 | 14.3M | // If there's a constant operand, canonicalize comparisons with boundary |
8902 | 14.3M | // cases, and canonicalize *-or-equal comparisons to regular comparisons. |
8903 | 14.3M | if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { |
8904 | 10.5M | const APInt &RA = RC->getAPInt(); |
8905 | 10.5M | |
8906 | 10.5M | bool SimplifiedByConstantRange = false; |
8907 | 10.5M | |
8908 | 10.5M | if (!ICmpInst::isEquality(Pred)) { |
8909 | 6.00M | ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA); |
8910 | 6.00M | if (ExactCR.isFullSet()) |
8911 | 20 | return TrivialCase(true); |
8912 | 6.00M | else if (ExactCR.isEmptySet()) |
8913 | 619 | return TrivialCase(false); |
8914 | 6.00M | |
8915 | 6.00M | APInt NewRHS; |
8916 | 6.00M | CmpInst::Predicate NewPred; |
8917 | 6.00M | if (ExactCR.getEquivalentICmp(NewPred, NewRHS) && |
8918 | 6.00M | ICmpInst::isEquality(NewPred)) { |
8919 | 1.16M | // We were able to convert an inequality to an equality. |
8920 | 1.16M | Pred = NewPred; |
8921 | 1.16M | RHS = getConstant(NewRHS); |
8922 | 1.16M | Changed = SimplifiedByConstantRange = true; |
8923 | 1.16M | } |
8924 | 6.00M | } |
8925 | 10.5M | |
8926 | 10.5M | if (10.5M !SimplifiedByConstantRange10.5M ) { |
8927 | 9.34M | switch (Pred) { |
8928 | 9.34M | default: |
8929 | 4.01M | break; |
8930 | 9.34M | case ICmpInst::ICMP_EQ: |
8931 | 4.50M | case ICmpInst::ICMP_NE: |
8932 | 4.50M | // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. |
8933 | 4.50M | if (!RA) |
8934 | 2.15M | if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS)) |
8935 | 239k | if (const SCEVMulExpr *ME = |
8936 | 32.8k | dyn_cast<SCEVMulExpr>(AE->getOperand(0))) |
8937 | 32.8k | if (AE->getNumOperands() == 2 && ME->getNumOperands() == 231.0k && |
8938 | 32.8k | ME->getOperand(0)->isAllOnesValue()30.7k ) { |
8939 | 28.5k | RHS = AE->getOperand(1); |
8940 | 28.5k | LHS = ME->getOperand(1); |
8941 | 28.5k | Changed = true; |
8942 | 28.5k | } |
8943 | 4.50M | break; |
8944 | 4.50M | |
8945 | 4.50M | |
8946 | 4.50M | // The "Should have been caught earlier!" messages refer to the fact |
8947 | 4.50M | // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above |
8948 | 4.50M | // should have fired on the corresponding cases, and canonicalized the |
8949 | 4.50M | // check to trivial case. |
8950 | 4.50M | |
8951 | 4.50M | case ICmpInst::ICMP_UGE: |
8952 | 193k | assert(!RA.isMinValue() && "Should have been caught earlier!"); |
8953 | 193k | Pred = ICmpInst::ICMP_UGT; |
8954 | 193k | RHS = getConstant(RA - 1); |
8955 | 193k | Changed = true; |
8956 | 193k | break; |
8957 | 4.50M | case ICmpInst::ICMP_ULE: |
8958 | 165k | assert(!RA.isMaxValue() && "Should have been caught earlier!"); |
8959 | 165k | Pred = ICmpInst::ICMP_ULT; |
8960 | 165k | RHS = getConstant(RA + 1); |
8961 | 165k | Changed = true; |
8962 | 165k | break; |
8963 | 4.50M | case ICmpInst::ICMP_SGE: |
8964 | 336k | assert(!RA.isMinSignedValue() && "Should have been caught earlier!"); |
8965 | 336k | Pred = ICmpInst::ICMP_SGT; |
8966 | 336k | RHS = getConstant(RA - 1); |
8967 | 336k | Changed = true; |
8968 | 336k | break; |
8969 | 4.50M | case ICmpInst::ICMP_SLE: |
8970 | 129k | assert(!RA.isMaxSignedValue() && "Should have been caught earlier!"); |
8971 | 129k | Pred = ICmpInst::ICMP_SLT; |
8972 | 129k | RHS = getConstant(RA + 1); |
8973 | 129k | Changed = true; |
8974 | 129k | break; |
8975 | 14.3M | } |
8976 | 14.3M | } |
8977 | 10.5M | } |
8978 | 14.3M | |
8979 | 14.3M | // Check for obvious equality. |
8980 | 14.3M | if (HasSameValue(LHS, RHS)) { |
8981 | 74.7k | if (ICmpInst::isTrueWhenEqual(Pred)) |
8982 | 282 | return TrivialCase(true); |
8983 | 74.4k | if (ICmpInst::isFalseWhenEqual(Pred)) |
8984 | 74.4k | return TrivialCase(false); |
8985 | 14.2M | } |
8986 | 14.2M | |
8987 | 14.2M | // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by |
8988 | 14.2M | // adding or subtracting 1 from one of the operands. |
8989 | 14.2M | switch (Pred) { |
8990 | 14.2M | case ICmpInst::ICMP_SLE: |
8991 | 250k | if (!getSignedRangeMax(RHS).isMaxSignedValue()) { |
8992 | 34.7k | RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, |
8993 | 34.7k | SCEV::FlagNSW); |
8994 | 34.7k | Pred = ICmpInst::ICMP_SLT; |
8995 | 34.7k | Changed = true; |
8996 | 215k | } else if (!getSignedRangeMin(LHS).isMinSignedValue()) { |
8997 | 2.11k | LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, |
8998 | 2.11k | SCEV::FlagNSW); |
8999 | 2.11k | Pred = ICmpInst::ICMP_SLT; |
9000 | 2.11k | Changed = true; |
9001 | 2.11k | } |
9002 | 250k | break; |
9003 | 14.2M | case ICmpInst::ICMP_SGE: |
9004 | 167k | if (!getSignedRangeMin(RHS).isMinSignedValue()) { |
9005 | 55.8k | RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, |
9006 | 55.8k | SCEV::FlagNSW); |
9007 | 55.8k | Pred = ICmpInst::ICMP_SGT; |
9008 | 55.8k | Changed = true; |
9009 | 111k | } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) { |
9010 | 2.40k | LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, |
9011 | 2.40k | SCEV::FlagNSW); |
9012 | 2.40k | Pred = ICmpInst::ICMP_SGT; |
9013 | 2.40k | Changed = true; |
9014 | 2.40k | } |
9015 | 167k | break; |
9016 | 14.2M | case ICmpInst::ICMP_ULE: |
9017 | 80.0k | if (!getUnsignedRangeMax(RHS).isMaxValue()) { |
9018 | 5.88k | RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, |
9019 | 5.88k | SCEV::FlagNUW); |
9020 | 5.88k | Pred = ICmpInst::ICMP_ULT; |
9021 | 5.88k | Changed = true; |
9022 | 74.1k | } else if (!getUnsignedRangeMin(LHS).isMinValue()) { |
9023 | 291 | LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); |
9024 | 291 | Pred = ICmpInst::ICMP_ULT; |
9025 | 291 | Changed = true; |
9026 | 291 | } |
9027 | 80.0k | break; |
9028 | 14.2M | case ICmpInst::ICMP_UGE: |
9029 | 71.5k | if (!getUnsignedRangeMin(RHS).isMinValue()) { |
9030 | 2.64k | RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); |
9031 | 2.64k | Pred = ICmpInst::ICMP_UGT; |
9032 | 2.64k | Changed = true; |
9033 | 68.9k | } else if (!getUnsignedRangeMax(LHS).isMaxValue()) { |
9034 | 29.8k | LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, |
9035 | 29.8k | SCEV::FlagNUW); |
9036 | 29.8k | Pred = ICmpInst::ICMP_UGT; |
9037 | 29.8k | Changed = true; |
9038 | 29.8k | } |
9039 | 71.5k | break; |
9040 | 14.2M | default: |
9041 | 13.6M | break; |
9042 | 14.2M | } |
9043 | 14.2M | |
9044 | 14.2M | // TODO: More simplifications are possible here. |
9045 | 14.2M | |
9046 | 14.2M | // Recursively simplify until we either hit a recursion limit or nothing |
9047 | 14.2M | // changes. |
9048 | 14.2M | if (Changed) |
9049 | 2.67M | return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); |
9050 | 11.5M | |
9051 | 11.5M | return Changed; |
9052 | 11.5M | } |
9053 | | |
9054 | 1.05M | bool ScalarEvolution::isKnownNegative(const SCEV *S) { |
9055 | 1.05M | return getSignedRangeMax(S).isNegative(); |
9056 | 1.05M | } |
9057 | | |
9058 | 1.70M | bool ScalarEvolution::isKnownPositive(const SCEV *S) { |
9059 | 1.70M | return getSignedRangeMin(S).isStrictlyPositive(); |
9060 | 1.70M | } |
9061 | | |
9062 | 20.3M | bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { |
9063 | 20.3M | return !getSignedRangeMin(S).isNegative(); |
9064 | 20.3M | } |
9065 | | |
9066 | 5.17M | bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { |
9067 | 5.17M | return !getSignedRangeMax(S).isStrictlyPositive(); |
9068 | 5.17M | } |
9069 | | |
9070 | 776k | bool ScalarEvolution::isKnownNonZero(const SCEV *S) { |
9071 | 776k | return isKnownNegative(S) || isKnownPositive(S)774k ; |
9072 | 776k | } |
9073 | | |
9074 | | std::pair<const SCEV *, const SCEV *> |
9075 | 993k | ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) { |
9076 | 993k | // Compute SCEV on entry of loop L. |
9077 | 993k | const SCEV *Start = SCEVInitRewriter::rewrite(S, L, *this); |
9078 | 993k | if (Start == getCouldNotCompute()) |
9079 | 63.3k | return { Start, Start }; |
9080 | 929k | // Compute post increment SCEV for loop L. |
9081 | 929k | const SCEV *PostInc = SCEVPostIncRewriter::rewrite(S, L, *this); |
9082 | 929k | assert(PostInc != getCouldNotCompute() && "Unexpected could not compute"); |
9083 | 929k | return { Start, PostInc }; |
9084 | 929k | } |
9085 | | |
9086 | | bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred, |
9087 | 676k | const SCEV *LHS, const SCEV *RHS) { |
9088 | 676k | // First collect all loops. |
9089 | 676k | SmallPtrSet<const Loop *, 8> LoopsUsed; |
9090 | 676k | getUsedLoops(LHS, LoopsUsed); |
9091 | 676k | getUsedLoops(RHS, LoopsUsed); |
9092 | 676k | |
9093 | 676k | if (LoopsUsed.empty()) |
9094 | 179k | return false; |
9095 | 496k | |
9096 | 496k | // Domination relationship must be a linear order on collected loops. |
9097 | | #ifndef NDEBUG |
9098 | | for (auto *L1 : LoopsUsed) |
9099 | | for (auto *L2 : LoopsUsed) |
9100 | | assert((DT.dominates(L1->getHeader(), L2->getHeader()) || |
9101 | | DT.dominates(L2->getHeader(), L1->getHeader())) && |
9102 | | "Domination relationship is not a linear order"); |
9103 | | #endif |
9104 | | |
9105 | 496k | const Loop *MDL = |
9106 | 496k | *std::max_element(LoopsUsed.begin(), LoopsUsed.end(), |
9107 | 496k | [&](const Loop *L1, const Loop *L2) { |
9108 | 7.71k | return DT.properlyDominates(L1->getHeader(), L2->getHeader()); |
9109 | 7.71k | }); |
9110 | 496k | |
9111 | 496k | // Get init and post increment value for LHS. |
9112 | 496k | auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS); |
9113 | 496k | // if LHS contains unknown non-invariant SCEV then bail out. |
9114 | 496k | if (SplitLHS.first == getCouldNotCompute()) |
9115 | 691 | return false; |
9116 | 496k | assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC"); |
9117 | 496k | // Get init and post increment value for RHS. |
9118 | 496k | auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS); |
9119 | 496k | // if RHS contains unknown non-invariant SCEV then bail out. |
9120 | 496k | if (SplitRHS.first == getCouldNotCompute()) |
9121 | 62.7k | return false; |
9122 | 433k | assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC"); |
9123 | 433k | // It is possible that init SCEV contains an invariant load but it does |
9124 | 433k | // not dominate MDL and is not available at MDL loop entry, so we should |
9125 | 433k | // check it here. |
9126 | 433k | if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) || |
9127 | 433k | !isAvailableAtLoopEntry(SplitRHS.first, MDL)) |
9128 | 0 | return false; |
9129 | 433k | |
9130 | 433k | return isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first) && |
9131 | 433k | isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second, |
9132 | 173k | SplitRHS.second); |
9133 | 433k | } |
9134 | | |
9135 | | bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, |
9136 | 676k | const SCEV *LHS, const SCEV *RHS) { |
9137 | 676k | // Canonicalize the inputs first. |
9138 | 676k | (void)SimplifyICmpOperands(Pred, LHS, RHS); |
9139 | 676k | |
9140 | 676k | if (isKnownViaInduction(Pred, LHS, RHS)) |
9141 | 58.1k | return true; |
9142 | 617k | |
9143 | 617k | if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) |
9144 | 7 | return true; |
9145 | 617k | |
9146 | 617k | // Otherwise see what can be done with some simple reasoning. |
9147 | 617k | return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS); |
9148 | 617k | } |
9149 | | |
9150 | | bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, |
9151 | | const SCEVAddRecExpr *LHS, |
9152 | 520k | const SCEV *RHS) { |
9153 | 520k | const Loop *L = LHS->getLoop(); |
9154 | 520k | return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) && |
9155 | 520k | isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS)338k ; |
9156 | 520k | } |
9157 | | |
9158 | | bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, |
9159 | | ICmpInst::Predicate Pred, |
9160 | 19.1k | bool &Increasing) { |
9161 | 19.1k | bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); |
9162 | 19.1k | |
9163 | | #ifndef NDEBUG |
9164 | | // Verify an invariant: inverting the predicate should turn a monotonically |
9165 | | // increasing change to a monotonically decreasing one, and vice versa. |
9166 | | bool IncreasingSwapped; |
9167 | | bool ResultSwapped = isMonotonicPredicateImpl( |
9168 | | LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); |
9169 | | |
9170 | | assert(Result == ResultSwapped && "should be able to analyze both!"); |
9171 | | if (ResultSwapped) |
9172 | | assert(Increasing == !IncreasingSwapped && |
9173 | | "monotonicity should flip as we flip the predicate"); |
9174 | | #endif |
9175 | | |
9176 | 19.1k | return Result; |
9177 | 19.1k | } |
9178 | | |
9179 | | bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, |
9180 | | ICmpInst::Predicate Pred, |
9181 | 19.1k | bool &Increasing) { |
9182 | 19.1k | |
9183 | 19.1k | // A zero step value for LHS means the induction variable is essentially a |
9184 | 19.1k | // loop invariant value. We don't really depend on the predicate actually |
9185 | 19.1k | // flipping from false to true (for increasing predicates, and the other way |
9186 | 19.1k | // around for decreasing predicates), all we care about is that *if* the |
9187 | 19.1k | // predicate changes then it only changes from false to true. |
9188 | 19.1k | // |
9189 | 19.1k | // A zero step value in itself is not very useful, but there may be places |
9190 | 19.1k | // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be |
9191 | 19.1k | // as general as possible. |
9192 | 19.1k | |
9193 | 19.1k | switch (Pred) { |
9194 | 19.1k | default: |
9195 | 10.0k | return false; // Conservative answer |
9196 | 19.1k | |
9197 | 19.1k | case ICmpInst::ICMP_UGT: |
9198 | 2.65k | case ICmpInst::ICMP_UGE: |
9199 | 2.65k | case ICmpInst::ICMP_ULT: |
9200 | 2.65k | case ICmpInst::ICMP_ULE: |
9201 | 2.65k | if (!LHS->hasNoUnsignedWrap()) |
9202 | 527 | return false; |
9203 | 2.12k | |
9204 | 2.12k | Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE1.74k ; |
9205 | 2.12k | return true; |
9206 | 2.12k | |
9207 | 6.37k | case ICmpInst::ICMP_SGT: |
9208 | 6.37k | case ICmpInst::ICMP_SGE: |
9209 | 6.37k | case ICmpInst::ICMP_SLT: |
9210 | 6.37k | case ICmpInst::ICMP_SLE: { |
9211 | 6.37k | if (!LHS->hasNoSignedWrap()) |
9212 | 79 | return false; |
9213 | 6.29k | |
9214 | 6.29k | const SCEV *Step = LHS->getStepRecurrence(*this); |
9215 | 6.29k | |
9216 | 6.29k | if (isKnownNonNegative(Step)) { |
9217 | 3.96k | Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE3.65k ; |
9218 | 3.96k | return true; |
9219 | 3.96k | } |
9220 | 2.33k | |
9221 | 2.33k | if (isKnownNonPositive(Step)) { |
9222 | 2.30k | Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE2.25k ; |
9223 | 2.30k | return true; |
9224 | 2.30k | } |
9225 | 24 | |
9226 | 24 | return false; |
9227 | 24 | } |
9228 | 0 | |
9229 | 0 | } |
9230 | 0 | |
9231 | 0 | llvm_unreachable("switch has default clause!"); |
9232 | 0 | } |
9233 | | |
9234 | | bool ScalarEvolution::isLoopInvariantPredicate( |
9235 | | ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, |
9236 | | ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, |
9237 | 50.3k | const SCEV *&InvariantRHS) { |
9238 | 50.3k | |
9239 | 50.3k | // If there is a loop-invariant, force it into the RHS, otherwise bail out. |
9240 | 50.3k | if (!isLoopInvariant(RHS, L)) { |
9241 | 19.4k | if (!isLoopInvariant(LHS, L)) |
9242 | 19.4k | return false; |
9243 | 1 | |
9244 | 1 | std::swap(LHS, RHS); |
9245 | 1 | Pred = ICmpInst::getSwappedPredicate(Pred); |
9246 | 1 | } |
9247 | 50.3k | |
9248 | 50.3k | const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS); |
9249 | 30.8k | if (!ArLHS || ArLHS->getLoop() != L15.5k ) |
9250 | 15.2k | return false; |
9251 | 15.5k | |
9252 | 15.5k | bool Increasing; |
9253 | 15.5k | if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) |
9254 | 7.41k | return false; |
9255 | 8.14k | |
9256 | 8.14k | // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to |
9257 | 8.14k | // true as the loop iterates, and the backedge is control dependent on |
9258 | 8.14k | // "ArLHS `Pred` RHS" == true then we can reason as follows: |
9259 | 8.14k | // |
9260 | 8.14k | // * if the predicate was false in the first iteration then the predicate |
9261 | 8.14k | // is never evaluated again, since the loop exits without taking the |
9262 | 8.14k | // backedge. |
9263 | 8.14k | // * if the predicate was true in the first iteration then it will |
9264 | 8.14k | // continue to be true for all future iterations since it is |
9265 | 8.14k | // monotonically increasing. |
9266 | 8.14k | // |
9267 | 8.14k | // For both the above possibilities, we can replace the loop varying |
9268 | 8.14k | // predicate with its value on the first iteration of the loop (which is |
9269 | 8.14k | // loop invariant). |
9270 | 8.14k | // |
9271 | 8.14k | // A similar reasoning applies for a monotonically decreasing predicate, by |
9272 | 8.14k | // replacing true with false and false with true in the above two bullets. |
9273 | 8.14k | |
9274 | 8.14k | auto P = Increasing ? Pred742 : ICmpInst::getInversePredicate(Pred)7.39k ; |
9275 | 8.14k | |
9276 | 8.14k | if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) |
9277 | 8.12k | return false; |
9278 | 20 | |
9279 | 20 | InvariantPred = Pred; |
9280 | 20 | InvariantLHS = ArLHS->getStart(); |
9281 | 20 | InvariantRHS = RHS; |
9282 | 20 | return true; |
9283 | 20 | } |
9284 | | |
9285 | | bool ScalarEvolution::isKnownPredicateViaConstantRanges( |
9286 | 9.90M | ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { |
9287 | 9.90M | if (HasSameValue(LHS, RHS)) |
9288 | 782k | return ICmpInst::isTrueWhenEqual(Pred); |
9289 | 9.12M | |
9290 | 9.12M | // This code is split out from isKnownPredicate because it is called from |
9291 | 9.12M | // within isLoopEntryGuardedByCond. |
9292 | 9.12M | |
9293 | 9.12M | auto CheckRanges = |
9294 | 9.71M | [&](const ConstantRange &RangeLHS, const ConstantRange &RangeRHS) { |
9295 | 9.71M | return ConstantRange::makeSatisfyingICmpRegion(Pred, RangeRHS) |
9296 | 9.71M | .contains(RangeLHS); |
9297 | 9.71M | }; |
9298 | 9.12M | |
9299 | 9.12M | // The check at the top of the function catches the case where the values are |
9300 | 9.12M | // known to be equal. |
9301 | 9.12M | if (Pred == CmpInst::ICMP_EQ) |
9302 | 194k | return false; |
9303 | 8.92M | |
9304 | 8.92M | if (Pred == CmpInst::ICMP_NE) |
9305 | 842k | return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) || |
9306 | 842k | CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS))784k || |
9307 | 842k | isKnownNonZero(getMinusSCEV(LHS, RHS))775k ; |
9308 | 8.08M | |
9309 | 8.08M | if (CmpInst::isSigned(Pred)) |
9310 | 5.07M | return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)); |
9311 | 3.01M | |
9312 | 3.01M | return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)); |
9313 | 3.01M | } |
9314 | | |
9315 | | bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, |
9316 | | const SCEV *LHS, |
9317 | 7.99M | const SCEV *RHS) { |
9318 | 7.99M | // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer. |
9319 | 7.99M | // Return Y via OutY. |
9320 | 7.99M | auto MatchBinaryAddToConst = |
9321 | 7.99M | [this](const SCEV *Result, const SCEV *X, APInt &OutY, |
9322 | 8.60M | SCEV::NoWrapFlags ExpectedFlags) { |
9323 | 8.60M | const SCEV *NonConstOp, *ConstOp; |
9324 | 8.60M | SCEV::NoWrapFlags FlagsPresent; |
9325 | 8.60M | |
9326 | 8.60M | if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) || |
9327 | 8.60M | !isa<SCEVConstant>(ConstOp)1.44M || NonConstOp != X1.24M ) |
9328 | 8.53M | return false; |
9329 | 66.5k | |
9330 | 66.5k | OutY = cast<SCEVConstant>(ConstOp)->getAPInt(); |
9331 | 66.5k | return (FlagsPresent & ExpectedFlags) == ExpectedFlags; |
9332 | 66.5k | }; |
9333 | 7.99M | |
9334 | 7.99M | APInt C; |
9335 | 7.99M | |
9336 | 7.99M | switch (Pred) { |
9337 | 7.99M | default: |
9338 | 3.68M | break; |
9339 | 7.99M | |
9340 | 7.99M | case ICmpInst::ICMP_SGE: |
9341 | 1.37M | std::swap(LHS, RHS); |
9342 | 1.37M | LLVM_FALLTHROUGH; |
9343 | 2.56M | case ICmpInst::ICMP_SLE: |
9344 | 2.56M | // X s<= (X + C)<nsw> if C >= 0 |
9345 | 2.56M | if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()35.6k ) |
9346 | 17.5k | return true; |
9347 | 2.54M | |
9348 | 2.54M | // (X + C)<nsw> s<= X if C <= 0 |
9349 | 2.54M | if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && |
9350 | 2.54M | !C.isStrictlyPositive()13.9k ) |
9351 | 3.23k | return true; |
9352 | 2.54M | break; |
9353 | 2.54M | |
9354 | 2.54M | case ICmpInst::ICMP_SGT: |
9355 | 1.15M | std::swap(LHS, RHS); |
9356 | 1.15M | LLVM_FALLTHROUGH; |
9357 | 1.74M | case ICmpInst::ICMP_SLT: |
9358 | 1.74M | // X s< (X + C)<nsw> if C > 0 |
9359 | 1.74M | if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && |
9360 | 1.74M | C.isStrictlyPositive()170 ) |
9361 | 159 | return true; |
9362 | 1.74M | |
9363 | 1.74M | // (X + C)<nsw> s< X if C < 0 |
9364 | 1.74M | if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()339 ) |
9365 | 81 | return true; |
9366 | 1.74M | break; |
9367 | 7.97M | } |
9368 | 7.97M | |
9369 | 7.97M | return false; |
9370 | 7.97M | } |
9371 | | |
9372 | | bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, |
9373 | | const SCEV *LHS, |
9374 | 617k | const SCEV *RHS) { |
9375 | 617k | if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate64.6k ) |
9376 | 553k | return false; |
9377 | 64.6k | |
9378 | 64.6k | // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on |
9379 | 64.6k | // the stack can result in exponential time complexity. |
9380 | 64.6k | SaveAndRestore<bool> Restore(ProvingSplitPredicate, true); |
9381 | 64.6k | |
9382 | 64.6k | // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L |
9383 | 64.6k | // |
9384 | 64.6k | // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use |
9385 | 64.6k | // isKnownPredicate. isKnownPredicate is more powerful, but also more |
9386 | 64.6k | // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the |
9387 | 64.6k | // interesting cases seen in practice. We can consider "upgrading" L >= 0 to |
9388 | 64.6k | // use isKnownPredicate later if needed. |
9389 | 64.6k | return isKnownNonNegative(RHS) && |
9390 | 64.6k | isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType()))38.7k && |
9391 | 64.6k | isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS)32.7k ; |
9392 | 64.6k | } |
9393 | | |
9394 | | bool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB, |
9395 | | ICmpInst::Predicate Pred, |
9396 | 11.2M | const SCEV *LHS, const SCEV *RHS) { |
9397 | 11.2M | // No need to even try if we know the module has no guards. |
9398 | 11.2M | if (!HasGuards) |
9399 | 11.2M | return false; |
9400 | 2.27k | |
9401 | 5.34k | return any_of(*BB, [&](Instruction &I) 2.27k { |
9402 | 5.34k | using namespace llvm::PatternMatch; |
9403 | 5.34k | |
9404 | 5.34k | Value *Condition; |
9405 | 5.34k | return match(&I, m_Intrinsic<Intrinsic::experimental_guard>( |
9406 | 5.34k | m_Value(Condition))) && |
9407 | 5.34k | isImpliedCond(Pred, LHS, RHS, Condition, false)150 ; |
9408 | 5.34k | }); |
9409 | 2.27k | } |
9410 | | |
9411 | | /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is |
9412 | | /// protected by a conditional between LHS and RHS. This is used to |
9413 | | /// to eliminate casts. |
9414 | | bool |
9415 | | ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, |
9416 | | ICmpInst::Predicate Pred, |
9417 | 1.06M | const SCEV *LHS, const SCEV *RHS) { |
9418 | 1.06M | // Interpret a null as meaning no loop, where there is obviously no guard |
9419 | 1.06M | // (interprocedural conditions notwithstanding). |
9420 | 1.06M | if (!L) return true0 ; |
9421 | 1.06M | |
9422 | 1.06M | if (VerifyIR) |
9423 | 1.06M | assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && |
9424 | 1.06M | "This cannot be done on broken IR!"); |
9425 | 1.06M | |
9426 | 1.06M | |
9427 | 1.06M | if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) |
9428 | 66.7k | return true; |
9429 | 994k | |
9430 | 994k | BasicBlock *Latch = L->getLoopLatch(); |
9431 | 994k | if (!Latch) |
9432 | 0 | return false; |
9433 | 994k | |
9434 | 994k | BranchInst *LoopContinuePredicate = |
9435 | 994k | dyn_cast<BranchInst>(Latch->getTerminator()); |
9436 | 994k | if (LoopContinuePredicate && LoopContinuePredicate->isConditional()993k && |
9437 | 994k | isImpliedCond(Pred, LHS, RHS, |
9438 | 987k | LoopContinuePredicate->getCondition(), |
9439 | 987k | LoopContinuePredicate->getSuccessor(0) != L->getHeader())) |
9440 | 14.5k | return true; |
9441 | 979k | |
9442 | 979k | // We don't want more than one activation of the following loops on the stack |
9443 | 979k | // -- that can lead to O(n!) time complexity. |
9444 | 979k | if (WalkingBEDominatingConds) |
9445 | 162k | return false; |
9446 | 817k | |
9447 | 817k | SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true); |
9448 | 817k | |
9449 | 817k | // See if we can exploit a trip count to prove the predicate. |
9450 | 817k | const auto &BETakenInfo = getBackedgeTakenInfo(L); |
9451 | 817k | const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); |
9452 | 817k | if (LatchBECount != getCouldNotCompute()) { |
9453 | 736k | // We know that Latch branches back to the loop header exactly |
9454 | 736k | // LatchBECount times. This means the backdege condition at Latch is |
9455 | 736k | // equivalent to "{0,+,1} u< LatchBECount". |
9456 | 736k | Type *Ty = LatchBECount->getType(); |
9457 | 736k | auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); |
9458 | 736k | const SCEV *LoopCounter = |
9459 | 736k | getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); |
9460 | 736k | if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, |
9461 | 736k | LatchBECount)) |
9462 | 252 | return true; |
9463 | 817k | } |
9464 | 817k | |
9465 | 817k | // Check conditions due to any @llvm.assume intrinsics. |
9466 | 817k | for (auto &AssumeVH : AC.assumptions()) { |
9467 | 3.61k | if (!AssumeVH) |
9468 | 0 | continue; |
9469 | 3.61k | auto *CI = cast<CallInst>(AssumeVH); |
9470 | 3.61k | if (!DT.dominates(CI, Latch->getTerminator())) |
9471 | 127 | continue; |
9472 | 3.48k | |
9473 | 3.48k | if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) |
9474 | 2 | return true; |
9475 | 3.48k | } |
9476 | 817k | |
9477 | 817k | // If the loop is not reachable from the entry block, we risk running into an |
9478 | 817k | // infinite loop as we walk up into the dom tree. These loops do not matter |
9479 | 817k | // anyway, so we just return a conservative answer when we see them. |
9480 | 817k | if (817k !DT.isReachableFromEntry(L->getHeader())817k ) |
9481 | 0 | return false; |
9482 | 817k | |
9483 | 817k | if (isImpliedViaGuard(Latch, Pred, LHS, RHS)) |
9484 | 13 | return true; |
9485 | 817k | |
9486 | 817k | for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; |
9487 | 1.17M | DTN != HeaderDTN; DTN = DTN->getIDom()354k ) { |
9488 | 355k | assert(DTN && "should reach the loop header before reaching the root!"); |
9489 | 355k | |
9490 | 355k | BasicBlock *BB = DTN->getBlock(); |
9491 | 355k | if (isImpliedViaGuard(BB, Pred, LHS, RHS)) |
9492 | 0 | return true; |
9493 | 355k | |
9494 | 355k | BasicBlock *PBB = BB->getSinglePredecessor(); |
9495 | 355k | if (!PBB) |
9496 | 174k | continue; |
9497 | 181k | |
9498 | 181k | BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator()); |
9499 | 181k | if (!ContinuePredicate || !ContinuePredicate->isConditional()171k ) |
9500 | 20.1k | continue; |
9501 | 161k | |
9502 | 161k | Value *Condition = ContinuePredicate->getCondition(); |
9503 | 161k | |
9504 | 161k | // If we have an edge `E` within the loop body that dominates the only |
9505 | 161k | // latch, the condition guarding `E` also guards the backedge. This |
9506 | 161k | // reasoning works only for loops with a single latch. |
9507 | 161k | |
9508 | 161k | BasicBlockEdge DominatingEdge(PBB, BB); |
9509 | 161k | if (DominatingEdge.isSingleEdge()) { |
9510 | 161k | // We're constructively (and conservatively) enumerating edges within the |
9511 | 161k | // loop body that dominate the latch. The dominator tree better agree |
9512 | 161k | // with us on this: |
9513 | 161k | assert(DT.dominates(DominatingEdge, Latch) && "should be!"); |
9514 | 161k | |
9515 | 161k | if (isImpliedCond(Pred, LHS, RHS, Condition, |
9516 | 161k | BB != ContinuePredicate->getSuccessor(0))) |
9517 | 1.00k | return true; |
9518 | 161k | } |
9519 | 161k | } |
9520 | 817k | |
9521 | 817k | return false816k ; |
9522 | 817k | } |
9523 | | |
9524 | | bool |
9525 | | ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, |
9526 | | ICmpInst::Predicate Pred, |
9527 | 1.33M | const SCEV *LHS, const SCEV *RHS) { |
9528 | 1.33M | // Interpret a null as meaning no loop, where there is obviously no guard |
9529 | 1.33M | // (interprocedural conditions notwithstanding). |
9530 | 1.33M | if (!L) return false0 ; |
9531 | 1.33M | |
9532 | 1.33M | if (VerifyIR) |
9533 | 1.33M | assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && |
9534 | 1.33M | "This cannot be done on broken IR!"); |
9535 | 1.33M | |
9536 | 1.33M | // Both LHS and RHS must be available at loop entry. |
9537 | 1.33M | assert(isAvailableAtLoopEntry(LHS, L) && |
9538 | 1.33M | "LHS is not available at Loop Entry"); |
9539 | 1.33M | assert(isAvailableAtLoopEntry(RHS, L) && |
9540 | 1.33M | "RHS is not available at Loop Entry"); |
9541 | 1.33M | |
9542 | 1.33M | if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) |
9543 | 500k | return true; |
9544 | 832k | |
9545 | 832k | // If we cannot prove strict comparison (e.g. a > b), maybe we can prove |
9546 | 832k | // the facts (a >= b && a != b) separately. A typical situation is when the |
9547 | 832k | // non-strict comparison is known from ranges and non-equality is known from |
9548 | 832k | // dominating predicates. If we are proving strict comparison, we always try |
9549 | 832k | // to prove non-equality and non-strict comparison separately. |
9550 | 832k | auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred); |
9551 | 832k | const bool ProvingStrictComparison = (Pred != NonStrictPredicate); |
9552 | 832k | bool ProvedNonStrictComparison = false; |
9553 | 832k | bool ProvedNonEquality = false; |
9554 | 832k | |
9555 | 832k | if (ProvingStrictComparison) { |
9556 | 533k | ProvedNonStrictComparison = |
9557 | 533k | isKnownViaNonRecursiveReasoning(NonStrictPredicate, LHS, RHS); |
9558 | 533k | ProvedNonEquality = |
9559 | 533k | isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, LHS, RHS); |
9560 | 533k | if (ProvedNonStrictComparison && ProvedNonEquality294k ) |
9561 | 8.43k | return true; |
9562 | 824k | } |
9563 | 824k | |
9564 | 824k | // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard. |
9565 | 5.44M | auto ProveViaGuard = [&](BasicBlock *Block) 824k { |
9566 | 5.44M | if (isImpliedViaGuard(Block, Pred, LHS, RHS)) |
9567 | 16 | return true; |
9568 | 5.44M | if (ProvingStrictComparison) { |
9569 | 3.82M | if (!ProvedNonStrictComparison) |
9570 | 1.42M | ProvedNonStrictComparison = |
9571 | 1.42M | isImpliedViaGuard(Block, NonStrictPredicate, LHS, RHS); |
9572 | 3.82M | if (!ProvedNonEquality) |
9573 | 3.25M | ProvedNonEquality = |
9574 | 3.25M | isImpliedViaGuard(Block, ICmpInst::ICMP_NE, LHS, RHS); |
9575 | 3.82M | if (ProvedNonStrictComparison && ProvedNonEquality2.40M ) |
9576 | 0 | return true; |
9577 | 5.44M | } |
9578 | 5.44M | return false; |
9579 | 5.44M | }; |
9580 | 824k | |
9581 | 824k | // Try to prove (Pred, LHS, RHS) using isImpliedCond. |
9582 | 4.06M | auto ProveViaCond = [&](Value *Condition, bool Inverse) { |
9583 | 4.06M | if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) |
9584 | 152k | return true; |
9585 | 3.91M | if (ProvingStrictComparison) { |
9586 | 2.80M | if (!ProvedNonStrictComparison) |
9587 | 974k | ProvedNonStrictComparison = |
9588 | 974k | isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); |
9589 | 2.80M | if (!ProvedNonEquality) |
9590 | 2.30M | ProvedNonEquality = |
9591 | 2.30M | isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); |
9592 | 2.80M | if (ProvedNonStrictComparison && ProvedNonEquality1.86M ) |
9593 | 376 | return true; |
9594 | 3.91M | } |
9595 | 3.91M | return false; |
9596 | 3.91M | }; |
9597 | 824k | |
9598 | 824k | // Starting at the loop predecessor, climb up the predecessor chain, as long |
9599 | 824k | // as there are predecessors that can be found that have unique successors |
9600 | 824k | // leading to the original header. |
9601 | 824k | for (std::pair<BasicBlock *, BasicBlock *> |
9602 | 824k | Pair(L->getLoopPredecessor(), L->getHeader()); |
9603 | 6.11M | Pair.first; |
9604 | 5.44M | Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)5.29M ) { |
9605 | 5.44M | |
9606 | 5.44M | if (ProveViaGuard(Pair.first)) |
9607 | 16 | return true; |
9608 | 5.44M | |
9609 | 5.44M | BranchInst *LoopEntryPredicate = |
9610 | 5.44M | dyn_cast<BranchInst>(Pair.first->getTerminator()); |
9611 | 5.44M | if (!LoopEntryPredicate || |
9612 | 5.44M | LoopEntryPredicate->isUnconditional()5.38M ) |
9613 | 1.37M | continue; |
9614 | 4.06M | |
9615 | 4.06M | if (ProveViaCond(LoopEntryPredicate->getCondition(), |
9616 | 4.06M | LoopEntryPredicate->getSuccessor(0) != Pair.second)) |
9617 | 153k | return true; |
9618 | 4.06M | } |
9619 | 824k | |
9620 | 824k | // Check conditions due to any @llvm.assume intrinsics. |
9621 | 824k | for (auto &AssumeVH : AC.assumptions())670k { |
9622 | 9.81k | if (!AssumeVH) |
9623 | 0 | continue; |
9624 | 9.81k | auto *CI = cast<CallInst>(AssumeVH); |
9625 | 9.81k | if (!DT.dominates(CI, L->getHeader())) |
9626 | 9.79k | continue; |
9627 | 23 | |
9628 | 23 | if (ProveViaCond(CI->getArgOperand(0), false)) |
9629 | 5 | return true; |
9630 | 23 | } |
9631 | 670k | |
9632 | 670k | return false670k ; |
9633 | 670k | } |
9634 | | |
9635 | | bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, |
9636 | | const SCEV *LHS, const SCEV *RHS, |
9637 | | Value *FoundCondValue, |
9638 | 9.51M | bool Inverse) { |
9639 | 9.51M | if (!PendingLoopPredicates.insert(FoundCondValue).second) |
9640 | 199k | return false; |
9641 | 9.31M | |
9642 | 9.31M | auto ClearOnExit = |
9643 | 9.31M | make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); }); |
9644 | 9.31M | |
9645 | 9.31M | // Recursively handle And and Or conditions. |
9646 | 9.31M | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { |
9647 | 829k | if (BO->getOpcode() == Instruction::And) { |
9648 | 497k | if (!Inverse) |
9649 | 221k | return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || |
9650 | 221k | isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse)219k ; |
9651 | 331k | } else if (BO->getOpcode() == Instruction::Or) { |
9652 | 330k | if (Inverse) |
9653 | 291k | return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || |
9654 | 291k | isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse)290k ; |
9655 | 8.80M | } |
9656 | 829k | } |
9657 | 8.80M | |
9658 | 8.80M | ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); |
9659 | 8.80M | if (!ICI) return false518k ; |
9660 | 8.28M | |
9661 | 8.28M | // Now that we found a conditional branch that dominates the loop or controls |
9662 | 8.28M | // the loop latch. Check to see if it is the comparison we are looking for. |
9663 | 8.28M | ICmpInst::Predicate FoundPred; |
9664 | 8.28M | if (Inverse) |
9665 | 5.63M | FoundPred = ICI->getInversePredicate(); |
9666 | 2.64M | else |
9667 | 2.64M | FoundPred = ICI->getPredicate(); |
9668 | 8.28M | |
9669 | 8.28M | const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); |
9670 | 8.28M | const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); |
9671 | 8.28M | |
9672 | 8.28M | return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); |
9673 | 8.28M | } |
9674 | | |
9675 | | bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, |
9676 | | const SCEV *RHS, |
9677 | | ICmpInst::Predicate FoundPred, |
9678 | | const SCEV *FoundLHS, |
9679 | 9.02M | const SCEV *FoundRHS) { |
9680 | 9.02M | // Balance the types. |
9681 | 9.02M | if (getTypeSizeInBits(LHS->getType()) < |
9682 | 9.02M | getTypeSizeInBits(FoundLHS->getType())) { |
9683 | 732k | if (CmpInst::isSigned(Pred)) { |
9684 | 189k | LHS = getSignExtendExpr(LHS, FoundLHS->getType()); |
9685 | 189k | RHS = getSignExtendExpr(RHS, FoundLHS->getType()); |
9686 | 542k | } else { |
9687 | 542k | LHS = getZeroExtendExpr(LHS, FoundLHS->getType()); |
9688 | 542k | RHS = getZeroExtendExpr(RHS, FoundLHS->getType()); |
9689 | 542k | } |
9690 | 8.29M | } else if (getTypeSizeInBits(LHS->getType()) > |
9691 | 8.29M | getTypeSizeInBits(FoundLHS->getType())) { |
9692 | 4.61M | if (CmpInst::isSigned(FoundPred)) { |
9693 | 590k | FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); |
9694 | 590k | FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); |
9695 | 4.02M | } else { |
9696 | 4.02M | FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); |
9697 | 4.02M | FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); |
9698 | 4.02M | } |
9699 | 4.61M | } |
9700 | 9.02M | |
9701 | 9.02M | // Canonicalize the query to match the way instcombine will have |
9702 | 9.02M | // canonicalized the comparison. |
9703 | 9.02M | if (SimplifyICmpOperands(Pred, LHS, RHS)) |
9704 | 4.02M | if (LHS == RHS) |
9705 | 4.02M | return CmpInst::isTrueWhenEqual(Pred); |
9706 | 5.00M | if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) |
9707 | 261 | if (FoundLHS == FoundRHS) |
9708 | 261 | return CmpInst::isFalseWhenEqual(FoundPred); |
9709 | 5.00M | |
9710 | 5.00M | // Check to see if we can make the LHS or RHS match. |
9711 | 5.00M | if (LHS == FoundRHS || RHS == FoundLHS4.96M ) { |
9712 | 54.0k | if (isa<SCEVConstant>(RHS)) { |
9713 | 33.3k | std::swap(FoundLHS, FoundRHS); |
9714 | 33.3k | FoundPred = ICmpInst::getSwappedPredicate(FoundPred); |
9715 | 33.3k | } else { |
9716 | 20.6k | std::swap(LHS, RHS); |
9717 | 20.6k | Pred = ICmpInst::getSwappedPredicate(Pred); |
9718 | 20.6k | } |
9719 | 54.0k | } |
9720 | 5.00M | |
9721 | 5.00M | // Check whether the found predicate is the same as the desired predicate. |
9722 | 5.00M | if (FoundPred == Pred) |
9723 | 1.15M | return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); |
9724 | 3.85M | |
9725 | 3.85M | // Check whether swapping the found predicate makes it the same as the |
9726 | 3.85M | // desired predicate. |
9727 | 3.85M | if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { |
9728 | 183k | if (isa<SCEVConstant>(RHS)) |
9729 | 156k | return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); |
9730 | 26.1k | else |
9731 | 26.1k | return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), |
9732 | 26.1k | RHS, LHS, FoundLHS, FoundRHS); |
9733 | 3.66M | } |
9734 | 3.66M | |
9735 | 3.66M | // Unsigned comparison is the same as signed comparison when both the operands |
9736 | 3.66M | // are non-negative. |
9737 | 3.66M | if (CmpInst::isUnsigned(FoundPred) && |
9738 | 3.66M | CmpInst::getSignedPredicate(FoundPred) == Pred1.02M && |
9739 | 3.66M | isKnownNonNegative(FoundLHS)260k && isKnownNonNegative(FoundRHS)177k ) |
9740 | 95.7k | return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); |
9741 | 3.57M | |
9742 | 3.57M | // Check if we can make progress by sharpening ranges. |
9743 | 3.57M | if (FoundPred == ICmpInst::ICMP_NE && |
9744 | 3.57M | (682k isa<SCEVConstant>(FoundLHS)682k || isa<SCEVConstant>(FoundRHS)682k )) { |
9745 | 330k | |
9746 | 330k | const SCEVConstant *C = nullptr; |
9747 | 330k | const SCEV *V = nullptr; |
9748 | 330k | |
9749 | 330k | if (isa<SCEVConstant>(FoundLHS)) { |
9750 | 0 | C = cast<SCEVConstant>(FoundLHS); |
9751 | 0 | V = FoundRHS; |
9752 | 330k | } else { |
9753 | 330k | C = cast<SCEVConstant>(FoundRHS); |
9754 | 330k | V = FoundLHS; |
9755 | 330k | } |
9756 | 330k | |
9757 | 330k | // The guarding predicate tells us that C != V. If the known range |
9758 | 330k | // of V is [C, t), we can sharpen the range to [C + 1, t). The |
9759 | 330k | // range we consider has to correspond to same signedness as the |
9760 | 330k | // predicate we're interested in folding. |
9761 | 330k | |
9762 | 330k | APInt Min = ICmpInst::isSigned(Pred) ? |
9763 | 190k | getSignedRangeMin(V)139k : getUnsignedRangeMin(V); |
9764 | 330k | |
9765 | 330k | if (Min == C->getAPInt()) { |
9766 | 209k | // Given (V >= Min && V != Min) we conclude V >= (Min + 1). |
9767 | 209k | // This is true even if (Min + 1) wraps around -- in case of |
9768 | 209k | // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). |
9769 | 209k | |
9770 | 209k | APInt SharperMin = Min + 1; |
9771 | 209k | |
9772 | 209k | switch (Pred) { |
9773 | 209k | case ICmpInst::ICMP_SGE: |
9774 | 1.12k | case ICmpInst::ICMP_UGE: |
9775 | 1.12k | // We know V `Pred` SharperMin. If this implies LHS `Pred` |
9776 | 1.12k | // RHS, we're done. |
9777 | 1.12k | if (isImpliedCondOperands(Pred, LHS, RHS, V, |
9778 | 1.12k | getConstant(SharperMin))) |
9779 | 0 | return true; |
9780 | 1.12k | LLVM_FALLTHROUGH; |
9781 | 1.12k | |
9782 | 58.2k | case ICmpInst::ICMP_SGT: |
9783 | 58.2k | case ICmpInst::ICMP_UGT: |
9784 | 58.2k | // We know from the range information that (V `Pred` Min || |
9785 | 58.2k | // V == Min). We know from the guarding condition that !(V |
9786 | 58.2k | // == Min). This gives us |
9787 | 58.2k | // |
9788 | 58.2k | // V `Pred` Min || V == Min && !(V == Min) |
9789 | 58.2k | // => V `Pred` Min |
9790 | 58.2k | // |
9791 | 58.2k | // If V `Pred` Min implies LHS `Pred` RHS, we're done. |
9792 | 58.2k | |
9793 | 58.2k | if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) |
9794 | 1.40k | return true; |
9795 | 56.8k | LLVM_FALLTHROUGH; |
9796 | 56.8k | |
9797 | 208k | default: |
9798 | 208k | // No change |
9799 | 208k | break; |
9800 | 3.57M | } |
9801 | 3.57M | } |
9802 | 330k | } |
9803 | 3.57M | |
9804 | 3.57M | // Check whether the actual condition is beyond sufficient. |
9805 | 3.57M | if (FoundPred == ICmpInst::ICMP_EQ) |
9806 | 683k | if (ICmpInst::isTrueWhenEqual(Pred)) |
9807 | 2.06k | if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) |
9808 | 10 | return true; |
9809 | 3.57M | if (Pred == ICmpInst::ICMP_NE) |
9810 | 1.96M | if (!ICmpInst::isTrueWhenEqual(FoundPred)) |
9811 | 1.28M | if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) |
9812 | 83.9k | return true; |
9813 | 3.48M | |
9814 | 3.48M | // Otherwise assume the worst. |
9815 | 3.48M | return false; |
9816 | 3.48M | } |
9817 | | |
9818 | | bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, |
9819 | | const SCEV *&L, const SCEV *&R, |
9820 | 12.3M | SCEV::NoWrapFlags &Flags) { |
9821 | 12.3M | const auto *AE = dyn_cast<SCEVAddExpr>(Expr); |
9822 | 12.3M | if (!AE || AE->getNumOperands() != 22.49M ) |
9823 | 10.1M | return false; |
9824 | 2.28M | |
9825 | 2.28M | L = AE->getOperand(0); |
9826 | 2.28M | R = AE->getOperand(1); |
9827 | 2.28M | Flags = AE->getNoWrapFlags(); |
9828 | 2.28M | return true; |
9829 | 2.28M | } |
9830 | | |
9831 | | Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More, |
9832 | 2.60M | const SCEV *Less) { |
9833 | 2.60M | // We avoid subtracting expressions here because this function is usually |
9834 | 2.60M | // fairly deep in the call stack (i.e. is called many times). |
9835 | 2.60M | |
9836 | 2.60M | if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)723k ) { |
9837 | 689k | const auto *LAR = cast<SCEVAddRecExpr>(Less); |
9838 | 689k | const auto *MAR = cast<SCEVAddRecExpr>(More); |
9839 | 689k | |
9840 | 689k | if (LAR->getLoop() != MAR->getLoop()) |
9841 | 2.03k | return None; |
9842 | 687k | |
9843 | 687k | // We look at affine expressions only; not for correctness but to keep |
9844 | 687k | // getStepRecurrence cheap. |
9845 | 687k | if (!LAR->isAffine() || !MAR->isAffine()687k ) |
9846 | 35 | return None; |
9847 | 687k | |
9848 | 687k | if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) |
9849 | 224k | return None; |
9850 | 463k | |
9851 | 463k | Less = LAR->getStart(); |
9852 | 463k | More = MAR->getStart(); |
9853 | 463k | |
9854 | 463k | // fall through |
9855 | 463k | } |
9856 | 2.60M | |
9857 | 2.60M | if (2.38M isa<SCEVConstant>(Less)2.38M && isa<SCEVConstant>(More)534k ) { |
9858 | 479k | const auto &M = cast<SCEVConstant>(More)->getAPInt(); |
9859 | 479k | const auto &L = cast<SCEVConstant>(Less)->getAPInt(); |
9860 | 479k | return M - L; |
9861 | 479k | } |
9862 | 1.90M | |
9863 | 1.90M | SCEV::NoWrapFlags Flags; |
9864 | 1.90M | const SCEV *LLess = nullptr, *RLess = nullptr; |
9865 | 1.90M | const SCEV *LMore = nullptr, *RMore = nullptr; |
9866 | 1.90M | const SCEVConstant *C1 = nullptr, *C2 = nullptr; |
9867 | 1.90M | // Compare (X + C1) vs X. |
9868 | 1.90M | if (splitBinaryAdd(Less, LLess, RLess, Flags)) |
9869 | 369k | if ((C1 = dyn_cast<SCEVConstant>(LLess))) |
9870 | 278k | if (RLess == More) |
9871 | 15.7k | return -(C1->getAPInt()); |
9872 | 1.88M | |
9873 | 1.88M | // Compare X vs (X + C2). |
9874 | 1.88M | if (splitBinaryAdd(More, LMore, RMore, Flags)) |
9875 | 473k | if ((C2 = dyn_cast<SCEVConstant>(LMore))) |
9876 | 416k | if (RMore == Less) |
9877 | 79.3k | return C2->getAPInt(); |
9878 | 1.80M | |
9879 | 1.80M | // Compare (X + C1) vs (X + C2). |
9880 | 1.80M | if (C1 && C2262k && RLess == RMore45.7k ) |
9881 | 37.6k | return C2->getAPInt() - C1->getAPInt(); |
9882 | 1.76M | |
9883 | 1.76M | return None; |
9884 | 1.76M | } |
9885 | | |
9886 | | bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( |
9887 | | ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, |
9888 | 2.75M | const SCEV *FoundLHS, const SCEV *FoundRHS) { |
9889 | 2.75M | if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT2.33M ) |
9890 | 1.74M | return false; |
9891 | 1.00M | |
9892 | 1.00M | const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS); |
9893 | 1.00M | if (!AddRecLHS) |
9894 | 495k | return false; |
9895 | 512k | |
9896 | 512k | const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS); |
9897 | 512k | if (!AddRecFoundLHS) |
9898 | 14.2k | return false; |
9899 | 498k | |
9900 | 498k | // We'd like to let SCEV reason about control dependencies, so we constrain |
9901 | 498k | // both the inequalities to be about add recurrences on the same loop. This |
9902 | 498k | // way we can use isLoopEntryGuardedByCond later. |
9903 | 498k | |
9904 | 498k | const Loop *L = AddRecFoundLHS->getLoop(); |
9905 | 498k | if (L != AddRecLHS->getLoop()) |
9906 | 1.37k | return false; |
9907 | 496k | |
9908 | 496k | // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) |
9909 | 496k | // |
9910 | 496k | // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) |
9911 | 496k | // ... (2) |
9912 | 496k | // |
9913 | 496k | // Informal proof for (2), assuming (1) [*]: |
9914 | 496k | // |
9915 | 496k | // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] |
9916 | 496k | // |
9917 | 496k | // Then |
9918 | 496k | // |
9919 | 496k | // FoundLHS s< FoundRHS s< INT_MIN - C |
9920 | 496k | // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] |
9921 | 496k | // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] |
9922 | 496k | // <=> (FoundLHS + INT_MIN + C + INT_MIN) s< |
9923 | 496k | // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] |
9924 | 496k | // <=> FoundLHS + C s< FoundRHS + C |
9925 | 496k | // |
9926 | 496k | // [*]: (1) can be proved by ruling out overflow. |
9927 | 496k | // |
9928 | 496k | // [**]: This can be proved by analyzing all the four possibilities: |
9929 | 496k | // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and |
9930 | 496k | // (A s>= 0, B s>= 0). |
9931 | 496k | // |
9932 | 496k | // Note: |
9933 | 496k | // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" |
9934 | 496k | // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS |
9935 | 496k | // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS |
9936 | 496k | // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is |
9937 | 496k | // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + |
9938 | 496k | // C)". |
9939 | 496k | |
9940 | 496k | Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS); |
9941 | 496k | Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS); |
9942 | 496k | if (!LDiff || !RDiff230k || *LDiff != *RDiff119k ) |
9943 | 496k | return false; |
9944 | 213 | |
9945 | 213 | if (LDiff->isMinValue()) |
9946 | 14 | return true; |
9947 | 199 | |
9948 | 199 | APInt FoundRHSLimit; |
9949 | 199 | |
9950 | 199 | if (Pred == CmpInst::ICMP_ULT) { |
9951 | 21 | FoundRHSLimit = -(*RDiff); |
9952 | 178 | } else { |
9953 | 178 | assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); |
9954 | 178 | FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff; |
9955 | 178 | } |
9956 | 199 | |
9957 | 199 | // Try to prove (1) or (2), as needed. |
9958 | 199 | return isAvailableAtLoopEntry(FoundRHS, L) && |
9959 | 199 | isLoopEntryGuardedByCond(L, Pred, FoundRHS, |
9960 | 168 | getConstant(FoundRHSLimit)); |
9961 | 199 | } |
9962 | | |
9963 | | bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, |
9964 | | const SCEV *LHS, const SCEV *RHS, |
9965 | | const SCEV *FoundLHS, |
9966 | 2.62M | const SCEV *FoundRHS, unsigned Depth) { |
9967 | 2.62M | const PHINode *LPhi = nullptr, *RPhi = nullptr; |
9968 | 2.62M | |
9969 | 2.62M | auto ClearOnExit = make_scope_exit([&]() { |
9970 | 2.62M | if (LPhi) { |
9971 | 150k | bool Erased = PendingMerges.erase(LPhi); |
9972 | 150k | assert(Erased && "Failed to erase LPhi!"); |
9973 | 150k | (void)Erased; |
9974 | 150k | } |
9975 | 2.62M | if (RPhi) { |
9976 | 822 | bool Erased = PendingMerges.erase(RPhi); |
9977 | 822 | assert(Erased && "Failed to erase RPhi!"); |
9978 | 822 | (void)Erased; |
9979 | 822 | } |
9980 | 2.62M | }); |
9981 | 2.62M | |
9982 | 2.62M | // Find respective Phis and check that they are not being pending. |
9983 | 2.62M | if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS)) |
9984 | 440k | if (auto *Phi = dyn_cast<PHINode>(LU->getValue())) { |
9985 | 115k | if (!PendingMerges.insert(Phi).second) |
9986 | 318 | return false; |
9987 | 114k | LPhi = Phi; |
9988 | 114k | } |
9989 | 2.62M | if (const SCEVUnknown *2.62M RU2.62M = dyn_cast<SCEVUnknown>(RHS)) |
9990 | 153k | if (auto *Phi = dyn_cast<PHINode>(RU->getValue())) { |
9991 | 36.5k | // If we detect a loop of Phi nodes being processed by this method, for |
9992 | 36.5k | // example: |
9993 | 36.5k | // |
9994 | 36.5k | // %a = phi i32 [ %some1, %preheader ], [ %b, %latch ] |
9995 | 36.5k | // %b = phi i32 [ %some2, %preheader ], [ %a, %latch ] |
9996 | 36.5k | // |
9997 | 36.5k | // we don't want to deal with a case that complex, so return conservative |
9998 | 36.5k | // answer false. |
9999 | 36.5k | if (!PendingMerges.insert(Phi).second) |
10000 | 298 | return false; |
10001 | 36.2k | RPhi = Phi; |
10002 | 36.2k | } |
10003 | 2.62M | |
10004 | 2.62M | // If none of LHS, RHS is a Phi, nothing to do here. |
10005 | 2.62M | if (2.62M !LPhi2.62M && !RPhi2.51M ) |
10006 | 2.47M | return false; |
10007 | 150k | |
10008 | 150k | // If there is a SCEVUnknown Phi we are interested in, make it left. |
10009 | 150k | if (!LPhi) { |
10010 | 35.4k | std::swap(LHS, RHS); |
10011 | 35.4k | std::swap(FoundLHS, FoundRHS); |
10012 | 35.4k | std::swap(LPhi, RPhi); |
10013 | 35.4k | Pred = ICmpInst::getSwappedPredicate(Pred); |
10014 | 35.4k | } |
10015 | 150k | |
10016 | 150k | assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!"); |
10017 | 150k | const BasicBlock *LBB = LPhi->getParent(); |
10018 | 150k | const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); |
10019 | 150k | |
10020 | 157k | auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) { |
10021 | 157k | return isKnownViaNonRecursiveReasoning(Pred, S1, S2) || |
10022 | 157k | isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS)148k || |
10023 | 157k | isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth)148k ; |
10024 | 157k | }; |
10025 | 150k | |
10026 | 150k | if (RPhi && RPhi->getParent() == LBB822 ) { |
10027 | 472 | // Case one: RHS is also a SCEVUnknown Phi from the same basic block. |
10028 | 472 | // If we compare two Phis from the same block, and for each entry block |
10029 | 472 | // the predicate is true for incoming values from this block, then the |
10030 | 472 | // predicate is also true for the Phis. |
10031 | 480 | for (const BasicBlock *IncBB : predecessors(LBB)) { |
10032 | 480 | const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); |
10033 | 480 | const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB)); |
10034 | 480 | if (!ProvedEasily(L, R)) |
10035 | 472 | return false; |
10036 | 480 | } |
10037 | 149k | } else if (RAR && RAR->getLoop()->getHeader() == LBB788 ) { |
10038 | 176 | // Case two: RHS is also a Phi from the same basic block, and it is an |
10039 | 176 | // AddRec. It means that there is a loop which has both AddRec and Unknown |
10040 | 176 | // PHIs, for it we can compare incoming values of AddRec from above the loop |
10041 | 176 | // and latch with their respective incoming values of LPhi. |
10042 | 176 | // TODO: Generalize to handle loops with many inputs in a header. |
10043 | 176 | if (LPhi->getNumIncomingValues() != 2) return false8 ; |
10044 | 168 | |
10045 | 168 | auto *RLoop = RAR->getLoop(); |
10046 | 168 | auto *Predecessor = RLoop->getLoopPredecessor(); |
10047 | 168 | assert(Predecessor && "Loop with AddRec with no predecessor?"); |
10048 | 168 | const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor)); |
10049 | 168 | if (!ProvedEasily(L1, RAR->getStart())) |
10050 | 164 | return false; |
10051 | 4 | auto *Latch = RLoop->getLoopLatch(); |
10052 | 4 | assert(Latch && "Loop with AddRec with no latch?"); |
10053 | 4 | const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch)); |
10054 | 4 | if (!ProvedEasily(L2, RAR->getPostIncExpr(*this))) |
10055 | 4 | return false; |
10056 | 149k | } else { |
10057 | 149k | // In all other cases go over inputs of LHS and compare each of them to RHS, |
10058 | 149k | // the predicate is true for (LHS, RHS) if it is true for all such pairs. |
10059 | 149k | // At this point RHS is either a non-Phi, or it is a Phi from some block |
10060 | 149k | // different from LBB. |
10061 | 159k | for (const BasicBlock *IncBB : predecessors(LBB)) { |
10062 | 159k | // Check that RHS is available in this block. |
10063 | 159k | if (!dominates(RHS, IncBB)) |
10064 | 2.21k | return false; |
10065 | 157k | const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); |
10066 | 157k | if (!ProvedEasily(L, RHS)) |
10067 | 147k | return false; |
10068 | 157k | } |
10069 | 149k | } |
10070 | 150k | return true19 ; |
10071 | 150k | } |
10072 | | |
10073 | | bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, |
10074 | | const SCEV *LHS, const SCEV *RHS, |
10075 | | const SCEV *FoundLHS, |
10076 | 2.77M | const SCEV *FoundRHS) { |
10077 | 2.77M | if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) |
10078 | 24.0k | return true; |
10079 | 2.75M | |
10080 | 2.75M | if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) |
10081 | 101 | return true; |
10082 | 2.75M | |
10083 | 2.75M | return isImpliedCondOperandsHelper(Pred, LHS, RHS, |
10084 | 2.75M | FoundLHS, FoundRHS) || |
10085 | 2.75M | // ~x < ~y --> x > y |
10086 | 2.75M | isImpliedCondOperandsHelper(Pred, LHS, RHS, |
10087 | 2.57M | getNotSCEV(FoundRHS), |
10088 | 2.57M | getNotSCEV(FoundLHS)); |
10089 | 2.75M | } |
10090 | | |
10091 | | /// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values? |
10092 | | template <typename MinMaxExprType> |
10093 | | static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, |
10094 | 8.85M | const SCEV *Candidate) { |
10095 | 8.85M | const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr); |
10096 | 8.85M | if (!MinMaxExpr) |
10097 | 8.80M | return false; |
10098 | 56.3k | |
10099 | 56.3k | return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); |
10100 | 56.3k | } ScalarEvolution.cpp:bool IsMinMaxConsistingOf<llvm::SCEVSMinExpr>(llvm::SCEV const*, llvm::SCEV const*) Line | Count | Source | 10094 | 2.56M | const SCEV *Candidate) { | 10095 | 2.56M | const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr); | 10096 | 2.56M | if (!MinMaxExpr) | 10097 | 2.56M | return false; | 10098 | 3.13k | | 10099 | 3.13k | return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); | 10100 | 3.13k | } |
ScalarEvolution.cpp:bool IsMinMaxConsistingOf<llvm::SCEVSMaxExpr>(llvm::SCEV const*, llvm::SCEV const*) Line | Count | Source | 10094 | 2.56M | const SCEV *Candidate) { | 10095 | 2.56M | const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr); | 10096 | 2.56M | if (!MinMaxExpr) | 10097 | 2.56M | return false; | 10098 | 3.61k | | 10099 | 3.61k | return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); | 10100 | 3.61k | } |
ScalarEvolution.cpp:bool IsMinMaxConsistingOf<llvm::SCEVUMinExpr>(llvm::SCEV const*, llvm::SCEV const*) Line | Count | Source | 10094 | 1.88M | const SCEV *Candidate) { | 10095 | 1.88M | const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr); | 10096 | 1.88M | if (!MinMaxExpr) | 10097 | 1.83M | return false; | 10098 | 48.6k | | 10099 | 48.6k | return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); | 10100 | 48.6k | } |
ScalarEvolution.cpp:bool IsMinMaxConsistingOf<llvm::SCEVUMaxExpr>(llvm::SCEV const*, llvm::SCEV const*) Line | Count | Source | 10094 | 1.84M | const SCEV *Candidate) { | 10095 | 1.84M | const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr); | 10096 | 1.84M | if (!MinMaxExpr) | 10097 | 1.84M | return false; | 10098 | 927 | | 10099 | 927 | return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); | 10100 | 927 | } |
|
10101 | | |
10102 | | static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, |
10103 | | ICmpInst::Predicate Pred, |
10104 | 8.01M | const SCEV *LHS, const SCEV *RHS) { |
10105 | 8.01M | // If both sides are affine addrecs for the same loop, with equal |
10106 | 8.01M | // steps, and we know the recurrences don't wrap, then we only |
10107 | 8.01M | // need to check the predicate on the starting values. |
10108 | 8.01M | |
10109 | 8.01M | if (!ICmpInst::isRelational(Pred)) |
10110 | 1.07M | return false; |
10111 | 6.93M | |
10112 | 6.93M | const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); |
10113 | 6.93M | if (!LAR) |
10114 | 4.52M | return false; |
10115 | 2.40M | const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); |
10116 | 2.40M | if (!RAR) |
10117 | 1.88M | return false; |
10118 | 521k | if (LAR->getLoop() != RAR->getLoop()) |
10119 | 14.9k | return false; |
10120 | 506k | if (!LAR->isAffine() || !RAR->isAffine()506k ) |
10121 | 20 | return false; |
10122 | 506k | |
10123 | 506k | if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) |
10124 | 247k | return false; |
10125 | 259k | |
10126 | 259k | SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? |
10127 | 191k | SCEV::FlagNSW67.0k : SCEV::FlagNUW; |
10128 | 259k | if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)48.6k ) |
10129 | 212k | return false; |
10130 | 46.7k | |
10131 | 46.7k | return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); |
10132 | 46.7k | } |
10133 | | |
10134 | | /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max |
10135 | | /// expression? |
10136 | | static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, |
10137 | | ICmpInst::Predicate Pred, |
10138 | 8.05M | const SCEV *LHS, const SCEV *RHS) { |
10139 | 8.05M | switch (Pred) { |
10140 | 8.05M | default: |
10141 | 3.60M | return false; |
10142 | 8.05M | |
10143 | 8.05M | case ICmpInst::ICMP_SGE: |
10144 | 1.37M | std::swap(LHS, RHS); |
10145 | 1.37M | LLVM_FALLTHROUGH; |
10146 | 2.56M | case ICmpInst::ICMP_SLE: |
10147 | 2.56M | return |
10148 | 2.56M | // min(A, ...) <= A |
10149 | 2.56M | IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) || |
10150 | 2.56M | // A <= max(A, ...) |
10151 | 2.56M | IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS)2.56M ; |
10152 | 1.37M | |
10153 | 1.37M | case ICmpInst::ICMP_UGE: |
10154 | 716k | std::swap(LHS, RHS); |
10155 | 716k | LLVM_FALLTHROUGH; |
10156 | 1.88M | case ICmpInst::ICMP_ULE: |
10157 | 1.88M | return |
10158 | 1.88M | // min(A, ...) <= A |
10159 | 1.88M | IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) || |
10160 | 1.88M | // A <= max(A, ...) |
10161 | 1.88M | IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS)1.84M ; |
10162 | 0 | } |
10163 | 0 | |
10164 | 0 | llvm_unreachable("covered switch fell through?!"); |
10165 | 0 | } |
10166 | | |
10167 | | bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, |
10168 | | const SCEV *LHS, const SCEV *RHS, |
10169 | | const SCEV *FoundLHS, |
10170 | | const SCEV *FoundRHS, |
10171 | 5.91M | unsigned Depth) { |
10172 | 5.91M | assert(getTypeSizeInBits(LHS->getType()) == |
10173 | 5.91M | getTypeSizeInBits(RHS->getType()) && |
10174 | 5.91M | "LHS and RHS have different sizes?"); |
10175 | 5.91M | assert(getTypeSizeInBits(FoundLHS->getType()) == |
10176 | 5.91M | getTypeSizeInBits(FoundRHS->getType()) && |
10177 | 5.91M | "FoundLHS and FoundRHS have different sizes?"); |
10178 | 5.91M | // We want to avoid hurting the compile time with analysis of too big trees. |
10179 | 5.91M | if (Depth > MaxSCEVOperationsImplicationDepth) |
10180 | 11.3k | return false; |
10181 | 5.90M | // We only want to work with ICMP_SGT comparison so far. |
10182 | 5.90M | // TODO: Extend to ICMP_UGT? |
10183 | 5.90M | if (Pred == ICmpInst::ICMP_SLT) { |
10184 | 869k | Pred = ICmpInst::ICMP_SGT; |
10185 | 869k | std::swap(LHS, RHS); |
10186 | 869k | std::swap(FoundLHS, FoundRHS); |
10187 | 869k | } |
10188 | 5.90M | if (Pred != ICmpInst::ICMP_SGT) |
10189 | 2.91M | return false; |
10190 | 2.98M | |
10191 | 5.97M | auto GetOpFromSExt = [&](const SCEV *S) 2.98M { |
10192 | 5.97M | if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S)) |
10193 | 938k | return Ext->getOperand(); |
10194 | 5.03M | // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off |
10195 | 5.03M | // the constant in some cases. |
10196 | 5.03M | return S; |
10197 | 5.03M | }; |
10198 | 2.98M | |
10199 | 2.98M | // Acquire values from extensions. |
10200 | 2.98M | auto *OrigLHS = LHS; |
10201 | 2.98M | auto *OrigFoundLHS = FoundLHS; |
10202 | 2.98M | LHS = GetOpFromSExt(LHS); |
10203 | 2.98M | FoundLHS = GetOpFromSExt(FoundLHS); |
10204 | 2.98M | |
10205 | 2.98M | // Is the SGT predicate can be proved trivially or using the found context. |
10206 | 2.98M | auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) { |
10207 | 855k | return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) || |
10208 | 855k | isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS, |
10209 | 611k | FoundRHS, Depth + 1); |
10210 | 855k | }; |
10211 | 2.98M | |
10212 | 2.98M | if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) { |
10213 | 651k | // We want to avoid creation of any new non-constant SCEV. Since we are |
10214 | 651k | // going to compare the operands to RHS, we should be certain that we don't |
10215 | 651k | // need any size extensions for this. So let's decline all cases when the |
10216 | 651k | // sizes of types of LHS and RHS do not match. |
10217 | 651k | // TODO: Maybe try to get RHS from sext to catch more cases? |
10218 | 651k | if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType())) |
10219 | 50.5k | return false; |
10220 | 601k | |
10221 | 601k | // Should not overflow. |
10222 | 601k | if (!LHSAddExpr->hasNoSignedWrap()) |
10223 | 295k | return false; |
10224 | 305k | |
10225 | 305k | auto *LL = LHSAddExpr->getOperand(0); |
10226 | 305k | auto *LR = LHSAddExpr->getOperand(1); |
10227 | 305k | auto *MinusOne = getNegativeSCEV(getOne(RHS->getType())); |
10228 | 305k | |
10229 | 305k | // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context. |
10230 | 610k | auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) { |
10231 | 610k | return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS)244k ; |
10232 | 610k | }; |
10233 | 305k | // Try to prove the following rule: |
10234 | 305k | // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS). |
10235 | 305k | // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS). |
10236 | 305k | if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL)305k ) |
10237 | 121 | return true; |
10238 | 2.33M | } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) { |
10239 | 651k | Value *LL, *LR; |
10240 | 651k | // FIXME: Once we have SDiv implemented, we can get rid of this matching. |
10241 | 651k | |
10242 | 651k | using namespace llvm::PatternMatch; |
10243 | 651k | |
10244 | 651k | if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) { |
10245 | 10.5k | // Rules for division. |
10246 | 10.5k | // We are going to perform some comparisons with Denominator and its |
10247 | 10.5k | // derivative expressions. In general case, creating a SCEV for it may |
10248 | 10.5k | // lead to a complex analysis of the entire graph, and in particular it |
10249 | 10.5k | // can request trip count recalculation for the same loop. This would |
10250 | 10.5k | // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid |
10251 | 10.5k | // this, we only want to create SCEVs that are constants in this section. |
10252 | 10.5k | // So we bail if Denominator is not a constant. |
10253 | 10.5k | if (!isa<ConstantInt>(LR)) |
10254 | 2.84k | return false; |
10255 | 7.66k | |
10256 | 7.66k | auto *Denominator = cast<SCEVConstant>(getSCEV(LR)); |
10257 | 7.66k | |
10258 | 7.66k | // We want to make sure that LHS = FoundLHS / Denominator. If it is so, |
10259 | 7.66k | // then a SCEV for the numerator already exists and matches with FoundLHS. |
10260 | 7.66k | auto *Numerator = getExistingSCEV(LL); |
10261 | 7.66k | if (!Numerator || Numerator->getType() != FoundLHS->getType()7.58k ) |
10262 | 1.88k | return false; |
10263 | 5.77k | |
10264 | 5.77k | // Make sure that the numerator matches with FoundLHS and the denominator |
10265 | 5.77k | // is positive. |
10266 | 5.77k | if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator)1.14k ) |
10267 | 4.62k | return false; |
10268 | 1.14k | |
10269 | 1.14k | auto *DTy = Denominator->getType(); |
10270 | 1.14k | auto *FRHSTy = FoundRHS->getType(); |
10271 | 1.14k | if (DTy->isPointerTy() != FRHSTy->isPointerTy()) |
10272 | 0 | // One of types is a pointer and another one is not. We cannot extend |
10273 | 0 | // them properly to a wider type, so let us just reject this case. |
10274 | 0 | // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help |
10275 | 0 | // to avoid this check. |
10276 | 0 | return false; |
10277 | 1.14k | |
10278 | 1.14k | // Given that: |
10279 | 1.14k | // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0. |
10280 | 1.14k | auto *WTy = getWiderType(DTy, FRHSTy); |
10281 | 1.14k | auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy); |
10282 | 1.14k | auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy); |
10283 | 1.14k | |
10284 | 1.14k | // Try to prove the following rule: |
10285 | 1.14k | // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS). |
10286 | 1.14k | // For example, given that FoundLHS > 2. It means that FoundLHS is at |
10287 | 1.14k | // least 3. If we divide it by Denominator < 4, we will have at least 1. |
10288 | 1.14k | auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2)); |
10289 | 1.14k | if (isKnownNonPositive(RHS) && |
10290 | 1.14k | IsSGTViaContext(FoundRHSExt, DenomMinusTwo)790 ) |
10291 | 551 | return true; |
10292 | 597 | |
10293 | 597 | // Try to prove the following rule: |
10294 | 597 | // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS). |
10295 | 597 | // For example, given that FoundLHS > -3. Then FoundLHS is at least -2. |
10296 | 597 | // If we divide it by Denominator > 2, then: |
10297 | 597 | // 1. If FoundLHS is negative, then the result is 0. |
10298 | 597 | // 2. If FoundLHS is non-negative, then the result is non-negative. |
10299 | 597 | // Anyways, the result is non-negative. |
10300 | 597 | auto *MinusOne = getNegativeSCEV(getOne(WTy)); |
10301 | 597 | auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt); |
10302 | 597 | if (isKnownNegative(RHS) && |
10303 | 597 | IsSGTViaContext(FoundRHSExt, NegDenomMinusOne)136 ) |
10304 | 68 | return true; |
10305 | 2.62M | } |
10306 | 651k | } |
10307 | 2.62M | |
10308 | 2.62M | // If our expression contained SCEVUnknown Phis, and we split it down and now |
10309 | 2.62M | // need to prove something for them, try to prove the predicate for every |
10310 | 2.62M | // possible incoming values of those Phis. |
10311 | 2.62M | if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1)) |
10312 | 19 | return true; |
10313 | 2.62M | |
10314 | 2.62M | return false; |
10315 | 2.62M | } |
10316 | | |
10317 | | bool |
10318 | | ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, |
10319 | 9.90M | const SCEV *LHS, const SCEV *RHS) { |
10320 | 9.90M | return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || |
10321 | 9.90M | IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS)8.05M || |
10322 | 9.90M | IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS)8.01M || |
10323 | 9.90M | isKnownPredicateViaNoOverflow(Pred, LHS, RHS)7.99M ; |
10324 | 9.90M | } |
10325 | | |
10326 | | bool |
10327 | | ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, |
10328 | | const SCEV *LHS, const SCEV *RHS, |
10329 | | const SCEV *FoundLHS, |
10330 | 5.33M | const SCEV *FoundRHS) { |
10331 | 5.33M | switch (Pred) { |
10332 | 5.33M | default: 0 llvm_unreachable0 ("Unexpected ICmpInst::Predicate value!"); |
10333 | 5.33M | case ICmpInst::ICMP_EQ: |
10334 | 1.42M | case ICmpInst::ICMP_NE: |
10335 | 1.42M | if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)92.4k ) |
10336 | 39.1k | return true; |
10337 | 1.38M | break; |
10338 | 1.38M | case ICmpInst::ICMP_SLT: |
10339 | 851k | case ICmpInst::ICMP_SLE: |
10340 | 851k | if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) && |
10341 | 851k | isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS)23.4k ) |
10342 | 5.27k | return true; |
10343 | 846k | break; |
10344 | 1.51M | case ICmpInst::ICMP_SGT: |
10345 | 1.51M | case ICmpInst::ICMP_SGE: |
10346 | 1.51M | if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) && |
10347 | 1.51M | isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS)323k ) |
10348 | 106k | return true; |
10349 | 1.40M | break; |
10350 | 1.40M | case ICmpInst::ICMP_ULT: |
10351 | 1.16M | case ICmpInst::ICMP_ULE: |
10352 | 1.16M | if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) && |
10353 | 1.16M | isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS)210k ) |
10354 | 7.39k | return true; |
10355 | 1.15M | break; |
10356 | 1.15M | case ICmpInst::ICMP_UGT: |
10357 | 374k | case ICmpInst::ICMP_UGE: |
10358 | 374k | if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) && |
10359 | 374k | isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS)31.1k ) |
10360 | 18.5k | return true; |
10361 | 356k | break; |
10362 | 5.15M | } |
10363 | 5.15M | |
10364 | 5.15M | // Maybe it can be proved via operations? |
10365 | 5.15M | if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS)) |
10366 | 475 | return true; |
10367 | 5.15M | |
10368 | 5.15M | return false; |
10369 | 5.15M | } |
10370 | | |
10371 | | bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, |
10372 | | const SCEV *LHS, |
10373 | | const SCEV *RHS, |
10374 | | const SCEV *FoundLHS, |
10375 | 2.92M | const SCEV *FoundRHS) { |
10376 | 2.92M | if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS)2.62M ) |
10377 | 1.31M | // The restriction on `FoundRHS` be lifted easily -- it exists only to |
10378 | 1.31M | // reduce the compile time impact of this optimization. |
10379 | 1.31M | return false; |
10380 | 1.61M | |
10381 | 1.61M | Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS); |
10382 | 1.61M | if (!Addend) |
10383 | 1.36M | return false; |
10384 | 251k | |
10385 | 251k | const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt(); |
10386 | 251k | |
10387 | 251k | // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the |
10388 | 251k | // antecedent "`FoundLHS` `Pred` `FoundRHS`". |
10389 | 251k | ConstantRange FoundLHSRange = |
10390 | 251k | ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS); |
10391 | 251k | |
10392 | 251k | // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`: |
10393 | 251k | ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend)); |
10394 | 251k | |
10395 | 251k | // We can also compute the range of values for `LHS` that satisfy the |
10396 | 251k | // consequent, "`LHS` `Pred` `RHS`": |
10397 | 251k | const APInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt(); |
10398 | 251k | ConstantRange SatisfyingLHSRange = |
10399 | 251k | ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); |
10400 | 251k | |
10401 | 251k | // The antecedent implies the consequent if every value of `LHS` that |
10402 | 251k | // satisfies the antecedent also satisfies the consequent. |
10403 | 251k | return SatisfyingLHSRange.contains(LHSRange); |
10404 | 251k | } |
10405 | | |
10406 | | bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, |
10407 | 6.84k | bool IsSigned, bool NoWrap) { |
10408 | 6.84k | assert(isKnownPositive(Stride) && "Positive stride expected!"); |
10409 | 6.84k | |
10410 | 6.84k | if (NoWrap) return false3.34k ; |
10411 | 3.50k | |
10412 | 3.50k | unsigned BitWidth = getTypeSizeInBits(RHS->getType()); |
10413 | 3.50k | const SCEV *One = getOne(Stride->getType()); |
10414 | 3.50k | |
10415 | 3.50k | if (IsSigned) { |
10416 | 1.52k | APInt MaxRHS = getSignedRangeMax(RHS); |
10417 | 1.52k | APInt MaxValue = APInt::getSignedMaxValue(BitWidth); |
10418 | 1.52k | APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); |
10419 | 1.52k | |
10420 | 1.52k | // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! |
10421 | 1.52k | return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); |
10422 | 1.52k | } |
10423 | 1.98k | |
10424 | 1.98k | APInt MaxRHS = getUnsignedRangeMax(RHS); |
10425 | 1.98k | APInt MaxValue = APInt::getMaxValue(BitWidth); |
10426 | 1.98k | APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); |
10427 | 1.98k | |
10428 | 1.98k | // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! |
10429 | 1.98k | return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); |
10430 | 1.98k | } |
10431 | | |
10432 | | bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, |
10433 | 1.09k | bool IsSigned, bool NoWrap) { |
10434 | 1.09k | if (NoWrap) return false186 ; |
10435 | 912 | |
10436 | 912 | unsigned BitWidth = getTypeSizeInBits(RHS->getType()); |
10437 | 912 | const SCEV *One = getOne(Stride->getType()); |
10438 | 912 | |
10439 | 912 | if (IsSigned) { |
10440 | 531 | APInt MinRHS = getSignedRangeMin(RHS); |
10441 | 531 | APInt MinValue = APInt::getSignedMinValue(BitWidth); |
10442 | 531 | APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); |
10443 | 531 | |
10444 | 531 | // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! |
10445 | 531 | return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); |
10446 | 531 | } |
10447 | 381 | |
10448 | 381 | APInt MinRHS = getUnsignedRangeMin(RHS); |
10449 | 381 | APInt MinValue = APInt::getMinValue(BitWidth); |
10450 | 381 | APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); |
10451 | 381 | |
10452 | 381 | // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! |
10453 | 381 | return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); |
10454 | 381 | } |
10455 | | |
10456 | | const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, |
10457 | 342k | bool Equality) { |
10458 | 342k | const SCEV *One = getOne(Step->getType()); |
10459 | 342k | Delta = Equality ? getAddExpr(Delta, Step)0 |
10460 | 342k | : getAddExpr(Delta, getMinusSCEV(Step, One)); |
10461 | 342k | return getUDivExpr(Delta, Step); |
10462 | 342k | } |
10463 | | |
10464 | | const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, |
10465 | | const SCEV *Stride, |
10466 | | const SCEV *End, |
10467 | | unsigned BitWidth, |
10468 | 194k | bool IsSigned) { |
10469 | 194k | |
10470 | 194k | assert(!isKnownNonPositive(Stride) && |
10471 | 194k | "Stride is expected strictly positive!"); |
10472 | 194k | // Calculate the maximum backedge count based on the range of values |
10473 | 194k | // permitted by Start, End, and Stride. |
10474 | 194k | const SCEV *MaxBECount; |
10475 | 194k | APInt MinStart = |
10476 | 194k | IsSigned ? getSignedRangeMin(Start)104k : getUnsignedRangeMin(Start)89.9k ; |
10477 | 194k | |
10478 | 194k | APInt StrideForMaxBECount = |
10479 | 194k | IsSigned ? getSignedRangeMin(Stride)104k : getUnsignedRangeMin(Stride)89.9k ; |
10480 | 194k | |
10481 | 194k | // We already know that the stride is positive, so we paper over conservatism |
10482 | 194k | // in our range computation by forcing StrideForMaxBECount to be at least one. |
10483 | 194k | // In theory this is unnecessary, but we expect MaxBECount to be a |
10484 | 194k | // SCEVConstant, and (udiv <constant> 0) is not constant folded by SCEV (there |
10485 | 194k | // is nothing to constant fold it to). |
10486 | 194k | APInt One(BitWidth, 1, IsSigned); |
10487 | 194k | StrideForMaxBECount = APIntOps::smax(One, StrideForMaxBECount); |
10488 | 194k | |
10489 | 194k | APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth)104k |
10490 | 194k | : APInt::getMaxValue(BitWidth)89.9k ; |
10491 | 194k | APInt Limit = MaxValue - (StrideForMaxBECount - 1); |
10492 | 194k | |
10493 | 194k | // Although End can be a MAX expression we estimate MaxEnd considering only |
10494 | 194k | // the case End = RHS of the loop termination condition. This is safe because |
10495 | 194k | // in the other case (End - Start) is zero, leading to a zero maximum backedge |
10496 | 194k | // taken count. |
10497 | 194k | APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)104k |
10498 | 194k | : APIntOps::umin(getUnsignedRangeMax(End), Limit)89.9k ; |
10499 | 194k | |
10500 | 194k | MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */, |
10501 | 194k | getConstant(StrideForMaxBECount) /* Step */, |
10502 | 194k | false /* Equality */); |
10503 | 194k | |
10504 | 194k | return MaxBECount; |
10505 | 194k | } |
10506 | | |
10507 | | ScalarEvolution::ExitLimit |
10508 | | ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, |
10509 | | const Loop *L, bool IsSigned, |
10510 | 277k | bool ControlsExit, bool AllowPredicates) { |
10511 | 277k | SmallPtrSet<const SCEVPredicate *, 4> Predicates; |
10512 | 277k | |
10513 | 277k | const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); |
10514 | 277k | bool PredicatedIV = false; |
10515 | 277k | |
10516 | 277k | if (!IV && AllowPredicates78.5k ) { |
10517 | 12.4k | // Try to make this an AddRec using runtime tests, in the first X |
10518 | 12.4k | // iterations of this loop, where X is the SCEV expression found by the |
10519 | 12.4k | // algorithm below. |
10520 | 12.4k | IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); |
10521 | 12.4k | PredicatedIV = true; |
10522 | 12.4k | } |
10523 | 277k | |
10524 | 277k | // Avoid weird loops |
10525 | 277k | if (!IV || IV->getLoop() != L198k || !IV->isAffine()198k ) |
10526 | 78.4k | return getCouldNotCompute(); |
10527 | 198k | |
10528 | 198k | bool NoWrap = ControlsExit && |
10529 | 198k | IV->getNoWrapFlags(IsSigned 148k ? SCEV::FlagNSW77.0k : SCEV::FlagNUW71.2k ); |
10530 | 198k | |
10531 | 198k | const SCEV *Stride = IV->getStepRecurrence(*this); |
10532 | 198k | |
10533 | 198k | bool PositiveStride = isKnownPositive(Stride); |
10534 | 198k | |
10535 | 198k | // Avoid negative or zero stride values. |
10536 | 198k | if (!PositiveStride) { |
10537 | 2.16k | // We can compute the correct backedge taken count for loops with unknown |
10538 | 2.16k | // strides if we can prove that the loop is not an infinite loop with side |
10539 | 2.16k | // effects. Here's the loop structure we are trying to handle - |
10540 | 2.16k | // |
10541 | 2.16k | // i = start |
10542 | 2.16k | // do { |
10543 | 2.16k | // A[i] = i; |
10544 | 2.16k | // i += s; |
10545 | 2.16k | // } while (i < end); |
10546 | 2.16k | // |
10547 | 2.16k | // The backedge taken count for such loops is evaluated as - |
10548 | 2.16k | // (max(end, start + stride) - start - 1) /u stride |
10549 | 2.16k | // |
10550 | 2.16k | // The additional preconditions that we need to check to prove correctness |
10551 | 2.16k | // of the above formula is as follows - |
10552 | 2.16k | // |
10553 | 2.16k | // a) IV is either nuw or nsw depending upon signedness (indicated by the |
10554 | 2.16k | // NoWrap flag). |
10555 | 2.16k | // b) loop is single exit with no side effects. |
10556 | 2.16k | // |
10557 | 2.16k | // |
10558 | 2.16k | // Precondition a) implies that if the stride is negative, this is a single |
10559 | 2.16k | // trip loop. The backedge taken count formula reduces to zero in this case. |
10560 | 2.16k | // |
10561 | 2.16k | // Precondition b) implies that the unknown stride cannot be zero otherwise |
10562 | 2.16k | // we have UB. |
10563 | 2.16k | // |
10564 | 2.16k | // The positive stride case is the same as isKnownPositive(Stride) returning |
10565 | 2.16k | // true (original behavior of the function). |
10566 | 2.16k | // |
10567 | 2.16k | // We want to make sure that the stride is truly unknown as there are edge |
10568 | 2.16k | // cases where ScalarEvolution propagates no wrap flags to the |
10569 | 2.16k | // post-increment/decrement IV even though the increment/decrement operation |
10570 | 2.16k | // itself is wrapping. The computed backedge taken count may be wrong in |
10571 | 2.16k | // such cases. This is prevented by checking that the stride is not known to |
10572 | 2.16k | // be either positive or non-positive. For example, no wrap flags are |
10573 | 2.16k | // propagated to the post-increment IV of this loop with a trip count of 2 - |
10574 | 2.16k | // |
10575 | 2.16k | // unsigned char i; |
10576 | 2.16k | // for(i=127; i<128; i+=129) |
10577 | 2.16k | // A[i] = i; |
10578 | 2.16k | // |
10579 | 2.16k | if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride)823 || |
10580 | 2.16k | !loopHasNoSideEffects(L)779 ) |
10581 | 1.39k | return getCouldNotCompute(); |
10582 | 196k | } else if (!Stride->isOne() && |
10583 | 196k | doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)6.84k ) |
10584 | 2.79k | // Avoid proven overflow cases: this will ensure that the backedge taken |
10585 | 2.79k | // count will not generate any unsigned overflow. Relaxed no-overflow |
10586 | 2.79k | // conditions exploit NoWrapFlags, allowing to optimize in presence of |
10587 | 2.79k | // undefined behaviors like the case of C language. |
10588 | 2.79k | return getCouldNotCompute(); |
10589 | 194k | |
10590 | 194k | ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT104k |
10591 | 194k | : ICmpInst::ICMP_ULT90.0k ; |
10592 | 194k | const SCEV *Start = IV->getStart(); |
10593 | 194k | const SCEV *End = RHS; |
10594 | 194k | // When the RHS is not invariant, we do not know the end bound of the loop and |
10595 | 194k | // cannot calculate the ExactBECount needed by ExitLimit. However, we can |
10596 | 194k | // calculate the MaxBECount, given the start, stride and max value for the end |
10597 | 194k | // bound of the loop (RHS), and the fact that IV does not overflow (which is |
10598 | 194k | // checked above). |
10599 | 194k | if (!isLoopInvariant(RHS, L)) { |
10600 | 106k | const SCEV *MaxBECount = computeMaxBECountForLT( |
10601 | 106k | Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); |
10602 | 106k | return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, |
10603 | 106k | false /*MaxOrZero*/, Predicates); |
10604 | 106k | } |
10605 | 88.1k | // If the backedge is taken at least once, then it will be taken |
10606 | 88.1k | // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start |
10607 | 88.1k | // is the LHS value of the less-than comparison the first time it is evaluated |
10608 | 88.1k | // and End is the RHS. |
10609 | 88.1k | const SCEV *BECountIfBackedgeTaken = |
10610 | 88.1k | computeBECount(getMinusSCEV(End, Start), Stride, false); |
10611 | 88.1k | // If the loop entry is guarded by the result of the backedge test of the |
10612 | 88.1k | // first loop iteration, then we know the backedge will be taken at least |
10613 | 88.1k | // once and so the backedge taken count is as above. If not then we use the |
10614 | 88.1k | // expression (max(End,Start)-Start)/Stride to describe the backedge count, |
10615 | 88.1k | // as if the backedge is taken at least once max(End,Start) is End and so the |
10616 | 88.1k | // result is as above, and if not max(End,Start) is Start so we get a backedge |
10617 | 88.1k | // count of zero. |
10618 | 88.1k | const SCEV *BECount; |
10619 | 88.1k | if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) |
10620 | 56.5k | BECount = BECountIfBackedgeTaken; |
10621 | 31.5k | else { |
10622 | 31.5k | End = IsSigned ? getSMaxExpr(RHS, Start)16.3k : getUMaxExpr(RHS, Start)15.2k ; |
10623 | 31.5k | BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); |
10624 | 31.5k | } |
10625 | 88.1k | |
10626 | 88.1k | const SCEV *MaxBECount; |
10627 | 88.1k | bool MaxOrZero = false; |
10628 | 88.1k | if (isa<SCEVConstant>(BECount)) |
10629 | 332 | MaxBECount = BECount; |
10630 | 87.8k | else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) { |
10631 | 232 | // If we know exactly how many times the backedge will be taken if it's |
10632 | 232 | // taken at least once, then the backedge count will either be that or |
10633 | 232 | // zero. |
10634 | 232 | MaxBECount = BECountIfBackedgeTaken; |
10635 | 232 | MaxOrZero = true; |
10636 | 87.5k | } else { |
10637 | 87.5k | MaxBECount = computeMaxBECountForLT( |
10638 | 87.5k | Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); |
10639 | 87.5k | } |
10640 | 88.1k | |
10641 | 88.1k | if (isa<SCEVCouldNotCompute>(MaxBECount) && |
10642 | 88.1k | !isa<SCEVCouldNotCompute>(BECount)0 ) |
10643 | 0 | MaxBECount = getConstant(getUnsignedRangeMax(BECount)); |
10644 | 88.1k | |
10645 | 88.1k | return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); |
10646 | 88.1k | } |
10647 | | |
10648 | | ScalarEvolution::ExitLimit |
10649 | | ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, |
10650 | | const Loop *L, bool IsSigned, |
10651 | 43.5k | bool ControlsExit, bool AllowPredicates) { |
10652 | 43.5k | SmallPtrSet<const SCEVPredicate *, 4> Predicates; |
10653 | 43.5k | // We handle only IV > Invariant |
10654 | 43.5k | if (!isLoopInvariant(RHS, L)) |
10655 | 11.7k | return getCouldNotCompute(); |
10656 | 31.8k | |
10657 | 31.8k | const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); |
10658 | 31.8k | if (!IV && AllowPredicates17.1k ) |
10659 | 1.98k | // Try to make this an AddRec using runtime tests, in the first X |
10660 | 1.98k | // iterations of this loop, where X is the SCEV expression found by the |
10661 | 1.98k | // algorithm below. |
10662 | 1.98k | IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); |
10663 | 31.8k | |
10664 | 31.8k | // Avoid weird loops |
10665 | 31.8k | if (!IV || IV->getLoop() != L14.6k || !IV->isAffine()14.6k ) |
10666 | 17.1k | return getCouldNotCompute(); |
10667 | 14.6k | |
10668 | 14.6k | bool NoWrap = ControlsExit && |
10669 | 14.6k | IV->getNoWrapFlags(IsSigned 11.8k ? SCEV::FlagNSW11.3k : SCEV::FlagNUW504 ); |
10670 | 14.6k | |
10671 | 14.6k | const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); |
10672 | 14.6k | |
10673 | 14.6k | // Avoid negative or zero stride values |
10674 | 14.6k | if (!isKnownPositive(Stride)) |
10675 | 139 | return getCouldNotCompute(); |
10676 | 14.5k | |
10677 | 14.5k | // Avoid proven overflow cases: this will ensure that the backedge taken count |
10678 | 14.5k | // will not generate any unsigned overflow. Relaxed no-overflow conditions |
10679 | 14.5k | // exploit NoWrapFlags, allowing to optimize in presence of undefined |
10680 | 14.5k | // behaviors like the case of C language. |
10681 | 14.5k | if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)1.09k ) |
10682 | 157 | return getCouldNotCompute(); |
10683 | 14.3k | |
10684 | 14.3k | ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT13.7k |
10685 | 14.3k | : ICmpInst::ICMP_UGT587 ; |
10686 | 14.3k | |
10687 | 14.3k | const SCEV *Start = IV->getStart(); |
10688 | 14.3k | const SCEV *End = RHS; |
10689 | 14.3k | if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) |
10690 | 9.28k | End = IsSigned ? getSMinExpr(RHS, Start)9.11k : getUMinExpr(RHS, Start)172 ; |
10691 | 14.3k | |
10692 | 14.3k | const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); |
10693 | 14.3k | |
10694 | 14.3k | APInt MaxStart = IsSigned ? getSignedRangeMax(Start)13.7k |
10695 | 14.3k | : getUnsignedRangeMax(Start)587 ; |
10696 | 14.3k | |
10697 | 14.3k | APInt MinStride = IsSigned ? getSignedRangeMin(Stride)13.7k |
10698 | 14.3k | : getUnsignedRangeMin(Stride)587 ; |
10699 | 14.3k | |
10700 | 14.3k | unsigned BitWidth = getTypeSizeInBits(LHS->getType()); |
10701 | 14.3k | APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)13.7k |
10702 | 14.3k | : APInt::getMinValue(BitWidth) + (MinStride - 1)587 ; |
10703 | 14.3k | |
10704 | 14.3k | // Although End can be a MIN expression we estimate MinEnd considering only |
10705 | 14.3k | // the case End = RHS. This is safe because in the other case (Start - End) |
10706 | 14.3k | // is zero, leading to a zero maximum backedge taken count. |
10707 | 14.3k | APInt MinEnd = |
10708 | 14.3k | IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)13.7k |
10709 | 14.3k | : APIntOps::umax(getUnsignedRangeMin(RHS), Limit)587 ; |
10710 | 14.3k | |
10711 | 14.3k | const SCEV *MaxBECount = isa<SCEVConstant>(BECount) |
10712 | 14.3k | ? BECount449 |
10713 | 14.3k | : computeBECount(getConstant(MaxStart - MinEnd), |
10714 | 13.9k | getConstant(MinStride), false); |
10715 | 14.3k | |
10716 | 14.3k | if (isa<SCEVCouldNotCompute>(MaxBECount)) |
10717 | 0 | MaxBECount = BECount; |
10718 | 14.3k | |
10719 | 14.3k | return ExitLimit(BECount, MaxBECount, false, Predicates); |
10720 | 14.3k | } |
10721 | | |
10722 | | const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, |
10723 | 277k | ScalarEvolution &SE) const { |
10724 | 277k | if (Range.isFullSet()) // Infinite loop. |
10725 | 0 | return SE.getCouldNotCompute(); |
10726 | 277k | |
10727 | 277k | // If the start is a non-zero constant, shift the range to simplify things. |
10728 | 277k | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) |
10729 | 246k | if (!SC->getValue()->isZero()) { |
10730 | 122k | SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); |
10731 | 122k | Operands[0] = SE.getZero(SC->getType()); |
10732 | 122k | const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), |
10733 | 122k | getNoWrapFlags(FlagNW)); |
10734 | 122k | if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) |
10735 | 122k | return ShiftedAddRec->getNumIterationsInRange( |
10736 | 122k | Range.subtract(SC->getAPInt()), SE); |
10737 | 0 | // This is strange and shouldn't happen. |
10738 | 0 | return SE.getCouldNotCompute(); |
10739 | 0 | } |
10740 | 154k | |
10741 | 154k | // The only time we can solve this is when we have all constant indices. |
10742 | 154k | // Otherwise, we cannot determine the overflow conditions. |
10743 | 278k | if (154k any_of(operands(), [](const SCEV *Op) 154k { return !isa<SCEVConstant>(Op); })) |
10744 | 30.3k | return SE.getCouldNotCompute(); |
10745 | 124k | |
10746 | 124k | // Okay at this point we know that all elements of the chrec are constants and |
10747 | 124k | // that the start element is zero. |
10748 | 124k | |
10749 | 124k | // First check to see if the range contains zero. If not, the first |
10750 | 124k | // iteration exits. |
10751 | 124k | unsigned BitWidth = SE.getTypeSizeInBits(getType()); |
10752 | 124k | if (!Range.contains(APInt(BitWidth, 0))) |
10753 | 698 | return SE.getZero(getType()); |
10754 | 123k | |
10755 | 123k | if (isAffine()) { |
10756 | 123k | // If this is an affine expression then we have this situation: |
10757 | 123k | // Solve {0,+,A} in Range === Ax in Range |
10758 | 123k | |
10759 | 123k | // We know that zero is in the range. If A is positive then we know that |
10760 | 123k | // the upper value of the range must be the first possible exit value. |
10761 | 123k | // If A is negative then the lower of the range is the last possible loop |
10762 | 123k | // value. Also note that we already checked for a full range. |
10763 | 123k | APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt(); |
10764 | 123k | APInt End = A.sge(1) ? (Range.getUpper() - 1)121k : Range.getLower()2.20k ; |
10765 | 123k | |
10766 | 123k | // The exit value should be (End+A)/A. |
10767 | 123k | APInt ExitVal = (End + A).udiv(A); |
10768 | 123k | ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); |
10769 | 123k | |
10770 | 123k | // Evaluate at the exit value. If we really did fall out of the valid |
10771 | 123k | // range, then we computed our trip count, otherwise wrap around or other |
10772 | 123k | // things must have happened. |
10773 | 123k | ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); |
10774 | 123k | if (Range.contains(Val->getValue())) |
10775 | 2.13k | return SE.getCouldNotCompute(); // Something strange happened |
10776 | 121k | |
10777 | 121k | // Ensure that the previous value is in the range. This is a sanity check. |
10778 | 121k | assert(Range.contains( |
10779 | 121k | EvaluateConstantChrecAtConstant(this, |
10780 | 121k | ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && |
10781 | 121k | "Linear scev computation is off in a bad way!"); |
10782 | 121k | return SE.getConstant(ExitValue); |
10783 | 121k | } |
10784 | 15 | |
10785 | 15 | if (isQuadratic()) { |
10786 | 15 | if (auto S = SolveQuadraticAddRecRange(this, Range, SE)) |
10787 | 10 | return SE.getConstant(S.getValue()); |
10788 | 5 | } |
10789 | 5 | |
10790 | 5 | return SE.getCouldNotCompute(); |
10791 | 5 | } |
10792 | | |
10793 | | const SCEVAddRecExpr * |
10794 | 835k | SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const { |
10795 | 835k | assert(getNumOperands() > 1 && "AddRec with zero step?"); |
10796 | 835k | // There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)), |
10797 | 835k | // but in this case we cannot guarantee that the value returned will be an |
10798 | 835k | // AddRec because SCEV does not have a fixed point where it stops |
10799 | 835k | // simplification: it is legal to return ({rec1} + {rec2}). For example, it |
10800 | 835k | // may happen if we reach arithmetic depth limit while simplifying. So we |
10801 | 835k | // construct the returned value explicitly. |
10802 | 835k | SmallVector<const SCEV *, 3> Ops; |
10803 | 835k | // If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and |
10804 | 835k | // (this + Step) is {A+B,+,B+C,+...,+,N}. |
10805 | 1.67M | for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i835k ) |
10806 | 835k | Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1))); |
10807 | 835k | // We know that the last operand is not a constant zero (otherwise it would |
10808 | 835k | // have been popped out earlier). This guarantees us that if the result has |
10809 | 835k | // the same last operand, then it will also not be popped out, meaning that |
10810 | 835k | // the returned value will be an AddRec. |
10811 | 835k | const SCEV *Last = getOperand(getNumOperands() - 1); |
10812 | 835k | assert(!Last->isZero() && "Recurrency with zero step?"); |
10813 | 835k | Ops.push_back(Last); |
10814 | 835k | return cast<SCEVAddRecExpr>(SE.getAddRecExpr(Ops, getLoop(), |
10815 | 835k | SCEV::FlagAnyWrap)); |
10816 | 835k | } |
10817 | | |
10818 | | // Return true when S contains at least an undef value. |
10819 | 1.11k | static inline bool containsUndefs(const SCEV *S) { |
10820 | 4.05k | return SCEVExprContains(S, [](const SCEV *S) { |
10821 | 4.05k | if (const auto *SU = dyn_cast<SCEVUnknown>(S)) |
10822 | 1.68k | return isa<UndefValue>(SU->getValue()); |
10823 | 2.37k | return false; |
10824 | 2.37k | }); |
10825 | 1.11k | } |
10826 | | |
10827 | | namespace { |
10828 | | |
10829 | | // Collect all steps of SCEV expressions. |
10830 | | struct SCEVCollectStrides { |
10831 | | ScalarEvolution &SE; |
10832 | | SmallVectorImpl<const SCEV *> &Strides; |
10833 | | |
10834 | | SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S) |
10835 | 3.01k | : SE(SE), Strides(S) {} |
10836 | | |
10837 | 15.2k | bool follow(const SCEV *S) { |
10838 | 15.2k | if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) |
10839 | 4.61k | Strides.push_back(AR->getStepRecurrence(SE)); |
10840 | 15.2k | return true; |
10841 | 15.2k | } |
10842 | | |
10843 | 15.2k | bool isDone() const { return false; } |
10844 | | }; |
10845 | | |
10846 | | // Collect all SCEVUnknown and SCEVMulExpr expressions. |
10847 | | struct SCEVCollectTerms { |
10848 | | SmallVectorImpl<const SCEV *> &Terms; |
10849 | | |
10850 | 4.61k | SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {} |
10851 | | |
10852 | 4.75k | bool follow(const SCEV *S) { |
10853 | 4.75k | if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)4.73k || |
10854 | 4.75k | isa<SCEVSignExtendExpr>(S)3.63k ) { |
10855 | 1.11k | if (!containsUndefs(S)) |
10856 | 1.09k | Terms.push_back(S); |
10857 | 1.11k | |
10858 | 1.11k | // Stop recursion: once we collected a term, do not walk its operands. |
10859 | 1.11k | return false; |
10860 | 1.11k | } |
10861 | 3.63k | |
10862 | 3.63k | // Keep looking. |
10863 | 3.63k | return true; |
10864 | 3.63k | } |
10865 | | |
10866 | 3.63k | bool isDone() const { return false; } |
10867 | | }; |
10868 | | |
10869 | | // Check if a SCEV contains an AddRecExpr. |
10870 | | struct SCEVHasAddRec { |
10871 | | bool &ContainsAddRec; |
10872 | | |
10873 | 2.16k | SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { |
10874 | 2.16k | ContainsAddRec = false; |
10875 | 2.16k | } |
10876 | | |
10877 | 3.06k | bool follow(const SCEV *S) { |
10878 | 3.06k | if (isa<SCEVAddRecExpr>(S)) { |
10879 | 64 | ContainsAddRec = true; |
10880 | 64 | |
10881 | 64 | // Stop recursion: once we collected a term, do not walk its operands. |
10882 | 64 | return false; |
10883 | 64 | } |
10884 | 2.99k | |
10885 | 2.99k | // Keep looking. |
10886 | 2.99k | return true; |
10887 | 2.99k | } |
10888 | | |
10889 | 2.99k | bool isDone() const { return false; } |
10890 | | }; |
10891 | | |
10892 | | // Find factors that are multiplied with an expression that (possibly as a |
10893 | | // subexpression) contains an AddRecExpr. In the expression: |
10894 | | // |
10895 | | // 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) |
10896 | | // |
10897 | | // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" |
10898 | | // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size |
10899 | | // parameters as they form a product with an induction variable. |
10900 | | // |
10901 | | // This collector expects all array size parameters to be in the same MulExpr. |
10902 | | // It might be necessary to later add support for collecting parameters that are |
10903 | | // spread over different nested MulExpr. |
10904 | | struct SCEVCollectAddRecMultiplies { |
10905 | | SmallVectorImpl<const SCEV *> &Terms; |
10906 | | ScalarEvolution &SE; |
10907 | | |
10908 | | SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE) |
10909 | 3.01k | : Terms(T), SE(SE) {} |
10910 | | |
10911 | 13.2k | bool follow(const SCEV *S) { |
10912 | 13.2k | if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { |
10913 | 1.71k | bool HasAddRec = false; |
10914 | 1.71k | SmallVector<const SCEV *, 0> Operands; |
10915 | 4.20k | for (auto Op : Mul->operands()) { |
10916 | 4.20k | const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op); |
10917 | 4.20k | if (Unknown && !isa<CallInst>(Unknown->getValue())2.04k ) { |
10918 | 2.02k | Operands.push_back(Op); |
10919 | 2.18k | } else if (Unknown) { |
10920 | 16 | HasAddRec = true; |
10921 | 2.16k | } else { |
10922 | 2.16k | bool ContainsAddRec; |
10923 | 2.16k | SCEVHasAddRec ContiansAddRec(ContainsAddRec); |
10924 | 2.16k | visitAll(Op, ContiansAddRec); |
10925 | 2.16k | HasAddRec |= ContainsAddRec; |
10926 | 2.16k | } |
10927 | 4.20k | } |
10928 | 1.71k | if (Operands.size() == 0) |
10929 | 282 | return true; |
10930 | 1.43k | |
10931 | 1.43k | if (!HasAddRec) |
10932 | 1.41k | return false; |
10933 | 21 | |
10934 | 21 | Terms.push_back(SE.getMulExpr(Operands)); |
10935 | 21 | // Stop recursion: once we collected a term, do not walk its operands. |
10936 | 21 | return false; |
10937 | 21 | } |
10938 | 11.5k | |
10939 | 11.5k | // Keep looking. |
10940 | 11.5k | return true; |
10941 | 11.5k | } |
10942 | | |
10943 | 11.8k | bool isDone() const { return false; } |
10944 | | }; |
10945 | | |
10946 | | } // end anonymous namespace |
10947 | | |
10948 | | /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in |
10949 | | /// two places: |
10950 | | /// 1) The strides of AddRec expressions. |
10951 | | /// 2) Unknowns that are multiplied with AddRec expressions. |
10952 | | void ScalarEvolution::collectParametricTerms(const SCEV *Expr, |
10953 | 3.01k | SmallVectorImpl<const SCEV *> &Terms) { |
10954 | 3.01k | SmallVector<const SCEV *, 4> Strides; |
10955 | 3.01k | SCEVCollectStrides StrideCollector(*this, Strides); |
10956 | 3.01k | visitAll(Expr, StrideCollector); |
10957 | 3.01k | |
10958 | 3.01k | LLVM_DEBUG({ |
10959 | 3.01k | dbgs() << "Strides:\n"; |
10960 | 3.01k | for (const SCEV *S : Strides) |
10961 | 3.01k | dbgs() << *S << "\n"; |
10962 | 3.01k | }); |
10963 | 3.01k | |
10964 | 4.61k | for (const SCEV *S : Strides) { |
10965 | 4.61k | SCEVCollectTerms TermCollector(Terms); |
10966 | 4.61k | visitAll(S, TermCollector); |
10967 | 4.61k | } |
10968 | 3.01k | |
10969 | 3.01k | LLVM_DEBUG({ |
10970 | 3.01k | dbgs() << "Terms:\n"; |
10971 | 3.01k | for (const SCEV *T : Terms) |
10972 | 3.01k | dbgs() << *T << "\n"; |
10973 | 3.01k | }); |
10974 | 3.01k | |
10975 | 3.01k | SCEVCollectAddRecMultiplies MulCollector(Terms, *this); |
10976 | 3.01k | visitAll(Expr, MulCollector); |
10977 | 3.01k | } |
10978 | | |
10979 | | static bool findArrayDimensionsRec(ScalarEvolution &SE, |
10980 | | SmallVectorImpl<const SCEV *> &Terms, |
10981 | 950 | SmallVectorImpl<const SCEV *> &Sizes) { |
10982 | 950 | int Last = Terms.size() - 1; |
10983 | 950 | const SCEV *Step = Terms[Last]; |
10984 | 950 | |
10985 | 950 | // End of recursion. |
10986 | 950 | if (Last == 0) { |
10987 | 550 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) { |
10988 | 11 | SmallVector<const SCEV *, 2> Qs; |
10989 | 11 | for (const SCEV *Op : M->operands()) |
10990 | 22 | if (!isa<SCEVConstant>(Op)) |
10991 | 22 | Qs.push_back(Op); |
10992 | 11 | |
10993 | 11 | Step = SE.getMulExpr(Qs); |
10994 | 11 | } |
10995 | 550 | |
10996 | 550 | Sizes.push_back(Step); |
10997 | 550 | return true; |
10998 | 550 | } |
10999 | 400 | |
11000 | 915 | for (const SCEV *&Term : Terms)400 { |
11001 | 915 | // Normalize the terms before the next call to findArrayDimensionsRec. |
11002 | 915 | const SCEV *Q, *R; |
11003 | 915 | SCEVDivision::divide(SE, Term, Step, &Q, &R); |
11004 | 915 | |
11005 | 915 | // Bail out when GCD does not evenly divide one of the terms. |
11006 | 915 | if (!R->isZero()) |
11007 | 1 | return false; |
11008 | 914 | |
11009 | 914 | Term = Q; |
11010 | 914 | } |
11011 | 400 | |
11012 | 400 | // Remove all SCEVConstants. |
11013 | 400 | Terms.erase( |
11014 | 914 | remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }), |
11015 | 399 | Terms.end()); |
11016 | 399 | |
11017 | 399 | if (Terms.size() > 0) |
11018 | 391 | if (!findArrayDimensionsRec(SE, Terms, Sizes)) |
11019 | 0 | return false; |
11020 | 399 | |
11021 | 399 | Sizes.push_back(Step); |
11022 | 399 | return true; |
11023 | 399 | } |
11024 | | |
11025 | | // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. |
11026 | 559 | static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) { |
11027 | 559 | for (const SCEV *T : Terms) |
11028 | 559 | if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>)) |
11029 | 559 | return true; |
11030 | 559 | return false0 ; |
11031 | 559 | } |
11032 | | |
11033 | | // Return the number of product terms in S. |
11034 | 840 | static inline int numberOfTerms(const SCEV *S) { |
11035 | 840 | if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S)) |
11036 | 829 | return Expr->getNumOperands(); |
11037 | 11 | return 1; |
11038 | 11 | } |
11039 | | |
11040 | 961 | static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { |
11041 | 961 | if (isa<SCEVConstant>(T)) |
11042 | 2 | return nullptr; |
11043 | 959 | |
11044 | 959 | if (isa<SCEVUnknown>(T)) |
11045 | 430 | return T; |
11046 | 529 | |
11047 | 529 | if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) { |
11048 | 464 | SmallVector<const SCEV *, 2> Factors; |
11049 | 464 | for (const SCEV *Op : M->operands()) |
11050 | 1.04k | if (!isa<SCEVConstant>(Op)) |
11051 | 982 | Factors.push_back(Op); |
11052 | 464 | |
11053 | 464 | return SE.getMulExpr(Factors); |
11054 | 464 | } |
11055 | 65 | |
11056 | 65 | return T; |
11057 | 65 | } |
11058 | | |
11059 | | /// Return the size of an element read or written by Inst. |
11060 | 15.7k | const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { |
11061 | 15.7k | Type *Ty; |
11062 | 15.7k | if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) |
11063 | 7.97k | Ty = Store->getValueOperand()->getType(); |
11064 | 7.81k | else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) |
11065 | 7.75k | Ty = Load->getType(); |
11066 | 61 | else |
11067 | 61 | return nullptr; |
11068 | 15.7k | |
11069 | 15.7k | Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); |
11070 | 15.7k | return getSizeOfExpr(ETy, Ty); |
11071 | 15.7k | } |
11072 | | |
11073 | | void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, |
11074 | | SmallVectorImpl<const SCEV *> &Sizes, |
11075 | 1.66k | const SCEV *ElementSize) { |
11076 | 1.66k | if (Terms.size() < 1 || !ElementSize559 ) |
11077 | 1.10k | return; |
11078 | 559 | |
11079 | 559 | // Early return when Terms do not contain parameters: we do not delinearize |
11080 | 559 | // non parametric SCEVs. |
11081 | 559 | if (!containsParameters(Terms)) |
11082 | 0 | return; |
11083 | 559 | |
11084 | 559 | LLVM_DEBUG({ |
11085 | 559 | dbgs() << "Terms:\n"; |
11086 | 559 | for (const SCEV *T : Terms) |
11087 | 559 | dbgs() << *T << "\n"; |
11088 | 559 | }); |
11089 | 559 | |
11090 | 559 | // Remove duplicates. |
11091 | 559 | array_pod_sort(Terms.begin(), Terms.end()); |
11092 | 559 | Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); |
11093 | 559 | |
11094 | 559 | // Put larger terms first. |
11095 | 559 | llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) { |
11096 | 420 | return numberOfTerms(LHS) > numberOfTerms(RHS); |
11097 | 420 | }); |
11098 | 559 | |
11099 | 559 | // Try to divide all terms by the element size. If term is not divisible by |
11100 | 559 | // element size, proceed with the original term. |
11101 | 961 | for (const SCEV *&Term : Terms) { |
11102 | 961 | const SCEV *Q, *R; |
11103 | 961 | SCEVDivision::divide(*this, Term, ElementSize, &Q, &R); |
11104 | 961 | if (!Q->isZero()) |
11105 | 910 | Term = Q; |
11106 | 961 | } |
11107 | 559 | |
11108 | 559 | SmallVector<const SCEV *, 4> NewTerms; |
11109 | 559 | |
11110 | 559 | // Remove constant factors. |
11111 | 559 | for (const SCEV *T : Terms) |
11112 | 961 | if (const SCEV *NewT = removeConstantFactors(*this, T)) |
11113 | 959 | NewTerms.push_back(NewT); |
11114 | 559 | |
11115 | 559 | LLVM_DEBUG({ |
11116 | 559 | dbgs() << "Terms after sorting:\n"; |
11117 | 559 | for (const SCEV *T : NewTerms) |
11118 | 559 | dbgs() << *T << "\n"; |
11119 | 559 | }); |
11120 | 559 | |
11121 | 559 | if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) { |
11122 | 1 | Sizes.clear(); |
11123 | 1 | return; |
11124 | 1 | } |
11125 | 558 | |
11126 | 558 | // The last element to be pushed into Sizes is the size of an element. |
11127 | 558 | Sizes.push_back(ElementSize); |
11128 | 558 | |
11129 | 558 | LLVM_DEBUG({ |
11130 | 558 | dbgs() << "Sizes:\n"; |
11131 | 558 | for (const SCEV *S : Sizes) |
11132 | 558 | dbgs() << *S << "\n"; |
11133 | 558 | }); |
11134 | 558 | } |
11135 | | |
11136 | | void ScalarEvolution::computeAccessFunctions( |
11137 | | const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, |
11138 | 2.99k | SmallVectorImpl<const SCEV *> &Sizes) { |
11139 | 2.99k | // Early exit in case this SCEV is not an affine multivariate function. |
11140 | 2.99k | if (Sizes.empty()) |
11141 | 2.08k | return; |
11142 | 913 | |
11143 | 913 | if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr)) |
11144 | 872 | if (!AR->isAffine()) |
11145 | 6 | return; |
11146 | 907 | |
11147 | 907 | const SCEV *Res = Expr; |
11148 | 907 | int Last = Sizes.size() - 1; |
11149 | 3.38k | for (int i = Last; i >= 0; i--2.47k ) { |
11150 | 2.48k | const SCEV *Q, *R; |
11151 | 2.48k | SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R); |
11152 | 2.48k | |
11153 | 2.48k | LLVM_DEBUG({ |
11154 | 2.48k | dbgs() << "Res: " << *Res << "\n"; |
11155 | 2.48k | dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; |
11156 | 2.48k | dbgs() << "Res divided by Sizes[i]:\n"; |
11157 | 2.48k | dbgs() << "Quotient: " << *Q << "\n"; |
11158 | 2.48k | dbgs() << "Remainder: " << *R << "\n"; |
11159 | 2.48k | }); |
11160 | 2.48k | |
11161 | 2.48k | Res = Q; |
11162 | 2.48k | |
11163 | 2.48k | // Do not record the last subscript corresponding to the size of elements in |
11164 | 2.48k | // the array. |
11165 | 2.48k | if (i == Last) { |
11166 | 907 | |
11167 | 907 | // Bail out if the remainder is too complex. |
11168 | 907 | if (isa<SCEVAddRecExpr>(R)) { |
11169 | 2 | Subscripts.clear(); |
11170 | 2 | Sizes.clear(); |
11171 | 2 | return; |
11172 | 2 | } |
11173 | 905 | |
11174 | 905 | continue; |
11175 | 905 | } |
11176 | 1.57k | |
11177 | 1.57k | // Record the access function for the current subscript. |
11178 | 1.57k | Subscripts.push_back(R); |
11179 | 1.57k | } |
11180 | 907 | |
11181 | 907 | // Also push in last position the remainder of the last division: it will be |
11182 | 907 | // the access function of the innermost dimension. |
11183 | 907 | Subscripts.push_back(Res); |
11184 | 905 | |
11185 | 905 | std::reverse(Subscripts.begin(), Subscripts.end()); |
11186 | 905 | |
11187 | 905 | LLVM_DEBUG({ |
11188 | 905 | dbgs() << "Subscripts:\n"; |
11189 | 905 | for (const SCEV *S : Subscripts) |
11190 | 905 | dbgs() << *S << "\n"; |
11191 | 905 | }); |
11192 | 905 | } |
11193 | | |
11194 | | /// Splits the SCEV into two vectors of SCEVs representing the subscripts and |
11195 | | /// sizes of an array access. Returns the remainder of the delinearization that |
11196 | | /// is the offset start of the array. The SCEV->delinearize algorithm computes |
11197 | | /// the multiples of SCEV coefficients: that is a pattern matching of sub |
11198 | | /// expressions in the stride and base of a SCEV corresponding to the |
11199 | | /// computation of a GCD (greatest common divisor) of base and stride. When |
11200 | | /// SCEV->delinearize fails, it returns the SCEV unchanged. |
11201 | | /// |
11202 | | /// For example: when analyzing the memory access A[i][j][k] in this loop nest |
11203 | | /// |
11204 | | /// void foo(long n, long m, long o, double A[n][m][o]) { |
11205 | | /// |
11206 | | /// for (long i = 0; i < n; i++) |
11207 | | /// for (long j = 0; j < m; j++) |
11208 | | /// for (long k = 0; k < o; k++) |
11209 | | /// A[i][j][k] = 1.0; |
11210 | | /// } |
11211 | | /// |
11212 | | /// the delinearization input is the following AddRec SCEV: |
11213 | | /// |
11214 | | /// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> |
11215 | | /// |
11216 | | /// From this SCEV, we are able to say that the base offset of the access is %A |
11217 | | /// because it appears as an offset that does not divide any of the strides in |
11218 | | /// the loops: |
11219 | | /// |
11220 | | /// CHECK: Base offset: %A |
11221 | | /// |
11222 | | /// and then SCEV->delinearize determines the size of some of the dimensions of |
11223 | | /// the array as these are the multiples by which the strides are happening: |
11224 | | /// |
11225 | | /// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. |
11226 | | /// |
11227 | | /// Note that the outermost dimension remains of UnknownSize because there are |
11228 | | /// no strides that would help identifying the size of the last dimension: when |
11229 | | /// the array has been statically allocated, one could compute the size of that |
11230 | | /// dimension by dividing the overall size of the array by the size of the known |
11231 | | /// dimensions: %m * %o * 8. |
11232 | | /// |
11233 | | /// Finally delinearize provides the access functions for the array reference |
11234 | | /// that does correspond to A[i][j][k] of the above C testcase: |
11235 | | /// |
11236 | | /// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] |
11237 | | /// |
11238 | | /// The testcases are checking the output of a function pass: |
11239 | | /// DelinearizationPass that walks through all loads and stores of a function |
11240 | | /// asking for the SCEV of the memory access with respect to all enclosing |
11241 | | /// loops, calling SCEV->delinearize on that and printing the results. |
11242 | | void ScalarEvolution::delinearize(const SCEV *Expr, |
11243 | | SmallVectorImpl<const SCEV *> &Subscripts, |
11244 | | SmallVectorImpl<const SCEV *> &Sizes, |
11245 | 165 | const SCEV *ElementSize) { |
11246 | 165 | // First step: collect parametric terms. |
11247 | 165 | SmallVector<const SCEV *, 4> Terms; |
11248 | 165 | collectParametricTerms(Expr, Terms); |
11249 | 165 | |
11250 | 165 | if (Terms.empty()) |
11251 | 124 | return; |
11252 | 41 | |
11253 | 41 | // Second step: find subscript sizes. |
11254 | 41 | findArrayDimensions(Terms, Sizes, ElementSize); |
11255 | 41 | |
11256 | 41 | if (Sizes.empty()) |
11257 | 0 | return; |
11258 | 41 | |
11259 | 41 | // Third step: compute the access functions for each subscript. |
11260 | 41 | computeAccessFunctions(Expr, Subscripts, Sizes); |
11261 | 41 | |
11262 | 41 | if (Subscripts.empty()) |
11263 | 0 | return; |
11264 | 41 | |
11265 | 41 | LLVM_DEBUG({ |
11266 | 41 | dbgs() << "succeeded to delinearize " << *Expr << "\n"; |
11267 | 41 | dbgs() << "ArrayDecl[UnknownSize]"; |
11268 | 41 | for (const SCEV *S : Sizes) |
11269 | 41 | dbgs() << "[" << *S << "]"; |
11270 | 41 | |
11271 | 41 | dbgs() << "\nArrayRef"; |
11272 | 41 | for (const SCEV *S : Subscripts) |
11273 | 41 | dbgs() << "[" << *S << "]"; |
11274 | 41 | dbgs() << "\n"; |
11275 | 41 | }); |
11276 | 41 | } |
11277 | | |
11278 | | //===----------------------------------------------------------------------===// |
11279 | | // SCEVCallbackVH Class Implementation |
11280 | | //===----------------------------------------------------------------------===// |
11281 | | |
11282 | 626k | void ScalarEvolution::SCEVCallbackVH::deleted() { |
11283 | 626k | assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); |
11284 | 626k | if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) |
11285 | 846 | SE->ConstantEvolutionLoopExitValue.erase(PN); |
11286 | 626k | SE->eraseValueFromMap(getValPtr()); |
11287 | 626k | // this now dangles! |
11288 | 626k | } |
11289 | | |
11290 | 135k | void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { |
11291 | 135k | assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); |
11292 | 135k | |
11293 | 135k | // Forget all the expressions associated with users of the old value, |
11294 | 135k | // so that future queries will recompute the expressions using the new |
11295 | 135k | // value. |
11296 | 135k | Value *Old = getValPtr(); |
11297 | 135k | SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end()); |
11298 | 135k | SmallPtrSet<User *, 8> Visited; |
11299 | 1.21M | while (!Worklist.empty()) { |
11300 | 1.07M | User *U = Worklist.pop_back_val(); |
11301 | 1.07M | // Deleting the Old value will cause this to dangle. Postpone |
11302 | 1.07M | // that until everything else is done. |
11303 | 1.07M | if (U == Old) |
11304 | 88.9k | continue; |
11305 | 988k | if (!Visited.insert(U).second) |
11306 | 147k | continue; |
11307 | 840k | if (PHINode *PN = dyn_cast<PHINode>(U)) |
11308 | 71.4k | SE->ConstantEvolutionLoopExitValue.erase(PN); |
11309 | 840k | SE->eraseValueFromMap(U); |
11310 | 840k | Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); |
11311 | 840k | } |
11312 | 135k | // Delete the Old value. |
11313 | 135k | if (PHINode *PN = dyn_cast<PHINode>(Old)) |
11314 | 86.0k | SE->ConstantEvolutionLoopExitValue.erase(PN); |
11315 | 135k | SE->eraseValueFromMap(Old); |
11316 | 135k | // this now dangles! |
11317 | 135k | } |
11318 | | |
11319 | | ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) |
11320 | 230M | : CallbackVH(V), SE(se) {} |
11321 | | |
11322 | | //===----------------------------------------------------------------------===// |
11323 | | // ScalarEvolution Class Implementation |
11324 | | //===----------------------------------------------------------------------===// |
11325 | | |
11326 | | ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, |
11327 | | AssumptionCache &AC, DominatorTree &DT, |
11328 | | LoopInfo &LI) |
11329 | | : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), |
11330 | | CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64), |
11331 | 4.01M | LoopDispositions(64), BlockDispositions(64) { |
11332 | 4.01M | // To use guards for proving predicates, we need to scan every instruction in |
11333 | 4.01M | // relevant basic blocks, and not just terminators. Doing this is a waste of |
11334 | 4.01M | // time if the IR does not actually contain any calls to |
11335 | 4.01M | // @llvm.experimental.guard, so do a quick check and remember this beforehand. |
11336 | 4.01M | // |
11337 | 4.01M | // This pessimizes the case where a pass that preserves ScalarEvolution wants |
11338 | 4.01M | // to _add_ guards to the module when there weren't any before, and wants |
11339 | 4.01M | // ScalarEvolution to optimize based on those guards. For now we prefer to be |
11340 | 4.01M | // efficient in lieu of being smart in that rather obscure case. |
11341 | 4.01M | |
11342 | 4.01M | auto *GuardDecl = F.getParent()->getFunction( |
11343 | 4.01M | Intrinsic::getName(Intrinsic::experimental_guard)); |
11344 | 4.01M | HasGuards = GuardDecl && !GuardDecl->use_empty()350 ; |
11345 | 4.01M | } |
11346 | | |
11347 | | ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) |
11348 | | : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), |
11349 | | LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), |
11350 | | ValueExprMap(std::move(Arg.ValueExprMap)), |
11351 | | PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), |
11352 | | PendingPhiRanges(std::move(Arg.PendingPhiRanges)), |
11353 | | PendingMerges(std::move(Arg.PendingMerges)), |
11354 | | MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), |
11355 | | BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), |
11356 | | PredicatedBackedgeTakenCounts( |
11357 | | std::move(Arg.PredicatedBackedgeTakenCounts)), |
11358 | | ConstantEvolutionLoopExitValue( |
11359 | | std::move(Arg.ConstantEvolutionLoopExitValue)), |
11360 | | ValuesAtScopes(std::move(Arg.ValuesAtScopes)), |
11361 | | LoopDispositions(std::move(Arg.LoopDispositions)), |
11362 | | LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), |
11363 | | BlockDispositions(std::move(Arg.BlockDispositions)), |
11364 | | UnsignedRanges(std::move(Arg.UnsignedRanges)), |
11365 | | SignedRanges(std::move(Arg.SignedRanges)), |
11366 | | UniqueSCEVs(std::move(Arg.UniqueSCEVs)), |
11367 | | UniquePreds(std::move(Arg.UniquePreds)), |
11368 | | SCEVAllocator(std::move(Arg.SCEVAllocator)), |
11369 | | LoopUsers(std::move(Arg.LoopUsers)), |
11370 | | PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), |
11371 | 5.04k | FirstUnknown(Arg.FirstUnknown) { |
11372 | 5.04k | Arg.FirstUnknown = nullptr; |
11373 | 5.04k | } |
11374 | | |
11375 | 4.02M | ScalarEvolution::~ScalarEvolution() { |
11376 | 4.02M | // Iterate through all the SCEVUnknown instances and call their |
11377 | 4.02M | // destructors, so that they release their references to their values. |
11378 | 7.73M | for (SCEVUnknown *U = FirstUnknown; U;) { |
11379 | 3.71M | SCEVUnknown *Tmp = U; |
11380 | 3.71M | U = U->Next; |
11381 | 3.71M | Tmp->~SCEVUnknown(); |
11382 | 3.71M | } |
11383 | 4.02M | FirstUnknown = nullptr; |
11384 | 4.02M | |
11385 | 4.02M | ExprValueMap.clear(); |
11386 | 4.02M | ValueExprMap.clear(); |
11387 | 4.02M | HasRecMap.clear(); |
11388 | 4.02M | |
11389 | 4.02M | // Free any extra memory created for ExitNotTakenInfo in the unlikely event |
11390 | 4.02M | // that a loop had multiple computable exits. |
11391 | 4.02M | for (auto &BTCI : BackedgeTakenCounts) |
11392 | 670k | BTCI.second.clear(); |
11393 | 4.02M | for (auto &BTCI : PredicatedBackedgeTakenCounts) |
11394 | 70.9k | BTCI.second.clear(); |
11395 | 4.02M | |
11396 | 4.02M | assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); |
11397 | 4.02M | assert(PendingPhiRanges.empty() && "getRangeRef garbage"); |
11398 | 4.02M | assert(PendingMerges.empty() && "isImpliedViaMerge garbage"); |
11399 | 4.02M | assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); |
11400 | 4.02M | assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); |
11401 | 4.02M | } |
11402 | | |
11403 | 204k | bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { |
11404 | 204k | return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L)); |
11405 | 204k | } |
11406 | | |
11407 | | static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, |
11408 | 420 | const Loop *L) { |
11409 | 420 | // Print all inner loops first |
11410 | 420 | for (Loop *I : *L) |
11411 | 43 | PrintLoopInfo(OS, SE, I); |
11412 | 420 | |
11413 | 420 | OS << "Loop "; |
11414 | 420 | L->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
11415 | 420 | OS << ": "; |
11416 | 420 | |
11417 | 420 | SmallVector<BasicBlock *, 8> ExitingBlocks; |
11418 | 420 | L->getExitingBlocks(ExitingBlocks); |
11419 | 420 | if (ExitingBlocks.size() != 1) |
11420 | 67 | OS << "<multiple exits> "; |
11421 | 420 | |
11422 | 420 | if (SE->hasLoopInvariantBackedgeTakenCount(L)) |
11423 | 283 | OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n"; |
11424 | 137 | else |
11425 | 137 | OS << "Unpredictable backedge-taken count.\n"; |
11426 | 420 | |
11427 | 420 | if (ExitingBlocks.size() > 1) |
11428 | 128 | for (BasicBlock *ExitingBlock : ExitingBlocks)54 { |
11429 | 128 | OS << " exit count for " << ExitingBlock->getName() << ": " |
11430 | 128 | << *SE->getExitCount(L, ExitingBlock) << "\n"; |
11431 | 128 | } |
11432 | 420 | |
11433 | 420 | OS << "Loop "; |
11434 | 420 | L->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
11435 | 420 | OS << ": "; |
11436 | 420 | |
11437 | 420 | if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { |
11438 | 326 | OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); |
11439 | 326 | if (SE->isBackedgeTakenCountMaxOrZero(L)) |
11440 | 9 | OS << ", actual taken count either this or zero."; |
11441 | 326 | } else { |
11442 | 94 | OS << "Unpredictable max backedge-taken count. "; |
11443 | 94 | } |
11444 | 420 | |
11445 | 420 | OS << "\n" |
11446 | 420 | "Loop "; |
11447 | 420 | L->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
11448 | 420 | OS << ": "; |
11449 | 420 | |
11450 | 420 | SCEVUnionPredicate Pred; |
11451 | 420 | auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); |
11452 | 420 | if (!isa<SCEVCouldNotCompute>(PBT)) { |
11453 | 287 | OS << "Predicated backedge-taken count is " << *PBT << "\n"; |
11454 | 287 | OS << " Predicates:\n"; |
11455 | 287 | Pred.print(OS, 4); |
11456 | 287 | } else { |
11457 | 133 | OS << "Unpredictable predicated backedge-taken count. "; |
11458 | 133 | } |
11459 | 420 | OS << "\n"; |
11460 | 420 | |
11461 | 420 | if (SE->hasLoopInvariantBackedgeTakenCount(L)) { |
11462 | 283 | OS << "Loop "; |
11463 | 283 | L->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
11464 | 283 | OS << ": "; |
11465 | 283 | OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n"; |
11466 | 283 | } |
11467 | 420 | } |
11468 | | |
11469 | 2.35k | static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) { |
11470 | 2.35k | switch (LD) { |
11471 | 2.35k | case ScalarEvolution::LoopVariant: |
11472 | 668 | return "Variant"; |
11473 | 2.35k | case ScalarEvolution::LoopInvariant: |
11474 | 243 | return "Invariant"; |
11475 | 2.35k | case ScalarEvolution::LoopComputable: |
11476 | 1.44k | return "Computable"; |
11477 | 0 | } |
11478 | 0 | llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!"); |
11479 | 0 | } |
11480 | | |
11481 | 470 | void ScalarEvolution::print(raw_ostream &OS) const { |
11482 | 470 | // ScalarEvolution's implementation of the print method is to print |
11483 | 470 | // out SCEV values of all instructions that are interesting. Doing |
11484 | 470 | // this potentially causes it to create new SCEV objects though, |
11485 | 470 | // which technically conflicts with the const qualifier. This isn't |
11486 | 470 | // observable from outside the class though, so casting away the |
11487 | 470 | // const isn't dangerous. |
11488 | 470 | ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); |
11489 | 470 | |
11490 | 470 | OS << "Classifying expressions for: "; |
11491 | 470 | F.printAsOperand(OS, /*PrintType=*/false); |
11492 | 470 | OS << "\n"; |
11493 | 470 | for (Instruction &I : instructions(F)) |
11494 | 11.9k | if (isSCEVable(I.getType()) && !isa<CmpInst>(I)6.80k ) { |
11495 | 2.49k | OS << I << '\n'; |
11496 | 2.49k | OS << " --> "; |
11497 | 2.49k | const SCEV *SV = SE.getSCEV(&I); |
11498 | 2.49k | SV->print(OS); |
11499 | 2.49k | if (!isa<SCEVCouldNotCompute>(SV)) { |
11500 | 2.49k | OS << " U: "; |
11501 | 2.49k | SE.getUnsignedRange(SV).print(OS); |
11502 | 2.49k | OS << " S: "; |
11503 | 2.49k | SE.getSignedRange(SV).print(OS); |
11504 | 2.49k | } |
11505 | 2.49k | |
11506 | 2.49k | const Loop *L = LI.getLoopFor(I.getParent()); |
11507 | 2.49k | |
11508 | 2.49k | const SCEV *AtUse = SE.getSCEVAtScope(SV, L); |
11509 | 2.49k | if (AtUse != SV) { |
11510 | 61 | OS << " --> "; |
11511 | 61 | AtUse->print(OS); |
11512 | 61 | if (!isa<SCEVCouldNotCompute>(AtUse)) { |
11513 | 61 | OS << " U: "; |
11514 | 61 | SE.getUnsignedRange(AtUse).print(OS); |
11515 | 61 | OS << " S: "; |
11516 | 61 | SE.getSignedRange(AtUse).print(OS); |
11517 | 61 | } |
11518 | 61 | } |
11519 | 2.49k | |
11520 | 2.49k | if (L) { |
11521 | 1.89k | OS << "\t\t" "Exits: "; |
11522 | 1.89k | const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); |
11523 | 1.89k | if (!SE.isLoopInvariant(ExitValue, L)) { |
11524 | 776 | OS << "<<Unknown>>"; |
11525 | 1.11k | } else { |
11526 | 1.11k | OS << *ExitValue; |
11527 | 1.11k | } |
11528 | 1.89k | |
11529 | 1.89k | bool First = true; |
11530 | 4.02k | for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()2.13k ) { |
11531 | 2.13k | if (First) { |
11532 | 1.89k | OS << "\t\t" "LoopDispositions: { "; |
11533 | 1.89k | First = false; |
11534 | 1.89k | } else { |
11535 | 242 | OS << ", "; |
11536 | 242 | } |
11537 | 2.13k | |
11538 | 2.13k | Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
11539 | 2.13k | OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); |
11540 | 2.13k | } |
11541 | 1.89k | |
11542 | 2.11k | for (auto *InnerL : depth_first(L)) { |
11543 | 2.11k | if (InnerL == L) |
11544 | 1.89k | continue; |
11545 | 219 | if (First) { |
11546 | 0 | OS << "\t\t" "LoopDispositions: { "; |
11547 | 0 | First = false; |
11548 | 219 | } else { |
11549 | 219 | OS << ", "; |
11550 | 219 | } |
11551 | 219 | |
11552 | 219 | InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
11553 | 219 | OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL)); |
11554 | 219 | } |
11555 | 1.89k | |
11556 | 1.89k | OS << " }"; |
11557 | 1.89k | } |
11558 | 2.49k | |
11559 | 2.49k | OS << "\n"; |
11560 | 2.49k | } |
11561 | 470 | |
11562 | 470 | OS << "Determining loop execution counts for: "; |
11563 | 470 | F.printAsOperand(OS, /*PrintType=*/false); |
11564 | 470 | OS << "\n"; |
11565 | 470 | for (Loop *I : LI) |
11566 | 377 | PrintLoopInfo(OS, &SE, I); |
11567 | 470 | } |
11568 | | |
11569 | | ScalarEvolution::LoopDisposition |
11570 | 41.5M | ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { |
11571 | 41.5M | auto &Values = LoopDispositions[S]; |
11572 | 226M | for (auto &V : Values) { |
11573 | 226M | if (V.getPointer() == L) |
11574 | 28.6M | return V.getInt(); |
11575 | 226M | } |
11576 | 41.5M | Values.emplace_back(L, LoopVariant); |
11577 | 12.9M | LoopDisposition D = computeLoopDisposition(S, L); |
11578 | 12.9M | auto &Values2 = LoopDispositions[S]; |
11579 | 12.9M | for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { |
11580 | 12.9M | if (V.getPointer() == L) { |
11581 | 12.9M | V.setInt(D); |
11582 | 12.9M | break; |
11583 | 12.9M | } |
11584 | 12.9M | } |
11585 | 12.9M | return D; |
11586 | 41.5M | } |
11587 | | |
11588 | | ScalarEvolution::LoopDisposition |
11589 | 12.9M | ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { |
11590 | 12.9M | switch (static_cast<SCEVTypes>(S->getSCEVType())) { |
11591 | 12.9M | case scConstant: |
11592 | 3.20M | return LoopInvariant; |
11593 | 12.9M | case scTruncate: |
11594 | 422k | case scZeroExtend: |
11595 | 422k | case scSignExtend: |
11596 | 422k | return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L); |
11597 | 4.56M | case scAddRecExpr: { |
11598 | 4.56M | const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); |
11599 | 4.56M | |
11600 | 4.56M | // If L is the addrec's loop, it's computable. |
11601 | 4.56M | if (AR->getLoop() == L) |
11602 | 4.41M | return LoopComputable; |
11603 | 147k | |
11604 | 147k | // Add recurrences are never invariant in the function-body (null loop). |
11605 | 147k | if (!L) |
11606 | 21.2k | return LoopVariant; |
11607 | 126k | |
11608 | 126k | // Everything that is not defined at loop entry is variant. |
11609 | 126k | if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader())) |
11610 | 25.4k | return LoopVariant; |
11611 | 100k | assert(!L->contains(AR->getLoop()) && "Containing loop's header does not" |
11612 | 100k | " dominate the contained loop's header?"); |
11613 | 100k | |
11614 | 100k | // This recurrence is invariant w.r.t. L if AR's loop contains L. |
11615 | 100k | if (AR->getLoop()->contains(L)) |
11616 | 83.9k | return LoopInvariant; |
11617 | 17.0k | |
11618 | 17.0k | // This recurrence is variant w.r.t. L if any of its operands |
11619 | 17.0k | // are variant. |
11620 | 17.0k | for (auto *Op : AR->operands()) |
11621 | 34.0k | if (!isLoopInvariant(Op, L)) |
11622 | 0 | return LoopVariant; |
11623 | 17.0k | |
11624 | 17.0k | // Otherwise it's loop-invariant. |
11625 | 17.0k | return LoopInvariant; |
11626 | 17.0k | } |
11627 | 1.68M | case scAddExpr: |
11628 | 1.68M | case scMulExpr: |
11629 | 1.68M | case scUMaxExpr: |
11630 | 1.68M | case scSMaxExpr: |
11631 | 1.68M | case scUMinExpr: |
11632 | 1.68M | case scSMinExpr: { |
11633 | 1.68M | bool HasVarying = false; |
11634 | 3.33M | for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { |
11635 | 3.33M | LoopDisposition D = getLoopDisposition(Op, L); |
11636 | 3.33M | if (D == LoopVariant) |
11637 | 717k | return LoopVariant; |
11638 | 2.61M | if (D == LoopComputable) |
11639 | 80.5k | HasVarying = true; |
11640 | 2.61M | } |
11641 | 1.68M | return HasVarying 968k ? LoopComputable21.4k : LoopInvariant946k ; |
11642 | 1.68M | } |
11643 | 1.68M | case scUDivExpr: { |
11644 | 76.3k | const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); |
11645 | 76.3k | LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L); |
11646 | 76.3k | if (LD == LoopVariant) |
11647 | 33.5k | return LoopVariant; |
11648 | 42.8k | LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L); |
11649 | 42.8k | if (RD == LoopVariant) |
11650 | 1.94k | return LoopVariant; |
11651 | 40.9k | return (LD == LoopInvariant && RD == LoopInvariant36.4k ) ? |
11652 | 36.2k | LoopInvariant : LoopComputable4.72k ; |
11653 | 40.9k | } |
11654 | 3.01M | case scUnknown: |
11655 | 3.01M | // All non-instruction values are loop invariant. All instructions are loop |
11656 | 3.01M | // invariant if they are not contained in the specified loop. |
11657 | 3.01M | // Instructions are never considered invariant in the function body |
11658 | 3.01M | // (null loop) because they are defined within the "loop". |
11659 | 3.01M | if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) |
11660 | 2.42M | return (L && !L->contains(I)2.27M ) ? LoopInvariant548k : LoopVariant1.88M ; |
11661 | 587k | return LoopInvariant; |
11662 | 587k | case scCouldNotCompute: |
11663 | 0 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); |
11664 | 0 | } |
11665 | 0 | llvm_unreachable("Unknown SCEV kind!"); |
11666 | 0 | } |
11667 | | |
11668 | 30.0M | bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { |
11669 | 30.0M | return getLoopDisposition(S, L) == LoopInvariant; |
11670 | 30.0M | } |
11671 | | |
11672 | 7.67M | bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { |
11673 | 7.67M | return getLoopDisposition(S, L) == LoopComputable; |
11674 | 7.67M | } |
11675 | | |
11676 | | ScalarEvolution::BlockDisposition |
11677 | 18.4M | ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { |
11678 | 18.4M | auto &Values = BlockDispositions[S]; |
11679 | 166M | for (auto &V : Values) { |
11680 | 166M | if (V.getPointer() == BB) |
11681 | 12.8M | return V.getInt(); |
11682 | 166M | } |
11683 | 18.4M | Values.emplace_back(BB, DoesNotDominateBlock); |
11684 | 5.54M | BlockDisposition D = computeBlockDisposition(S, BB); |
11685 | 5.54M | auto &Values2 = BlockDispositions[S]; |
11686 | 5.54M | for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { |
11687 | 5.54M | if (V.getPointer() == BB) { |
11688 | 5.54M | V.setInt(D); |
11689 | 5.54M | break; |
11690 | 5.54M | } |
11691 | 5.54M | } |
11692 | 5.54M | return D; |
11693 | 18.4M | } |
11694 | | |
11695 | | ScalarEvolution::BlockDisposition |
11696 | 5.54M | ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { |
11697 | 5.54M | switch (static_cast<SCEVTypes>(S->getSCEVType())) { |
11698 | 5.54M | case scConstant: |
11699 | 2.54M | return ProperlyDominatesBlock; |
11700 | 5.54M | case scTruncate: |
11701 | 246k | case scZeroExtend: |
11702 | 246k | case scSignExtend: |
11703 | 246k | return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB); |
11704 | 1.13M | case scAddRecExpr: { |
11705 | 1.13M | // This uses a "dominates" query instead of "properly dominates" query |
11706 | 1.13M | // to test for proper dominance too, because the instruction which |
11707 | 1.13M | // produces the addrec's value is a PHI, and a PHI effectively properly |
11708 | 1.13M | // dominates its entire containing block. |
11709 | 1.13M | const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); |
11710 | 1.13M | if (!DT.dominates(AR->getLoop()->getHeader(), BB)) |
11711 | 682 | return DoesNotDominateBlock; |
11712 | 1.13M | |
11713 | 1.13M | // Fall through into SCEVNAryExpr handling. |
11714 | 1.13M | LLVM_FALLTHROUGH; |
11715 | 1.13M | } |
11716 | 2.00M | case scAddExpr: |
11717 | 2.00M | case scMulExpr: |
11718 | 2.00M | case scUMaxExpr: |
11719 | 2.00M | case scSMaxExpr: |
11720 | 2.00M | case scUMinExpr: |
11721 | 2.00M | case scSMinExpr: { |
11722 | 2.00M | const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); |
11723 | 2.00M | bool Proper = true; |
11724 | 4.09M | for (const SCEV *NAryOp : NAry->operands()) { |
11725 | 4.09M | BlockDisposition D = getBlockDisposition(NAryOp, BB); |
11726 | 4.09M | if (D == DoesNotDominateBlock) |
11727 | 30.3k | return DoesNotDominateBlock; |
11728 | 4.06M | if (D == DominatesBlock) |
11729 | 36.0k | Proper = false; |
11730 | 4.06M | } |
11731 | 2.00M | return Proper 1.97M ? ProperlyDominatesBlock1.96M : DominatesBlock8.81k ; |
11732 | 2.00M | } |
11733 | 2.00M | case scUDivExpr: { |
11734 | 30.1k | const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); |
11735 | 30.1k | const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); |
11736 | 30.1k | BlockDisposition LD = getBlockDisposition(LHS, BB); |
11737 | 30.1k | if (LD == DoesNotDominateBlock) |
11738 | 140 | return DoesNotDominateBlock; |
11739 | 30.0k | BlockDisposition RD = getBlockDisposition(RHS, BB); |
11740 | 30.0k | if (RD == DoesNotDominateBlock) |
11741 | 4 | return DoesNotDominateBlock; |
11742 | 30.0k | return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock29.9k ) ? |
11743 | 29.9k | ProperlyDominatesBlock : DominatesBlock58 ; |
11744 | 30.0k | } |
11745 | 727k | case scUnknown: |
11746 | 727k | if (Instruction *I = |
11747 | 449k | dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) { |
11748 | 449k | if (I->getParent() == BB) |
11749 | 6.55k | return DominatesBlock; |
11750 | 442k | if (DT.properlyDominates(I->getParent(), BB)) |
11751 | 423k | return ProperlyDominatesBlock; |
11752 | 19.0k | return DoesNotDominateBlock; |
11753 | 19.0k | } |
11754 | 278k | return ProperlyDominatesBlock; |
11755 | 278k | case scCouldNotCompute: |
11756 | 0 | llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); |
11757 | 0 | } |
11758 | 0 | llvm_unreachable("Unknown SCEV kind!"); |
11759 | 0 | } |
11760 | | |
11761 | 432k | bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { |
11762 | 432k | return getBlockDisposition(S, BB) >= DominatesBlock; |
11763 | 432k | } |
11764 | | |
11765 | 13.6M | bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { |
11766 | 13.6M | return getBlockDisposition(S, BB) == ProperlyDominatesBlock; |
11767 | 13.6M | } |
11768 | | |
11769 | 70.3M | bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { |
11770 | 105M | return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); |
11771 | 70.3M | } |
11772 | | |
11773 | 0 | bool ScalarEvolution::ExitLimit::hasOperand(const SCEV *S) const { |
11774 | 0 | auto IsS = [&](const SCEV *X) { return S == X; }; |
11775 | 0 | auto ContainsS = [&](const SCEV *X) { |
11776 | 0 | return !isa<SCEVCouldNotCompute>(X) && SCEVExprContains(X, IsS); |
11777 | 0 | }; |
11778 | 0 | return ContainsS(ExactNotTaken) || ContainsS(MaxNotTaken); |
11779 | 0 | } |
11780 | | |
11781 | | void |
11782 | 2.44M | ScalarEvolution::forgetMemoizedResults(const SCEV *S) { |
11783 | 2.44M | ValuesAtScopes.erase(S); |
11784 | 2.44M | LoopDispositions.erase(S); |
11785 | 2.44M | BlockDispositions.erase(S); |
11786 | 2.44M | UnsignedRanges.erase(S); |
11787 | 2.44M | SignedRanges.erase(S); |
11788 | 2.44M | ExprValueMap.erase(S); |
11789 | 2.44M | HasRecMap.erase(S); |
11790 | 2.44M | MinTrailingZerosCache.erase(S); |
11791 | 2.44M | |
11792 | 2.44M | for (auto I = PredicatedSCEVRewrites.begin(); |
11793 | 2.51M | I != PredicatedSCEVRewrites.end();) { |
11794 | 76.2k | std::pair<const SCEV *, const Loop *> Entry = I->first; |
11795 | 76.2k | if (Entry.first == S) |
11796 | 130 | PredicatedSCEVRewrites.erase(I++); |
11797 | 76.0k | else |
11798 | 76.0k | ++I; |
11799 | 76.2k | } |
11800 | 2.44M | |
11801 | 2.44M | auto RemoveSCEVFromBackedgeMap = |
11802 | 4.88M | [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) { |
11803 | 43.6M | for (auto I = Map.begin(), E = Map.end(); I != E;) { |
11804 | 38.7M | BackedgeTakenInfo &BEInfo = I->second; |
11805 | 38.7M | if (BEInfo.hasOperand(S, this)) { |
11806 | 2.37k | BEInfo.clear(); |
11807 | 2.37k | Map.erase(I++); |
11808 | 2.37k | } else |
11809 | 38.7M | ++I; |
11810 | 38.7M | } |
11811 | 4.88M | }; |
11812 | 2.44M | |
11813 | 2.44M | RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); |
11814 | 2.44M | RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); |
11815 | 2.44M | } |
11816 | | |
11817 | | void |
11818 | | ScalarEvolution::getUsedLoops(const SCEV *S, |
11819 | 15.5M | SmallPtrSetImpl<const Loop *> &LoopsUsed) { |
11820 | 15.5M | struct FindUsedLoops { |
11821 | 15.5M | FindUsedLoops(SmallPtrSetImpl<const Loop *> &LoopsUsed) |
11822 | 15.5M | : LoopsUsed(LoopsUsed) {} |
11823 | 15.5M | SmallPtrSetImpl<const Loop *> &LoopsUsed; |
11824 | 77.7M | bool follow(const SCEV *S) { |
11825 | 77.7M | if (auto *AR = dyn_cast<SCEVAddRecExpr>(S)) |
11826 | 7.40M | LoopsUsed.insert(AR->getLoop()); |
11827 | 77.7M | return true; |
11828 | 77.7M | } |
11829 | 15.5M | |
11830 | 77.7M | bool isDone() const { return false; } |
11831 | 15.5M | }; |
11832 | 15.5M | |
11833 | 15.5M | FindUsedLoops F(LoopsUsed); |
11834 | 15.5M | SCEVTraversal<FindUsedLoops>(F).visitAll(S); |
11835 | 15.5M | } |
11836 | | |
11837 | 14.2M | void ScalarEvolution::addToLoopUseLists(const SCEV *S) { |
11838 | 14.2M | SmallPtrSet<const Loop *, 8> LoopsUsed; |
11839 | 14.2M | getUsedLoops(S, LoopsUsed); |
11840 | 14.2M | for (auto *L : LoopsUsed) |
11841 | 6.87M | LoopUsers[L].push_back(S); |
11842 | 14.2M | } |
11843 | | |
11844 | 0 | void ScalarEvolution::verify() const { |
11845 | 0 | ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); |
11846 | 0 | ScalarEvolution SE2(F, TLI, AC, DT, LI); |
11847 | 0 |
|
11848 | 0 | SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end()); |
11849 | 0 |
|
11850 | 0 | // Map's SCEV expressions from one ScalarEvolution "universe" to another. |
11851 | 0 | struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> { |
11852 | 0 | SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {} |
11853 | 0 |
|
11854 | 0 | const SCEV *visitConstant(const SCEVConstant *Constant) { |
11855 | 0 | return SE.getConstant(Constant->getAPInt()); |
11856 | 0 | } |
11857 | 0 |
|
11858 | 0 | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
11859 | 0 | return SE.getUnknown(Expr->getValue()); |
11860 | 0 | } |
11861 | 0 |
|
11862 | 0 | const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { |
11863 | 0 | return SE.getCouldNotCompute(); |
11864 | 0 | } |
11865 | 0 | }; |
11866 | 0 |
|
11867 | 0 | SCEVMapper SCM(SE2); |
11868 | 0 |
|
11869 | 0 | while (!LoopStack.empty()) { |
11870 | 0 | auto *L = LoopStack.pop_back_val(); |
11871 | 0 | LoopStack.insert(LoopStack.end(), L->begin(), L->end()); |
11872 | 0 |
|
11873 | 0 | auto *CurBECount = SCM.visit( |
11874 | 0 | const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L)); |
11875 | 0 | auto *NewBECount = SE2.getBackedgeTakenCount(L); |
11876 | 0 |
|
11877 | 0 | if (CurBECount == SE2.getCouldNotCompute() || |
11878 | 0 | NewBECount == SE2.getCouldNotCompute()) { |
11879 | 0 | // NB! This situation is legal, but is very suspicious -- whatever pass |
11880 | 0 | // change the loop to make a trip count go from could not compute to |
11881 | 0 | // computable or vice-versa *should have* invalidated SCEV. However, we |
11882 | 0 | // choose not to assert here (for now) since we don't want false |
11883 | 0 | // positives. |
11884 | 0 | continue; |
11885 | 0 | } |
11886 | 0 | |
11887 | 0 | if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) { |
11888 | 0 | // SCEV treats "undef" as an unknown but consistent value (i.e. it does |
11889 | 0 | // not propagate undef aggressively). This means we can (and do) fail |
11890 | 0 | // verification in cases where a transform makes the trip count of a loop |
11891 | 0 | // go from "undef" to "undef+1" (say). The transform is fine, since in |
11892 | 0 | // both cases the loop iterates "undef" times, but SCEV thinks we |
11893 | 0 | // increased the trip count of the loop by 1 incorrectly. |
11894 | 0 | continue; |
11895 | 0 | } |
11896 | 0 | |
11897 | 0 | if (SE.getTypeSizeInBits(CurBECount->getType()) > |
11898 | 0 | SE.getTypeSizeInBits(NewBECount->getType())) |
11899 | 0 | NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); |
11900 | 0 | else if (SE.getTypeSizeInBits(CurBECount->getType()) < |
11901 | 0 | SE.getTypeSizeInBits(NewBECount->getType())) |
11902 | 0 | CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); |
11903 | 0 |
|
11904 | 0 | auto *ConstantDelta = |
11905 | 0 | dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount)); |
11906 | 0 |
|
11907 | 0 | if (ConstantDelta && ConstantDelta->getAPInt() != 0) { |
11908 | 0 | dbgs() << "Trip Count Changed!\n"; |
11909 | 0 | dbgs() << "Old: " << *CurBECount << "\n"; |
11910 | 0 | dbgs() << "New: " << *NewBECount << "\n"; |
11911 | 0 | dbgs() << "Delta: " << *ConstantDelta << "\n"; |
11912 | 0 | std::abort(); |
11913 | 0 | } |
11914 | 0 | } |
11915 | 0 | } |
11916 | | |
11917 | | bool ScalarEvolution::invalidate( |
11918 | | Function &F, const PreservedAnalyses &PA, |
11919 | 1.23k | FunctionAnalysisManager::Invalidator &Inv) { |
11920 | 1.23k | // Invalidate the ScalarEvolution object whenever it isn't preserved or one |
11921 | 1.23k | // of its dependencies is invalidated. |
11922 | 1.23k | auto PAC = PA.getChecker<ScalarEvolutionAnalysis>(); |
11923 | 1.23k | return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()229 ) || |
11924 | 1.23k | Inv.invalidate<AssumptionAnalysis>(F, PA)1.02k || |
11925 | 1.23k | Inv.invalidate<DominatorTreeAnalysis>(F, PA)1.02k || |
11926 | 1.23k | Inv.invalidate<LoopAnalysis>(F, PA)1.02k ; |
11927 | 1.23k | } |
11928 | | |
11929 | | AnalysisKey ScalarEvolutionAnalysis::Key; |
11930 | | |
11931 | | ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, |
11932 | 2.52k | FunctionAnalysisManager &AM) { |
11933 | 2.52k | return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F), |
11934 | 2.52k | AM.getResult<AssumptionAnalysis>(F), |
11935 | 2.52k | AM.getResult<DominatorTreeAnalysis>(F), |
11936 | 2.52k | AM.getResult<LoopAnalysis>(F)); |
11937 | 2.52k | } |
11938 | | |
11939 | | PreservedAnalyses |
11940 | 19 | ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
11941 | 19 | AM.getResult<ScalarEvolutionAnalysis>(F).print(OS); |
11942 | 19 | return PreservedAnalyses::all(); |
11943 | 19 | } |
11944 | | |
11945 | 102k | INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", |
11946 | 102k | "Scalar Evolution Analysis", false, true) |
11947 | 102k | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
11948 | 102k | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
11949 | 102k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
11950 | 102k | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
11951 | 102k | INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", |
11952 | | "Scalar Evolution Analysis", false, true) |
11953 | | |
11954 | | char ScalarEvolutionWrapperPass::ID = 0; |
11955 | | |
11956 | 191k | ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { |
11957 | 191k | initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); |
11958 | 191k | } |
11959 | | |
11960 | 4.01M | bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { |
11961 | 4.01M | SE.reset(new ScalarEvolution( |
11962 | 4.01M | F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), |
11963 | 4.01M | getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), |
11964 | 4.01M | getAnalysis<DominatorTreeWrapperPass>().getDomTree(), |
11965 | 4.01M | getAnalysis<LoopInfoWrapperPass>().getLoopInfo())); |
11966 | 4.01M | return false; |
11967 | 4.01M | } |
11968 | | |
11969 | 4.01M | void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } |
11970 | | |
11971 | 451 | void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { |
11972 | 451 | SE->print(OS); |
11973 | 451 | } |
11974 | | |
11975 | 0 | void ScalarEvolutionWrapperPass::verifyAnalysis() const { |
11976 | 0 | if (!VerifySCEV) |
11977 | 0 | return; |
11978 | 0 | |
11979 | 0 | SE->verify(); |
11980 | 0 | } |
11981 | | |
11982 | 191k | void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
11983 | 191k | AU.setPreservesAll(); |
11984 | 191k | AU.addRequiredTransitive<AssumptionCacheTracker>(); |
11985 | 191k | AU.addRequiredTransitive<LoopInfoWrapperPass>(); |
11986 | 191k | AU.addRequiredTransitive<DominatorTreeWrapperPass>(); |
11987 | 191k | AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); |
11988 | 191k | } |
11989 | | |
11990 | | const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, |
11991 | 7.09k | const SCEV *RHS) { |
11992 | 7.09k | FoldingSetNodeID ID; |
11993 | 7.09k | assert(LHS->getType() == RHS->getType() && |
11994 | 7.09k | "Type mismatch between LHS and RHS"); |
11995 | 7.09k | // Unique this node based on the arguments |
11996 | 7.09k | ID.AddInteger(SCEVPredicate::P_Equal); |
11997 | 7.09k | ID.AddPointer(LHS); |
11998 | 7.09k | ID.AddPointer(RHS); |
11999 | 7.09k | void *IP = nullptr; |
12000 | 7.09k | if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) |
12001 | 6.72k | return S; |
12002 | 376 | SCEVEqualPredicate *Eq = new (SCEVAllocator) |
12003 | 376 | SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); |
12004 | 376 | UniquePreds.InsertNode(Eq, IP); |
12005 | 376 | return Eq; |
12006 | 376 | } |
12007 | | |
12008 | | const SCEVPredicate *ScalarEvolution::getWrapPredicate( |
12009 | | const SCEVAddRecExpr *AR, |
12010 | 29.0k | SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { |
12011 | 29.0k | FoldingSetNodeID ID; |
12012 | 29.0k | // Unique this node based on the arguments |
12013 | 29.0k | ID.AddInteger(SCEVPredicate::P_Wrap); |
12014 | 29.0k | ID.AddPointer(AR); |
12015 | 29.0k | ID.AddInteger(AddedFlags); |
12016 | 29.0k | void *IP = nullptr; |
12017 | 29.0k | if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) |
12018 | 3.16k | return S; |
12019 | 25.9k | auto *OF = new (SCEVAllocator) |
12020 | 25.9k | SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags); |
12021 | 25.9k | UniquePreds.InsertNode(OF, IP); |
12022 | 25.9k | return OF; |
12023 | 25.9k | } |
12024 | | |
12025 | | namespace { |
12026 | | |
12027 | | class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> { |
12028 | | public: |
12029 | | |
12030 | | /// Rewrites \p S in the context of a loop L and the SCEV predication |
12031 | | /// infrastructure. |
12032 | | /// |
12033 | | /// If \p Pred is non-null, the SCEV expression is rewritten to respect the |
12034 | | /// equivalences present in \p Pred. |
12035 | | /// |
12036 | | /// If \p NewPreds is non-null, rewrite is free to add further predicates to |
12037 | | /// \p NewPreds such that the result will be an AddRecExpr. |
12038 | | static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
12039 | | SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, |
12040 | 385k | SCEVUnionPredicate *Pred) { |
12041 | 385k | SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred); |
12042 | 385k | return Rewriter.visit(S); |
12043 | 385k | } |
12044 | | |
12045 | 501k | const SCEV *visitUnknown(const SCEVUnknown *Expr) { |
12046 | 501k | if (Pred) { |
12047 | 399k | auto ExprPreds = Pred->getPredicatesForExpr(Expr); |
12048 | 399k | for (auto *Pred : ExprPreds) |
12049 | 1.45k | if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred)) |
12050 | 1.45k | if (IPred->getLHS() == Expr) |
12051 | 1.45k | return IPred->getRHS(); |
12052 | 399k | } |
12053 | 501k | return convertToAddRecWithPreds(Expr)499k ; |
12054 | 501k | } |
12055 | | |
12056 | 14.4k | const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { |
12057 | 14.4k | const SCEV *Operand = visit(Expr->getOperand()); |
12058 | 14.4k | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand); |
12059 | 14.4k | if (AR && AR->getLoop() == L1.95k && AR->isAffine()1.59k ) { |
12060 | 1.59k | // This couldn't be folded because the operand didn't have the nuw |
12061 | 1.59k | // flag. Add the nusw flag as an assumption that we could make. |
12062 | 1.59k | const SCEV *Step = AR->getStepRecurrence(SE); |
12063 | 1.59k | Type *Ty = Expr->getType(); |
12064 | 1.59k | if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW)) |
12065 | 811 | return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty), |
12066 | 811 | SE.getSignExtendExpr(Step, Ty), L, |
12067 | 811 | AR->getNoWrapFlags()); |
12068 | 13.6k | } |
12069 | 13.6k | return SE.getZeroExtendExpr(Operand, Expr->getType()); |
12070 | 13.6k | } |
12071 | | |
12072 | 122k | const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { |
12073 | 122k | const SCEV *Operand = visit(Expr->getOperand()); |
12074 | 122k | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand); |
12075 | 122k | if (AR && AR->getLoop() == L3.80k && AR->isAffine()2.68k ) { |
12076 | 2.67k | // This couldn't be folded because the operand didn't have the nsw |
12077 | 2.67k | // flag. Add the nssw flag as an assumption that we could make. |
12078 | 2.67k | const SCEV *Step = AR->getStepRecurrence(SE); |
12079 | 2.67k | Type *Ty = Expr->getType(); |
12080 | 2.67k | if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW)) |
12081 | 1.24k | return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty), |
12082 | 1.24k | SE.getSignExtendExpr(Step, Ty), L, |
12083 | 1.24k | AR->getNoWrapFlags()); |
12084 | 121k | } |
12085 | 121k | return SE.getSignExtendExpr(Operand, Expr->getType()); |
12086 | 121k | } |
12087 | | |
12088 | | private: |
12089 | | explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, |
12090 | | SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, |
12091 | | SCEVUnionPredicate *Pred) |
12092 | 385k | : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} |
12093 | | |
12094 | 4.42k | bool addOverflowAssumption(const SCEVPredicate *P) { |
12095 | 4.42k | if (!NewPreds) { |
12096 | 3.24k | // Check if we've already made this assumption. |
12097 | 3.24k | return Pred && Pred->implies(P); |
12098 | 3.24k | } |
12099 | 1.17k | NewPreds->insert(P); |
12100 | 1.17k | return true; |
12101 | 1.17k | } |
12102 | | |
12103 | | bool addOverflowAssumption(const SCEVAddRecExpr *AR, |
12104 | 4.27k | SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { |
12105 | 4.27k | auto *A = SE.getWrapPredicate(AR, AddedFlags); |
12106 | 4.27k | return addOverflowAssumption(A); |
12107 | 4.27k | } |
12108 | | |
12109 | | // If \p Expr represents a PHINode, we try to see if it can be represented |
12110 | | // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible |
12111 | | // to add this predicate as a runtime overflow check, we return the AddRec. |
12112 | | // If \p Expr does not meet these conditions (is not a PHI node, or we |
12113 | | // couldn't create an AddRec for it, or couldn't add the predicate), we just |
12114 | | // return \p Expr. |
12115 | 499k | const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { |
12116 | 499k | if (!isa<PHINode>(Expr->getValue())) |
12117 | 400k | return Expr; |
12118 | 99.2k | Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> |
12119 | 99.2k | PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); |
12120 | 99.2k | if (!PredicatedRewrite) |
12121 | 99.1k | return Expr; |
12122 | 190 | for (auto *P : PredicatedRewrite->second)155 { |
12123 | 190 | // Wrap predicates from outer loops are not supported. |
12124 | 190 | if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) { |
12125 | 155 | auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr()); |
12126 | 155 | if (L != AR->getLoop()) |
12127 | 46 | return Expr; |
12128 | 144 | } |
12129 | 144 | if (!addOverflowAssumption(P)) |
12130 | 33 | return Expr; |
12131 | 76 | } |
12132 | 76 | return PredicatedRewrite->first; |
12133 | 76 | } |
12134 | | |
12135 | | SmallPtrSetImpl<const SCEVPredicate *> *NewPreds; |
12136 | | SCEVUnionPredicate *Pred; |
12137 | | const Loop *L; |
12138 | | }; |
12139 | | |
12140 | | } // end anonymous namespace |
12141 | | |
12142 | | const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, |
12143 | 302k | SCEVUnionPredicate &Preds) { |
12144 | 302k | return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds); |
12145 | 302k | } |
12146 | | |
12147 | | const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( |
12148 | | const SCEV *S, const Loop *L, |
12149 | 82.1k | SmallPtrSetImpl<const SCEVPredicate *> &Preds) { |
12150 | 82.1k | SmallPtrSet<const SCEVPredicate *, 4> TransformPreds; |
12151 | 82.1k | S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr); |
12152 | 82.1k | auto *AddRec = dyn_cast<SCEVAddRecExpr>(S); |
12153 | 82.1k | |
12154 | 82.1k | if (!AddRec) |
12155 | 81.1k | return nullptr; |
12156 | 1.09k | |
12157 | 1.09k | // Since the transformation was successful, we can now transfer the SCEV |
12158 | 1.09k | // predicates. |
12159 | 1.09k | for (auto *P : TransformPreds) |
12160 | 1.10k | Preds.insert(P); |
12161 | 1.09k | |
12162 | 1.09k | return AddRec; |
12163 | 1.09k | } |
12164 | | |
12165 | | /// SCEV predicates |
12166 | | SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, |
12167 | | SCEVPredicateKind Kind) |
12168 | 531k | : FastID(ID), Kind(Kind) {} |
12169 | | |
12170 | | SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, |
12171 | | const SCEV *LHS, const SCEV *RHS) |
12172 | 376 | : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) { |
12173 | 376 | assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); |
12174 | 376 | assert(LHS != RHS && "LHS and RHS are the same SCEV"); |
12175 | 376 | } |
12176 | | |
12177 | 6.73k | bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { |
12178 | 6.73k | const auto *Op = dyn_cast<SCEVEqualPredicate>(N); |
12179 | 6.73k | |
12180 | 6.73k | if (!Op) |
12181 | 0 | return false; |
12182 | 6.73k | |
12183 | 6.73k | return Op->LHS == LHS && Op->RHS == RHS; |
12184 | 6.73k | } |
12185 | | |
12186 | 4 | bool SCEVEqualPredicate::isAlwaysTrue() const { return false; } |
12187 | | |
12188 | 7.99k | const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } |
12189 | | |
12190 | 0 | void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { |
12191 | 0 | OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; |
12192 | 0 | } |
12193 | | |
12194 | | SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID, |
12195 | | const SCEVAddRecExpr *AR, |
12196 | | IncrementWrapFlags Flags) |
12197 | 25.9k | : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {} |
12198 | | |
12199 | 82.5k | const SCEV *SCEVWrapPredicate::getExpr() const { return AR; } |
12200 | | |
12201 | 1.16k | bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const { |
12202 | 1.16k | const auto *Op = dyn_cast<SCEVWrapPredicate>(N); |
12203 | 1.16k | |
12204 | 1.16k | return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags; |
12205 | 1.16k | } |
12206 | | |
12207 | 420 | bool SCEVWrapPredicate::isAlwaysTrue() const { |
12208 | 420 | SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags(); |
12209 | 420 | IncrementWrapFlags IFlags = Flags; |
12210 | 420 | |
12211 | 420 | if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags) |
12212 | 0 | IFlags = clearFlags(IFlags, IncrementNSSW); |
12213 | 420 | |
12214 | 420 | return IFlags == IncrementAnyWrap; |
12215 | 420 | } |
12216 | | |
12217 | 36 | void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const { |
12218 | 36 | OS.indent(Depth) << *getExpr() << " Added Flags: "; |
12219 | 36 | if (SCEVWrapPredicate::IncrementNUSW & getFlags()) |
12220 | 28 | OS << "<nusw>"; |
12221 | 36 | if (SCEVWrapPredicate::IncrementNSSW & getFlags()) |
12222 | 8 | OS << "<nssw>"; |
12223 | 36 | OS << "\n"; |
12224 | 36 | } |
12225 | | |
12226 | | SCEVWrapPredicate::IncrementWrapFlags |
12227 | | SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, |
12228 | 382k | ScalarEvolution &SE) { |
12229 | 382k | IncrementWrapFlags ImpliedFlags = IncrementAnyWrap; |
12230 | 382k | SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags(); |
12231 | 382k | |
12232 | 382k | // We can safely transfer the NSW flag as NSSW. |
12233 | 382k | if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags) |
12234 | 207k | ImpliedFlags = IncrementNSSW; |
12235 | 382k | |
12236 | 382k | if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) { |
12237 | 2.77k | // If the increment is positive, the SCEV NUW flag will also imply the |
12238 | 2.77k | // WrapPredicate NUSW flag. |
12239 | 2.77k | if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) |
12240 | 2.77k | if (Step->getValue()->getValue().isNonNegative()) |
12241 | 2.77k | ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW); |
12242 | 2.77k | } |
12243 | 382k | |
12244 | 382k | return ImpliedFlags; |
12245 | 382k | } |
12246 | | |
12247 | | /// Union predicates don't get cached so create a dummy set ID for it. |
12248 | | SCEVUnionPredicate::SCEVUnionPredicate() |
12249 | 504k | : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} |
12250 | | |
12251 | 66.7k | bool SCEVUnionPredicate::isAlwaysTrue() const { |
12252 | 66.7k | return all_of(Preds, |
12253 | 66.7k | [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }424 ); |
12254 | 66.7k | } |
12255 | | |
12256 | | ArrayRef<const SCEVPredicate *> |
12257 | 399k | SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { |
12258 | 399k | auto I = SCEVToPreds.find(Expr); |
12259 | 399k | if (I == SCEVToPreds.end()) |
12260 | 398k | return ArrayRef<const SCEVPredicate *>(); |
12261 | 1.45k | return I->second; |
12262 | 1.45k | } |
12263 | | |
12264 | 251k | bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { |
12265 | 251k | if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) |
12266 | 188k | return all_of(Set->Preds, |
12267 | 188k | [this](const SCEVPredicate *I) { return this->implies(I); }408 ); |
12268 | 62.7k | |
12269 | 62.7k | auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); |
12270 | 62.7k | if (ScevPredsIt == SCEVToPreds.end()) |
12271 | 54.8k | return false; |
12272 | 7.90k | auto &SCEVPreds = ScevPredsIt->second; |
12273 | 7.90k | |
12274 | 7.90k | return any_of(SCEVPreds, |
12275 | 7.90k | [N](const SCEVPredicate *I) { return I->implies(N); }); |
12276 | 7.90k | } |
12277 | | |
12278 | 0 | const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } |
12279 | | |
12280 | 402 | void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { |
12281 | 402 | for (auto Pred : Preds) |
12282 | 36 | Pred->print(OS, Depth); |
12283 | 402 | } |
12284 | | |
12285 | 27.8k | void SCEVUnionPredicate::add(const SCEVPredicate *N) { |
12286 | 27.8k | if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) { |
12287 | 594 | for (auto Pred : Set->Preds) |
12288 | 1.17k | add(Pred); |
12289 | 594 | return; |
12290 | 594 | } |
12291 | 27.2k | |
12292 | 27.2k | if (implies(N)) |
12293 | 11 | return; |
12294 | 27.2k | |
12295 | 27.2k | const SCEV *Key = N->getExpr(); |
12296 | 27.2k | assert(Key && "Only SCEVUnionPredicate doesn't have an " |
12297 | 27.2k | " associated expression!"); |
12298 | 27.2k | |
12299 | 27.2k | SCEVToPreds[Key].push_back(N); |
12300 | 27.2k | Preds.push_back(N); |
12301 | 27.2k | } |
12302 | | |
12303 | | PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, |
12304 | | Loop &L) |
12305 | 332k | : SE(SE), L(L) {} |
12306 | | |
12307 | 1.76M | const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { |
12308 | 1.76M | const SCEV *Expr = SE.getSCEV(V); |
12309 | 1.76M | RewriteEntry &Entry = RewriteMap[Expr]; |
12310 | 1.76M | |
12311 | 1.76M | // If we already have an entry and the version matches, return it. |
12312 | 1.76M | if (Entry.second && Generation == Entry.first1.50M ) |
12313 | 1.45M | return Entry.second; |
12314 | 302k | |
12315 | 302k | // We found an entry but it's stale. Rewrite the stale entry |
12316 | 302k | // according to the current predicate. |
12317 | 302k | if (Entry.second) |
12318 | 48.5k | Expr = Entry.second; |
12319 | 302k | |
12320 | 302k | const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds); |
12321 | 302k | Entry = {Generation, NewSCEV}; |
12322 | 302k | |
12323 | 302k | return NewSCEV; |
12324 | 302k | } |
12325 | | |
12326 | 267k | const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { |
12327 | 267k | if (!BackedgeCount) { |
12328 | 168k | SCEVUnionPredicate BackedgePred; |
12329 | 168k | BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); |
12330 | 168k | addPredicate(BackedgePred); |
12331 | 168k | } |
12332 | 267k | return BackedgeCount; |
12333 | 267k | } |
12334 | | |
12335 | 220k | void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { |
12336 | 220k | if (Preds.implies(&Pred)) |
12337 | 195k | return; |
12338 | 25.3k | Preds.add(&Pred); |
12339 | 25.3k | updateGeneration(); |
12340 | 25.3k | } |
12341 | | |
12342 | 126k | const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { |
12343 | 126k | return Preds; |
12344 | 126k | } |
12345 | | |
12346 | 26.3k | void PredicatedScalarEvolution::updateGeneration() { |
12347 | 26.3k | // If the generation number wrapped recompute everything. |
12348 | 26.3k | if (++Generation == 0) { |
12349 | 0 | for (auto &II : RewriteMap) { |
12350 | 0 | const SCEV *Rewritten = II.second.second; |
12351 | 0 | II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)}; |
12352 | 0 | } |
12353 | 0 | } |
12354 | 26.3k | } |
12355 | | |
12356 | | void PredicatedScalarEvolution::setNoOverflow( |
12357 | 24.7k | Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { |
12358 | 24.7k | const SCEV *Expr = getSCEV(V); |
12359 | 24.7k | const auto *AR = cast<SCEVAddRecExpr>(Expr); |
12360 | 24.7k | |
12361 | 24.7k | auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE); |
12362 | 24.7k | |
12363 | 24.7k | // Clear the statically implied flags. |
12364 | 24.7k | Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags); |
12365 | 24.7k | addPredicate(*SE.getWrapPredicate(AR, Flags)); |
12366 | 24.7k | |
12367 | 24.7k | auto II = FlagsMap.insert({V, Flags}); |
12368 | 24.7k | if (!II.second) |
12369 | 0 | II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second); |
12370 | 24.7k | } |
12371 | | |
12372 | | bool PredicatedScalarEvolution::hasNoOverflow( |
12373 | 357k | Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { |
12374 | 357k | const SCEV *Expr = getSCEV(V); |
12375 | 357k | const auto *AR = cast<SCEVAddRecExpr>(Expr); |
12376 | 357k | |
12377 | 357k | Flags = SCEVWrapPredicate::clearFlags( |
12378 | 357k | Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE)); |
12379 | 357k | |
12380 | 357k | auto II = FlagsMap.find(V); |
12381 | 357k | |
12382 | 357k | if (II != FlagsMap.end()) |
12383 | 37.7k | Flags = SCEVWrapPredicate::clearFlags(Flags, II->second); |
12384 | 357k | |
12385 | 357k | return Flags == SCEVWrapPredicate::IncrementAnyWrap; |
12386 | 357k | } |
12387 | | |
12388 | 44.3k | const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { |
12389 | 44.3k | const SCEV *Expr = this->getSCEV(V); |
12390 | 44.3k | SmallPtrSet<const SCEVPredicate *, 4> NewPreds; |
12391 | 44.3k | auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds); |
12392 | 44.3k | |
12393 | 44.3k | if (!New) |
12394 | 43.4k | return nullptr; |
12395 | 957 | |
12396 | 957 | for (auto *P : NewPreds) |
12397 | 967 | Preds.add(P); |
12398 | 957 | |
12399 | 957 | updateGeneration(); |
12400 | 957 | RewriteMap[SE.getSCEV(V)] = {Generation, New}; |
12401 | 957 | return New; |
12402 | 957 | } |
12403 | | |
12404 | | PredicatedScalarEvolution::PredicatedScalarEvolution( |
12405 | | const PredicatedScalarEvolution &Init) |
12406 | | : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), |
12407 | 162k | Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { |
12408 | 162k | for (const auto &I : Init.FlagsMap) |
12409 | 24.1k | FlagsMap.insert(I); |
12410 | 162k | } |
12411 | | |
12412 | 115 | void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { |
12413 | 115 | // For each block. |
12414 | 115 | for (auto *BB : L.getBlocks()) |
12415 | 1.51k | for (auto &I : *BB)146 { |
12416 | 1.51k | if (!SE.isSCEVable(I.getType())) |
12417 | 325 | continue; |
12418 | 1.19k | |
12419 | 1.19k | auto *Expr = SE.getSCEV(&I); |
12420 | 1.19k | auto II = RewriteMap.find(Expr); |
12421 | 1.19k | |
12422 | 1.19k | if (II == RewriteMap.end()) |
12423 | 926 | continue; |
12424 | 267 | |
12425 | 267 | // Don't print things that are not interesting. |
12426 | 267 | if (II->second.second == Expr) |
12427 | 254 | continue; |
12428 | 13 | |
12429 | 13 | OS.indent(Depth) << "[PSE]" << I << ":\n"; |
12430 | 13 | OS.indent(Depth + 2) << *Expr << "\n"; |
12431 | 13 | OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; |
12432 | 13 | } |
12433 | 115 | } |
12434 | | |
12435 | | // Match the mathematical pattern A - (A / B) * B, where A and B can be |
12436 | | // arbitrary expressions. |
12437 | | // It's not always easy, as A and B can be folded (imagine A is X / 2, and B is |
12438 | | // 4, A / B becomes X / 8). |
12439 | | bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, |
12440 | 1.22M | const SCEV *&RHS) { |
12441 | 1.22M | const auto *Add = dyn_cast<SCEVAddExpr>(Expr); |
12442 | 1.22M | if (Add == nullptr || Add->getNumOperands() != 2281k ) |
12443 | 980k | return false; |
12444 | 248k | |
12445 | 248k | const SCEV *A = Add->getOperand(1); |
12446 | 248k | const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0)); |
12447 | 248k | |
12448 | 248k | if (Mul == nullptr) |
12449 | 231k | return false; |
12450 | 17.4k | |
12451 | 69.1k | const auto MatchURemWithDivisor = [&](const SCEV *B) 17.4k { |
12452 | 69.1k | // (SomeExpr + (-(SomeExpr / B) * B)). |
12453 | 69.1k | if (Expr == getURemExpr(A, B)) { |
12454 | 293 | LHS = A; |
12455 | 293 | RHS = B; |
12456 | 293 | return true; |
12457 | 293 | } |
12458 | 68.8k | return false; |
12459 | 68.8k | }; |
12460 | 17.4k | |
12461 | 17.4k | // (SomeExpr + (-1 * (SomeExpr / B) * B)). |
12462 | 17.4k | if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0))174 ) |
12463 | 167 | return MatchURemWithDivisor(Mul->getOperand(1)) || |
12464 | 167 | MatchURemWithDivisor(Mul->getOperand(2))145 ; |
12465 | 17.2k | |
12466 | 17.2k | // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). |
12467 | 17.2k | if (Mul->getNumOperands() == 2) |
12468 | 17.2k | return MatchURemWithDivisor(Mul->getOperand(1)) || |
12469 | 17.2k | MatchURemWithDivisor(Mul->getOperand(0)) || |
12470 | 17.2k | MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) || |
12471 | 17.2k | MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); |
12472 | 37 | return false; |
12473 | 37 | } |