/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Analysis/InstructionSimplify.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- InstructionSimplify.cpp - Fold instruction operands ----------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements routines for folding instructions into simpler forms |
11 | | // that do not require creating new instructions. This does constant folding |
12 | | // ("add i32 1, 1" -> "2") but can also handle non-constant operands, either |
13 | | // returning a constant ("and i32 %x, 0" -> "0") or an already existing value |
14 | | // ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been |
15 | | // simplified: This is usually true and assuming it simplifies the logic (if |
16 | | // they have not been simplified then results are correct but maybe suboptimal). |
17 | | // |
18 | | //===----------------------------------------------------------------------===// |
19 | | |
20 | | #include "llvm/Analysis/InstructionSimplify.h" |
21 | | #include "llvm/ADT/SetVector.h" |
22 | | #include "llvm/ADT/Statistic.h" |
23 | | #include "llvm/Analysis/AliasAnalysis.h" |
24 | | #include "llvm/Analysis/AssumptionCache.h" |
25 | | #include "llvm/Analysis/CaptureTracking.h" |
26 | | #include "llvm/Analysis/CmpInstAnalysis.h" |
27 | | #include "llvm/Analysis/ConstantFolding.h" |
28 | | #include "llvm/Analysis/LoopAnalysisManager.h" |
29 | | #include "llvm/Analysis/MemoryBuiltins.h" |
30 | | #include "llvm/Analysis/OptimizationDiagnosticInfo.h" |
31 | | #include "llvm/Analysis/ValueTracking.h" |
32 | | #include "llvm/Analysis/VectorUtils.h" |
33 | | #include "llvm/IR/ConstantRange.h" |
34 | | #include "llvm/IR/DataLayout.h" |
35 | | #include "llvm/IR/Dominators.h" |
36 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
37 | | #include "llvm/IR/GlobalAlias.h" |
38 | | #include "llvm/IR/Operator.h" |
39 | | #include "llvm/IR/PatternMatch.h" |
40 | | #include "llvm/IR/ValueHandle.h" |
41 | | #include "llvm/Support/KnownBits.h" |
42 | | #include <algorithm> |
43 | | using namespace llvm; |
44 | | using namespace llvm::PatternMatch; |
45 | | |
46 | | #define DEBUG_TYPE "instsimplify" |
47 | | |
48 | | enum { RecursionLimit = 3 }; |
49 | | |
50 | | STATISTIC(NumExpand, "Number of expansions"); |
51 | | STATISTIC(NumReassoc, "Number of reassociations"); |
52 | | |
53 | | static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); |
54 | | static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, |
55 | | unsigned); |
56 | | static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, |
57 | | const SimplifyQuery &, unsigned); |
58 | | static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, |
59 | | unsigned); |
60 | | static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, |
61 | | const SimplifyQuery &Q, unsigned MaxRecurse); |
62 | | static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); |
63 | | static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); |
64 | | static Value *SimplifyCastInst(unsigned, Value *, Type *, |
65 | | const SimplifyQuery &, unsigned); |
66 | | |
67 | | /// For a boolean type or a vector of boolean type, return false or a vector |
68 | | /// with every element false. |
69 | 25.4k | static Constant *getFalse(Type *Ty) { |
70 | 25.4k | return ConstantInt::getFalse(Ty); |
71 | 25.4k | } |
72 | | |
73 | | /// For a boolean type or a vector of boolean type, return true or a vector |
74 | | /// with every element true. |
75 | 6.12k | static Constant *getTrue(Type *Ty) { |
76 | 6.12k | return ConstantInt::getTrue(Ty); |
77 | 6.12k | } |
78 | | |
79 | | /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? |
80 | | static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, |
81 | 1.55M | Value *RHS) { |
82 | 1.55M | CmpInst *Cmp = dyn_cast<CmpInst>(V); |
83 | 1.55M | if (!Cmp) |
84 | 315k | return false; |
85 | 1.23M | CmpInst::Predicate CPred = Cmp->getPredicate(); |
86 | 1.23M | Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1); |
87 | 1.23M | if (CPred == Pred && 1.23M CLHS == LHS338k && CRHS == RHS300k ) |
88 | 403 | return true; |
89 | 1.23M | return CPred == CmpInst::getSwappedPredicate(Pred) && 1.23M CLHS == RHS342k && |
90 | 18 | CRHS == LHS; |
91 | 1.55M | } |
92 | | |
93 | | /// Does the given value dominate the specified phi node? |
94 | 7.75M | static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { |
95 | 7.75M | Instruction *I = dyn_cast<Instruction>(V); |
96 | 7.75M | if (!I) |
97 | 7.75M | // Arguments and constants dominate all instructions. |
98 | 4.30M | return true; |
99 | 3.45M | |
100 | 3.45M | // If we are processing instructions (and/or basic blocks) that have not been |
101 | 3.45M | // fully added to a function, the parent nodes may still be null. Simply |
102 | 3.45M | // return the conservative answer in these cases. |
103 | 3.45M | if (3.45M !I->getParent() || 3.45M !P->getParent()3.45M || !I->getParent()->getParent()3.45M ) |
104 | 24.3k | return false; |
105 | 3.43M | |
106 | 3.43M | // If we have a DominatorTree then do a precise test. |
107 | 3.43M | if (3.43M DT3.43M ) |
108 | 3.22M | return DT->dominates(I, P); |
109 | 206k | |
110 | 206k | // Otherwise, if the instruction is in the entry block and is not an invoke, |
111 | 206k | // then it obviously dominates all phi nodes. |
112 | 206k | if (206k I->getParent() == &I->getParent()->getParent()->getEntryBlock() && |
113 | 1.87k | !isa<InvokeInst>(I)) |
114 | 1.87k | return true; |
115 | 204k | |
116 | 204k | return false; |
117 | 204k | } |
118 | | |
119 | | /// Simplify "A op (B op' C)" by distributing op over op', turning it into |
120 | | /// "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is |
121 | | /// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS. |
122 | | /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". |
123 | | /// Returns the simplified value, or null if no simplification was performed. |
124 | | static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, |
125 | | Instruction::BinaryOps OpcodeToExpand, |
126 | 27.3M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
127 | 27.3M | // Recursion is always used, so bail out at once if we already hit the limit. |
128 | 27.3M | if (!MaxRecurse--) |
129 | 4.03M | return nullptr; |
130 | 23.3M | |
131 | 23.3M | // Check whether the expression has the form "(A op' B) op C". |
132 | 23.3M | if (BinaryOperator *23.3M Op023.3M = dyn_cast<BinaryOperator>(LHS)) |
133 | 8.04M | if (8.04M Op0->getOpcode() == OpcodeToExpand8.04M ) { |
134 | 2.12M | // It does! Try turning it into "(A op C) op' (B op C)". |
135 | 2.12M | Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; |
136 | 2.12M | // Do "A op C" and "B op C" both simplify? |
137 | 2.12M | if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) |
138 | 15.3k | if (Value *15.3k R15.3k = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { |
139 | 1.34k | // They do! Return "L op' R" if it simplifies or is already available. |
140 | 1.34k | // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. |
141 | 1.34k | if ((L == A && 1.34k R == B795 ) || (Instruction::isCommutative(OpcodeToExpand) |
142 | 1.34k | && L == B652 && R == A0 )) { |
143 | 693 | ++NumExpand; |
144 | 693 | return LHS; |
145 | 693 | } |
146 | 652 | // Otherwise return "L op' R" if it simplifies. |
147 | 652 | if (Value *652 V652 = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { |
148 | 608 | ++NumExpand; |
149 | 608 | return V; |
150 | 608 | } |
151 | 23.3M | } |
152 | 8.04M | } |
153 | 23.3M | |
154 | 23.3M | // Check whether the expression has the form "A op (B op' C)". |
155 | 23.3M | if (BinaryOperator *23.3M Op123.3M = dyn_cast<BinaryOperator>(RHS)) |
156 | 3.93M | if (3.93M Op1->getOpcode() == OpcodeToExpand3.93M ) { |
157 | 1.11M | // It does! Try turning it into "(A op B) op' (A op C)". |
158 | 1.11M | Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); |
159 | 1.11M | // Do "A op B" and "A op C" both simplify? |
160 | 1.11M | if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) |
161 | 8.98k | if (Value *8.98k R8.98k = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) { |
162 | 282 | // They do! Return "L op' R" if it simplifies or is already available. |
163 | 282 | // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. |
164 | 282 | if ((L == B && 282 R == C240 ) || (Instruction::isCommutative(OpcodeToExpand) |
165 | 282 | && L == C249 && R == B0 )) { |
166 | 33 | ++NumExpand; |
167 | 33 | return RHS; |
168 | 33 | } |
169 | 249 | // Otherwise return "L op' R" if it simplifies. |
170 | 249 | if (Value *249 V249 = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { |
171 | 42 | ++NumExpand; |
172 | 42 | return V; |
173 | 42 | } |
174 | 23.3M | } |
175 | 3.93M | } |
176 | 23.3M | |
177 | 23.3M | return nullptr; |
178 | 23.3M | } |
179 | | |
180 | | /// Generic simplifications for associative binary operations. |
181 | | /// Returns the simpler value, or null if none was found. |
182 | | static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, |
183 | | Value *LHS, Value *RHS, |
184 | | const SimplifyQuery &Q, |
185 | 45.5M | unsigned MaxRecurse) { |
186 | 45.5M | assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); |
187 | 45.5M | |
188 | 45.5M | // Recursion is always used, so bail out at once if we already hit the limit. |
189 | 45.5M | if (!MaxRecurse--) |
190 | 6.17M | return nullptr; |
191 | 39.3M | |
192 | 39.3M | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); |
193 | 39.3M | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); |
194 | 39.3M | |
195 | 39.3M | // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely. |
196 | 39.3M | if (Op0 && 39.3M Op0->getOpcode() == Opcode13.1M ) { |
197 | 3.88M | Value *A = Op0->getOperand(0); |
198 | 3.88M | Value *B = Op0->getOperand(1); |
199 | 3.88M | Value *C = RHS; |
200 | 3.88M | |
201 | 3.88M | // Does "B op C" simplify? |
202 | 3.88M | if (Value *V3.88M = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { |
203 | 401k | // It does! Return "A op V" if it simplifies or is already available. |
204 | 401k | // If V equals B then "A op V" is just the LHS. |
205 | 401k | if (V == B401k ) return LHS22.2k ; |
206 | 379k | // Otherwise return "A op V" if it simplifies. |
207 | 379k | if (Value *379k W379k = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { |
208 | 7.76k | ++NumReassoc; |
209 | 7.76k | return W; |
210 | 7.76k | } |
211 | 39.3M | } |
212 | 3.88M | } |
213 | 39.3M | |
214 | 39.3M | // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely. |
215 | 39.3M | if (39.3M Op1 && 39.3M Op1->getOpcode() == Opcode7.53M ) { |
216 | 2.96M | Value *A = LHS; |
217 | 2.96M | Value *B = Op1->getOperand(0); |
218 | 2.96M | Value *C = Op1->getOperand(1); |
219 | 2.96M | |
220 | 2.96M | // Does "A op B" simplify? |
221 | 2.96M | if (Value *V2.96M = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { |
222 | 1.07k | // It does! Return "V op C" if it simplifies or is already available. |
223 | 1.07k | // If V equals B then "V op C" is just the RHS. |
224 | 1.07k | if (V == B1.07k ) return RHS366 ; |
225 | 705 | // Otherwise return "V op C" if it simplifies. |
226 | 705 | if (Value *705 W705 = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { |
227 | 71 | ++NumReassoc; |
228 | 71 | return W; |
229 | 71 | } |
230 | 39.3M | } |
231 | 2.96M | } |
232 | 39.3M | |
233 | 39.3M | // The remaining transforms require commutativity as well as associativity. |
234 | 39.3M | if (39.3M !Instruction::isCommutative(Opcode)39.3M ) |
235 | 0 | return nullptr; |
236 | 39.3M | |
237 | 39.3M | // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. |
238 | 39.3M | if (39.3M Op0 && 39.3M Op0->getOpcode() == Opcode13.1M ) { |
239 | 3.85M | Value *A = Op0->getOperand(0); |
240 | 3.85M | Value *B = Op0->getOperand(1); |
241 | 3.85M | Value *C = RHS; |
242 | 3.85M | |
243 | 3.85M | // Does "C op A" simplify? |
244 | 3.85M | if (Value *V3.85M = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { |
245 | 3.68k | // It does! Return "V op B" if it simplifies or is already available. |
246 | 3.68k | // If V equals A then "V op B" is just the LHS. |
247 | 3.68k | if (V == A3.68k ) return LHS800 ; |
248 | 2.88k | // Otherwise return "V op B" if it simplifies. |
249 | 2.88k | if (Value *2.88k W2.88k = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { |
250 | 7 | ++NumReassoc; |
251 | 7 | return W; |
252 | 7 | } |
253 | 39.3M | } |
254 | 3.85M | } |
255 | 39.3M | |
256 | 39.3M | // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely. |
257 | 39.3M | if (39.3M Op1 && 39.3M Op1->getOpcode() == Opcode7.53M ) { |
258 | 2.96M | Value *A = LHS; |
259 | 2.96M | Value *B = Op1->getOperand(0); |
260 | 2.96M | Value *C = Op1->getOperand(1); |
261 | 2.96M | |
262 | 2.96M | // Does "C op A" simplify? |
263 | 2.96M | if (Value *V2.96M = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { |
264 | 4.95k | // It does! Return "B op V" if it simplifies or is already available. |
265 | 4.95k | // If V equals C then "B op V" is just the RHS. |
266 | 4.95k | if (V == C4.95k ) return RHS176 ; |
267 | 4.78k | // Otherwise return "B op V" if it simplifies. |
268 | 4.78k | if (Value *4.78k W4.78k = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { |
269 | 0 | ++NumReassoc; |
270 | 0 | return W; |
271 | 0 | } |
272 | 39.3M | } |
273 | 2.96M | } |
274 | 39.3M | |
275 | 39.3M | return nullptr; |
276 | 39.3M | } |
277 | | |
278 | | /// In the case of a binary operation with a select instruction as an operand, |
279 | | /// try to simplify the binop by seeing whether evaluating it on both branches |
280 | | /// of the select results in the same value. Returns the common value if so, |
281 | | /// otherwise returns null. |
282 | | static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, |
283 | | Value *RHS, const SimplifyQuery &Q, |
284 | 5.22M | unsigned MaxRecurse) { |
285 | 5.22M | // Recursion is always used, so bail out at once if we already hit the limit. |
286 | 5.22M | if (!MaxRecurse--) |
287 | 1.36M | return nullptr; |
288 | 3.85M | |
289 | 3.85M | SelectInst *SI; |
290 | 3.85M | if (isa<SelectInst>(LHS)3.85M ) { |
291 | 2.51M | SI = cast<SelectInst>(LHS); |
292 | 3.85M | } else { |
293 | 1.33M | assert(isa<SelectInst>(RHS) && "No select instruction operand!"); |
294 | 1.33M | SI = cast<SelectInst>(RHS); |
295 | 1.33M | } |
296 | 3.85M | |
297 | 3.85M | // Evaluate the BinOp on the true and false branches of the select. |
298 | 3.85M | Value *TV; |
299 | 3.85M | Value *FV; |
300 | 3.85M | if (SI == LHS3.85M ) { |
301 | 2.51M | TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); |
302 | 2.51M | FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); |
303 | 3.85M | } else { |
304 | 1.33M | TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); |
305 | 1.33M | FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); |
306 | 1.33M | } |
307 | 3.85M | |
308 | 3.85M | // If they simplified to the same value, then return the common value. |
309 | 3.85M | // If they both failed to simplify then return null. |
310 | 3.85M | if (TV == FV) |
311 | 231k | return TV; |
312 | 3.62M | |
313 | 3.62M | // If one branch simplified to undef, return the other one. |
314 | 3.62M | if (3.62M TV && 3.62M isa<UndefValue>(TV)810k ) |
315 | 2 | return FV; |
316 | 3.62M | if (3.62M FV && 3.62M isa<UndefValue>(FV)2.82M ) |
317 | 4 | return TV; |
318 | 3.62M | |
319 | 3.62M | // If applying the operation did not change the true and false select values, |
320 | 3.62M | // then the result of the binop is the select itself. |
321 | 3.62M | if (3.62M TV == SI->getTrueValue() && 3.62M FV == SI->getFalseValue()110k ) |
322 | 218 | return SI; |
323 | 3.62M | |
324 | 3.62M | // If one branch simplified and the other did not, and the simplified |
325 | 3.62M | // value is equal to the unsimplified one, return the simplified value. |
326 | 3.62M | // For example, select (cond, X, X & Z) & Z -> X & Z. |
327 | 3.62M | if (3.62M (FV && 3.62M !TV2.82M ) || (TV && 809k !FV809k )) { |
328 | 3.62M | // Check that the simplified value has the form "X op Y" where "op" is the |
329 | 3.62M | // same as the original operation. |
330 | 3.62M | Instruction *Simplified = dyn_cast<Instruction>(FV ? FV2.81M : TV802k ); |
331 | 3.62M | if (Simplified && 3.62M Simplified->getOpcode() == Opcode3.05M ) { |
332 | 696k | // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". |
333 | 696k | // We already know that "op" is the same as for the simplified value. See |
334 | 696k | // if the operands match too. If so, return the simplified value. |
335 | 696k | Value *UnsimplifiedBranch = FV ? SI->getTrueValue()486k : SI->getFalseValue()209k ; |
336 | 696k | Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch467k : LHS228k ; |
337 | 696k | Value *UnsimplifiedRHS = SI == LHS ? RHS467k : UnsimplifiedBranch228k ; |
338 | 696k | if (Simplified->getOperand(0) == UnsimplifiedLHS && |
339 | 30 | Simplified->getOperand(1) == UnsimplifiedRHS) |
340 | 30 | return Simplified; |
341 | 696k | if (696k Simplified->isCommutative() && |
342 | 695k | Simplified->getOperand(1) == UnsimplifiedLHS && |
343 | 0 | Simplified->getOperand(0) == UnsimplifiedRHS) |
344 | 0 | return Simplified; |
345 | 3.62M | } |
346 | 3.62M | } |
347 | 3.62M | |
348 | 3.62M | return nullptr; |
349 | 3.62M | } |
350 | | |
351 | | /// In the case of a comparison with a select instruction, try to simplify the |
352 | | /// comparison by seeing whether both branches of the select result in the same |
353 | | /// value. Returns the common value if so, otherwise returns null. |
354 | | static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, |
355 | | Value *RHS, const SimplifyQuery &Q, |
356 | 1.57M | unsigned MaxRecurse) { |
357 | 1.57M | // Recursion is always used, so bail out at once if we already hit the limit. |
358 | 1.57M | if (!MaxRecurse--) |
359 | 6.57k | return nullptr; |
360 | 1.56M | |
361 | 1.56M | // Make sure the select is on the LHS. |
362 | 1.56M | if (1.56M !isa<SelectInst>(LHS)1.56M ) { |
363 | 452k | std::swap(LHS, RHS); |
364 | 452k | Pred = CmpInst::getSwappedPredicate(Pred); |
365 | 452k | } |
366 | 1.56M | assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!"); |
367 | 1.56M | SelectInst *SI = cast<SelectInst>(LHS); |
368 | 1.56M | Value *Cond = SI->getCondition(); |
369 | 1.56M | Value *TV = SI->getTrueValue(); |
370 | 1.56M | Value *FV = SI->getFalseValue(); |
371 | 1.56M | |
372 | 1.56M | // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. |
373 | 1.56M | // Does "cmp TV, RHS" simplify? |
374 | 1.56M | Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); |
375 | 1.56M | if (TCmp == Cond1.56M ) { |
376 | 45 | // It not only simplified, it simplified to the select condition. Replace |
377 | 45 | // it with 'true'. |
378 | 45 | TCmp = getTrue(Cond->getType()); |
379 | 1.56M | } else if (1.56M !TCmp1.56M ) { |
380 | 1.50M | // It didn't simplify. However if "cmp TV, RHS" is equal to the select |
381 | 1.50M | // condition then we can replace it with 'true'. Otherwise give up. |
382 | 1.50M | if (!isSameCompare(Cond, Pred, TV, RHS)) |
383 | 1.50M | return nullptr; |
384 | 73 | TCmp = getTrue(Cond->getType()); |
385 | 73 | } |
386 | 1.56M | |
387 | 1.56M | // Does "cmp FV, RHS" simplify? |
388 | 55.9k | Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); |
389 | 55.9k | if (FCmp == Cond55.9k ) { |
390 | 300 | // It not only simplified, it simplified to the select condition. Replace |
391 | 300 | // it with 'false'. |
392 | 300 | FCmp = getFalse(Cond->getType()); |
393 | 55.9k | } else if (55.6k !FCmp55.6k ) { |
394 | 43.4k | // It didn't simplify. However if "cmp FV, RHS" is equal to the select |
395 | 43.4k | // condition then we can replace it with 'false'. Otherwise give up. |
396 | 43.4k | if (!isSameCompare(Cond, Pred, FV, RHS)) |
397 | 43.1k | return nullptr; |
398 | 330 | FCmp = getFalse(Cond->getType()); |
399 | 330 | } |
400 | 55.9k | |
401 | 55.9k | // If both sides simplified to the same value, then use it as the result of |
402 | 55.9k | // the original comparison. |
403 | 12.8k | if (12.8k TCmp == FCmp12.8k ) |
404 | 998 | return TCmp; |
405 | 11.8k | |
406 | 11.8k | // The remaining cases only make sense if the select condition has the same |
407 | 11.8k | // type as the result of the comparison, so bail out if this is not so. |
408 | 11.8k | if (11.8k Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()11.8k ) |
409 | 1 | return nullptr; |
410 | 11.8k | // If the false value simplified to false, then the result of the compare |
411 | 11.8k | // is equal to "Cond && TCmp". This also catches the case when the false |
412 | 11.8k | // value simplified to false and the true value to true, returning "Cond". |
413 | 11.8k | if (11.8k match(FCmp, m_Zero())11.8k ) |
414 | 5.71k | if (Value *5.71k V5.71k = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) |
415 | 5.67k | return V; |
416 | 6.13k | // If the true value simplified to true, then the result of the compare |
417 | 6.13k | // is equal to "Cond || FCmp". |
418 | 6.13k | if (6.13k match(TCmp, m_One())6.13k ) |
419 | 110 | if (Value *110 V110 = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) |
420 | 12 | return V; |
421 | 6.12k | // Finally, if the false value simplified to true and the true value to |
422 | 6.12k | // false, then the result of the compare is equal to "!Cond". |
423 | 6.12k | if (6.12k match(FCmp, m_One()) && 6.12k match(TCmp, m_Zero())3.18k ) |
424 | 3.11k | if (Value *3.11k V3.11k = |
425 | 3.11k | SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), |
426 | 3.11k | Q, MaxRecurse)) |
427 | 5 | return V; |
428 | 6.11k | |
429 | 6.11k | return nullptr; |
430 | 6.11k | } |
431 | | |
432 | | /// In the case of a binary operation with an operand that is a PHI instruction, |
433 | | /// try to simplify the binop by seeing whether evaluating it on the incoming |
434 | | /// phi values yields the same result for every value. If so returns the common |
435 | | /// value, otherwise returns null. |
436 | | static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, |
437 | | Value *RHS, const SimplifyQuery &Q, |
438 | 3.09M | unsigned MaxRecurse) { |
439 | 3.09M | // Recursion is always used, so bail out at once if we already hit the limit. |
440 | 3.09M | if (!MaxRecurse--) |
441 | 345k | return nullptr; |
442 | 2.75M | |
443 | 2.75M | PHINode *PI; |
444 | 2.75M | if (isa<PHINode>(LHS)2.75M ) { |
445 | 2.24M | PI = cast<PHINode>(LHS); |
446 | 2.24M | // Bail out if RHS and the phi may be mutually interdependent due to a loop. |
447 | 2.24M | if (!ValueDominatesPHI(RHS, PI, Q.DT)) |
448 | 560k | return nullptr; |
449 | 508k | } else { |
450 | 508k | assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); |
451 | 508k | PI = cast<PHINode>(RHS); |
452 | 508k | // Bail out if LHS and the phi may be mutually interdependent due to a loop. |
453 | 508k | if (!ValueDominatesPHI(LHS, PI, Q.DT)) |
454 | 318k | return nullptr; |
455 | 1.87M | } |
456 | 1.87M | |
457 | 1.87M | // Evaluate the BinOp on the incoming phi values. |
458 | 1.87M | Value *CommonValue = nullptr; |
459 | 2.32M | for (Value *Incoming : PI->incoming_values()) { |
460 | 2.32M | // If the incoming value is the phi node itself, it can safely be skipped. |
461 | 2.32M | if (Incoming == PI2.32M ) continue50 ; |
462 | 2.32M | Value *V = PI == LHS ? |
463 | 2.11M | SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) : |
464 | 210k | SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); |
465 | 2.32M | // If the operation failed to simplify, or simplified to a different value |
466 | 2.32M | // to previously, then give up. |
467 | 2.32M | if (!V || 2.32M (CommonValue && 462k V != CommonValue23.6k )) |
468 | 1.87M | return nullptr; |
469 | 454k | CommonValue = V; |
470 | 454k | } |
471 | 1.87M | |
472 | 305 | return CommonValue; |
473 | 3.09M | } |
474 | | |
475 | | /// In the case of a comparison with a PHI instruction, try to simplify the |
476 | | /// comparison by seeing whether comparing with all of the incoming phi values |
477 | | /// yields the same result every time. If so returns the common result, |
478 | | /// otherwise returns null. |
479 | | static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, |
480 | 4.97M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
481 | 4.97M | // Recursion is always used, so bail out at once if we already hit the limit. |
482 | 4.97M | if (!MaxRecurse--) |
483 | 210k | return nullptr; |
484 | 4.76M | |
485 | 4.76M | // Make sure the phi is on the LHS. |
486 | 4.76M | if (4.76M !isa<PHINode>(LHS)4.76M ) { |
487 | 745k | std::swap(LHS, RHS); |
488 | 745k | Pred = CmpInst::getSwappedPredicate(Pred); |
489 | 745k | } |
490 | 4.76M | assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!"); |
491 | 4.76M | PHINode *PI = cast<PHINode>(LHS); |
492 | 4.76M | |
493 | 4.76M | // Bail out if RHS and the phi may be mutually interdependent due to a loop. |
494 | 4.76M | if (!ValueDominatesPHI(RHS, PI, Q.DT)) |
495 | 1.17M | return nullptr; |
496 | 3.59M | |
497 | 3.59M | // Evaluate the BinOp on the incoming phi values. |
498 | 3.59M | Value *CommonValue = nullptr; |
499 | 5.03M | for (Value *Incoming : PI->incoming_values()) { |
500 | 5.03M | // If the incoming value is the phi node itself, it can safely be skipped. |
501 | 5.03M | if (Incoming == PI5.03M ) continue4.34k ; |
502 | 5.03M | Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); |
503 | 5.03M | // If the operation failed to simplify, or simplified to a different value |
504 | 5.03M | // to previously, then give up. |
505 | 5.03M | if (!V || 5.03M (CommonValue && 1.69M V != CommonValue300k )) |
506 | 3.58M | return nullptr; |
507 | 1.44M | CommonValue = V; |
508 | 1.44M | } |
509 | 3.59M | |
510 | 5.07k | return CommonValue; |
511 | 4.97M | } |
512 | | |
513 | | static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, |
514 | | Value *&Op0, Value *&Op1, |
515 | 63.5M | const SimplifyQuery &Q) { |
516 | 63.5M | if (auto *CLHS63.5M = dyn_cast<Constant>(Op0)) { |
517 | 9.20M | if (auto *CRHS = dyn_cast<Constant>(Op1)) |
518 | 2.05M | return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); |
519 | 7.14M | |
520 | 7.14M | // Canonicalize the constant to the RHS if this is a commutative operation. |
521 | 7.14M | if (7.14M Instruction::isCommutative(Opcode)7.14M ) |
522 | 5.30M | std::swap(Op0, Op1); |
523 | 9.20M | } |
524 | 61.4M | return nullptr; |
525 | 63.5M | } |
526 | | |
527 | | /// Given operands for an Add, see if we can fold the result. |
528 | | /// If not, this returns null. |
529 | | static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, |
530 | 23.0M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
531 | 23.0M | if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) |
532 | 878k | return C; |
533 | 22.2M | |
534 | 22.2M | // X + undef -> undef |
535 | 22.2M | if (22.2M match(Op1, m_Undef())22.2M ) |
536 | 21 | return Op1; |
537 | 22.2M | |
538 | 22.2M | // X + 0 -> X |
539 | 22.2M | if (22.2M match(Op1, m_Zero())22.2M ) |
540 | 108k | return Op0; |
541 | 22.0M | |
542 | 22.0M | // X + (Y - X) -> Y |
543 | 22.0M | // (Y - X) + X -> Y |
544 | 22.0M | // Eg: X + -X -> 0 |
545 | 22.0M | Value *Y = nullptr; |
546 | 22.0M | if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || |
547 | 22.0M | match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) |
548 | 1.06k | return Y; |
549 | 22.0M | |
550 | 22.0M | // X + ~X -> -1 since ~X = -X-1 |
551 | 22.0M | Type *Ty = Op0->getType(); |
552 | 22.0M | if (match(Op0, m_Not(m_Specific(Op1))) || |
553 | 22.0M | match(Op1, m_Not(m_Specific(Op0)))) |
554 | 2 | return Constant::getAllOnesValue(Ty); |
555 | 22.0M | |
556 | 22.0M | // add nsw/nuw (xor Y, signmask), signmask --> Y |
557 | 22.0M | // The no-wrapping add guarantees that the top bit will be set by the add. |
558 | 22.0M | // Therefore, the xor must be clearing the already set sign bit of Y. |
559 | 22.0M | if (22.0M (isNSW || 22.0M isNUW14.9M ) && match(Op1, m_SignMask())7.59M && |
560 | 7 | match(Op0, m_Xor(m_Value(Y), m_SignMask()))) |
561 | 3 | return Y; |
562 | 22.0M | |
563 | 22.0M | /// i1 add -> xor. |
564 | 22.0M | if (22.0M MaxRecurse && 22.0M Op0->getType()->isIntOrIntVectorTy(1)20.0M ) |
565 | 325 | if (Value *325 V325 = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) |
566 | 5 | return V; |
567 | 22.0M | |
568 | 22.0M | // Try some generic simplifications for associative operations. |
569 | 22.0M | if (Value *22.0M V22.0M = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, |
570 | 22.0M | MaxRecurse)) |
571 | 1.08k | return V; |
572 | 22.0M | |
573 | 22.0M | // Threading Add over selects and phi nodes is pointless, so don't bother. |
574 | 22.0M | // Threading over the select in "A + select(cond, B, C)" means evaluating |
575 | 22.0M | // "A+B" and "A+C" and seeing if they are equal; but they are equal if and |
576 | 22.0M | // only if B and C are equal. If B and C are equal then (since we assume |
577 | 22.0M | // that operands have already been simplified) "select(cond, B, C)" should |
578 | 22.0M | // have been simplified to the common value of B and C already. Analysing |
579 | 22.0M | // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly |
580 | 22.0M | // for threading over phi nodes. |
581 | 22.0M | |
582 | 22.0M | return nullptr; |
583 | 22.0M | } |
584 | | |
585 | | Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, |
586 | 12.6M | const SimplifyQuery &Query) { |
587 | 12.6M | return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit); |
588 | 12.6M | } |
589 | | |
590 | | /// \brief Compute the base pointer and cumulative constant offsets for V. |
591 | | /// |
592 | | /// This strips all constant offsets off of V, leaving it the base pointer, and |
593 | | /// accumulates the total constant offset applied in the returned constant. It |
594 | | /// returns 0 if V is not a pointer, and returns the constant '0' if there are |
595 | | /// no constant offsets applied. |
596 | | /// |
597 | | /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't |
598 | | /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. |
599 | | /// folding. |
600 | | static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, |
601 | 34.6M | bool AllowNonInbounds = false) { |
602 | 34.6M | assert(V->getType()->isPtrOrPtrVectorTy()); |
603 | 34.6M | |
604 | 34.6M | Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); |
605 | 34.6M | APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); |
606 | 34.6M | |
607 | 34.6M | // Even though we don't look through PHI nodes, we could be called on an |
608 | 34.6M | // instruction in an unreachable block, which may be on a cycle. |
609 | 34.6M | SmallPtrSet<Value *, 4> Visited; |
610 | 34.6M | Visited.insert(V); |
611 | 35.1M | do { |
612 | 35.1M | if (GEPOperator *GEP35.1M = dyn_cast<GEPOperator>(V)) { |
613 | 959k | if ((!AllowNonInbounds && 959k !GEP->isInBounds()942k ) || |
614 | 625k | !GEP->accumulateConstantOffset(DL, Offset)) |
615 | 456k | break; |
616 | 503k | V = GEP->getPointerOperand(); |
617 | 35.1M | } else if (34.2M Operator::getOpcode(V) == Instruction::BitCast34.2M ) { |
618 | 501 | V = cast<Operator>(V)->getOperand(0); |
619 | 34.2M | } else if (GlobalAlias *34.2M GA34.2M = dyn_cast<GlobalAlias>(V)) { |
620 | 0 | if (GA->isInterposable()) |
621 | 0 | break; |
622 | 0 | V = GA->getAliasee(); |
623 | 34.2M | } else { |
624 | 34.2M | if (auto CS = CallSite(V)) |
625 | 3.70M | if (Value *3.70M RV3.70M = CS.getReturnedArgOperand()) { |
626 | 1 | V = RV; |
627 | 1 | continue; |
628 | 1 | } |
629 | 34.2M | break; |
630 | 34.2M | } |
631 | 35.1M | assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); |
632 | 34.6M | } while (Visited.insert(V).second); |
633 | 34.6M | |
634 | 34.6M | Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); |
635 | 34.6M | if (V->getType()->isVectorTy()) |
636 | 2 | return ConstantVector::getSplat(V->getType()->getVectorNumElements(), |
637 | 2 | OffsetIntPtr); |
638 | 34.6M | return OffsetIntPtr; |
639 | 34.6M | } |
640 | | |
641 | | /// \brief Compute the constant difference between two pointer values. |
642 | | /// If the difference is not a constant, returns zero. |
643 | | static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, |
644 | 624k | Value *RHS) { |
645 | 624k | Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); |
646 | 624k | Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); |
647 | 624k | |
648 | 624k | // If LHS and RHS are not related via constant offsets to the same base |
649 | 624k | // value, there is nothing we can do here. |
650 | 624k | if (LHS != RHS) |
651 | 624k | return nullptr; |
652 | 36 | |
653 | 36 | // Otherwise, the difference of LHS - RHS can be computed as: |
654 | 36 | // LHS - RHS |
655 | 36 | // = (LHSOffset + Base) - (RHSOffset + Base) |
656 | 36 | // = LHSOffset - RHSOffset |
657 | 36 | return ConstantExpr::getSub(LHSOffset, RHSOffset); |
658 | 36 | } |
659 | | |
660 | | /// Given operands for a Sub, see if we can fold the result. |
661 | | /// If not, this returns null. |
662 | | static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, |
663 | 3.87M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
664 | 3.87M | if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q)) |
665 | 26.7k | return C; |
666 | 3.84M | |
667 | 3.84M | // X - undef -> undef |
668 | 3.84M | // undef - X -> undef |
669 | 3.84M | if (3.84M match(Op0, m_Undef()) || 3.84M match(Op1, m_Undef())3.84M ) |
670 | 2 | return UndefValue::get(Op0->getType()); |
671 | 3.84M | |
672 | 3.84M | // X - 0 -> X |
673 | 3.84M | if (3.84M match(Op1, m_Zero())3.84M ) |
674 | 5.33k | return Op0; |
675 | 3.83M | |
676 | 3.83M | // X - X -> 0 |
677 | 3.83M | if (3.83M Op0 == Op13.83M ) |
678 | 4.38k | return Constant::getNullValue(Op0->getType()); |
679 | 3.83M | |
680 | 3.83M | // Is this a negation? |
681 | 3.83M | if (3.83M match(Op0, m_Zero())3.83M ) { |
682 | 483k | // 0 - X -> 0 if the sub is NUW. |
683 | 483k | if (isNUW) |
684 | 3 | return Op0; |
685 | 483k | |
686 | 483k | KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
687 | 483k | if (Known.Zero.isMaxSignedValue()483k ) { |
688 | 5 | // Op1 is either 0 or the minimum signed value. If the sub is NSW, then |
689 | 5 | // Op1 must be 0 because negating the minimum signed value is undefined. |
690 | 5 | if (isNSW) |
691 | 2 | return Op0; |
692 | 3 | |
693 | 3 | // 0 - X -> X if X is 0 or the minimum signed value. |
694 | 3 | return Op1; |
695 | 3 | } |
696 | 483k | } |
697 | 3.83M | |
698 | 3.83M | // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. |
699 | 3.83M | // For example, (X + Y) - Y -> X; (Y + X) - Y -> X |
700 | 3.83M | Value *X = nullptr, *Y = nullptr, *Z = Op1; |
701 | 3.83M | if (MaxRecurse && 3.83M match(Op0, m_Add(m_Value(X), m_Value(Y)))3.56M ) { // (X + Y) - Z |
702 | 268k | // See if "V === Y - Z" simplifies. |
703 | 268k | if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) |
704 | 268k | // It does! Now see if "X + V" simplifies. |
705 | 5.75k | if (Value *5.75k W5.75k = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) { |
706 | 2.03k | // It does, we successfully reassociated! |
707 | 2.03k | ++NumReassoc; |
708 | 2.03k | return W; |
709 | 2.03k | } |
710 | 266k | // See if "V === X - Z" simplifies. |
711 | 266k | if (Value *266k V266k = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) |
712 | 266k | // It does! Now see if "Y + V" simplifies. |
713 | 1.73k | if (Value *1.73k W1.73k = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) { |
714 | 1.09k | // It does, we successfully reassociated! |
715 | 1.09k | ++NumReassoc; |
716 | 1.09k | return W; |
717 | 1.09k | } |
718 | 3.83M | } |
719 | 3.83M | |
720 | 3.83M | // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies. |
721 | 3.83M | // For example, X - (X + 1) -> -1 |
722 | 3.83M | X = Op0; |
723 | 3.83M | if (MaxRecurse && 3.83M match(Op1, m_Add(m_Value(Y), m_Value(Z)))3.55M ) { // X - (Y + Z) |
724 | 254k | // See if "V === X - Y" simplifies. |
725 | 254k | if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) |
726 | 254k | // It does! Now see if "V - Z" simplifies. |
727 | 1.89k | if (Value *1.89k W1.89k = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) { |
728 | 374 | // It does, we successfully reassociated! |
729 | 374 | ++NumReassoc; |
730 | 374 | return W; |
731 | 374 | } |
732 | 253k | // See if "V === X - Z" simplifies. |
733 | 253k | if (Value *253k V253k = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) |
734 | 253k | // It does! Now see if "V - Y" simplifies. |
735 | 9.04k | if (Value *9.04k W9.04k = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) { |
736 | 4 | // It does, we successfully reassociated! |
737 | 4 | ++NumReassoc; |
738 | 4 | return W; |
739 | 4 | } |
740 | 3.83M | } |
741 | 3.83M | |
742 | 3.83M | // Z - (X - Y) -> (Z - X) + Y if everything simplifies. |
743 | 3.83M | // For example, X - (X - Y) -> Y. |
744 | 3.83M | Z = Op0; |
745 | 3.83M | if (MaxRecurse && 3.83M match(Op1, m_Sub(m_Value(X), m_Value(Y)))3.55M ) // Z - (X - Y) |
746 | 3.83M | // See if "V === Z - X" simplifies. |
747 | 103k | if (Value *103k V103k = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1)) |
748 | 103k | // It does! Now see if "V + Y" simplifies. |
749 | 10.6k | if (Value *10.6k W10.6k = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) { |
750 | 413 | // It does, we successfully reassociated! |
751 | 413 | ++NumReassoc; |
752 | 413 | return W; |
753 | 413 | } |
754 | 3.83M | |
755 | 3.83M | // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies. |
756 | 3.83M | if (3.83M MaxRecurse && 3.83M match(Op0, m_Trunc(m_Value(X)))3.55M && |
757 | 16.1k | match(Op1, m_Trunc(m_Value(Y)))) |
758 | 4.93k | if (4.93k X->getType() == Y->getType()4.93k ) |
759 | 4.93k | // See if "V === X - Y" simplifies. |
760 | 4.92k | if (Value *4.92k V4.92k = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) |
761 | 4.92k | // It does! Now see if "trunc V" simplifies. |
762 | 3 | if (Value *3 W3 = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(), |
763 | 3 | Q, MaxRecurse - 1)) |
764 | 3 | // It does, return the simplified "trunc V". |
765 | 3 | return W; |
766 | 3.83M | |
767 | 3.83M | // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). |
768 | 3.83M | if (3.83M match(Op0, m_PtrToInt(m_Value(X))) && |
769 | 636k | match(Op1, m_PtrToInt(m_Value(Y)))) |
770 | 624k | if (Constant *624k Result624k = computePointerDifference(Q.DL, X, Y)) |
771 | 36 | return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); |
772 | 3.83M | |
773 | 3.83M | // i1 sub -> xor. |
774 | 3.83M | if (3.83M MaxRecurse && 3.83M Op0->getType()->isIntOrIntVectorTy(1)3.55M ) |
775 | 6 | if (Value *6 V6 = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) |
776 | 3 | return V; |
777 | 3.83M | |
778 | 3.83M | // Threading Sub over selects and phi nodes is pointless, so don't bother. |
779 | 3.83M | // Threading over the select in "A - select(cond, B, C)" means evaluating |
780 | 3.83M | // "A-B" and "A-C" and seeing if they are equal; but they are equal if and |
781 | 3.83M | // only if B and C are equal. If B and C are equal then (since we assume |
782 | 3.83M | // that operands have already been simplified) "select(cond, B, C)" should |
783 | 3.83M | // have been simplified to the common value of B and C already. Analysing |
784 | 3.83M | // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly |
785 | 3.83M | // for threading over phi nodes. |
786 | 3.83M | |
787 | 3.83M | return nullptr; |
788 | 3.83M | } |
789 | | |
790 | | Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, |
791 | 2.67M | const SimplifyQuery &Q) { |
792 | 2.67M | return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); |
793 | 2.67M | } |
794 | | |
795 | | /// Given operands for a Mul, see if we can fold the result. |
796 | | /// If not, this returns null. |
797 | | static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
798 | 14.6M | unsigned MaxRecurse) { |
799 | 14.6M | if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q)) |
800 | 160k | return C; |
801 | 14.4M | |
802 | 14.4M | // X * undef -> 0 |
803 | 14.4M | if (14.4M match(Op1, m_Undef())14.4M ) |
804 | 672 | return Constant::getNullValue(Op0->getType()); |
805 | 14.4M | |
806 | 14.4M | // X * 0 -> 0 |
807 | 14.4M | if (14.4M match(Op1, m_Zero())14.4M ) |
808 | 193k | return Op1; |
809 | 14.2M | |
810 | 14.2M | // X * 1 -> X |
811 | 14.2M | if (14.2M match(Op1, m_One())14.2M ) |
812 | 3.13M | return Op0; |
813 | 11.1M | |
814 | 11.1M | // (X / Y) * Y -> X if the division is exact. |
815 | 11.1M | Value *X = nullptr; |
816 | 11.1M | if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y |
817 | 11.1M | match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) |
818 | 46 | return X; |
819 | 11.1M | |
820 | 11.1M | // i1 mul -> and. |
821 | 11.1M | if (11.1M MaxRecurse && 11.1M Op0->getType()->isIntOrIntVectorTy(1)8.35M ) |
822 | 4 | if (Value *4 V4 = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) |
823 | 2 | return V; |
824 | 11.1M | |
825 | 11.1M | // Try some generic simplifications for associative operations. |
826 | 11.1M | if (Value *11.1M V11.1M = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, |
827 | 11.1M | MaxRecurse)) |
828 | 2 | return V; |
829 | 11.1M | |
830 | 11.1M | // Mul distributes over Add. Try some generic simplifications based on this. |
831 | 11.1M | if (Value *11.1M V11.1M = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, |
832 | 11.1M | Q, MaxRecurse)) |
833 | 0 | return V; |
834 | 11.1M | |
835 | 11.1M | // If the operation is with the result of a select instruction, check whether |
836 | 11.1M | // operating on either branch of the select always yields the same value. |
837 | 11.1M | if (11.1M isa<SelectInst>(Op0) || 11.1M isa<SelectInst>(Op1)8.57M ) |
838 | 4.56M | if (Value *4.56M V4.56M = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, |
839 | 4.56M | MaxRecurse)) |
840 | 0 | return V; |
841 | 11.1M | |
842 | 11.1M | // If the operation is with the result of a phi instruction, check whether |
843 | 11.1M | // operating on all incoming values of the phi always yields the same value. |
844 | 11.1M | if (11.1M isa<PHINode>(Op0) || 11.1M isa<PHINode>(Op1)10.1M ) |
845 | 1.20M | if (Value *1.20M V1.20M = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, |
846 | 1.20M | MaxRecurse)) |
847 | 0 | return V; |
848 | 11.1M | |
849 | 11.1M | return nullptr; |
850 | 11.1M | } |
851 | | |
852 | 2.31M | Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
853 | 2.31M | return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); |
854 | 2.31M | } |
855 | | |
856 | | /// Check for common or similar folds of integer division or integer remainder. |
857 | | /// This applies to all 4 opcodes (sdiv/udiv/srem/urem). |
858 | 627k | static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { |
859 | 627k | Type *Ty = Op0->getType(); |
860 | 627k | |
861 | 627k | // X / undef -> undef |
862 | 627k | // X % undef -> undef |
863 | 627k | if (match(Op1, m_Undef())) |
864 | 3 | return Op1; |
865 | 627k | |
866 | 627k | // X / 0 -> undef |
867 | 627k | // X % 0 -> undef |
868 | 627k | // We don't need to preserve faults! |
869 | 627k | if (627k match(Op1, m_Zero())627k ) |
870 | 91 | return UndefValue::get(Ty); |
871 | 627k | |
872 | 627k | // If any element of a constant divisor vector is zero, the whole op is undef. |
873 | 627k | auto *Op1C = dyn_cast<Constant>(Op1); |
874 | 627k | if (Op1C && 627k Ty->isVectorTy()299k ) { |
875 | 595 | unsigned NumElts = Ty->getVectorNumElements(); |
876 | 2.50k | for (unsigned i = 0; i != NumElts2.50k ; ++i1.91k ) { |
877 | 1.91k | Constant *Elt = Op1C->getAggregateElement(i); |
878 | 1.91k | if (Elt && 1.91k Elt->isNullValue()1.91k ) |
879 | 6 | return UndefValue::get(Ty); |
880 | 1.91k | } |
881 | 595 | } |
882 | 627k | |
883 | 627k | // undef / X -> 0 |
884 | 627k | // undef % X -> 0 |
885 | 627k | if (627k match(Op0, m_Undef())627k ) |
886 | 0 | return Constant::getNullValue(Ty); |
887 | 627k | |
888 | 627k | // 0 / X -> 0 |
889 | 627k | // 0 % X -> 0 |
890 | 627k | if (627k match(Op0, m_Zero())627k ) |
891 | 367 | return Op0; |
892 | 626k | |
893 | 626k | // X / X -> 1 |
894 | 626k | // X % X -> 0 |
895 | 626k | if (626k Op0 == Op1626k ) |
896 | 1.64k | return IsDiv ? 1.64k ConstantInt::get(Ty, 1)917 : Constant::getNullValue(Ty)729 ; |
897 | 625k | |
898 | 625k | // X / 1 -> X |
899 | 625k | // X % 1 -> 0 |
900 | 625k | // If this is a boolean op (single-bit element type), we can't have |
901 | 625k | // division-by-zero or remainder-by-zero, so assume the divisor is 1. |
902 | 625k | if (625k match(Op1, m_One()) || 625k Ty->isIntOrIntVectorTy(1)619k ) |
903 | 6.01k | return IsDiv ? 6.01k Op03.57k : Constant::getNullValue(Ty)2.44k ; |
904 | 619k | |
905 | 619k | return nullptr; |
906 | 619k | } |
907 | | |
908 | | /// Given a predicate and two operands, return true if the comparison is true. |
909 | | /// This is a helper for div/rem simplification where we return some other value |
910 | | /// when we can prove a relationship between the operands. |
911 | | static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, |
912 | 549k | const SimplifyQuery &Q, unsigned MaxRecurse) { |
913 | 549k | Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse); |
914 | 549k | Constant *C = dyn_cast_or_null<Constant>(V); |
915 | 4.00k | return (C && C->isAllOnesValue()); |
916 | 549k | } |
917 | | |
918 | | /// Return true if we can simplify X / Y to 0. Remainder can adapt that answer |
919 | | /// to simplify X % Y to X. |
920 | | static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q, |
921 | 619k | unsigned MaxRecurse, bool IsSigned) { |
922 | 619k | // Recursion is always used, so bail out at once if we already hit the limit. |
923 | 619k | if (!MaxRecurse--) |
924 | 6.77k | return false; |
925 | 612k | |
926 | 612k | if (612k IsSigned612k ) { |
927 | 289k | // |X| / |Y| --> 0 |
928 | 289k | // |
929 | 289k | // We require that 1 operand is a simple constant. That could be extended to |
930 | 289k | // 2 variables if we computed the sign bit for each. |
931 | 289k | // |
932 | 289k | // Make sure that a constant is not the minimum signed value because taking |
933 | 289k | // the abs() of that is undefined. |
934 | 289k | Type *Ty = X->getType(); |
935 | 289k | const APInt *C; |
936 | 289k | if (match(X, m_APInt(C)) && 289k !C->isMinSignedValue()53.7k ) { |
937 | 52.3k | // Is the variable divisor magnitude always greater than the constant |
938 | 52.3k | // dividend magnitude? |
939 | 52.3k | // |Y| > |C| --> Y < -abs(C) or Y > abs(C) |
940 | 52.3k | Constant *PosDividendC = ConstantInt::get(Ty, C->abs()); |
941 | 52.3k | Constant *NegDividendC = ConstantInt::get(Ty, -C->abs()); |
942 | 52.3k | if (isICmpTrue(CmpInst::ICMP_SLT, Y, NegDividendC, Q, MaxRecurse) || |
943 | 52.3k | isICmpTrue(CmpInst::ICMP_SGT, Y, PosDividendC, Q, MaxRecurse)) |
944 | 0 | return true; |
945 | 289k | } |
946 | 289k | if (289k match(Y, m_APInt(C))289k ) { |
947 | 121k | // Special-case: we can't take the abs() of a minimum signed value. If |
948 | 121k | // that's the divisor, then all we have to do is prove that the dividend |
949 | 121k | // is also not the minimum signed value. |
950 | 121k | if (C->isMinSignedValue()) |
951 | 9 | return isICmpTrue(CmpInst::ICMP_NE, X, Y, Q, MaxRecurse); |
952 | 121k | |
953 | 121k | // Is the variable dividend magnitude always less than the constant |
954 | 121k | // divisor magnitude? |
955 | 121k | // |X| < |C| --> X > -abs(C) and X < abs(C) |
956 | 121k | Constant *PosDivisorC = ConstantInt::get(Ty, C->abs()); |
957 | 121k | Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs()); |
958 | 121k | if (isICmpTrue(CmpInst::ICMP_SGT, X, NegDivisorC, Q, MaxRecurse) && |
959 | 741 | isICmpTrue(CmpInst::ICMP_SLT, X, PosDivisorC, Q, MaxRecurse)) |
960 | 157 | return true; |
961 | 289k | } |
962 | 289k | return false; |
963 | 289k | } |
964 | 322k | |
965 | 322k | // IsSigned == false. |
966 | 322k | // Is the dividend unsigned less than the divisor? |
967 | 322k | return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse); |
968 | 322k | } |
969 | | |
970 | | /// These are simplifications common to SDiv and UDiv. |
971 | | static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, |
972 | 486k | const SimplifyQuery &Q, unsigned MaxRecurse) { |
973 | 486k | if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) |
974 | 7.71k | return C; |
975 | 479k | |
976 | 479k | if (Value *479k V479k = simplifyDivRem(Op0, Op1, true)) |
977 | 4.76k | return V; |
978 | 474k | |
979 | 474k | bool IsSigned = Opcode == Instruction::SDiv; |
980 | 474k | |
981 | 474k | // (X * Y) / Y -> X if the multiplication does not overflow. |
982 | 474k | Value *X = nullptr, *Y = nullptr; |
983 | 474k | if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && 474k (X == Op1 || 60.2k Y == Op160.1k )) { |
984 | 286 | if (Y != Op1286 ) std::swap(X, Y)118 ; // Ensure expression is (X * Y) / Y, Y = Op1 |
985 | 286 | OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0); |
986 | 286 | // If the Mul knows it does not overflow, then we are good to go. |
987 | 286 | if ((IsSigned && 286 Mul->hasNoSignedWrap()68 ) || |
988 | 251 | (!IsSigned && 251 Mul->hasNoUnsignedWrap()218 )) |
989 | 38 | return X; |
990 | 248 | // If X has the form X = A / Y then X * Y cannot overflow. |
991 | 248 | if (BinaryOperator *248 Div248 = dyn_cast<BinaryOperator>(X)) |
992 | 36 | if (36 Div->getOpcode() == Opcode && 36 Div->getOperand(1) == Y2 ) |
993 | 2 | return X; |
994 | 474k | } |
995 | 474k | |
996 | 474k | // (X rem Y) / Y -> 0 |
997 | 474k | if (474k (IsSigned && 474k match(Op0, m_SRem(m_Value(), m_Specific(Op1)))199k ) || |
998 | 474k | (!IsSigned && 474k match(Op0, m_URem(m_Value(), m_Specific(Op1)))274k )) |
999 | 2 | return Constant::getNullValue(Op0->getType()); |
1000 | 474k | |
1001 | 474k | // (X /u C1) /u C2 -> 0 if C1 * C2 overflow |
1002 | 474k | ConstantInt *C1, *C2; |
1003 | 474k | if (!IsSigned && 474k match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1)))274k && |
1004 | 474k | match(Op1, m_ConstantInt(C2))44.7k ) { |
1005 | 44.7k | bool Overflow; |
1006 | 44.7k | (void)C1->getValue().umul_ov(C2->getValue(), Overflow); |
1007 | 44.7k | if (Overflow) |
1008 | 1 | return Constant::getNullValue(Op0->getType()); |
1009 | 474k | } |
1010 | 474k | |
1011 | 474k | // If the operation is with the result of a select instruction, check whether |
1012 | 474k | // operating on either branch of the select always yields the same value. |
1013 | 474k | if (474k isa<SelectInst>(Op0) || 474k isa<SelectInst>(Op1)468k ) |
1014 | 15.7k | if (Value *15.7k V15.7k = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) |
1015 | 1 | return V; |
1016 | 474k | |
1017 | 474k | // If the operation is with the result of a phi instruction, check whether |
1018 | 474k | // operating on all incoming values of the phi always yields the same value. |
1019 | 474k | if (474k isa<PHINode>(Op0) || 474k isa<PHINode>(Op1)407k ) |
1020 | 71.8k | if (Value *71.8k V71.8k = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) |
1021 | 0 | return V; |
1022 | 474k | |
1023 | 474k | if (474k isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned)474k ) |
1024 | 21 | return Constant::getNullValue(Op0->getType()); |
1025 | 474k | |
1026 | 474k | return nullptr; |
1027 | 474k | } |
1028 | | |
1029 | | /// These are simplifications common to SRem and URem. |
1030 | | static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, |
1031 | 150k | const SimplifyQuery &Q, unsigned MaxRecurse) { |
1032 | 150k | if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) |
1033 | 2.79k | return C; |
1034 | 148k | |
1035 | 148k | if (Value *148k V148k = simplifyDivRem(Op0, Op1, false)) |
1036 | 3.36k | return V; |
1037 | 144k | |
1038 | 144k | // (X % Y) % Y -> X % Y |
1039 | 144k | if (144k (Opcode == Instruction::SRem && |
1040 | 96.7k | match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || |
1041 | 144k | (Opcode == Instruction::URem && |
1042 | 48.0k | match(Op0, m_URem(m_Value(), m_Specific(Op1))))) |
1043 | 2 | return Op0; |
1044 | 144k | |
1045 | 144k | // If the operation is with the result of a select instruction, check whether |
1046 | 144k | // operating on either branch of the select always yields the same value. |
1047 | 144k | if (144k isa<SelectInst>(Op0) || 144k isa<SelectInst>(Op1)141k ) |
1048 | 8.76k | if (Value *8.76k V8.76k = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) |
1049 | 2 | return V; |
1050 | 144k | |
1051 | 144k | // If the operation is with the result of a phi instruction, check whether |
1052 | 144k | // operating on all incoming values of the phi always yields the same value. |
1053 | 144k | if (144k isa<PHINode>(Op0) || 144k isa<PHINode>(Op1)134k ) |
1054 | 12.2k | if (Value *12.2k V12.2k = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) |
1055 | 0 | return V; |
1056 | 144k | |
1057 | 144k | // If X / Y == 0, then X % Y == X. |
1058 | 144k | if (144k isDivZero(Op0, Op1, Q, MaxRecurse, Opcode == Instruction::SRem)144k ) |
1059 | 147 | return Op0; |
1060 | 144k | |
1061 | 144k | return nullptr; |
1062 | 144k | } |
1063 | | |
1064 | | /// Given operands for an SDiv, see if we can fold the result. |
1065 | | /// If not, this returns null. |
1066 | | static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
1067 | 210k | unsigned MaxRecurse) { |
1068 | 210k | return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse); |
1069 | 210k | } |
1070 | | |
1071 | 164k | Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
1072 | 164k | return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); |
1073 | 164k | } |
1074 | | |
1075 | | /// Given operands for a UDiv, see if we can fold the result. |
1076 | | /// If not, this returns null. |
1077 | | static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
1078 | 276k | unsigned MaxRecurse) { |
1079 | 276k | return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse); |
1080 | 276k | } |
1081 | | |
1082 | 224k | Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
1083 | 224k | return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); |
1084 | 224k | } |
1085 | | |
1086 | | /// Given operands for an SRem, see if we can fold the result. |
1087 | | /// If not, this returns null. |
1088 | | static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
1089 | 101k | unsigned MaxRecurse) { |
1090 | 101k | return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse); |
1091 | 101k | } |
1092 | | |
1093 | 85.9k | Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
1094 | 85.9k | return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit); |
1095 | 85.9k | } |
1096 | | |
1097 | | /// Given operands for a URem, see if we can fold the result. |
1098 | | /// If not, this returns null. |
1099 | | static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
1100 | 49.8k | unsigned MaxRecurse) { |
1101 | 49.8k | return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse); |
1102 | 49.8k | } |
1103 | | |
1104 | 42.4k | Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
1105 | 42.4k | return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); |
1106 | 42.4k | } |
1107 | | |
1108 | | /// Returns true if a shift by \c Amount always yields undef. |
1109 | 5.03M | static bool isUndefShift(Value *Amount) { |
1110 | 5.03M | Constant *C = dyn_cast<Constant>(Amount); |
1111 | 5.03M | if (!C) |
1112 | 504k | return false; |
1113 | 4.53M | |
1114 | 4.53M | // X shift by undef -> undef because it may shift by the bitwidth. |
1115 | 4.53M | if (4.53M isa<UndefValue>(C)4.53M ) |
1116 | 46 | return true; |
1117 | 4.53M | |
1118 | 4.53M | // Shifting by the bitwidth or more is undefined. |
1119 | 4.53M | if (ConstantInt *4.53M CI4.53M = dyn_cast<ConstantInt>(C)) |
1120 | 4.51M | if (4.51M CI->getValue().getLimitedValue() >= |
1121 | 4.51M | CI->getType()->getScalarSizeInBits()) |
1122 | 62 | return true; |
1123 | 4.53M | |
1124 | 4.53M | // If all lanes of a vector shift are undefined the whole shift is. |
1125 | 4.53M | if (4.53M isa<ConstantVector>(C) || 4.53M isa<ConstantDataVector>(C)4.53M ) { |
1126 | 21.4k | for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E21.4k ; ++I61 ) |
1127 | 21.4k | if (21.4k !isUndefShift(C->getAggregateElement(I))21.4k ) |
1128 | 21.3k | return false; |
1129 | 4 | return true; |
1130 | 4.51M | } |
1131 | 4.51M | |
1132 | 4.51M | return false; |
1133 | 4.51M | } |
1134 | | |
1135 | | /// Given operands for an Shl, LShr or AShr, see if we can fold the result. |
1136 | | /// If not, this returns null. |
1137 | | static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, |
1138 | 5.48M | Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { |
1139 | 5.48M | if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) |
1140 | 460k | return C; |
1141 | 5.01M | |
1142 | 5.01M | // 0 shift by X -> 0 |
1143 | 5.01M | if (5.01M match(Op0, m_Zero())5.01M ) |
1144 | 188 | return Op0; |
1145 | 5.01M | |
1146 | 5.01M | // X shift by 0 -> X |
1147 | 5.01M | if (5.01M match(Op1, m_Zero())5.01M ) |
1148 | 1.37k | return Op0; |
1149 | 5.01M | |
1150 | 5.01M | // Fold undefined shifts. |
1151 | 5.01M | if (5.01M isUndefShift(Op1)5.01M ) |
1152 | 51 | return UndefValue::get(Op0->getType()); |
1153 | 5.01M | |
1154 | 5.01M | // If the operation is with the result of a select instruction, check whether |
1155 | 5.01M | // operating on either branch of the select always yields the same value. |
1156 | 5.01M | if (5.01M isa<SelectInst>(Op0) || 5.01M isa<SelectInst>(Op1)4.67M ) |
1157 | 350k | if (Value *350k V350k = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) |
1158 | 5 | return V; |
1159 | 5.01M | |
1160 | 5.01M | // If the operation is with the result of a phi instruction, check whether |
1161 | 5.01M | // operating on all incoming values of the phi always yields the same value. |
1162 | 5.01M | if (5.01M isa<PHINode>(Op0) || 5.01M isa<PHINode>(Op1)4.33M ) |
1163 | 697k | if (Value *697k V697k = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) |
1164 | 20 | return V; |
1165 | 5.01M | |
1166 | 5.01M | // If any bits in the shift amount make that value greater than or equal to |
1167 | 5.01M | // the number of bits in the type, the shift is undefined. |
1168 | 5.01M | KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
1169 | 5.01M | if (Known.One.getLimitedValue() >= Known.getBitWidth()) |
1170 | 4 | return UndefValue::get(Op0->getType()); |
1171 | 5.01M | |
1172 | 5.01M | // If all valid bits in the shift amount are known zero, the first operand is |
1173 | 5.01M | // unchanged. |
1174 | 5.01M | unsigned NumValidShiftBits = Log2_32_Ceil(Known.getBitWidth()); |
1175 | 5.01M | if (Known.countMinTrailingZeros() >= NumValidShiftBits) |
1176 | 36 | return Op0; |
1177 | 5.01M | |
1178 | 5.01M | return nullptr; |
1179 | 5.01M | } |
1180 | | |
1181 | | /// \brief Given operands for an Shl, LShr or AShr, see if we can |
1182 | | /// fold the result. If not, this returns null. |
1183 | | static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, |
1184 | | Value *Op1, bool isExact, const SimplifyQuery &Q, |
1185 | 1.98M | unsigned MaxRecurse) { |
1186 | 1.98M | if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse)) |
1187 | 27.4k | return V; |
1188 | 1.95M | |
1189 | 1.95M | // X >> X -> 0 |
1190 | 1.95M | if (1.95M Op0 == Op11.95M ) |
1191 | 3 | return Constant::getNullValue(Op0->getType()); |
1192 | 1.95M | |
1193 | 1.95M | // undef >> X -> 0 |
1194 | 1.95M | // undef >> X -> undef (if it's exact) |
1195 | 1.95M | if (1.95M match(Op0, m_Undef())1.95M ) |
1196 | 3 | return isExact ? 3 Op02 : Constant::getNullValue(Op0->getType())1 ; |
1197 | 1.95M | |
1198 | 1.95M | // The low bit cannot be shifted out of an exact shift if it is set. |
1199 | 1.95M | if (1.95M isExact1.95M ) { |
1200 | 296k | KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); |
1201 | 296k | if (Op0Known.One[0]) |
1202 | 4 | return Op0; |
1203 | 1.95M | } |
1204 | 1.95M | |
1205 | 1.95M | return nullptr; |
1206 | 1.95M | } |
1207 | | |
1208 | | /// Given operands for an Shl, see if we can fold the result. |
1209 | | /// If not, this returns null. |
1210 | | static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, |
1211 | 3.49M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
1212 | 3.49M | if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) |
1213 | 434k | return V; |
1214 | 3.06M | |
1215 | 3.06M | // undef << X -> 0 |
1216 | 3.06M | // undef << X -> undef if (if it's NSW/NUW) |
1217 | 3.06M | if (3.06M match(Op0, m_Undef())3.06M ) |
1218 | 4 | return isNSW || 4 isNUW2 ? Op03 : Constant::getNullValue(Op0->getType())1 ; |
1219 | 3.06M | |
1220 | 3.06M | // (X >> A) << A -> X |
1221 | 3.06M | Value *X; |
1222 | 3.06M | if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) |
1223 | 3.10k | return X; |
1224 | 3.06M | return nullptr; |
1225 | 3.06M | } |
1226 | | |
1227 | | Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, |
1228 | 2.08M | const SimplifyQuery &Q) { |
1229 | 2.08M | return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); |
1230 | 2.08M | } |
1231 | | |
1232 | | /// Given operands for an LShr, see if we can fold the result. |
1233 | | /// If not, this returns null. |
1234 | | static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, |
1235 | 1.40M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
1236 | 1.40M | if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, |
1237 | 1.40M | MaxRecurse)) |
1238 | 18.5k | return V; |
1239 | 1.38M | |
1240 | 1.38M | // (X << A) >> A -> X |
1241 | 1.38M | Value *X; |
1242 | 1.38M | if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1)))) |
1243 | 488 | return X; |
1244 | 1.38M | |
1245 | 1.38M | return nullptr; |
1246 | 1.38M | } |
1247 | | |
1248 | | Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, |
1249 | 1.23M | const SimplifyQuery &Q) { |
1250 | 1.23M | return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); |
1251 | 1.23M | } |
1252 | | |
1253 | | /// Given operands for an AShr, see if we can fold the result. |
1254 | | /// If not, this returns null. |
1255 | | static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, |
1256 | 581k | const SimplifyQuery &Q, unsigned MaxRecurse) { |
1257 | 581k | if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, |
1258 | 581k | MaxRecurse)) |
1259 | 8.86k | return V; |
1260 | 572k | |
1261 | 572k | // all ones >>a X -> all ones |
1262 | 572k | if (572k match(Op0, m_AllOnes())572k ) |
1263 | 9 | return Op0; |
1264 | 572k | |
1265 | 572k | // (X << A) >> A -> X |
1266 | 572k | Value *X; |
1267 | 572k | if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1)))) |
1268 | 128 | return X; |
1269 | 572k | |
1270 | 572k | // Arithmetic shifting an all-sign-bit value is a no-op. |
1271 | 572k | unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
1272 | 572k | if (NumSignBits == Op0->getType()->getScalarSizeInBits()) |
1273 | 7 | return Op0; |
1274 | 572k | |
1275 | 572k | return nullptr; |
1276 | 572k | } |
1277 | | |
1278 | | Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, |
1279 | 545k | const SimplifyQuery &Q) { |
1280 | 545k | return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); |
1281 | 545k | } |
1282 | | |
1283 | | /// Commuted variants are assumed to be handled by calling this function again |
1284 | | /// with the parameters swapped. |
1285 | | static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, |
1286 | 4.12M | ICmpInst *UnsignedICmp, bool IsAnd) { |
1287 | 4.12M | Value *X, *Y; |
1288 | 4.12M | |
1289 | 4.12M | ICmpInst::Predicate EqPred; |
1290 | 4.12M | if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(Y), m_Zero())) || |
1291 | 2.01M | !ICmpInst::isEquality(EqPred)) |
1292 | 2.19M | return nullptr; |
1293 | 1.92M | |
1294 | 1.92M | ICmpInst::Predicate UnsignedPred; |
1295 | 1.92M | if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) && |
1296 | 2.97k | ICmpInst::isUnsigned(UnsignedPred)) |
1297 | 1.46k | ; |
1298 | 1.92M | else if (1.92M match(UnsignedICmp, |
1299 | 1.92M | m_ICmp(UnsignedPred, m_Value(Y), m_Specific(X))) && |
1300 | 0 | ICmpInst::isUnsigned(UnsignedPred)) |
1301 | 0 | UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); |
1302 | 1.92M | else |
1303 | 1.92M | return nullptr; |
1304 | 1.46k | |
1305 | 1.46k | // X < Y && Y != 0 --> X < Y |
1306 | 1.46k | // X < Y || Y != 0 --> Y != 0 |
1307 | 1.46k | if (1.46k UnsignedPred == ICmpInst::ICMP_ULT && 1.46k EqPred == ICmpInst::ICMP_NE132 ) |
1308 | 68 | return IsAnd ? 68 UnsignedICmp67 : ZeroICmp1 ; |
1309 | 1.40k | |
1310 | 1.40k | // X >= Y || Y != 0 --> true |
1311 | 1.40k | // X >= Y || Y == 0 --> X >= Y |
1312 | 1.40k | if (1.40k UnsignedPred == ICmpInst::ICMP_UGE && 1.40k !IsAnd81 ) { |
1313 | 21 | if (EqPred == ICmpInst::ICMP_NE) |
1314 | 2 | return getTrue(UnsignedICmp->getType()); |
1315 | 19 | return UnsignedICmp; |
1316 | 19 | } |
1317 | 1.38k | |
1318 | 1.38k | // X < Y && Y == 0 --> false |
1319 | 1.38k | if (1.38k UnsignedPred == ICmpInst::ICMP_ULT && 1.38k EqPred == ICmpInst::ICMP_EQ64 && |
1320 | 64 | IsAnd) |
1321 | 1 | return getFalse(UnsignedICmp->getType()); |
1322 | 1.37k | |
1323 | 1.37k | return nullptr; |
1324 | 1.37k | } |
1325 | | |
1326 | | /// Commuted variants are assumed to be handled by calling this function again |
1327 | | /// with the parameters swapped. |
1328 | 2.64M | static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { |
1329 | 2.64M | ICmpInst::Predicate Pred0, Pred1; |
1330 | 2.64M | Value *A ,*B; |
1331 | 2.64M | if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || |
1332 | 2.64M | !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) |
1333 | 2.64M | return nullptr; |
1334 | 374 | |
1335 | 374 | // We have (icmp Pred0, A, B) & (icmp Pred1, A, B). |
1336 | 374 | // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we |
1337 | 374 | // can eliminate Op1 from this 'and'. |
1338 | 374 | if (374 ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)374 ) |
1339 | 154 | return Op0; |
1340 | 220 | |
1341 | 220 | // Check for any combination of predicates that are guaranteed to be disjoint. |
1342 | 220 | if (220 (Pred0 == ICmpInst::getInversePredicate(Pred1)) || |
1343 | 195 | (Pred0 == ICmpInst::ICMP_EQ && 195 ICmpInst::isFalseWhenEqual(Pred1)11 ) || |
1344 | 184 | (Pred0 == ICmpInst::ICMP_SLT && 184 Pred1 == ICmpInst::ICMP_SGT14 ) || |
1345 | 180 | (Pred0 == ICmpInst::ICMP_ULT && 180 Pred1 == ICmpInst::ICMP_UGT13 )) |
1346 | 43 | return getFalse(Op0->getType()); |
1347 | 177 | |
1348 | 177 | return nullptr; |
1349 | 177 | } |
1350 | | |
1351 | | /// Commuted variants are assumed to be handled by calling this function again |
1352 | | /// with the parameters swapped. |
1353 | 1.47M | static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { |
1354 | 1.47M | ICmpInst::Predicate Pred0, Pred1; |
1355 | 1.47M | Value *A ,*B; |
1356 | 1.47M | if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || |
1357 | 1.47M | !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) |
1358 | 1.47M | return nullptr; |
1359 | 1.26k | |
1360 | 1.26k | // We have (icmp Pred0, A, B) | (icmp Pred1, A, B). |
1361 | 1.26k | // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we |
1362 | 1.26k | // can eliminate Op0 from this 'or'. |
1363 | 1.26k | if (1.26k ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)1.26k ) |
1364 | 412 | return Op1; |
1365 | 849 | |
1366 | 849 | // Check for any combination of predicates that cover the entire range of |
1367 | 849 | // possibilities. |
1368 | 849 | if (849 (Pred0 == ICmpInst::getInversePredicate(Pred1)) || |
1369 | 686 | (Pred0 == ICmpInst::ICMP_NE && 686 ICmpInst::isTrueWhenEqual(Pred1)22 ) || |
1370 | 678 | (Pred0 == ICmpInst::ICMP_SLE && 678 Pred1 == ICmpInst::ICMP_SGE17 ) || |
1371 | 676 | (Pred0 == ICmpInst::ICMP_ULE && 676 Pred1 == ICmpInst::ICMP_UGE13 )) |
1372 | 175 | return getTrue(Op0->getType()); |
1373 | 674 | |
1374 | 674 | return nullptr; |
1375 | 674 | } |
1376 | | |
1377 | | /// Test if a pair of compares with a shared operand and 2 constants has an |
1378 | | /// empty set intersection, full set union, or if one compare is a superset of |
1379 | | /// the other. |
1380 | | static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, |
1381 | 2.06M | bool IsAnd) { |
1382 | 2.06M | // Look for this pattern: {and/or} (icmp X, C0), (icmp X, C1)). |
1383 | 2.06M | if (Cmp0->getOperand(0) != Cmp1->getOperand(0)) |
1384 | 1.90M | return nullptr; |
1385 | 159k | |
1386 | 159k | const APInt *C0, *C1; |
1387 | 159k | if (!match(Cmp0->getOperand(1), m_APInt(C0)) || |
1388 | 26.7k | !match(Cmp1->getOperand(1), m_APInt(C1))) |
1389 | 142k | return nullptr; |
1390 | 16.6k | |
1391 | 16.6k | auto Range0 = ConstantRange::makeExactICmpRegion(Cmp0->getPredicate(), *C0); |
1392 | 16.6k | auto Range1 = ConstantRange::makeExactICmpRegion(Cmp1->getPredicate(), *C1); |
1393 | 16.6k | |
1394 | 16.6k | // For and-of-compares, check if the intersection is empty: |
1395 | 16.6k | // (icmp X, C0) && (icmp X, C1) --> empty set --> false |
1396 | 16.6k | if (IsAnd && 16.6k Range0.intersectWith(Range1).isEmptySet()6.09k ) |
1397 | 160 | return getFalse(Cmp0->getType()); |
1398 | 16.5k | |
1399 | 16.5k | // For or-of-compares, check if the union is full: |
1400 | 16.5k | // (icmp X, C0) || (icmp X, C1) --> full set --> true |
1401 | 16.5k | if (16.5k !IsAnd && 16.5k Range0.unionWith(Range1).isFullSet()10.5k ) |
1402 | 43 | return getTrue(Cmp0->getType()); |
1403 | 16.4k | |
1404 | 16.4k | // Is one range a superset of the other? |
1405 | 16.4k | // If this is and-of-compares, take the smaller set: |
1406 | 16.4k | // (icmp sgt X, 4) && (icmp sgt X, 42) --> icmp sgt X, 42 |
1407 | 16.4k | // If this is or-of-compares, take the larger set: |
1408 | 16.4k | // (icmp sgt X, 4) || (icmp sgt X, 42) --> icmp sgt X, 4 |
1409 | 16.4k | if (16.4k Range0.contains(Range1)16.4k ) |
1410 | 883 | return IsAnd ? 883 Cmp1714 : Cmp0169 ; |
1411 | 15.5k | if (15.5k Range1.contains(Range0)15.5k ) |
1412 | 1.63k | return IsAnd ? 1.63k Cmp0701 : Cmp1933 ; |
1413 | 13.9k | |
1414 | 13.9k | return nullptr; |
1415 | 13.9k | } |
1416 | | |
1417 | 2.64M | static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { |
1418 | 2.64M | // (icmp (add V, C0), C1) & (icmp V, C0) |
1419 | 2.64M | ICmpInst::Predicate Pred0, Pred1; |
1420 | 2.64M | const APInt *C0, *C1; |
1421 | 2.64M | Value *V; |
1422 | 2.64M | if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) |
1423 | 2.63M | return nullptr; |
1424 | 11.5k | |
1425 | 11.5k | if (11.5k !match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))11.5k ) |
1426 | 11.2k | return nullptr; |
1427 | 286 | |
1428 | 286 | auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0)); |
1429 | 286 | if (AddInst->getOperand(1) != Op1->getOperand(1)) |
1430 | 274 | return nullptr; |
1431 | 12 | |
1432 | 12 | Type *ITy = Op0->getType(); |
1433 | 12 | bool isNSW = AddInst->hasNoSignedWrap(); |
1434 | 12 | bool isNUW = AddInst->hasNoUnsignedWrap(); |
1435 | 12 | |
1436 | 12 | const APInt Delta = *C1 - *C0; |
1437 | 12 | if (C0->isStrictlyPositive()12 ) { |
1438 | 12 | if (Delta == 212 ) { |
1439 | 6 | if (Pred0 == ICmpInst::ICMP_ULT && 6 Pred1 == ICmpInst::ICMP_SGT4 ) |
1440 | 2 | return getFalse(ITy); |
1441 | 4 | if (4 Pred0 == ICmpInst::ICMP_SLT && 4 Pred1 == ICmpInst::ICMP_SGT2 && isNSW2 ) |
1442 | 2 | return getFalse(ITy); |
1443 | 8 | } |
1444 | 8 | if (8 Delta == 18 ) { |
1445 | 6 | if (Pred0 == ICmpInst::ICMP_ULE && 6 Pred1 == ICmpInst::ICMP_SGT4 ) |
1446 | 2 | return getFalse(ITy); |
1447 | 4 | if (4 Pred0 == ICmpInst::ICMP_SLE && 4 Pred1 == ICmpInst::ICMP_SGT2 && isNSW2 ) |
1448 | 2 | return getFalse(ITy); |
1449 | 4 | } |
1450 | 12 | } |
1451 | 4 | if (4 C0->getBoolValue() && 4 isNUW4 ) { |
1452 | 4 | if (Delta == 2) |
1453 | 2 | if (2 Pred0 == ICmpInst::ICMP_ULT && 2 Pred1 == ICmpInst::ICMP_UGT2 ) |
1454 | 2 | return getFalse(ITy); |
1455 | 2 | if (2 Delta == 12 ) |
1456 | 2 | if (2 Pred0 == ICmpInst::ICMP_ULE && 2 Pred1 == ICmpInst::ICMP_UGT2 ) |
1457 | 2 | return getFalse(ITy); |
1458 | 0 | } |
1459 | 0 |
|
1460 | 0 | return nullptr; |
1461 | 0 | } |
1462 | | |
1463 | 1.32M | static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { |
1464 | 1.32M | if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) |
1465 | 4 | return X; |
1466 | 1.32M | if (Value *1.32M X1.32M = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) |
1467 | 64 | return X; |
1468 | 1.32M | |
1469 | 1.32M | if (Value *1.32M X1.32M = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) |
1470 | 118 | return X; |
1471 | 1.32M | if (Value *1.32M X1.32M = simplifyAndOfICmpsWithSameOperands(Op1, Op0)) |
1472 | 79 | return X; |
1473 | 1.32M | |
1474 | 1.32M | if (Value *1.32M X1.32M = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) |
1475 | 1.57k | return X; |
1476 | 1.32M | |
1477 | 1.32M | if (Value *1.32M X1.32M = simplifyAndOfICmpsWithAdd(Op0, Op1)) |
1478 | 12 | return X; |
1479 | 1.32M | if (Value *1.32M X1.32M = simplifyAndOfICmpsWithAdd(Op1, Op0)) |
1480 | 0 | return X; |
1481 | 1.32M | |
1482 | 1.32M | return nullptr; |
1483 | 1.32M | } |
1484 | | |
1485 | 1.47M | static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { |
1486 | 1.47M | // (icmp (add V, C0), C1) | (icmp V, C0) |
1487 | 1.47M | ICmpInst::Predicate Pred0, Pred1; |
1488 | 1.47M | const APInt *C0, *C1; |
1489 | 1.47M | Value *V; |
1490 | 1.47M | if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) |
1491 | 1.45M | return nullptr; |
1492 | 15.8k | |
1493 | 15.8k | if (15.8k !match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))15.8k ) |
1494 | 14.1k | return nullptr; |
1495 | 1.67k | |
1496 | 1.67k | auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0)); |
1497 | 1.67k | if (AddInst->getOperand(1) != Op1->getOperand(1)) |
1498 | 1.65k | return nullptr; |
1499 | 12 | |
1500 | 12 | Type *ITy = Op0->getType(); |
1501 | 12 | bool isNSW = AddInst->hasNoSignedWrap(); |
1502 | 12 | bool isNUW = AddInst->hasNoUnsignedWrap(); |
1503 | 12 | |
1504 | 12 | const APInt Delta = *C1 - *C0; |
1505 | 12 | if (C0->isStrictlyPositive()12 ) { |
1506 | 12 | if (Delta == 212 ) { |
1507 | 6 | if (Pred0 == ICmpInst::ICMP_UGE && 6 Pred1 == ICmpInst::ICMP_SLE4 ) |
1508 | 2 | return getTrue(ITy); |
1509 | 4 | if (4 Pred0 == ICmpInst::ICMP_SGE && 4 Pred1 == ICmpInst::ICMP_SLE2 && isNSW2 ) |
1510 | 2 | return getTrue(ITy); |
1511 | 8 | } |
1512 | 8 | if (8 Delta == 18 ) { |
1513 | 6 | if (Pred0 == ICmpInst::ICMP_UGT && 6 Pred1 == ICmpInst::ICMP_SLE4 ) |
1514 | 2 | return getTrue(ITy); |
1515 | 4 | if (4 Pred0 == ICmpInst::ICMP_SGT && 4 Pred1 == ICmpInst::ICMP_SLE2 && isNSW2 ) |
1516 | 2 | return getTrue(ITy); |
1517 | 4 | } |
1518 | 12 | } |
1519 | 4 | if (4 C0->getBoolValue() && 4 isNUW4 ) { |
1520 | 4 | if (Delta == 2) |
1521 | 2 | if (2 Pred0 == ICmpInst::ICMP_UGE && 2 Pred1 == ICmpInst::ICMP_ULE2 ) |
1522 | 2 | return getTrue(ITy); |
1523 | 2 | if (2 Delta == 12 ) |
1524 | 2 | if (2 Pred0 == ICmpInst::ICMP_UGT && 2 Pred1 == ICmpInst::ICMP_ULE2 ) |
1525 | 2 | return getTrue(ITy); |
1526 | 0 | } |
1527 | 0 |
|
1528 | 0 | return nullptr; |
1529 | 0 | } |
1530 | | |
1531 | 738k | static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { |
1532 | 738k | if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) |
1533 | 1 | return X; |
1534 | 738k | if (Value *738k X738k = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) |
1535 | 21 | return X; |
1536 | 738k | |
1537 | 738k | if (Value *738k X738k = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) |
1538 | 555 | return X; |
1539 | 737k | if (Value *737k X737k = simplifyOrOfICmpsWithSameOperands(Op1, Op0)) |
1540 | 32 | return X; |
1541 | 737k | |
1542 | 737k | if (Value *737k X737k = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) |
1543 | 1.14k | return X; |
1544 | 736k | |
1545 | 736k | if (Value *736k X736k = simplifyOrOfICmpsWithAdd(Op0, Op1)) |
1546 | 12 | return X; |
1547 | 736k | if (Value *736k X736k = simplifyOrOfICmpsWithAdd(Op1, Op0)) |
1548 | 0 | return X; |
1549 | 736k | |
1550 | 736k | return nullptr; |
1551 | 736k | } |
1552 | | |
1553 | 11.0M | static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) { |
1554 | 11.0M | // Look through casts of the 'and' operands to find compares. |
1555 | 11.0M | auto *Cast0 = dyn_cast<CastInst>(Op0); |
1556 | 11.0M | auto *Cast1 = dyn_cast<CastInst>(Op1); |
1557 | 11.0M | if (Cast0 && 11.0M Cast1794k && Cast0->getOpcode() == Cast1->getOpcode()37.3k && |
1558 | 11.0M | Cast0->getSrcTy() == Cast1->getSrcTy()35.3k ) { |
1559 | 32.6k | Op0 = Cast0->getOperand(0); |
1560 | 32.6k | Op1 = Cast1->getOperand(0); |
1561 | 32.6k | } |
1562 | 11.0M | |
1563 | 11.0M | auto *Cmp0 = dyn_cast<ICmpInst>(Op0); |
1564 | 11.0M | auto *Cmp1 = dyn_cast<ICmpInst>(Op1); |
1565 | 11.0M | if (!Cmp0 || 11.0M !Cmp12.42M ) |
1566 | 8.98M | return nullptr; |
1567 | 2.06M | |
1568 | 2.06M | Value *V = |
1569 | 2.06M | IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1)1.32M : simplifyOrOfICmps(Cmp0, Cmp1)738k ; |
1570 | 2.06M | if (!V) |
1571 | 2.05M | return nullptr; |
1572 | 3.61k | if (3.61k !Cast03.61k ) |
1573 | 3.61k | return V; |
1574 | 8 | |
1575 | 8 | // If we looked through casts, we can only handle a constant simplification |
1576 | 8 | // because we are not allowed to create a cast instruction here. |
1577 | 8 | if (auto *8 C8 = dyn_cast<Constant>(V)) |
1578 | 8 | return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType()); |
1579 | 0 |
|
1580 | 0 | return nullptr; |
1581 | 0 | } |
1582 | | |
1583 | | /// Given operands for an And, see if we can fold the result. |
1584 | | /// If not, this returns null. |
1585 | | static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
1586 | 5.59M | unsigned MaxRecurse) { |
1587 | 5.59M | if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q)) |
1588 | 286k | return C; |
1589 | 5.30M | |
1590 | 5.30M | // X & undef -> 0 |
1591 | 5.30M | if (5.30M match(Op1, m_Undef())5.30M ) |
1592 | 3 | return Constant::getNullValue(Op0->getType()); |
1593 | 5.30M | |
1594 | 5.30M | // X & X = X |
1595 | 5.30M | if (5.30M Op0 == Op15.30M ) |
1596 | 149 | return Op0; |
1597 | 5.30M | |
1598 | 5.30M | // X & 0 = 0 |
1599 | 5.30M | if (5.30M match(Op1, m_Zero())5.30M ) |
1600 | 3.21k | return Op1; |
1601 | 5.30M | |
1602 | 5.30M | // X & -1 = X |
1603 | 5.30M | if (5.30M match(Op1, m_AllOnes())5.30M ) |
1604 | 64.2k | return Op0; |
1605 | 5.23M | |
1606 | 5.23M | // A & ~A = ~A & A = 0 |
1607 | 5.23M | if (5.23M match(Op0, m_Not(m_Specific(Op1))) || |
1608 | 5.23M | match(Op1, m_Not(m_Specific(Op0)))) |
1609 | 101 | return Constant::getNullValue(Op0->getType()); |
1610 | 5.23M | |
1611 | 5.23M | // (A | ?) & A = A |
1612 | 5.23M | if (5.23M match(Op0, m_c_Or(m_Specific(Op1), m_Value()))5.23M ) |
1613 | 249 | return Op1; |
1614 | 5.23M | |
1615 | 5.23M | // A & (A | ?) = A |
1616 | 5.23M | if (5.23M match(Op1, m_c_Or(m_Specific(Op0), m_Value()))5.23M ) |
1617 | 33 | return Op0; |
1618 | 5.23M | |
1619 | 5.23M | // A mask that only clears known zeros of a shifted value is a no-op. |
1620 | 5.23M | Value *X; |
1621 | 5.23M | const APInt *Mask; |
1622 | 5.23M | const APInt *ShAmt; |
1623 | 5.23M | if (match(Op1, m_APInt(Mask))5.23M ) { |
1624 | 2.70M | // If all bits in the inverted and shifted mask are clear: |
1625 | 2.70M | // and (shl X, ShAmt), Mask --> shl X, ShAmt |
1626 | 2.70M | if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) && |
1627 | 115k | (~(*Mask)).lshr(*ShAmt).isNullValue()) |
1628 | 2.06k | return Op0; |
1629 | 2.70M | |
1630 | 2.70M | // If all bits in the inverted and shifted mask are clear: |
1631 | 2.70M | // and (lshr X, ShAmt), Mask --> lshr X, ShAmt |
1632 | 2.70M | if (2.70M match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) && |
1633 | 196k | (~(*Mask)).shl(*ShAmt).isNullValue()) |
1634 | 1.78k | return Op0; |
1635 | 5.23M | } |
1636 | 5.23M | |
1637 | 5.23M | // A & (-A) = A if A is a power of two or zero. |
1638 | 5.23M | if (5.23M match(Op0, m_Neg(m_Specific(Op1))) || |
1639 | 5.23M | match(Op1, m_Neg(m_Specific(Op0)))5.23M ) { |
1640 | 105 | if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, |
1641 | 105 | Q.DT)) |
1642 | 2 | return Op0; |
1643 | 103 | if (103 isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, |
1644 | 103 | Q.DT)) |
1645 | 0 | return Op1; |
1646 | 5.23M | } |
1647 | 5.23M | |
1648 | 5.23M | if (Value *5.23M V5.23M = simplifyAndOrOfICmps(Op0, Op1, true)) |
1649 | 1.85k | return V; |
1650 | 5.23M | |
1651 | 5.23M | // Try some generic simplifications for associative operations. |
1652 | 5.23M | if (Value *5.23M V5.23M = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, |
1653 | 5.23M | MaxRecurse)) |
1654 | 22.1k | return V; |
1655 | 5.20M | |
1656 | 5.20M | // And distributes over Or. Try some generic simplifications based on this. |
1657 | 5.20M | if (Value *5.20M V5.20M = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, |
1658 | 5.20M | Q, MaxRecurse)) |
1659 | 1.13k | return V; |
1660 | 5.20M | |
1661 | 5.20M | // And distributes over Xor. Try some generic simplifications based on this. |
1662 | 5.20M | if (Value *5.20M V5.20M = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, |
1663 | 5.20M | Q, MaxRecurse)) |
1664 | 234 | return V; |
1665 | 5.20M | |
1666 | 5.20M | // If the operation is with the result of a select instruction, check whether |
1667 | 5.20M | // operating on either branch of the select always yields the same value. |
1668 | 5.20M | if (5.20M isa<SelectInst>(Op0) || 5.20M isa<SelectInst>(Op1)5.02M ) |
1669 | 200k | if (Value *200k V200k = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, |
1670 | 200k | MaxRecurse)) |
1671 | 611 | return V; |
1672 | 5.20M | |
1673 | 5.20M | // If the operation is with the result of a phi instruction, check whether |
1674 | 5.20M | // operating on all incoming values of the phi always yields the same value. |
1675 | 5.20M | if (5.20M isa<PHINode>(Op0) || 5.20M isa<PHINode>(Op1)4.88M ) |
1676 | 380k | if (Value *380k V380k = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q, |
1677 | 380k | MaxRecurse)) |
1678 | 94 | return V; |
1679 | 5.20M | |
1680 | 5.20M | return nullptr; |
1681 | 5.20M | } |
1682 | | |
1683 | 3.34M | Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
1684 | 3.34M | return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); |
1685 | 3.34M | } |
1686 | | |
1687 | | /// Given operands for an Or, see if we can fold the result. |
1688 | | /// If not, this returns null. |
1689 | | static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
1690 | 6.08M | unsigned MaxRecurse) { |
1691 | 6.08M | if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q)) |
1692 | 211k | return C; |
1693 | 5.87M | |
1694 | 5.87M | // X | undef -> -1 |
1695 | 5.87M | if (5.87M match(Op1, m_Undef())5.87M ) |
1696 | 28 | return Constant::getAllOnesValue(Op0->getType()); |
1697 | 5.87M | |
1698 | 5.87M | // X | X = X |
1699 | 5.87M | if (5.87M Op0 == Op15.87M ) |
1700 | 1.08k | return Op0; |
1701 | 5.87M | |
1702 | 5.87M | // X | 0 = X |
1703 | 5.87M | if (5.87M match(Op1, m_Zero())5.87M ) |
1704 | 22.8k | return Op0; |
1705 | 5.85M | |
1706 | 5.85M | // X | -1 = -1 |
1707 | 5.85M | if (5.85M match(Op1, m_AllOnes())5.85M ) |
1708 | 498 | return Op1; |
1709 | 5.84M | |
1710 | 5.84M | // A | ~A = ~A | A = -1 |
1711 | 5.84M | if (5.84M match(Op0, m_Not(m_Specific(Op1))) || |
1712 | 5.84M | match(Op1, m_Not(m_Specific(Op0)))) |
1713 | 3.14k | return Constant::getAllOnesValue(Op0->getType()); |
1714 | 5.84M | |
1715 | 5.84M | // (A & ?) | A = A |
1716 | 5.84M | if (5.84M match(Op0, m_c_And(m_Specific(Op1), m_Value()))5.84M ) |
1717 | 21.1k | return Op1; |
1718 | 5.82M | |
1719 | 5.82M | // A | (A & ?) = A |
1720 | 5.82M | if (5.82M match(Op1, m_c_And(m_Specific(Op0), m_Value()))5.82M ) |
1721 | 8.54k | return Op0; |
1722 | 5.81M | |
1723 | 5.81M | // ~(A & ?) | A = -1 |
1724 | 5.81M | if (5.81M match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value())))5.81M ) |
1725 | 2 | return Constant::getAllOnesValue(Op1->getType()); |
1726 | 5.81M | |
1727 | 5.81M | // A | ~(A & ?) = -1 |
1728 | 5.81M | if (5.81M match(Op1, m_Not(m_c_And(m_Specific(Op1), m_Value())))5.81M ) |
1729 | 0 | return Constant::getAllOnesValue(Op0->getType()); |
1730 | 5.81M | |
1731 | 5.81M | Value *A, *B; |
1732 | 5.81M | // (A & ~B) | (A ^ B) -> (A ^ B) |
1733 | 5.81M | // (~B & A) | (A ^ B) -> (A ^ B) |
1734 | 5.81M | // (A & ~B) | (B ^ A) -> (B ^ A) |
1735 | 5.81M | // (~B & A) | (B ^ A) -> (B ^ A) |
1736 | 5.81M | if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && |
1737 | 53.5k | (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || |
1738 | 53.4k | match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) |
1739 | 4 | return Op1; |
1740 | 5.81M | |
1741 | 5.81M | // Commute the 'or' operands. |
1742 | 5.81M | // (A ^ B) | (A & ~B) -> (A ^ B) |
1743 | 5.81M | // (A ^ B) | (~B & A) -> (A ^ B) |
1744 | 5.81M | // (B ^ A) | (A & ~B) -> (B ^ A) |
1745 | 5.81M | // (B ^ A) | (~B & A) -> (B ^ A) |
1746 | 5.81M | if (5.81M match(Op0, m_Xor(m_Value(A), m_Value(B))) && |
1747 | 34.7k | (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || |
1748 | 34.7k | match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) |
1749 | 4 | return Op0; |
1750 | 5.81M | |
1751 | 5.81M | // (A & B) | (~A ^ B) -> (~A ^ B) |
1752 | 5.81M | // (B & A) | (~A ^ B) -> (~A ^ B) |
1753 | 5.81M | // (A & B) | (B ^ ~A) -> (B ^ ~A) |
1754 | 5.81M | // (B & A) | (B ^ ~A) -> (B ^ ~A) |
1755 | 5.81M | if (5.81M match(Op0, m_And(m_Value(A), m_Value(B))) && |
1756 | 1.22M | (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || |
1757 | 1.22M | match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) |
1758 | 4 | return Op1; |
1759 | 5.81M | |
1760 | 5.81M | // (~A ^ B) | (A & B) -> (~A ^ B) |
1761 | 5.81M | // (~A ^ B) | (B & A) -> (~A ^ B) |
1762 | 5.81M | // (B ^ ~A) | (A & B) -> (B ^ ~A) |
1763 | 5.81M | // (B ^ ~A) | (B & A) -> (B ^ ~A) |
1764 | 5.81M | if (5.81M match(Op1, m_And(m_Value(A), m_Value(B))) && |
1765 | 857k | (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || |
1766 | 857k | match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) |
1767 | 4 | return Op0; |
1768 | 5.81M | |
1769 | 5.81M | if (Value *5.81M V5.81M = simplifyAndOrOfICmps(Op0, Op1, false)) |
1770 | 1.76k | return V; |
1771 | 5.81M | |
1772 | 5.81M | // Try some generic simplifications for associative operations. |
1773 | 5.81M | if (Value *5.81M V5.81M = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, |
1774 | 5.81M | MaxRecurse)) |
1775 | 3.08k | return V; |
1776 | 5.81M | |
1777 | 5.81M | // Or distributes over And. Try some generic simplifications based on this. |
1778 | 5.81M | if (Value *5.81M V5.81M = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q, |
1779 | 5.81M | MaxRecurse)) |
1780 | 4 | return V; |
1781 | 5.81M | |
1782 | 5.81M | // If the operation is with the result of a select instruction, check whether |
1783 | 5.81M | // operating on either branch of the select always yields the same value. |
1784 | 5.81M | if (5.81M isa<SelectInst>(Op0) || 5.81M isa<SelectInst>(Op1)5.73M ) |
1785 | 81.7k | if (Value *81.7k V81.7k = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, |
1786 | 81.7k | MaxRecurse)) |
1787 | 20 | return V; |
1788 | 5.81M | |
1789 | 5.81M | // (A & C1)|(B & C2) |
1790 | 5.81M | const APInt *C1, *C2; |
1791 | 5.81M | if (match(Op0, m_And(m_Value(A), m_APInt(C1))) && |
1792 | 5.81M | match(Op1, m_And(m_Value(B), m_APInt(C2)))954k ) { |
1793 | 187k | if (*C1 == ~*C2187k ) { |
1794 | 43.9k | // (A & C1)|(B & C2) |
1795 | 43.9k | // If we have: ((V + N) & C1) | (V & C2) |
1796 | 43.9k | // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 |
1797 | 43.9k | // replace with V+N. |
1798 | 43.9k | Value *N; |
1799 | 43.9k | if (C2->isMask() && // C2 == 0+1+ |
1800 | 43.9k | match(A, m_c_Add(m_Specific(B), m_Value(N)))12.2k ) { |
1801 | 13 | // Add commutes, try both ways. |
1802 | 13 | if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) |
1803 | 13 | return A; |
1804 | 43.8k | } |
1805 | 43.8k | // Or commutes, try both ways. |
1806 | 43.8k | if (43.8k C1->isMask() && |
1807 | 43.8k | match(B, m_c_Add(m_Specific(A), m_Value(N)))10.4k ) { |
1808 | 7 | // Add commutes, try both ways. |
1809 | 7 | if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) |
1810 | 7 | return B; |
1811 | 5.81M | } |
1812 | 43.9k | } |
1813 | 187k | } |
1814 | 5.81M | |
1815 | 5.81M | // If the operation is with the result of a phi instruction, check whether |
1816 | 5.81M | // operating on all incoming values of the phi always yields the same value. |
1817 | 5.81M | if (5.81M isa<PHINode>(Op0) || 5.81M isa<PHINode>(Op1)5.37M ) |
1818 | 731k | if (Value *731k V731k = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) |
1819 | 190 | return V; |
1820 | 5.81M | |
1821 | 5.81M | return nullptr; |
1822 | 5.81M | } |
1823 | | |
1824 | 1.55M | Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
1825 | 1.55M | return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit); |
1826 | 1.55M | } |
1827 | | |
1828 | | /// Given operands for a Xor, see if we can fold the result. |
1829 | | /// If not, this returns null. |
1830 | | static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, |
1831 | 1.25M | unsigned MaxRecurse) { |
1832 | 1.25M | if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) |
1833 | 11.9k | return C; |
1834 | 1.24M | |
1835 | 1.24M | // A ^ undef -> undef |
1836 | 1.24M | if (1.24M match(Op1, m_Undef())1.24M ) |
1837 | 0 | return Op1; |
1838 | 1.24M | |
1839 | 1.24M | // A ^ 0 = A |
1840 | 1.24M | if (1.24M match(Op1, m_Zero())1.24M ) |
1841 | 5.63k | return Op0; |
1842 | 1.23M | |
1843 | 1.23M | // A ^ A = 0 |
1844 | 1.23M | if (1.23M Op0 == Op11.23M ) |
1845 | 18 | return Constant::getNullValue(Op0->getType()); |
1846 | 1.23M | |
1847 | 1.23M | // A ^ ~A = ~A ^ A = -1 |
1848 | 1.23M | if (1.23M match(Op0, m_Not(m_Specific(Op1))) || |
1849 | 1.23M | match(Op1, m_Not(m_Specific(Op0)))) |
1850 | 2 | return Constant::getAllOnesValue(Op0->getType()); |
1851 | 1.23M | |
1852 | 1.23M | // Try some generic simplifications for associative operations. |
1853 | 1.23M | if (Value *1.23M V1.23M = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, |
1854 | 1.23M | MaxRecurse)) |
1855 | 5.08k | return V; |
1856 | 1.23M | |
1857 | 1.23M | // Threading Xor over selects and phi nodes is pointless, so don't bother. |
1858 | 1.23M | // Threading over the select in "A ^ select(cond, B, C)" means evaluating |
1859 | 1.23M | // "A^B" and "A^C" and seeing if they are equal; but they are equal if and |
1860 | 1.23M | // only if B and C are equal. If B and C are equal then (since we assume |
1861 | 1.23M | // that operands have already been simplified) "select(cond, B, C)" should |
1862 | 1.23M | // have been simplified to the common value of B and C already. Analysing |
1863 | 1.23M | // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly |
1864 | 1.23M | // for threading over phi nodes. |
1865 | 1.23M | |
1866 | 1.23M | return nullptr; |
1867 | 1.23M | } |
1868 | | |
1869 | 441k | Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { |
1870 | 441k | return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit); |
1871 | 441k | } |
1872 | | |
1873 | | |
1874 | 170M | static Type *GetCompareTy(Value *Op) { |
1875 | 170M | return CmpInst::makeCmpResultType(Op->getType()); |
1876 | 170M | } |
1877 | | |
1878 | | /// Rummage around inside V looking for something equivalent to the comparison |
1879 | | /// "LHS Pred RHS". Return such a value if found, otherwise return null. |
1880 | | /// Helper function for analyzing max/min idioms. |
1881 | | static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, |
1882 | 113k | Value *LHS, Value *RHS) { |
1883 | 113k | SelectInst *SI = dyn_cast<SelectInst>(V); |
1884 | 113k | if (!SI) |
1885 | 53.7k | return nullptr; |
1886 | 60.1k | CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); |
1887 | 60.1k | if (!Cmp) |
1888 | 0 | return nullptr; |
1889 | 60.1k | Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); |
1890 | 60.1k | if (Pred == Cmp->getPredicate() && 60.1k LHS == CmpLHS3.37k && RHS == CmpRHS22 ) |
1891 | 22 | return Cmp; |
1892 | 60.0k | if (60.0k Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && |
1893 | 60.0k | LHS == CmpRHS6.39k && RHS == CmpLHS6.27k ) |
1894 | 6.27k | return Cmp; |
1895 | 53.8k | return nullptr; |
1896 | 53.8k | } |
1897 | | |
1898 | | // A significant optimization not implemented here is assuming that alloca |
1899 | | // addresses are not equal to incoming argument values. They don't *alias*, |
1900 | | // as we say, but that doesn't mean they aren't equal, so we take a |
1901 | | // conservative approach. |
1902 | | // |
1903 | | // This is inspired in part by C++11 5.10p1: |
1904 | | // "Two pointers of the same type compare equal if and only if they are both |
1905 | | // null, both point to the same function, or both represent the same |
1906 | | // address." |
1907 | | // |
1908 | | // This is pretty permissive. |
1909 | | // |
1910 | | // It's also partly due to C11 6.5.9p6: |
1911 | | // "Two pointers compare equal if and only if both are null pointers, both are |
1912 | | // pointers to the same object (including a pointer to an object and a |
1913 | | // subobject at its beginning) or function, both are pointers to one past the |
1914 | | // last element of the same array object, or one is a pointer to one past the |
1915 | | // end of one array object and the other is a pointer to the start of a |
1916 | | // different array object that happens to immediately follow the first array |
1917 | | // object in the address space.) |
1918 | | // |
1919 | | // C11's version is more restrictive, however there's no reason why an argument |
1920 | | // couldn't be a one-past-the-end value for a stack object in the caller and be |
1921 | | // equal to the beginning of a stack object in the callee. |
1922 | | // |
1923 | | // If the C and C++ standards are ever made sufficiently restrictive in this |
1924 | | // area, it may be possible to update LLVM's semantics accordingly and reinstate |
1925 | | // this optimization. |
1926 | | static Constant * |
1927 | | computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, |
1928 | | const DominatorTree *DT, CmpInst::Predicate Pred, |
1929 | | AssumptionCache *AC, const Instruction *CxtI, |
1930 | 8.66M | Value *LHS, Value *RHS) { |
1931 | 8.66M | // First, skip past any trivial no-ops. |
1932 | 8.66M | LHS = LHS->stripPointerCasts(); |
1933 | 8.66M | RHS = RHS->stripPointerCasts(); |
1934 | 8.66M | |
1935 | 8.66M | // A non-null pointer is not equal to a null pointer. |
1936 | 8.66M | if (llvm::isKnownNonZero(LHS, DL) && 8.66M isa<ConstantPointerNull>(RHS)478k && |
1937 | 8.91k | (Pred == CmpInst::ICMP_EQ || 8.91k Pred == CmpInst::ICMP_NE92 )) |
1938 | 8.91k | return ConstantInt::get(GetCompareTy(LHS), |
1939 | 8.91k | !CmpInst::isTrueWhenEqual(Pred)); |
1940 | 8.65M | |
1941 | 8.65M | // We can only fold certain predicates on pointer comparisons. |
1942 | 8.65M | switch (Pred) { |
1943 | 11 | default: |
1944 | 11 | return nullptr; |
1945 | 8.65M | |
1946 | 8.65M | // Equality comaprisons are easy to fold. |
1947 | 8.05M | case CmpInst::ICMP_EQ: |
1948 | 8.05M | case CmpInst::ICMP_NE: |
1949 | 8.05M | break; |
1950 | 8.05M | |
1951 | 8.05M | // We can only handle unsigned relational comparisons because 'inbounds' on |
1952 | 8.05M | // a GEP only protects against unsigned wrapping. |
1953 | 593k | case CmpInst::ICMP_UGT: |
1954 | 593k | case CmpInst::ICMP_UGE: |
1955 | 593k | case CmpInst::ICMP_ULT: |
1956 | 593k | case CmpInst::ICMP_ULE: |
1957 | 593k | // However, we have to switch them to their signed variants to handle |
1958 | 593k | // negative indices from the base pointer. |
1959 | 593k | Pred = ICmpInst::getSignedPredicate(Pred); |
1960 | 593k | break; |
1961 | 8.65M | } |
1962 | 8.65M | |
1963 | 8.65M | // Strip off any constant offsets so that we can reason about them. |
1964 | 8.65M | // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets |
1965 | 8.65M | // here and compare base addresses like AliasAnalysis does, however there are |
1966 | 8.65M | // numerous hazards. AliasAnalysis and its utilities rely on special rules |
1967 | 8.65M | // governing loads and stores which don't apply to icmps. Also, AliasAnalysis |
1968 | 8.65M | // doesn't need to guarantee pointer inequality when it says NoAlias. |
1969 | 8.65M | Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS); |
1970 | 8.65M | Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS); |
1971 | 8.65M | |
1972 | 8.65M | // If LHS and RHS are related via constant offsets to the same base |
1973 | 8.65M | // value, we can replace it with an icmp which just compares the offsets. |
1974 | 8.65M | if (LHS == RHS) |
1975 | 1.70k | return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); |
1976 | 8.65M | |
1977 | 8.65M | // Various optimizations for (in)equality comparisons. |
1978 | 8.65M | if (8.65M Pred == CmpInst::ICMP_EQ || 8.65M Pred == CmpInst::ICMP_NE1.17M ) { |
1979 | 8.05M | // Different non-empty allocations that exist at the same time have |
1980 | 8.05M | // different addresses (if the program can tell). Global variables always |
1981 | 8.05M | // exist, so they always exist during the lifetime of each other and all |
1982 | 8.05M | // allocas. Two different allocas usually have different addresses... |
1983 | 8.05M | // |
1984 | 8.05M | // However, if there's an @llvm.stackrestore dynamically in between two |
1985 | 8.05M | // allocas, they may have the same address. It's tempting to reduce the |
1986 | 8.05M | // scope of the problem by only looking at *static* allocas here. That would |
1987 | 8.05M | // cover the majority of allocas while significantly reducing the likelihood |
1988 | 8.05M | // of having an @llvm.stackrestore pop up in the middle. However, it's not |
1989 | 8.05M | // actually impossible for an @llvm.stackrestore to pop up in the middle of |
1990 | 8.05M | // an entry block. Also, if we have a block that's not attached to a |
1991 | 8.05M | // function, we can't tell if it's "static" under the current definition. |
1992 | 8.05M | // Theoretically, this problem could be fixed by creating a new kind of |
1993 | 8.05M | // instruction kind specifically for static allocas. Such a new instruction |
1994 | 8.05M | // could be required to be at the top of the entry block, thus preventing it |
1995 | 8.05M | // from being subject to a @llvm.stackrestore. Instcombine could even |
1996 | 8.05M | // convert regular allocas into these special allocas. It'd be nifty. |
1997 | 8.05M | // However, until then, this problem remains open. |
1998 | 8.05M | // |
1999 | 8.05M | // So, we'll assume that two non-empty allocas have different addresses |
2000 | 8.05M | // for now. |
2001 | 8.05M | // |
2002 | 8.05M | // With all that, if the offsets are within the bounds of their allocations |
2003 | 8.05M | // (and not one-past-the-end! so we can't use inbounds!), and their |
2004 | 8.05M | // allocations aren't the same, the pointers are not equal. |
2005 | 8.05M | // |
2006 | 8.05M | // Note that it's not necessary to check for LHS being a global variable |
2007 | 8.05M | // address, due to canonicalization and constant folding. |
2008 | 8.05M | if (isa<AllocaInst>(LHS) && |
2009 | 8.05M | (isa<AllocaInst>(RHS) || 3.07k isa<GlobalVariable>(RHS)2.82k )) { |
2010 | 246 | ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset); |
2011 | 246 | ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset); |
2012 | 246 | uint64_t LHSSize, RHSSize; |
2013 | 246 | if (LHSOffsetCI && 246 RHSOffsetCI246 && |
2014 | 246 | getObjectSize(LHS, LHSSize, DL, TLI) && |
2015 | 246 | getObjectSize(RHS, RHSSize, DL, TLI)246 ) { |
2016 | 246 | const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); |
2017 | 246 | const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); |
2018 | 246 | if (!LHSOffsetValue.isNegative() && |
2019 | 246 | !RHSOffsetValue.isNegative() && |
2020 | 246 | LHSOffsetValue.ult(LHSSize) && |
2021 | 246 | RHSOffsetValue.ult(RHSSize)246 ) { |
2022 | 246 | return ConstantInt::get(GetCompareTy(LHS), |
2023 | 246 | !CmpInst::isTrueWhenEqual(Pred)); |
2024 | 246 | } |
2025 | 0 | } |
2026 | 0 |
|
2027 | 0 | // Repeat the above check but this time without depending on DataLayout |
2028 | 0 | // or being able to compute a precise size. |
2029 | 0 | if (0 !cast<PointerType>(LHS->getType())->isEmptyTy() && |
2030 | 0 | !cast<PointerType>(RHS->getType())->isEmptyTy() && |
2031 | 0 | LHSOffset->isNullValue() && |
2032 | 0 | RHSOffset->isNullValue()) |
2033 | 0 | return ConstantInt::get(GetCompareTy(LHS), |
2034 | 0 | !CmpInst::isTrueWhenEqual(Pred)); |
2035 | 8.05M | } |
2036 | 8.05M | |
2037 | 8.05M | // Even if an non-inbounds GEP occurs along the path we can still optimize |
2038 | 8.05M | // equality comparisons concerning the result. We avoid walking the whole |
2039 | 8.05M | // chain again by starting where the last calls to |
2040 | 8.05M | // stripAndComputeConstantOffsets left off and accumulate the offsets. |
2041 | 8.05M | Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true); |
2042 | 8.05M | Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true); |
2043 | 8.05M | if (LHS == RHS) |
2044 | 90 | return ConstantExpr::getICmp(Pred, |
2045 | 90 | ConstantExpr::getAdd(LHSOffset, LHSNoBound), |
2046 | 90 | ConstantExpr::getAdd(RHSOffset, RHSNoBound)); |
2047 | 8.05M | |
2048 | 8.05M | // If one side of the equality comparison must come from a noalias call |
2049 | 8.05M | // (meaning a system memory allocation function), and the other side must |
2050 | 8.05M | // come from a pointer that cannot overlap with dynamically-allocated |
2051 | 8.05M | // memory within the lifetime of the current function (allocas, byval |
2052 | 8.05M | // arguments, globals), then determine the comparison result here. |
2053 | 8.05M | SmallVector<Value *, 8> LHSUObjs, RHSUObjs; |
2054 | 8.05M | GetUnderlyingObjects(LHS, LHSUObjs, DL); |
2055 | 8.05M | GetUnderlyingObjects(RHS, RHSUObjs, DL); |
2056 | 8.05M | |
2057 | 8.05M | // Is the set of underlying objects all noalias calls? |
2058 | 16.1M | auto IsNAC = [](ArrayRef<Value *> Objects) { |
2059 | 16.1M | return all_of(Objects, isNoAliasCall); |
2060 | 16.1M | }; |
2061 | 8.05M | |
2062 | 8.05M | // Is the set of underlying objects all things which must be disjoint from |
2063 | 8.05M | // noalias calls. For allocas, we consider only static ones (dynamic |
2064 | 8.05M | // allocas might be transformed into calls to malloc not simultaneously |
2065 | 8.05M | // live with the compared-to allocation). For globals, we exclude symbols |
2066 | 8.05M | // that might be resolve lazily to symbols in another dynamically-loaded |
2067 | 8.05M | // library (and, thus, could be malloc'ed by the implementation). |
2068 | 283k | auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) { |
2069 | 283k | return all_of(Objects, [](Value *V) { |
2070 | 283k | if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) |
2071 | 13 | return AI->getParent() && 13 AI->getFunction()13 && AI->isStaticAlloca()13 ; |
2072 | 283k | if (const GlobalValue *283k GV283k = dyn_cast<GlobalValue>(V)) |
2073 | 8 | return (GV->hasLocalLinkage() || 8 GV->hasHiddenVisibility()7 || |
2074 | 8 | GV->hasProtectedVisibility()6 || GV->hasGlobalUnnamedAddr()5 ) && |
2075 | 6 | !GV->isThreadLocal(); |
2076 | 283k | if (const Argument *283k A283k = dyn_cast<Argument>(V)) |
2077 | 2 | return A->hasByValAttr(); |
2078 | 283k | return false; |
2079 | 283k | }); |
2080 | 283k | }; |
2081 | 8.05M | |
2082 | 8.05M | if ((IsNAC(LHSUObjs) && 8.05M IsAllocDisjoint(RHSUObjs)278k ) || |
2083 | 8.05M | (IsNAC(RHSUObjs) && 8.05M IsAllocDisjoint(LHSUObjs)5.11k )) |
2084 | 12 | return ConstantInt::get(GetCompareTy(LHS), |
2085 | 12 | !CmpInst::isTrueWhenEqual(Pred)); |
2086 | 8.05M | |
2087 | 8.05M | // Fold comparisons for non-escaping pointer even if the allocation call |
2088 | 8.05M | // cannot be elided. We cannot fold malloc comparison to null. Also, the |
2089 | 8.05M | // dynamic allocation call could be either of the operands. |
2090 | 8.05M | Value *MI = nullptr; |
2091 | 8.05M | if (isAllocLikeFn(LHS, TLI) && |
2092 | 48.1k | llvm::isKnownNonZero(RHS, DL, 0, nullptr, CxtI, DT)) |
2093 | 1 | MI = LHS; |
2094 | 8.05M | else if (8.05M isAllocLikeFn(RHS, TLI) && |
2095 | 3.99k | llvm::isKnownNonZero(LHS, DL, 0, nullptr, CxtI, DT)) |
2096 | 1.18k | MI = RHS; |
2097 | 8.05M | // FIXME: We should also fold the compare when the pointer escapes, but the |
2098 | 8.05M | // compare dominates the pointer escape |
2099 | 8.05M | if (MI && 8.05M !PointerMayBeCaptured(MI, true, true)1.18k ) |
2100 | 2 | return ConstantInt::get(GetCompareTy(LHS), |
2101 | 2 | CmpInst::isFalseWhenEqual(Pred)); |
2102 | 8.65M | } |
2103 | 8.65M | |
2104 | 8.65M | // Otherwise, fail. |
2105 | 8.65M | return nullptr; |
2106 | 8.65M | } |
2107 | | |
2108 | | /// Fold an icmp when its operands have i1 scalar type. |
2109 | | static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, |
2110 | 37.6M | Value *RHS, const SimplifyQuery &Q) { |
2111 | 37.6M | Type *ITy = GetCompareTy(LHS); // The return type. |
2112 | 37.6M | Type *OpTy = LHS->getType(); // The operand type. |
2113 | 37.6M | if (!OpTy->isIntOrIntVectorTy(1)) |
2114 | 37.5M | return nullptr; |
2115 | 87.5k | |
2116 | 87.5k | // A boolean compared to true/false can be simplified in 14 out of the 20 |
2117 | 87.5k | // (10 predicates * 2 constants) possible combinations. Cases not handled here |
2118 | 87.5k | // require a 'not' of the LHS, so those must be transformed in InstCombine. |
2119 | 87.5k | if (87.5k match(RHS, m_Zero())87.5k ) { |
2120 | 86.3k | switch (Pred) { |
2121 | 11.8k | case CmpInst::ICMP_NE: // X != 0 -> X |
2122 | 11.8k | case CmpInst::ICMP_UGT: // X >u 0 -> X |
2123 | 11.8k | case CmpInst::ICMP_SLT: // X <s 0 -> X |
2124 | 11.8k | return LHS; |
2125 | 11.8k | |
2126 | 4 | case CmpInst::ICMP_ULT: // X <u 0 -> false |
2127 | 4 | case CmpInst::ICMP_SGT: // X >s 0 -> false |
2128 | 4 | return getFalse(ITy); |
2129 | 4 | |
2130 | 3 | case CmpInst::ICMP_UGE: // X >=u 0 -> true |
2131 | 3 | case CmpInst::ICMP_SLE: // X <=s 0 -> true |
2132 | 3 | return getTrue(ITy); |
2133 | 3 | |
2134 | 74.4k | default: break; |
2135 | 87.5k | } |
2136 | 1.24k | } else if (1.24k match(RHS, m_One())1.24k ) { |
2137 | 678 | switch (Pred) { |
2138 | 414 | case CmpInst::ICMP_EQ: // X == 1 -> X |
2139 | 414 | case CmpInst::ICMP_UGE: // X >=u 1 -> X |
2140 | 414 | case CmpInst::ICMP_SLE: // X <=s -1 -> X |
2141 | 414 | return LHS; |
2142 | 414 | |
2143 | 24 | case CmpInst::ICMP_UGT: // X >u 1 -> false |
2144 | 24 | case CmpInst::ICMP_SLT: // X <s -1 -> false |
2145 | 24 | return getFalse(ITy); |
2146 | 24 | |
2147 | 3 | case CmpInst::ICMP_ULE: // X <=u 1 -> true |
2148 | 3 | case CmpInst::ICMP_SGE: // X >=s -1 -> true |
2149 | 3 | return getTrue(ITy); |
2150 | 3 | |
2151 | 237 | default: break; |
2152 | 75.2k | } |
2153 | 75.2k | } |
2154 | 75.2k | |
2155 | 75.2k | switch (Pred) { |
2156 | 75.1k | default: |
2157 | 75.1k | break; |
2158 | 21 | case ICmpInst::ICMP_UGE: |
2159 | 21 | if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false)) |
2160 | 1 | return getTrue(ITy); |
2161 | 20 | break; |
2162 | 23 | case ICmpInst::ICMP_SGE: |
2163 | 23 | /// For signed comparison, the values for an i1 are 0 and -1 |
2164 | 23 | /// respectively. This maps into a truth table of: |
2165 | 23 | /// LHS | RHS | LHS >=s RHS | LHS implies RHS |
2166 | 23 | /// 0 | 0 | 1 (0 >= 0) | 1 |
2167 | 23 | /// 0 | 1 | 1 (0 >= -1) | 1 |
2168 | 23 | /// 1 | 0 | 0 (-1 >= 0) | 0 |
2169 | 23 | /// 1 | 1 | 1 (-1 >= -1) | 1 |
2170 | 23 | if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) |
2171 | 1 | return getTrue(ITy); |
2172 | 22 | break; |
2173 | 37 | case ICmpInst::ICMP_ULE: |
2174 | 37 | if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) |
2175 | 8 | return getTrue(ITy); |
2176 | 29 | break; |
2177 | 75.2k | } |
2178 | 75.2k | |
2179 | 75.2k | return nullptr; |
2180 | 75.2k | } |
2181 | | |
2182 | | /// Try hard to fold icmp with zero RHS because this is a common case. |
2183 | | static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, |
2184 | 37.6M | Value *RHS, const SimplifyQuery &Q) { |
2185 | 37.6M | if (!match(RHS, m_Zero())) |
2186 | 18.4M | return nullptr; |
2187 | 19.1M | |
2188 | 19.1M | Type *ITy = GetCompareTy(LHS); // The return type. |
2189 | 19.1M | switch (Pred) { |
2190 | 0 | default: |
2191 | 0 | llvm_unreachable("Unknown ICmp predicate!"); |
2192 | 1.30k | case ICmpInst::ICMP_ULT: |
2193 | 1.30k | return getFalse(ITy); |
2194 | 297 | case ICmpInst::ICMP_UGE: |
2195 | 297 | return getTrue(ITy); |
2196 | 15.1M | case ICmpInst::ICMP_EQ: |
2197 | 15.1M | case ICmpInst::ICMP_ULE: |
2198 | 15.1M | if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) |
2199 | 14.6k | return getFalse(ITy); |
2200 | 15.1M | break; |
2201 | 1.42M | case ICmpInst::ICMP_NE: |
2202 | 1.42M | case ICmpInst::ICMP_UGT: |
2203 | 1.42M | if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) |
2204 | 3.58k | return getTrue(ITy); |
2205 | 1.41M | break; |
2206 | 380k | case ICmpInst::ICMP_SLT: { |
2207 | 380k | KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2208 | 380k | if (LHSKnown.isNegative()) |
2209 | 5 | return getTrue(ITy); |
2210 | 380k | if (380k LHSKnown.isNonNegative()380k ) |
2211 | 1.13k | return getFalse(ITy); |
2212 | 379k | break; |
2213 | 379k | } |
2214 | 27.3k | case ICmpInst::ICMP_SLE: { |
2215 | 27.3k | KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2216 | 27.3k | if (LHSKnown.isNegative()) |
2217 | 0 | return getTrue(ITy); |
2218 | 27.3k | if (27.3k LHSKnown.isNonNegative() && |
2219 | 122 | isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) |
2220 | 0 | return getFalse(ITy); |
2221 | 27.3k | break; |
2222 | 27.3k | } |
2223 | 23.6k | case ICmpInst::ICMP_SGE: { |
2224 | 23.6k | KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2225 | 23.6k | if (LHSKnown.isNegative()) |
2226 | 0 | return getFalse(ITy); |
2227 | 23.6k | if (23.6k LHSKnown.isNonNegative()23.6k ) |
2228 | 100 | return getTrue(ITy); |
2229 | 23.5k | break; |
2230 | 23.5k | } |
2231 | 2.11M | case ICmpInst::ICMP_SGT: { |
2232 | 2.11M | KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2233 | 2.11M | if (LHSKnown.isNegative()) |
2234 | 5 | return getFalse(ITy); |
2235 | 2.11M | if (2.11M LHSKnown.isNonNegative() && |
2236 | 116k | isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) |
2237 | 1.23k | return getTrue(ITy); |
2238 | 2.11M | break; |
2239 | 2.11M | } |
2240 | 19.1M | } |
2241 | 19.1M | |
2242 | 19.1M | return nullptr; |
2243 | 19.1M | } |
2244 | | |
2245 | | /// Many binary operators with a constant operand have an easy-to-compute |
2246 | | /// range of outputs. This can be used to fold a comparison to always true or |
2247 | | /// always false. |
2248 | 4.95M | static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { |
2249 | 4.95M | unsigned Width = Lower.getBitWidth(); |
2250 | 4.95M | const APInt *C; |
2251 | 4.95M | switch (BO.getOpcode()) { |
2252 | 1.98M | case Instruction::Add: |
2253 | 1.98M | if (match(BO.getOperand(1), m_APInt(C)) && 1.98M !C->isNullValue()1.65M ) { |
2254 | 1.65M | // FIXME: If we have both nuw and nsw, we should reduce the range further. |
2255 | 1.65M | if (BO.hasNoUnsignedWrap()1.65M ) { |
2256 | 726k | // 'add nuw x, C' produces [C, UINT_MAX]. |
2257 | 726k | Lower = *C; |
2258 | 1.65M | } else if (925k BO.hasNoSignedWrap()925k ) { |
2259 | 635k | if (C->isNegative()635k ) { |
2260 | 220k | // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. |
2261 | 220k | Lower = APInt::getSignedMinValue(Width); |
2262 | 220k | Upper = APInt::getSignedMaxValue(Width) + *C + 1; |
2263 | 635k | } else { |
2264 | 414k | // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. |
2265 | 414k | Lower = APInt::getSignedMinValue(Width) + *C; |
2266 | 414k | Upper = APInt::getSignedMaxValue(Width) + 1; |
2267 | 414k | } |
2268 | 925k | } |
2269 | 1.65M | } |
2270 | 1.98M | break; |
2271 | 4.95M | |
2272 | 1.17M | case Instruction::And: |
2273 | 1.17M | if (match(BO.getOperand(1), m_APInt(C))) |
2274 | 1.17M | // 'and x, C' produces [0, C]. |
2275 | 1.06M | Upper = *C + 1; |
2276 | 1.17M | break; |
2277 | 4.95M | |
2278 | 70.8k | case Instruction::Or: |
2279 | 70.8k | if (match(BO.getOperand(1), m_APInt(C))) |
2280 | 70.8k | // 'or x, C' produces [C, UINT_MAX]. |
2281 | 25.0k | Lower = *C; |
2282 | 70.8k | break; |
2283 | 4.95M | |
2284 | 92.9k | case Instruction::AShr: |
2285 | 92.9k | if (match(BO.getOperand(1), m_APInt(C)) && 92.9k C->ult(Width)86.8k ) { |
2286 | 86.8k | // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. |
2287 | 86.8k | Lower = APInt::getSignedMinValue(Width).ashr(*C); |
2288 | 86.8k | Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; |
2289 | 92.9k | } else if (6.05k match(BO.getOperand(0), m_APInt(C))6.05k ) { |
2290 | 38 | unsigned ShiftAmount = Width - 1; |
2291 | 38 | if (!C->isNullValue() && 38 BO.isExact()38 ) |
2292 | 16 | ShiftAmount = C->countTrailingZeros(); |
2293 | 38 | if (C->isNegative()38 ) { |
2294 | 35 | // 'ashr C, x' produces [C, C >> (Width-1)] |
2295 | 35 | Lower = *C; |
2296 | 35 | Upper = C->ashr(ShiftAmount) + 1; |
2297 | 38 | } else { |
2298 | 3 | // 'ashr C, x' produces [C >> (Width-1), C] |
2299 | 3 | Lower = C->ashr(ShiftAmount); |
2300 | 3 | Upper = *C + 1; |
2301 | 3 | } |
2302 | 6.05k | } |
2303 | 92.9k | break; |
2304 | 4.95M | |
2305 | 68.6k | case Instruction::LShr: |
2306 | 68.6k | if (match(BO.getOperand(1), m_APInt(C)) && 68.6k C->ult(Width)67.8k ) { |
2307 | 67.8k | // 'lshr x, C' produces [0, UINT_MAX >> C]. |
2308 | 67.8k | Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1; |
2309 | 68.6k | } else if (778 match(BO.getOperand(0), m_APInt(C))778 ) { |
2310 | 79 | // 'lshr C, x' produces [C >> (Width-1), C]. |
2311 | 79 | unsigned ShiftAmount = Width - 1; |
2312 | 79 | if (!C->isNullValue() && 79 BO.isExact()79 ) |
2313 | 16 | ShiftAmount = C->countTrailingZeros(); |
2314 | 778 | Lower = C->lshr(ShiftAmount); |
2315 | 778 | Upper = *C + 1; |
2316 | 778 | } |
2317 | 68.6k | break; |
2318 | 4.95M | |
2319 | 287k | case Instruction::Shl: |
2320 | 287k | if (match(BO.getOperand(0), m_APInt(C))287k ) { |
2321 | 1.30k | if (BO.hasNoUnsignedWrap()1.30k ) { |
2322 | 9 | // 'shl nuw C, x' produces [C, C << CLZ(C)] |
2323 | 9 | Lower = *C; |
2324 | 9 | Upper = Lower.shl(Lower.countLeadingZeros()) + 1; |
2325 | 1.30k | } else if (1.29k BO.hasNoSignedWrap()1.29k ) { // TODO: What if both nuw+nsw? |
2326 | 8 | if (C->isNegative()8 ) { |
2327 | 6 | // 'shl nsw C, x' produces [C << CLO(C)-1, C] |
2328 | 6 | unsigned ShiftAmount = C->countLeadingOnes() - 1; |
2329 | 6 | Lower = C->shl(ShiftAmount); |
2330 | 6 | Upper = *C + 1; |
2331 | 8 | } else { |
2332 | 2 | // 'shl nsw C, x' produces [C, C << CLZ(C)-1] |
2333 | 2 | unsigned ShiftAmount = C->countLeadingZeros() - 1; |
2334 | 2 | Lower = *C; |
2335 | 2 | Upper = C->shl(ShiftAmount) + 1; |
2336 | 2 | } |
2337 | 1.29k | } |
2338 | 1.30k | } |
2339 | 287k | break; |
2340 | 4.95M | |
2341 | 21.2k | case Instruction::SDiv: |
2342 | 21.2k | if (match(BO.getOperand(1), m_APInt(C))21.2k ) { |
2343 | 11.8k | APInt IntMin = APInt::getSignedMinValue(Width); |
2344 | 11.8k | APInt IntMax = APInt::getSignedMaxValue(Width); |
2345 | 11.8k | if (C->isAllOnesValue()11.8k ) { |
2346 | 2 | // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] |
2347 | 2 | // where C != -1 and C != 0 and C != 1 |
2348 | 2 | Lower = IntMin + 1; |
2349 | 2 | Upper = IntMax + 1; |
2350 | 11.8k | } else if (11.8k C->countLeadingZeros() < Width - 111.8k ) { |
2351 | 11.8k | // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] |
2352 | 11.8k | // where C != -1 and C != 0 and C != 1 |
2353 | 11.8k | Lower = IntMin.sdiv(*C); |
2354 | 11.8k | Upper = IntMax.sdiv(*C); |
2355 | 11.8k | if (Lower.sgt(Upper)) |
2356 | 102 | std::swap(Lower, Upper); |
2357 | 11.8k | Upper = Upper + 1; |
2358 | 11.8k | assert(Upper != Lower && "Upper part of range has wrapped!"); |
2359 | 11.8k | } |
2360 | 21.2k | } else if (9.37k match(BO.getOperand(0), m_APInt(C))9.37k ) { |
2361 | 195 | if (C->isMinSignedValue()195 ) { |
2362 | 1 | // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. |
2363 | 1 | Lower = *C; |
2364 | 1 | Upper = Lower.lshr(1) + 1; |
2365 | 195 | } else { |
2366 | 194 | // 'sdiv C, x' produces [-|C|, |C|]. |
2367 | 194 | Upper = C->abs() + 1; |
2368 | 194 | Lower = (-Upper) + 1; |
2369 | 194 | } |
2370 | 9.37k | } |
2371 | 21.2k | break; |
2372 | 4.95M | |
2373 | 142k | case Instruction::UDiv: |
2374 | 142k | if (match(BO.getOperand(1), m_APInt(C)) && 142k !C->isNullValue()135k ) { |
2375 | 135k | // 'udiv x, C' produces [0, UINT_MAX / C]. |
2376 | 135k | Upper = APInt::getMaxValue(Width).udiv(*C) + 1; |
2377 | 142k | } else if (7.04k match(BO.getOperand(0), m_APInt(C))7.04k ) { |
2378 | 5.65k | // 'udiv C, x' produces [0, C]. |
2379 | 5.65k | Upper = *C + 1; |
2380 | 5.65k | } |
2381 | 142k | break; |
2382 | 4.95M | |
2383 | 61.6k | case Instruction::SRem: |
2384 | 61.6k | if (match(BO.getOperand(1), m_APInt(C))61.6k ) { |
2385 | 5.11k | // 'srem x, C' produces (-|C|, |C|). |
2386 | 5.11k | Upper = C->abs(); |
2387 | 5.11k | Lower = (-Upper) + 1; |
2388 | 5.11k | } |
2389 | 61.6k | break; |
2390 | 4.95M | |
2391 | 11.3k | case Instruction::URem: |
2392 | 11.3k | if (match(BO.getOperand(1), m_APInt(C))) |
2393 | 11.3k | // 'urem x, C' produces [0, C). |
2394 | 3.80k | Upper = *C; |
2395 | 11.3k | break; |
2396 | 4.95M | |
2397 | 1.03M | default: |
2398 | 1.03M | break; |
2399 | 4.95M | } |
2400 | 4.95M | } |
2401 | | |
2402 | | static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, |
2403 | 37.6M | Value *RHS) { |
2404 | 37.6M | const APInt *C; |
2405 | 37.6M | if (!match(RHS, m_APInt(C))) |
2406 | 16.5M | return nullptr; |
2407 | 21.1M | |
2408 | 21.1M | // Rule out tautological comparisons (eg., ult 0 or uge 0). |
2409 | 21.1M | ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C); |
2410 | 21.1M | if (RHS_CR.isEmptySet()) |
2411 | 860 | return ConstantInt::getFalse(GetCompareTy(RHS)); |
2412 | 21.1M | if (21.1M RHS_CR.isFullSet()21.1M ) |
2413 | 11.1k | return ConstantInt::getTrue(GetCompareTy(RHS)); |
2414 | 21.0M | |
2415 | 21.0M | // Find the range of possible values for binary operators. |
2416 | 21.0M | unsigned Width = C->getBitWidth(); |
2417 | 21.0M | APInt Lower = APInt(Width, 0); |
2418 | 21.0M | APInt Upper = APInt(Width, 0); |
2419 | 21.0M | if (auto *BO = dyn_cast<BinaryOperator>(LHS)) |
2420 | 4.95M | setLimitsForBinOp(*BO, Lower, Upper); |
2421 | 21.0M | |
2422 | 21.0M | ConstantRange LHS_CR = |
2423 | 21.0M | Lower != Upper ? ConstantRange(Lower, Upper)2.76M : ConstantRange(Width, true)18.3M ; |
2424 | 21.0M | |
2425 | 21.0M | if (auto *I = dyn_cast<Instruction>(LHS)) |
2426 | 19.8M | if (auto *19.8M Ranges19.8M = I->getMetadata(LLVMContext::MD_range)) |
2427 | 558k | LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); |
2428 | 21.0M | |
2429 | 21.0M | if (!LHS_CR.isFullSet()21.0M ) { |
2430 | 3.32M | if (RHS_CR.contains(LHS_CR)) |
2431 | 3.51k | return ConstantInt::getTrue(GetCompareTy(RHS)); |
2432 | 3.32M | if (3.32M RHS_CR.inverse().contains(LHS_CR)3.32M ) |
2433 | 61.3k | return ConstantInt::getFalse(GetCompareTy(RHS)); |
2434 | 21.0M | } |
2435 | 21.0M | |
2436 | 21.0M | return nullptr; |
2437 | 21.0M | } |
2438 | | |
2439 | | /// TODO: A large part of this logic is duplicated in InstCombine's |
2440 | | /// foldICmpBinOp(). We should be able to share that and avoid the code |
2441 | | /// duplication. |
2442 | | static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, |
2443 | | Value *RHS, const SimplifyQuery &Q, |
2444 | 37.5M | unsigned MaxRecurse) { |
2445 | 37.5M | Type *ITy = GetCompareTy(LHS); // The return type. |
2446 | 37.5M | |
2447 | 37.5M | BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); |
2448 | 37.5M | BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); |
2449 | 37.5M | if (MaxRecurse && 37.5M (LBO || 37.2M RBO28.8M )) { |
2450 | 9.32M | // Analyze the case when either LHS or RHS is an add instruction. |
2451 | 9.32M | Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; |
2452 | 9.32M | // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). |
2453 | 9.32M | bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; |
2454 | 9.32M | if (LBO && 9.32M LBO->getOpcode() == Instruction::Add8.39M ) { |
2455 | 4.44M | A = LBO->getOperand(0); |
2456 | 4.44M | B = LBO->getOperand(1); |
2457 | 4.44M | NoLHSWrapProblem = |
2458 | 4.44M | ICmpInst::isEquality(Pred) || |
2459 | 2.81M | (CmpInst::isUnsigned(Pred) && 2.81M LBO->hasNoUnsignedWrap()831k ) || |
2460 | 2.46M | (CmpInst::isSigned(Pred) && 2.46M LBO->hasNoSignedWrap()1.98M ); |
2461 | 4.44M | } |
2462 | 9.32M | if (RBO && 9.32M RBO->getOpcode() == Instruction::Add1.73M ) { |
2463 | 988k | C = RBO->getOperand(0); |
2464 | 988k | D = RBO->getOperand(1); |
2465 | 988k | NoRHSWrapProblem = |
2466 | 988k | ICmpInst::isEquality(Pred) || |
2467 | 823k | (CmpInst::isUnsigned(Pred) && 823k RBO->hasNoUnsignedWrap()393k ) || |
2468 | 597k | (CmpInst::isSigned(Pred) && 597k RBO->hasNoSignedWrap()430k ); |
2469 | 988k | } |
2470 | 9.32M | |
2471 | 9.32M | // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. |
2472 | 9.32M | if ((A == RHS || 9.32M B == RHS9.32M ) && NoLHSWrapProblem53.7k ) |
2473 | 42.8k | if (Value *42.8k V42.8k = SimplifyICmpInst(Pred, A == RHS ? B : A, |
2474 | 42.8k | Constant::getNullValue(RHS->getType()), Q, |
2475 | 42.8k | MaxRecurse - 1)) |
2476 | 921 | return V; |
2477 | 9.32M | |
2478 | 9.32M | // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. |
2479 | 9.32M | if (9.32M (C == LHS || 9.32M D == LHS9.31M ) && NoRHSWrapProblem15.0k ) |
2480 | 14.7k | if (Value *14.7k V14.7k = |
2481 | 14.7k | SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), |
2482 | 14.7k | C == LHS ? D : C, Q, MaxRecurse - 1)) |
2483 | 2.58k | return V; |
2484 | 9.32M | |
2485 | 9.32M | // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. |
2486 | 9.32M | if (9.32M A && 9.32M C4.44M && (A == C || 129k A == D127k || B == C126k || B == D126k ) && NoLHSWrapProblem10.4k && |
2487 | 9.32M | NoRHSWrapProblem7.15k ) { |
2488 | 6.88k | // Determine Y and Z in the form icmp (X+Y), (X+Z). |
2489 | 6.88k | Value *Y, *Z; |
2490 | 6.88k | if (A == C6.88k ) { |
2491 | 323 | // C + B == C + D -> B == D |
2492 | 323 | Y = B; |
2493 | 323 | Z = D; |
2494 | 6.88k | } else if (6.56k A == D6.56k ) { |
2495 | 268 | // D + B == C + D -> B == C |
2496 | 268 | Y = B; |
2497 | 268 | Z = C; |
2498 | 6.56k | } else if (6.29k B == C6.29k ) { |
2499 | 46 | // A + C == C + D -> A == D |
2500 | 46 | Y = A; |
2501 | 46 | Z = D; |
2502 | 6.29k | } else { |
2503 | 6.25k | assert(B == D); |
2504 | 6.25k | // A + D == C + D -> A == C |
2505 | 6.25k | Y = A; |
2506 | 6.25k | Z = C; |
2507 | 6.25k | } |
2508 | 6.88k | if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1)) |
2509 | 251 | return V; |
2510 | 37.5M | } |
2511 | 9.32M | } |
2512 | 37.5M | |
2513 | 37.5M | { |
2514 | 37.5M | Value *Y = nullptr; |
2515 | 37.5M | // icmp pred (or X, Y), X |
2516 | 37.5M | if (LBO && 37.5M match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))8.44M ) { |
2517 | 13.9k | if (Pred == ICmpInst::ICMP_ULT) |
2518 | 1 | return getFalse(ITy); |
2519 | 13.9k | if (13.9k Pred == ICmpInst::ICMP_UGE13.9k ) |
2520 | 1 | return getTrue(ITy); |
2521 | 13.9k | |
2522 | 13.9k | if (13.9k Pred == ICmpInst::ICMP_SLT || 13.9k Pred == ICmpInst::ICMP_SGE13.9k ) { |
2523 | 112 | KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2524 | 112 | KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2525 | 112 | if (RHSKnown.isNonNegative() && 112 YKnown.isNegative()48 ) |
2526 | 2 | return Pred == ICmpInst::ICMP_SLT ? 2 getTrue(ITy)1 : getFalse(ITy)1 ; |
2527 | 110 | if (110 RHSKnown.isNegative() || 110 YKnown.isNonNegative()104 ) |
2528 | 14 | return Pred == ICmpInst::ICMP_SLT ? 14 getFalse(ITy)9 : getTrue(ITy)5 ; |
2529 | 37.5M | } |
2530 | 13.9k | } |
2531 | 37.5M | // icmp pred X, (or X, Y) |
2532 | 37.5M | if (37.5M RBO && 37.5M match(RBO, m_c_Or(m_Value(Y), m_Specific(LHS)))1.73M ) { |
2533 | 786 | if (Pred == ICmpInst::ICMP_ULE) |
2534 | 1 | return getTrue(ITy); |
2535 | 785 | if (785 Pred == ICmpInst::ICMP_UGT785 ) |
2536 | 1 | return getFalse(ITy); |
2537 | 784 | |
2538 | 784 | if (784 Pred == ICmpInst::ICMP_SGT || 784 Pred == ICmpInst::ICMP_SLE729 ) { |
2539 | 108 | KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2540 | 108 | KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2541 | 108 | if (LHSKnown.isNonNegative() && 108 YKnown.isNegative()48 ) |
2542 | 2 | return Pred == ICmpInst::ICMP_SGT ? 2 getTrue(ITy)1 : getFalse(ITy)1 ; |
2543 | 106 | if (106 LHSKnown.isNegative() || 106 YKnown.isNonNegative()100 ) |
2544 | 12 | return Pred == ICmpInst::ICMP_SGT ? 12 getFalse(ITy)7 : getTrue(ITy)5 ; |
2545 | 37.5M | } |
2546 | 786 | } |
2547 | 37.5M | } |
2548 | 37.5M | |
2549 | 37.5M | // icmp pred (and X, Y), X |
2550 | 37.5M | if (37.5M LBO && 37.5M match(LBO, m_c_And(m_Value(), m_Specific(RHS)))8.44M ) { |
2551 | 157k | if (Pred == ICmpInst::ICMP_UGT) |
2552 | 1 | return getFalse(ITy); |
2553 | 157k | if (157k Pred == ICmpInst::ICMP_ULE157k ) |
2554 | 1 | return getTrue(ITy); |
2555 | 37.5M | } |
2556 | 37.5M | // icmp pred X, (and X, Y) |
2557 | 37.5M | if (37.5M RBO && 37.5M match(RBO, m_c_And(m_Value(), m_Specific(LHS)))1.73M ) { |
2558 | 122k | if (Pred == ICmpInst::ICMP_UGE) |
2559 | 1 | return getTrue(ITy); |
2560 | 122k | if (122k Pred == ICmpInst::ICMP_ULT122k ) |
2561 | 1 | return getFalse(ITy); |
2562 | 37.5M | } |
2563 | 37.5M | |
2564 | 37.5M | // 0 - (zext X) pred C |
2565 | 37.5M | if (37.5M !CmpInst::isUnsigned(Pred) && 37.5M match(LHS, m_Neg(m_ZExt(m_Value())))32.4M ) { |
2566 | 18 | if (ConstantInt *RHSC18 = dyn_cast<ConstantInt>(RHS)) { |
2567 | 6 | if (RHSC->getValue().isStrictlyPositive()6 ) { |
2568 | 4 | if (Pred == ICmpInst::ICMP_SLT) |
2569 | 1 | return ConstantInt::getTrue(RHSC->getContext()); |
2570 | 3 | if (3 Pred == ICmpInst::ICMP_SGE3 ) |
2571 | 1 | return ConstantInt::getFalse(RHSC->getContext()); |
2572 | 2 | if (2 Pred == ICmpInst::ICMP_EQ2 ) |
2573 | 1 | return ConstantInt::getFalse(RHSC->getContext()); |
2574 | 1 | if (1 Pred == ICmpInst::ICMP_NE1 ) |
2575 | 1 | return ConstantInt::getTrue(RHSC->getContext()); |
2576 | 2 | } |
2577 | 2 | if (2 RHSC->getValue().isNonNegative()2 ) { |
2578 | 2 | if (Pred == ICmpInst::ICMP_SLE) |
2579 | 1 | return ConstantInt::getTrue(RHSC->getContext()); |
2580 | 1 | if (1 Pred == ICmpInst::ICMP_SGT1 ) |
2581 | 1 | return ConstantInt::getFalse(RHSC->getContext()); |
2582 | 37.5M | } |
2583 | 6 | } |
2584 | 18 | } |
2585 | 37.5M | |
2586 | 37.5M | // icmp pred (urem X, Y), Y |
2587 | 37.5M | if (37.5M LBO && 37.5M match(LBO, m_URem(m_Value(), m_Specific(RHS)))8.44M ) { |
2588 | 19 | switch (Pred) { |
2589 | 0 | default: |
2590 | 0 | break; |
2591 | 1 | case ICmpInst::ICMP_SGT: |
2592 | 1 | case ICmpInst::ICMP_SGE: { |
2593 | 1 | KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2594 | 1 | if (!Known.isNonNegative()) |
2595 | 1 | break; |
2596 | 0 | LLVM_FALLTHROUGH0 ; |
2597 | 0 | } |
2598 | 1 | case ICmpInst::ICMP_EQ: |
2599 | 1 | case ICmpInst::ICMP_UGT: |
2600 | 1 | case ICmpInst::ICMP_UGE: |
2601 | 1 | return getFalse(ITy); |
2602 | 1 | case ICmpInst::ICMP_SLT: |
2603 | 1 | case ICmpInst::ICMP_SLE: { |
2604 | 1 | KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2605 | 1 | if (!Known.isNonNegative()) |
2606 | 1 | break; |
2607 | 0 | LLVM_FALLTHROUGH0 ; |
2608 | 0 | } |
2609 | 16 | case ICmpInst::ICMP_NE: |
2610 | 16 | case ICmpInst::ICMP_ULT: |
2611 | 16 | case ICmpInst::ICMP_ULE: |
2612 | 16 | return getTrue(ITy); |
2613 | 37.5M | } |
2614 | 37.5M | } |
2615 | 37.5M | |
2616 | 37.5M | // icmp pred X, (urem Y, X) |
2617 | 37.5M | if (37.5M RBO && 37.5M match(RBO, m_URem(m_Value(), m_Specific(LHS)))1.73M ) { |
2618 | 549 | switch (Pred) { |
2619 | 0 | default: |
2620 | 0 | break; |
2621 | 0 | case ICmpInst::ICMP_SGT: |
2622 | 0 | case ICmpInst::ICMP_SGE: { |
2623 | 0 | KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2624 | 0 | if (!Known.isNonNegative()) |
2625 | 0 | break; |
2626 | 0 | LLVM_FALLTHROUGH0 ; |
2627 | 0 | } |
2628 | 1 | case ICmpInst::ICMP_NE: |
2629 | 1 | case ICmpInst::ICMP_UGT: |
2630 | 1 | case ICmpInst::ICMP_UGE: |
2631 | 1 | return getTrue(ITy); |
2632 | 0 | case ICmpInst::ICMP_SLT: |
2633 | 0 | case ICmpInst::ICMP_SLE: { |
2634 | 0 | KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); |
2635 | 0 | if (!Known.isNonNegative()) |
2636 | 0 | break; |
2637 | 0 | LLVM_FALLTHROUGH0 ; |
2638 | 0 | } |
2639 | 548 | case ICmpInst::ICMP_EQ: |
2640 | 548 | case ICmpInst::ICMP_ULT: |
2641 | 548 | case ICmpInst::ICMP_ULE: |
2642 | 548 | return getFalse(ITy); |
2643 | 37.5M | } |
2644 | 37.5M | } |
2645 | 37.5M | |
2646 | 37.5M | // x >> y <=u x |
2647 | 37.5M | // x udiv y <=u x. |
2648 | 37.5M | if (37.5M LBO && 37.5M (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || |
2649 | 37.5M | match(LBO, m_UDiv(m_Specific(RHS), m_Value()))8.44M )) { |
2650 | 206 | // icmp pred (X op Y), X |
2651 | 206 | if (Pred == ICmpInst::ICMP_UGT) |
2652 | 2 | return getFalse(ITy); |
2653 | 204 | if (204 Pred == ICmpInst::ICMP_ULE204 ) |
2654 | 2 | return getTrue(ITy); |
2655 | 37.5M | } |
2656 | 37.5M | |
2657 | 37.5M | // x >=u x >> y |
2658 | 37.5M | // x >=u x udiv y. |
2659 | 37.5M | if (37.5M RBO && 37.5M (match(RBO, m_LShr(m_Specific(LHS), m_Value())) || |
2660 | 37.5M | match(RBO, m_UDiv(m_Specific(LHS), m_Value()))1.73M )) { |
2661 | 20 | // icmp pred X, (X op Y) |
2662 | 20 | if (Pred == ICmpInst::ICMP_ULT) |
2663 | 2 | return getFalse(ITy); |
2664 | 18 | if (18 Pred == ICmpInst::ICMP_UGE18 ) |
2665 | 2 | return getTrue(ITy); |
2666 | 37.5M | } |
2667 | 37.5M | |
2668 | 37.5M | // handle: |
2669 | 37.5M | // CI2 << X == CI |
2670 | 37.5M | // CI2 << X != CI |
2671 | 37.5M | // |
2672 | 37.5M | // where CI2 is a power of 2 and CI isn't |
2673 | 37.5M | if (auto *37.5M CI37.5M = dyn_cast<ConstantInt>(RHS)) { |
2674 | 20.9M | const APInt *CI2Val, *CIVal = &CI->getValue(); |
2675 | 20.9M | if (LBO && 20.9M match(LBO, m_Shl(m_APInt(CI2Val), m_Value()))4.88M && |
2676 | 20.9M | CI2Val->isPowerOf2()1.28k ) { |
2677 | 1.23k | if (!CIVal->isPowerOf2()1.23k ) { |
2678 | 841 | // CI2 << X can equal zero in some circumstances, |
2679 | 841 | // this simplification is unsafe if CI is zero. |
2680 | 841 | // |
2681 | 841 | // We know it is safe if: |
2682 | 841 | // - The shift is nsw, we can't shift out the one bit. |
2683 | 841 | // - The shift is nuw, we can't shift out the one bit. |
2684 | 841 | // - CI2 is one |
2685 | 841 | // - CI isn't zero |
2686 | 841 | if (LBO->hasNoSignedWrap() || 841 LBO->hasNoUnsignedWrap()841 || |
2687 | 841 | CI2Val->isOneValue()838 || !CI->isZero()67 ) { |
2688 | 774 | if (Pred == ICmpInst::ICMP_EQ) |
2689 | 2 | return ConstantInt::getFalse(RHS->getContext()); |
2690 | 772 | if (772 Pred == ICmpInst::ICMP_NE772 ) |
2691 | 1 | return ConstantInt::getTrue(RHS->getContext()); |
2692 | 1.23k | } |
2693 | 841 | } |
2694 | 1.23k | if (1.23k CIVal->isSignMask() && 1.23k CI2Val->isOneValue()6 ) { |
2695 | 6 | if (Pred == ICmpInst::ICMP_UGT) |
2696 | 3 | return ConstantInt::getFalse(RHS->getContext()); |
2697 | 3 | if (3 Pred == ICmpInst::ICMP_ULE3 ) |
2698 | 1 | return ConstantInt::getTrue(RHS->getContext()); |
2699 | 37.5M | } |
2700 | 1.23k | } |
2701 | 20.9M | } |
2702 | 37.5M | |
2703 | 37.5M | if (37.5M MaxRecurse && 37.5M LBO37.2M && RBO8.39M && LBO->getOpcode() == RBO->getOpcode()800k && |
2704 | 37.5M | LBO->getOperand(1) == RBO->getOperand(1)284k ) { |
2705 | 19.6k | switch (LBO->getOpcode()) { |
2706 | 13.9k | default: |
2707 | 13.9k | break; |
2708 | 1.32k | case Instruction::UDiv: |
2709 | 1.32k | case Instruction::LShr: |
2710 | 1.32k | if (ICmpInst::isSigned(Pred) || 1.32k !LBO->isExact()1.32k || !RBO->isExact()1 ) |
2711 | 1.32k | break; |
2712 | 1 | if (Value *1 V1 = SimplifyICmpInst(Pred, LBO->getOperand(0), |
2713 | 1 | RBO->getOperand(0), Q, MaxRecurse - 1)) |
2714 | 1 | return V; |
2715 | 0 | break; |
2716 | 1.02k | case Instruction::SDiv: |
2717 | 1.02k | if (!ICmpInst::isEquality(Pred) || 1.02k !LBO->isExact()3 || !RBO->isExact()3 ) |
2718 | 1.02k | break; |
2719 | 2 | if (Value *2 V2 = SimplifyICmpInst(Pred, LBO->getOperand(0), |
2720 | 2 | RBO->getOperand(0), Q, MaxRecurse - 1)) |
2721 | 1 | return V; |
2722 | 1 | break; |
2723 | 2.39k | case Instruction::AShr: |
2724 | 2.39k | if (!LBO->isExact() || 2.39k !RBO->isExact()367 ) |
2725 | 2.02k | break; |
2726 | 367 | if (Value *367 V367 = SimplifyICmpInst(Pred, LBO->getOperand(0), |
2727 | 367 | RBO->getOperand(0), Q, MaxRecurse - 1)) |
2728 | 0 | return V; |
2729 | 367 | break; |
2730 | 969 | case Instruction::Shl: { |
2731 | 34 | bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap(); |
2732 | 174 | bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); |
2733 | 969 | if (!NUW && 969 !NSW969 ) |
2734 | 795 | break; |
2735 | 174 | if (174 !NSW && 174 ICmpInst::isSigned(Pred)0 ) |
2736 | 0 | break; |
2737 | 174 | if (Value *174 V174 = SimplifyICmpInst(Pred, LBO->getOperand(0), |
2738 | 174 | RBO->getOperand(0), Q, MaxRecurse - 1)) |
2739 | 0 | return V; |
2740 | 174 | break; |
2741 | 174 | } |
2742 | 19.6k | } |
2743 | 19.6k | } |
2744 | 37.5M | return nullptr; |
2745 | 37.5M | } |
2746 | | |
2747 | | /// Simplify integer comparisons where at least one operand of the compare |
2748 | | /// matches an integer min/max idiom. |
2749 | | static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, |
2750 | | Value *RHS, const SimplifyQuery &Q, |
2751 | 37.5M | unsigned MaxRecurse) { |
2752 | 37.5M | Type *ITy = GetCompareTy(LHS); // The return type. |
2753 | 37.5M | Value *A, *B; |
2754 | 37.5M | CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; |
2755 | 37.5M | CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". |
2756 | 37.5M | |
2757 | 37.5M | // Signed variants on "max(a,b)>=a -> true". |
2758 | 37.5M | if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && 37.5M (A == RHS || 573k B == RHS573k )) { |
2759 | 109k | if (A != RHS) |
2760 | 109k | std::swap(A, B); // smax(A, B) pred A. |
2761 | 109k | EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". |
2762 | 109k | // We analyze this as smax(A, B) pred A. |
2763 | 109k | P = Pred; |
2764 | 37.5M | } else if (37.3M match(RHS, m_SMax(m_Value(A), m_Value(B))) && |
2765 | 37.3M | (A == LHS || 221k B == LHS221k )) { |
2766 | 329 | if (A != LHS) |
2767 | 306 | std::swap(A, B); // A pred smax(A, B). |
2768 | 329 | EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". |
2769 | 329 | // We analyze this as smax(A, B) swapped-pred A. |
2770 | 329 | P = CmpInst::getSwappedPredicate(Pred); |
2771 | 37.3M | } else if (37.3M match(LHS, m_SMin(m_Value(A), m_Value(B))) && |
2772 | 37.3M | (A == RHS || 19.4k B == RHS19.3k )) { |
2773 | 469 | if (A != RHS) |
2774 | 433 | std::swap(A, B); // smin(A, B) pred A. |
2775 | 469 | EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". |
2776 | 469 | // We analyze this as smax(-A, -B) swapped-pred -A. |
2777 | 469 | // Note that we do not need to actually form -A or -B thanks to EqP. |
2778 | 469 | P = CmpInst::getSwappedPredicate(Pred); |
2779 | 37.3M | } else if (37.3M match(RHS, m_SMin(m_Value(A), m_Value(B))) && |
2780 | 37.3M | (A == LHS || 7.12k B == LHS7.07k )) { |
2781 | 150 | if (A != LHS) |
2782 | 100 | std::swap(A, B); // A pred smin(A, B). |
2783 | 37.3M | EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". |
2784 | 37.3M | // We analyze this as smax(-A, -B) pred -A. |
2785 | 37.3M | // Note that we do not need to actually form -A or -B thanks to EqP. |
2786 | 37.3M | P = Pred; |
2787 | 37.3M | } |
2788 | 37.5M | if (P != CmpInst::BAD_ICMP_PREDICATE37.5M ) { |
2789 | 110k | // Cases correspond to "max(A, B) p A". |
2790 | 110k | switch (P) { |
2791 | 50.0k | default: |
2792 | 50.0k | break; |
2793 | 50.2k | case CmpInst::ICMP_EQ: |
2794 | 50.2k | case CmpInst::ICMP_SLE: |
2795 | 50.2k | // Equivalent to "A EqP B". This may be the same as the condition tested |
2796 | 50.2k | // in the max/min; if so, we can just return that. |
2797 | 50.2k | if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) |
2798 | 1 | return V; |
2799 | 50.2k | if (Value *50.2k V50.2k = ExtractEquivalentCondition(RHS, EqP, A, B)) |
2800 | 1 | return V; |
2801 | 50.2k | // Otherwise, see if "A EqP B" simplifies. |
2802 | 50.2k | if (50.2k MaxRecurse50.2k ) |
2803 | 50.2k | if (Value *50.2k V50.2k = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) |
2804 | 0 | return V; |
2805 | 50.2k | break; |
2806 | 9.65k | case CmpInst::ICMP_NE: |
2807 | 9.65k | case CmpInst::ICMP_SGT: { |
2808 | 9.65k | CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); |
2809 | 9.65k | // Equivalent to "A InvEqP B". This may be the same as the condition |
2810 | 9.65k | // tested in the max/min; if so, we can just return that. |
2811 | 9.65k | if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) |
2812 | 6.23k | return V; |
2813 | 3.41k | if (Value *3.41k V3.41k = ExtractEquivalentCondition(RHS, InvEqP, A, B)) |
2814 | 30 | return V; |
2815 | 3.38k | // Otherwise, see if "A InvEqP B" simplifies. |
2816 | 3.38k | if (3.38k MaxRecurse3.38k ) |
2817 | 3.37k | if (Value *3.37k V3.37k = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) |
2818 | 0 | return V; |
2819 | 3.38k | break; |
2820 | 3.38k | } |
2821 | 8 | case CmpInst::ICMP_SGE: |
2822 | 8 | // Always true. |
2823 | 8 | return getTrue(ITy); |
2824 | 352 | case CmpInst::ICMP_SLT: |
2825 | 352 | // Always false. |
2826 | 352 | return getFalse(ITy); |
2827 | 37.5M | } |
2828 | 37.5M | } |
2829 | 37.5M | |
2830 | 37.5M | // Unsigned variants on "max(a,b)>=a -> true". |
2831 | 37.5M | P = CmpInst::BAD_ICMP_PREDICATE; |
2832 | 37.5M | if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && 37.5M (A == RHS || 113k B == RHS113k )) { |
2833 | 126 | if (A != RHS) |
2834 | 4 | std::swap(A, B); // umax(A, B) pred A. |
2835 | 126 | EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". |
2836 | 126 | // We analyze this as umax(A, B) pred A. |
2837 | 126 | P = Pred; |
2838 | 37.5M | } else if (37.5M match(RHS, m_UMax(m_Value(A), m_Value(B))) && |
2839 | 37.5M | (A == LHS || 103k B == LHS103k )) { |
2840 | 11 | if (A != LHS) |
2841 | 0 | std::swap(A, B); // A pred umax(A, B). |
2842 | 11 | EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". |
2843 | 11 | // We analyze this as umax(A, B) swapped-pred A. |
2844 | 11 | P = CmpInst::getSwappedPredicate(Pred); |
2845 | 37.5M | } else if (37.5M match(LHS, m_UMin(m_Value(A), m_Value(B))) && |
2846 | 37.5M | (A == RHS || 12.3k B == RHS12.2k )) { |
2847 | 75 | if (A != RHS) |
2848 | 63 | std::swap(A, B); // umin(A, B) pred A. |
2849 | 75 | EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". |
2850 | 75 | // We analyze this as umax(-A, -B) swapped-pred -A. |
2851 | 75 | // Note that we do not need to actually form -A or -B thanks to EqP. |
2852 | 75 | P = CmpInst::getSwappedPredicate(Pred); |
2853 | 37.5M | } else if (37.5M match(RHS, m_UMin(m_Value(A), m_Value(B))) && |
2854 | 37.5M | (A == LHS || 5.55k B == LHS5.51k )) { |
2855 | 68 | if (A != LHS) |
2856 | 22 | std::swap(A, B); // A pred umin(A, B). |
2857 | 37.5M | EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". |
2858 | 37.5M | // We analyze this as umax(-A, -B) pred -A. |
2859 | 37.5M | // Note that we do not need to actually form -A or -B thanks to EqP. |
2860 | 37.5M | P = Pred; |
2861 | 37.5M | } |
2862 | 37.5M | if (P != CmpInst::BAD_ICMP_PREDICATE37.5M ) { |
2863 | 280 | // Cases correspond to "max(A, B) p A". |
2864 | 280 | switch (P) { |
2865 | 0 | default: |
2866 | 0 | break; |
2867 | 31 | case CmpInst::ICMP_EQ: |
2868 | 31 | case CmpInst::ICMP_ULE: |
2869 | 31 | // Equivalent to "A EqP B". This may be the same as the condition tested |
2870 | 31 | // in the max/min; if so, we can just return that. |
2871 | 31 | if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) |
2872 | 1 | return V; |
2873 | 30 | if (Value *30 V30 = ExtractEquivalentCondition(RHS, EqP, A, B)) |
2874 | 1 | return V; |
2875 | 29 | // Otherwise, see if "A EqP B" simplifies. |
2876 | 29 | if (29 MaxRecurse29 ) |
2877 | 29 | if (Value *29 V29 = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1)) |
2878 | 0 | return V; |
2879 | 29 | break; |
2880 | 103 | case CmpInst::ICMP_NE: |
2881 | 103 | case CmpInst::ICMP_UGT: { |
2882 | 103 | CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); |
2883 | 103 | // Equivalent to "A InvEqP B". This may be the same as the condition |
2884 | 103 | // tested in the max/min; if so, we can just return that. |
2885 | 103 | if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) |
2886 | 6 | return V; |
2887 | 97 | if (Value *97 V97 = ExtractEquivalentCondition(RHS, InvEqP, A, B)) |
2888 | 22 | return V; |
2889 | 75 | // Otherwise, see if "A InvEqP B" simplifies. |
2890 | 75 | if (75 MaxRecurse75 ) |
2891 | 75 | if (Value *75 V75 = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1)) |
2892 | 0 | return V; |
2893 | 75 | break; |
2894 | 75 | } |
2895 | 12 | case CmpInst::ICMP_UGE: |
2896 | 12 | // Always true. |
2897 | 12 | return getTrue(ITy); |
2898 | 134 | case CmpInst::ICMP_ULT: |
2899 | 134 | // Always false. |
2900 | 134 | return getFalse(ITy); |
2901 | 37.5M | } |
2902 | 37.5M | } |
2903 | 37.5M | |
2904 | 37.5M | // Variants on "max(x,y) >= min(x,z)". |
2905 | 37.5M | Value *C, *D; |
2906 | 37.5M | if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && |
2907 | 567k | match(RHS, m_SMin(m_Value(C), m_Value(D))) && |
2908 | 37.5M | (A == C || 698 A == D660 || B == C660 || B == D660 )) { |
2909 | 38 | // max(x, ?) pred min(x, ?). |
2910 | 38 | if (Pred == CmpInst::ICMP_SGE) |
2911 | 38 | // Always true. |
2912 | 1 | return getTrue(ITy); |
2913 | 37 | if (37 Pred == CmpInst::ICMP_SLT37 ) |
2914 | 37 | // Always false. |
2915 | 1 | return getFalse(ITy); |
2916 | 37.5M | } else if (37.5M match(LHS, m_SMin(m_Value(A), m_Value(B))) && |
2917 | 19.2k | match(RHS, m_SMax(m_Value(C), m_Value(D))) && |
2918 | 37.5M | (A == C || 128 A == D102 || B == C102 || B == D102 )) { |
2919 | 60 | // min(x, ?) pred max(x, ?). |
2920 | 60 | if (Pred == CmpInst::ICMP_SLE) |
2921 | 60 | // Always true. |
2922 | 1 | return getTrue(ITy); |
2923 | 59 | if (59 Pred == CmpInst::ICMP_SGT59 ) |
2924 | 59 | // Always false. |
2925 | 17 | return getFalse(ITy); |
2926 | 37.5M | } else if (37.5M match(LHS, m_UMax(m_Value(A), m_Value(B))) && |
2927 | 113k | match(RHS, m_UMin(m_Value(C), m_Value(D))) && |
2928 | 37.5M | (A == C || 38 A == D0 || B == C0 || B == D0 )) { |
2929 | 38 | // max(x, ?) pred min(x, ?). |
2930 | 38 | if (Pred == CmpInst::ICMP_UGE) |
2931 | 38 | // Always true. |
2932 | 1 | return getTrue(ITy); |
2933 | 37 | if (37 Pred == CmpInst::ICMP_ULT37 ) |
2934 | 37 | // Always false. |
2935 | 1 | return getFalse(ITy); |
2936 | 37.5M | } else if (37.5M match(LHS, m_UMin(m_Value(A), m_Value(B))) && |
2937 | 12.2k | match(RHS, m_UMax(m_Value(C), m_Value(D))) && |
2938 | 37.5M | (A == C || 6 A == D0 || B == C0 || B == D0 )) { |
2939 | 6 | // min(x, ?) pred max(x, ?). |
2940 | 6 | if (Pred == CmpInst::ICMP_ULE) |
2941 | 6 | // Always true. |
2942 | 1 | return getTrue(ITy); |
2943 | 5 | if (5 Pred == CmpInst::ICMP_UGT5 ) |
2944 | 5 | // Always false. |
2945 | 5 | return getFalse(ITy); |
2946 | 37.5M | } |
2947 | 37.5M | |
2948 | 37.5M | return nullptr; |
2949 | 37.5M | } |
2950 | | |
2951 | | /// Given operands for an ICmpInst, see if we can fold the result. |
2952 | | /// If not, this returns null. |
2953 | | static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, |
2954 | 39.7M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
2955 | 39.7M | CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; |
2956 | 39.7M | assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); |
2957 | 39.7M | |
2958 | 39.7M | if (Constant *CLHS39.7M = dyn_cast<Constant>(LHS)) { |
2959 | 2.34M | if (Constant *CRHS = dyn_cast<Constant>(RHS)) |
2960 | 2.00M | return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); |
2961 | 344k | |
2962 | 344k | // If we have a constant, make sure it is on the RHS. |
2963 | 344k | std::swap(LHS, RHS); |
2964 | 344k | Pred = CmpInst::getSwappedPredicate(Pred); |
2965 | 344k | } |
2966 | 39.7M | |
2967 | 37.7M | Type *ITy = GetCompareTy(LHS); // The return type. |
2968 | 37.7M | |
2969 | 37.7M | // icmp X, X -> true/false |
2970 | 37.7M | // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false |
2971 | 37.7M | // because X could be 0. |
2972 | 37.7M | if (LHS == RHS || 37.7M isa<UndefValue>(RHS)37.6M ) |
2973 | 69.6k | return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); |
2974 | 37.6M | |
2975 | 37.6M | if (Value *37.6M V37.6M = simplifyICmpOfBools(Pred, LHS, RHS, Q)) |
2976 | 12.3k | return V; |
2977 | 37.6M | |
2978 | 37.6M | if (Value *37.6M V37.6M = simplifyICmpWithZero(Pred, LHS, RHS, Q)) |
2979 | 22.2k | return V; |
2980 | 37.6M | |
2981 | 37.6M | if (Value *37.6M V37.6M = simplifyICmpWithConstant(Pred, LHS, RHS)) |
2982 | 76.9k | return V; |
2983 | 37.5M | |
2984 | 37.5M | // If both operands have range metadata, use the metadata |
2985 | 37.5M | // to simplify the comparison. |
2986 | 37.5M | if (37.5M isa<Instruction>(RHS) && 37.5M isa<Instruction>(LHS)7.95M ) { |
2987 | 7.90M | auto RHS_Instr = cast<Instruction>(RHS); |
2988 | 7.90M | auto LHS_Instr = cast<Instruction>(LHS); |
2989 | 7.90M | |
2990 | 7.90M | if (RHS_Instr->getMetadata(LLVMContext::MD_range) && |
2991 | 7.90M | LHS_Instr->getMetadata(LLVMContext::MD_range)203 ) { |
2992 | 188 | auto RHS_CR = getConstantRangeFromMetadata( |
2993 | 188 | *RHS_Instr->getMetadata(LLVMContext::MD_range)); |
2994 | 188 | auto LHS_CR = getConstantRangeFromMetadata( |
2995 | 188 | *LHS_Instr->getMetadata(LLVMContext::MD_range)); |
2996 | 188 | |
2997 | 188 | auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR); |
2998 | 188 | if (Satisfied_CR.contains(LHS_CR)) |
2999 | 1 | return ConstantInt::getTrue(RHS->getContext()); |
3000 | 187 | |
3001 | 187 | auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion( |
3002 | 187 | CmpInst::getInversePredicate(Pred), RHS_CR); |
3003 | 187 | if (InversedSatisfied_CR.contains(LHS_CR)) |
3004 | 1 | return ConstantInt::getFalse(RHS->getContext()); |
3005 | 37.5M | } |
3006 | 7.90M | } |
3007 | 37.5M | |
3008 | 37.5M | // Compare of cast, for example (zext X) != 0 -> X != 0 |
3009 | 37.5M | if (37.5M isa<CastInst>(LHS) && 37.5M (isa<Constant>(RHS) || 1.55M isa<CastInst>(RHS)459k )) { |
3010 | 1.27M | Instruction *LI = cast<CastInst>(LHS); |
3011 | 1.27M | Value *SrcOp = LI->getOperand(0); |
3012 | 1.27M | Type *SrcTy = SrcOp->getType(); |
3013 | 1.27M | Type *DstTy = LI->getType(); |
3014 | 1.27M | |
3015 | 1.27M | // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input |
3016 | 1.27M | // if the integer type is the same size as the pointer type. |
3017 | 1.27M | if (MaxRecurse && 1.27M isa<PtrToIntInst>(LI)1.26M && |
3018 | 1.27M | Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()5.72k ) { |
3019 | 5.48k | if (Constant *RHSC5.48k = dyn_cast<Constant>(RHS)) { |
3020 | 5.18k | // Transfer the cast to the constant. |
3021 | 5.18k | if (Value *V = SimplifyICmpInst(Pred, SrcOp, |
3022 | 5.18k | ConstantExpr::getIntToPtr(RHSC, SrcTy), |
3023 | 5.18k | Q, MaxRecurse-1)) |
3024 | 261 | return V; |
3025 | 298 | } else if (PtrToIntInst *298 RI298 = dyn_cast<PtrToIntInst>(RHS)) { |
3026 | 186 | if (RI->getOperand(0)->getType() == SrcTy) |
3027 | 186 | // Compare without the cast. |
3028 | 88 | if (Value *88 V88 = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), |
3029 | 88 | Q, MaxRecurse-1)) |
3030 | 2 | return V; |
3031 | 1.27M | } |
3032 | 5.48k | } |
3033 | 1.27M | |
3034 | 1.27M | if (1.27M isa<ZExtInst>(LHS)1.27M ) { |
3035 | 210k | // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the |
3036 | 210k | // same type. |
3037 | 210k | if (ZExtInst *RI210k = dyn_cast<ZExtInst>(RHS)) { |
3038 | 5.53k | if (MaxRecurse && 5.53k SrcTy == RI->getOperand(0)->getType()5.53k ) |
3039 | 5.53k | // Compare X and Y. Note that signed predicates become unsigned. |
3040 | 5.06k | if (Value *5.06k V5.06k = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), |
3041 | 5.06k | SrcOp, RI->getOperand(0), Q, |
3042 | 5.06k | MaxRecurse-1)) |
3043 | 1 | return V; |
3044 | 210k | } |
3045 | 210k | // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended |
3046 | 210k | // too. If not, then try to deduce the result of the comparison. |
3047 | 204k | else if (ConstantInt *204k CI204k = dyn_cast<ConstantInt>(RHS)) { |
3048 | 204k | // Compute the constant that would happen if we truncated to SrcTy then |
3049 | 204k | // reextended to DstTy. |
3050 | 204k | Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); |
3051 | 204k | Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy); |
3052 | 204k | |
3053 | 204k | // If the re-extended constant didn't change then this is effectively |
3054 | 204k | // also a case of comparing two zero-extended values. |
3055 | 204k | if (RExt == CI && 204k MaxRecurse199k ) |
3056 | 198k | if (Value *198k V198k = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), |
3057 | 198k | SrcOp, Trunc, Q, MaxRecurse-1)) |
3058 | 12.1k | return V; |
3059 | 192k | |
3060 | 192k | // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit |
3061 | 192k | // there. Use this to work out the result of the comparison. |
3062 | 192k | if (192k RExt != CI192k ) { |
3063 | 4.92k | switch (Pred) { |
3064 | 0 | default: 0 llvm_unreachable0 ("Unknown ICmp predicate!"); |
3065 | 4.92k | // LHS <u RHS. |
3066 | 3.17k | case ICmpInst::ICMP_EQ: |
3067 | 3.17k | case ICmpInst::ICMP_UGT: |
3068 | 3.17k | case ICmpInst::ICMP_UGE: |
3069 | 3.17k | return ConstantInt::getFalse(CI->getContext()); |
3070 | 3.17k | |
3071 | 519 | case ICmpInst::ICMP_NE: |
3072 | 519 | case ICmpInst::ICMP_ULT: |
3073 | 519 | case ICmpInst::ICMP_ULE: |
3074 | 519 | return ConstantInt::getTrue(CI->getContext()); |
3075 | 519 | |
3076 | 519 | // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS |
3077 | 519 | // is non-negative then LHS <s RHS. |
3078 | 932 | case ICmpInst::ICMP_SGT: |
3079 | 932 | case ICmpInst::ICMP_SGE: |
3080 | 932 | return CI->getValue().isNegative() ? |
3081 | 860 | ConstantInt::getTrue(CI->getContext()) : |
3082 | 72 | ConstantInt::getFalse(CI->getContext()); |
3083 | 932 | |
3084 | 297 | case ICmpInst::ICMP_SLT: |
3085 | 297 | case ICmpInst::ICMP_SLE: |
3086 | 297 | return CI->getValue().isNegative() ? |
3087 | 69 | ConstantInt::getFalse(CI->getContext()) : |
3088 | 228 | ConstantInt::getTrue(CI->getContext()); |
3089 | 1.25M | } |
3090 | 1.25M | } |
3091 | 204k | } |
3092 | 210k | } |
3093 | 1.25M | |
3094 | 1.25M | if (1.25M isa<SExtInst>(LHS)1.25M ) { |
3095 | 187k | // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the |
3096 | 187k | // same type. |
3097 | 187k | if (SExtInst *RI187k = dyn_cast<SExtInst>(RHS)) { |
3098 | 5.18k | if (MaxRecurse && 5.18k SrcTy == RI->getOperand(0)->getType()5.10k ) |
3099 | 5.18k | // Compare X and Y. Note that the predicate does not change. |
3100 | 5.04k | if (Value *5.04k V5.04k = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), |
3101 | 5.04k | Q, MaxRecurse-1)) |
3102 | 1.01k | return V; |
3103 | 187k | } |
3104 | 187k | // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended |
3105 | 187k | // too. If not, then try to deduce the result of the comparison. |
3106 | 182k | else if (ConstantInt *182k CI182k = dyn_cast<ConstantInt>(RHS)) { |
3107 | 181k | // Compute the constant that would happen if we truncated to SrcTy then |
3108 | 181k | // reextended to DstTy. |
3109 | 181k | Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); |
3110 | 181k | Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy); |
3111 | 181k | |
3112 | 181k | // If the re-extended constant didn't change then this is effectively |
3113 | 181k | // also a case of comparing two sign-extended values. |
3114 | 181k | if (RExt == CI && 181k MaxRecurse177k ) |
3115 | 177k | if (Value *177k V177k = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) |
3116 | 12.5k | return V; |
3117 | 168k | |
3118 | 168k | // Otherwise the upper bits of LHS are all equal, while RHS has varying |
3119 | 168k | // bits there. Use this to work out the result of the comparison. |
3120 | 168k | if (168k RExt != CI168k ) { |
3121 | 3.49k | switch (Pred) { |
3122 | 0 | default: 0 llvm_unreachable0 ("Unknown ICmp predicate!"); |
3123 | 1.02k | case ICmpInst::ICMP_EQ: |
3124 | 1.02k | return ConstantInt::getFalse(CI->getContext()); |
3125 | 320 | case ICmpInst::ICMP_NE: |
3126 | 320 | return ConstantInt::getTrue(CI->getContext()); |
3127 | 3.49k | |
3128 | 3.49k | // If RHS is non-negative then LHS <s RHS. If RHS is negative then |
3129 | 3.49k | // LHS >s RHS. |
3130 | 24 | case ICmpInst::ICMP_SGT: |
3131 | 24 | case ICmpInst::ICMP_SGE: |
3132 | 24 | return CI->getValue().isNegative() ? |
3133 | 11 | ConstantInt::getTrue(CI->getContext()) : |
3134 | 13 | ConstantInt::getFalse(CI->getContext()); |
3135 | 26 | case ICmpInst::ICMP_SLT: |
3136 | 26 | case ICmpInst::ICMP_SLE: |
3137 | 26 | return CI->getValue().isNegative() ? |
3138 | 13 | ConstantInt::getFalse(CI->getContext()) : |
3139 | 13 | ConstantInt::getTrue(CI->getContext()); |
3140 | 26 | |
3141 | 26 | // If LHS is non-negative then LHS <u RHS. If LHS is negative then |
3142 | 26 | // LHS >u RHS. |
3143 | 124 | case ICmpInst::ICMP_UGT: |
3144 | 124 | case ICmpInst::ICMP_UGE: |
3145 | 124 | // Comparison is true iff the LHS <s 0. |
3146 | 124 | if (MaxRecurse) |
3147 | 122 | if (Value *122 V122 = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, |
3148 | 122 | Constant::getNullValue(SrcTy), |
3149 | 122 | Q, MaxRecurse-1)) |
3150 | 0 | return V; |
3151 | 124 | break; |
3152 | 1.96k | case ICmpInst::ICMP_ULT: |
3153 | 1.96k | case ICmpInst::ICMP_ULE: |
3154 | 1.96k | // Comparison is true iff the LHS >=s 0. |
3155 | 1.96k | if (MaxRecurse) |
3156 | 1.96k | if (Value *1.96k V1.96k = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, |
3157 | 1.96k | Constant::getNullValue(SrcTy), |
3158 | 1.96k | Q, MaxRecurse-1)) |
3159 | 2 | return V; |
3160 | 1.96k | break; |
3161 | 3.49k | } |
3162 | 3.49k | } |
3163 | 182k | } |
3164 | 187k | } |
3165 | 1.27M | } |
3166 | 37.5M | |
3167 | 37.5M | // icmp eq|ne X, Y -> false|true if X != Y |
3168 | 37.5M | if (37.5M ICmpInst::isEquality(Pred) && |
3169 | 37.5M | isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)23.5M ) { |
3170 | 6.77k | return Pred == ICmpInst::ICMP_NE ? getTrue(ITy)423 : getFalse(ITy)6.35k ; |
3171 | 6.77k | } |
3172 | 37.5M | |
3173 | 37.5M | if (Value *37.5M V37.5M = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse)) |
3174 | 4.38k | return V; |
3175 | 37.5M | |
3176 | 37.5M | if (Value *37.5M V37.5M = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse)) |
3177 | 6.83k | return V; |
3178 | 37.5M | |
3179 | 37.5M | // Simplify comparisons of related pointers using a powerful, recursive |
3180 | 37.5M | // GEP-walk when we have target data available.. |
3181 | 37.5M | if (37.5M LHS->getType()->isPointerTy()37.5M ) |
3182 | 8.66M | if (auto *8.66M C8.66M = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, LHS, |
3183 | 8.66M | RHS)) |
3184 | 10.9k | return C; |
3185 | 37.4M | if (auto *37.4M CLHS37.4M = dyn_cast<PtrToIntOperator>(LHS)) |
3186 | 5.73k | if (auto *5.73k CRHS5.73k = dyn_cast<PtrToIntOperator>(RHS)) |
3187 | 304 | if (304 Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) == |
3188 | 304 | Q.DL.getTypeSizeInBits(CLHS->getType()) && |
3189 | 265 | Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) == |
3190 | 265 | Q.DL.getTypeSizeInBits(CRHS->getType())) |
3191 | 265 | if (auto *265 C265 = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, |
3192 | 265 | CLHS->getPointerOperand(), |
3193 | 265 | CRHS->getPointerOperand())) |
3194 | 1 | return C; |
3195 | 37.4M | |
3196 | 37.4M | if (GetElementPtrInst *37.4M GLHS37.4M = dyn_cast<GetElementPtrInst>(LHS)) { |
3197 | 572k | if (GEPOperator *GRHS572k = dyn_cast<GEPOperator>(RHS)) { |
3198 | 261k | if (GLHS->getPointerOperand() == GRHS->getPointerOperand() && |
3199 | 261k | GLHS->hasAllConstantIndices()2.92k && GRHS->hasAllConstantIndices()502 && |
3200 | 18 | (ICmpInst::isEquality(Pred) || |
3201 | 18 | (GLHS->isInBounds() && 18 GRHS->isInBounds()2 && |
3202 | 261k | Pred == ICmpInst::getSignedPredicate(Pred)2 ))) { |
3203 | 2 | // The bases are equal and the indices are constant. Build a constant |
3204 | 2 | // expression GEP with the same indices and a null base pointer to see |
3205 | 2 | // what constant folding can make out of it. |
3206 | 2 | Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); |
3207 | 2 | SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); |
3208 | 2 | Constant *NewLHS = ConstantExpr::getGetElementPtr( |
3209 | 2 | GLHS->getSourceElementType(), Null, IndicesLHS); |
3210 | 2 | |
3211 | 2 | SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); |
3212 | 2 | Constant *NewRHS = ConstantExpr::getGetElementPtr( |
3213 | 2 | GLHS->getSourceElementType(), Null, IndicesRHS); |
3214 | 2 | return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); |
3215 | 2 | } |
3216 | 37.4M | } |
3217 | 572k | } |
3218 | 37.4M | |
3219 | 37.4M | // If the comparison is with the result of a select instruction, check whether |
3220 | 37.4M | // comparing with either branch of the select always yields the same value. |
3221 | 37.4M | if (37.4M isa<SelectInst>(LHS) || 37.4M isa<SelectInst>(RHS)36.3M ) |
3222 | 1.56M | if (Value *1.56M V1.56M = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) |
3223 | 6.61k | return V; |
3224 | 37.4M | |
3225 | 37.4M | // If the comparison is with the result of a phi instruction, check whether |
3226 | 37.4M | // doing the compare with each incoming phi value yields a common result. |
3227 | 37.4M | if (37.4M isa<PHINode>(LHS) || 37.4M isa<PHINode>(RHS)33.3M ) |
3228 | 4.84M | if (Value *4.84M V4.84M = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) |
3229 | 4.94k | return V; |
3230 | 37.4M | |
3231 | 37.4M | return nullptr; |
3232 | 37.4M | } |
3233 | | |
3234 | | Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, |
3235 | 31.7M | const SimplifyQuery &Q) { |
3236 | 31.7M | return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); |
3237 | 31.7M | } |
3238 | | |
3239 | | /// Given operands for an FCmpInst, see if we can fold the result. |
3240 | | /// If not, this returns null. |
3241 | | static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, |
3242 | | FastMathFlags FMF, const SimplifyQuery &Q, |
3243 | 674k | unsigned MaxRecurse) { |
3244 | 674k | CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; |
3245 | 674k | assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); |
3246 | 674k | |
3247 | 674k | if (Constant *CLHS674k = dyn_cast<Constant>(LHS)) { |
3248 | 9.98k | if (Constant *CRHS = dyn_cast<Constant>(RHS)) |
3249 | 8.45k | return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI); |
3250 | 1.53k | |
3251 | 1.53k | // If we have a constant, make sure it is on the RHS. |
3252 | 1.53k | std::swap(LHS, RHS); |
3253 | 1.53k | Pred = CmpInst::getSwappedPredicate(Pred); |
3254 | 1.53k | } |
3255 | 674k | |
3256 | 674k | // Fold trivial predicates. |
3257 | 665k | Type *RetTy = GetCompareTy(LHS); |
3258 | 665k | if (Pred == FCmpInst::FCMP_FALSE) |
3259 | 35 | return getFalse(RetTy); |
3260 | 665k | if (665k Pred == FCmpInst::FCMP_TRUE665k ) |
3261 | 35 | return getTrue(RetTy); |
3262 | 665k | |
3263 | 665k | // UNO/ORD predicates can be trivially folded if NaNs are ignored. |
3264 | 665k | if (665k FMF.noNaNs()665k ) { |
3265 | 192 | if (Pred == FCmpInst::FCMP_UNO) |
3266 | 1 | return getFalse(RetTy); |
3267 | 191 | if (191 Pred == FCmpInst::FCMP_ORD191 ) |
3268 | 1 | return getTrue(RetTy); |
3269 | 665k | } |
3270 | 665k | |
3271 | 665k | // fcmp pred x, undef and fcmp pred undef, x |
3272 | 665k | // fold to true if unordered, false if ordered |
3273 | 665k | if (665k isa<UndefValue>(LHS) || 665k isa<UndefValue>(RHS)665k ) { |
3274 | 232 | // Choosing NaN for the undef will always make unordered comparison succeed |
3275 | 232 | // and ordered comparison fail. |
3276 | 232 | return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred)); |
3277 | 232 | } |
3278 | 665k | |
3279 | 665k | // fcmp x,x -> true/false. Not all compares are foldable. |
3280 | 665k | if (665k LHS == RHS665k ) { |
3281 | 3.00k | if (CmpInst::isTrueWhenEqual(Pred)) |
3282 | 2 | return getTrue(RetTy); |
3283 | 3.00k | if (3.00k CmpInst::isFalseWhenEqual(Pred)3.00k ) |
3284 | 15 | return getFalse(RetTy); |
3285 | 665k | } |
3286 | 665k | |
3287 | 665k | // Handle fcmp with constant RHS |
3288 | 665k | const ConstantFP *CFP = nullptr; |
3289 | 665k | if (const auto *RHSC665k = dyn_cast<Constant>(RHS)) { |
3290 | 543k | if (RHS->getType()->isVectorTy()) |
3291 | 744 | CFP = dyn_cast_or_null<ConstantFP>(RHSC->getSplatValue()); |
3292 | 543k | else |
3293 | 542k | CFP = dyn_cast<ConstantFP>(RHSC); |
3294 | 543k | } |
3295 | 665k | if (CFP665k ) { |
3296 | 543k | // If the constant is a nan, see if we can fold the comparison based on it. |
3297 | 543k | if (CFP->getValueAPF().isNaN()543k ) { |
3298 | 13 | if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" |
3299 | 7 | return getFalse(RetTy); |
3300 | 0 | assert(FCmpInst::isUnordered(Pred) && |
3301 | 6 | "Comparison must be either ordered or unordered!"); |
3302 | 6 | // True if unordered. |
3303 | 6 | return getTrue(RetTy); |
3304 | 6 | } |
3305 | 543k | // Check whether the constant is an infinity. |
3306 | 543k | if (543k CFP->getValueAPF().isInfinity()543k ) { |
3307 | 26.1k | if (CFP->getValueAPF().isNegative()26.1k ) { |
3308 | 110 | switch (Pred) { |
3309 | 1 | case FCmpInst::FCMP_OLT: |
3310 | 1 | // No value is ordered and less than negative infinity. |
3311 | 1 | return getFalse(RetTy); |
3312 | 1 | case FCmpInst::FCMP_UGE: |
3313 | 1 | // All values are unordered with or at least negative infinity. |
3314 | 1 | return getTrue(RetTy); |
3315 | 108 | default: |
3316 | 108 | break; |
3317 | 26.1k | } |
3318 | 25.9k | } else { |
3319 | 25.9k | switch (Pred) { |
3320 | 1 | case FCmpInst::FCMP_OGT: |
3321 | 1 | // No value is ordered and greater than infinity. |
3322 | 1 | return getFalse(RetTy); |
3323 | 3 | case FCmpInst::FCMP_ULE: |
3324 | 3 | // All values are unordered with and at most infinity. |
3325 | 3 | return getTrue(RetTy); |
3326 | 25.9k | default: |
3327 | 25.9k | break; |
3328 | 543k | } |
3329 | 543k | } |
3330 | 26.1k | } |
3331 | 543k | if (543k CFP->getValueAPF().isZero()543k ) { |
3332 | 74.8k | switch (Pred) { |
3333 | 28 | case FCmpInst::FCMP_UGE: |
3334 | 28 | if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) |
3335 | 6 | return getTrue(RetTy); |
3336 | 22 | break; |
3337 | 12.5k | case FCmpInst::FCMP_OLT: |
3338 | 12.5k | // X < 0 |
3339 | 12.5k | if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) |
3340 | 7 | return getFalse(RetTy); |
3341 | 12.5k | break; |
3342 | 62.2k | default: |
3343 | 62.2k | break; |
3344 | 665k | } |
3345 | 665k | } |
3346 | 543k | } |
3347 | 665k | |
3348 | 665k | // If the comparison is with the result of a select instruction, check whether |
3349 | 665k | // comparing with either branch of the select always yields the same value. |
3350 | 665k | if (665k isa<SelectInst>(LHS) || 665k isa<SelectInst>(RHS)660k ) |
3351 | 6.67k | if (Value *6.67k V6.67k = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) |
3352 | 72 | return V; |
3353 | 665k | |
3354 | 665k | // If the comparison is with the result of a phi instruction, check whether |
3355 | 665k | // doing the compare with each incoming phi value yields a common result. |
3356 | 665k | if (665k isa<PHINode>(LHS) || 665k isa<PHINode>(RHS)544k ) |
3357 | 132k | if (Value *132k V132k = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) |
3358 | 133 | return V; |
3359 | 665k | |
3360 | 665k | return nullptr; |
3361 | 665k | } |
3362 | | |
3363 | | Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, |
3364 | 532k | FastMathFlags FMF, const SimplifyQuery &Q) { |
3365 | 532k | return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); |
3366 | 532k | } |
3367 | | |
3368 | | /// See if V simplifies when its operand Op is replaced with RepOp. |
3369 | | static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, |
3370 | | const SimplifyQuery &Q, |
3371 | 5.84M | unsigned MaxRecurse) { |
3372 | 5.84M | // Trivial replacement. |
3373 | 5.84M | if (V == Op) |
3374 | 342k | return RepOp; |
3375 | 5.50M | |
3376 | 5.50M | // We cannot replace a constant, and shouldn't even try. |
3377 | 5.50M | if (5.50M isa<Constant>(Op)5.50M ) |
3378 | 2.52M | return nullptr; |
3379 | 2.97M | |
3380 | 2.97M | auto *I = dyn_cast<Instruction>(V); |
3381 | 2.97M | if (!I) |
3382 | 1.77M | return nullptr; |
3383 | 1.20M | |
3384 | 1.20M | // If this is a binary operator, try to simplify it with the replaced op. |
3385 | 1.20M | if (auto *1.20M B1.20M = dyn_cast<BinaryOperator>(I)) { |
3386 | 196k | // Consider: |
3387 | 196k | // %cmp = icmp eq i32 %x, 2147483647 |
3388 | 196k | // %add = add nsw i32 %x, 1 |
3389 | 196k | // %sel = select i1 %cmp, i32 -2147483648, i32 %add |
3390 | 196k | // |
3391 | 196k | // We can't replace %sel with %add unless we strip away the flags. |
3392 | 196k | if (isa<OverflowingBinaryOperator>(B)) |
3393 | 165k | if (165k B->hasNoSignedWrap() || 165k B->hasNoUnsignedWrap()125k ) |
3394 | 41.0k | return nullptr; |
3395 | 155k | if (155k isa<PossiblyExactOperator>(B)155k ) |
3396 | 2.21k | if (2.21k B->isExact()2.21k ) |
3397 | 98 | return nullptr; |
3398 | 155k | |
3399 | 155k | if (155k MaxRecurse155k ) { |
3400 | 155k | if (B->getOperand(0) == Op) |
3401 | 862 | return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q, |
3402 | 862 | MaxRecurse - 1); |
3403 | 154k | if (154k B->getOperand(1) == Op154k ) |
3404 | 4.30k | return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, Q, |
3405 | 4.30k | MaxRecurse - 1); |
3406 | 1.15M | } |
3407 | 196k | } |
3408 | 1.15M | |
3409 | 1.15M | // Same for CmpInsts. |
3410 | 1.15M | if (CmpInst *1.15M C1.15M = dyn_cast<CmpInst>(I)) { |
3411 | 2.69k | if (MaxRecurse2.69k ) { |
3412 | 2.69k | if (C->getOperand(0) == Op) |
3413 | 913 | return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), Q, |
3414 | 913 | MaxRecurse - 1); |
3415 | 1.78k | if (1.78k C->getOperand(1) == Op1.78k ) |
3416 | 20 | return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, Q, |
3417 | 20 | MaxRecurse - 1); |
3418 | 1.15M | } |
3419 | 2.69k | } |
3420 | 1.15M | |
3421 | 1.15M | // TODO: We could hand off more cases to instsimplify here. |
3422 | 1.15M | |
3423 | 1.15M | // If all operands are constant after substituting Op for RepOp then we can |
3424 | 1.15M | // constant fold the instruction. |
3425 | 1.15M | if (Constant *1.15M CRepOp1.15M = dyn_cast<Constant>(RepOp)) { |
3426 | 955k | // Build a list of all constant operands. |
3427 | 955k | SmallVector<Constant *, 8> ConstOps; |
3428 | 1.19M | for (unsigned i = 0, e = I->getNumOperands(); i != e1.19M ; ++i243k ) { |
3429 | 1.13M | if (I->getOperand(i) == Op) |
3430 | 40.7k | ConstOps.push_back(CRepOp); |
3431 | 1.09M | else if (Constant *1.09M COp1.09M = dyn_cast<Constant>(I->getOperand(i))) |
3432 | 202k | ConstOps.push_back(COp); |
3433 | 1.09M | else |
3434 | 894k | break; |
3435 | 1.13M | } |
3436 | 955k | |
3437 | 955k | // All operands were constants, fold it. |
3438 | 955k | if (ConstOps.size() == I->getNumOperands()955k ) { |
3439 | 61.2k | if (CmpInst *C = dyn_cast<CmpInst>(I)) |
3440 | 0 | return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], |
3441 | 0 | ConstOps[1], Q.DL, Q.TLI); |
3442 | 61.2k | |
3443 | 61.2k | if (LoadInst *61.2k LI61.2k = dyn_cast<LoadInst>(I)) |
3444 | 16.6k | if (16.6k !LI->isVolatile()16.6k ) |
3445 | 16.6k | return ConstantFoldLoadFromConstPtr(ConstOps[0], LI->getType(), Q.DL); |
3446 | 44.5k | |
3447 | 44.5k | return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI); |
3448 | 44.5k | } |
3449 | 955k | } |
3450 | 1.09M | |
3451 | 1.09M | return nullptr; |
3452 | 1.09M | } |
3453 | | |
3454 | | /// Try to simplify a select instruction when its condition operand is an |
3455 | | /// integer comparison where one operand of the compare is a constant. |
3456 | | static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X, |
3457 | 572k | const APInt *Y, bool TrueWhenUnset) { |
3458 | 572k | const APInt *C; |
3459 | 572k | |
3460 | 572k | // (X & Y) == 0 ? X & ~Y : X --> X |
3461 | 572k | // (X & Y) != 0 ? X & ~Y : X --> X & ~Y |
3462 | 572k | if (FalseVal == X && 572k match(TrueVal, m_And(m_Specific(X), m_APInt(C)))161k && |
3463 | 43 | *Y == ~*C) |
3464 | 11 | return TrueWhenUnset ? 11 FalseVal5 : TrueVal6 ; |
3465 | 572k | |
3466 | 572k | // (X & Y) == 0 ? X : X & ~Y --> X & ~Y |
3467 | 572k | // (X & Y) != 0 ? X : X & ~Y --> X |
3468 | 572k | if (572k TrueVal == X && 572k match(FalseVal, m_And(m_Specific(X), m_APInt(C)))79.3k && |
3469 | 11 | *Y == ~*C) |
3470 | 11 | return TrueWhenUnset ? 11 FalseVal6 : TrueVal5 ; |
3471 | 572k | |
3472 | 572k | if (572k Y->isPowerOf2()572k ) { |
3473 | 281k | // (X & Y) == 0 ? X | Y : X --> X | Y |
3474 | 281k | // (X & Y) != 0 ? X | Y : X --> X |
3475 | 281k | if (FalseVal == X && 281k match(TrueVal, m_Or(m_Specific(X), m_APInt(C)))160k && |
3476 | 12 | *Y == *C) |
3477 | 8 | return TrueWhenUnset ? 8 TrueVal5 : FalseVal3 ; |
3478 | 281k | |
3479 | 281k | // (X & Y) == 0 ? X : X | Y --> X |
3480 | 281k | // (X & Y) != 0 ? X : X | Y --> X | Y |
3481 | 281k | if (281k TrueVal == X && 281k match(FalseVal, m_Or(m_Specific(X), m_APInt(C)))8.74k && |
3482 | 7 | *Y == *C) |
3483 | 7 | return TrueWhenUnset ? 7 TrueVal3 : FalseVal4 ; |
3484 | 572k | } |
3485 | 572k | |
3486 | 572k | return nullptr; |
3487 | 572k | } |
3488 | | |
3489 | | /// An alternative way to test if a bit is set or not uses sgt/slt instead of |
3490 | | /// eq/ne. |
3491 | | static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, |
3492 | | ICmpInst::Predicate Pred, |
3493 | 4.39M | Value *TrueVal, Value *FalseVal) { |
3494 | 4.39M | Value *X; |
3495 | 4.39M | APInt Mask; |
3496 | 4.39M | if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) |
3497 | 3.88M | return nullptr; |
3498 | 503k | |
3499 | 503k | return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask, |
3500 | 503k | Pred == ICmpInst::ICMP_EQ); |
3501 | 503k | } |
3502 | | |
3503 | | /// Try to simplify a select instruction when its condition operand is an |
3504 | | /// integer comparison. |
3505 | | static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, |
3506 | | Value *FalseVal, const SimplifyQuery &Q, |
3507 | 4.79M | unsigned MaxRecurse) { |
3508 | 4.79M | ICmpInst::Predicate Pred; |
3509 | 4.79M | Value *CmpLHS, *CmpRHS; |
3510 | 4.79M | if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) |
3511 | 400k | return nullptr; |
3512 | 4.39M | |
3513 | 4.39M | if (4.39M ICmpInst::isEquality(Pred) && 4.39M match(CmpRHS, m_Zero())1.46M ) { |
3514 | 1.14M | Value *X; |
3515 | 1.14M | const APInt *Y; |
3516 | 1.14M | if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y)))) |
3517 | 68.6k | if (Value *68.6k V68.6k = simplifySelectBitTest(TrueVal, FalseVal, X, Y, |
3518 | 68.6k | Pred == ICmpInst::ICMP_EQ)) |
3519 | 12 | return V; |
3520 | 4.39M | } |
3521 | 4.39M | |
3522 | 4.39M | // Check for other compares that behave like bit test. |
3523 | 4.39M | if (Value *4.39M V4.39M = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred, |
3524 | 4.39M | TrueVal, FalseVal)) |
3525 | 25 | return V; |
3526 | 4.39M | |
3527 | 4.39M | if (4.39M CondVal->hasOneUse()4.39M ) { |
3528 | 3.89M | const APInt *C; |
3529 | 3.89M | if (match(CmpRHS, m_APInt(C))3.89M ) { |
3530 | 1.92M | // X < MIN ? T : F --> F |
3531 | 1.92M | if (Pred == ICmpInst::ICMP_SLT && 1.92M C->isMinSignedValue()304k ) |
3532 | 0 | return FalseVal; |
3533 | 1.92M | // X < MIN ? T : F --> F |
3534 | 1.92M | if (1.92M Pred == ICmpInst::ICMP_ULT && 1.92M C->isMinValue()125k ) |
3535 | 0 | return FalseVal; |
3536 | 1.92M | // X > MAX ? T : F --> F |
3537 | 1.92M | if (1.92M Pred == ICmpInst::ICMP_SGT && 1.92M C->isMaxSignedValue()356k ) |
3538 | 0 | return FalseVal; |
3539 | 1.92M | // X > MAX ? T : F --> F |
3540 | 1.92M | if (1.92M Pred == ICmpInst::ICMP_UGT && 1.92M C->isMaxValue()230k ) |
3541 | 0 | return FalseVal; |
3542 | 4.39M | } |
3543 | 3.89M | } |
3544 | 4.39M | |
3545 | 4.39M | // If we have an equality comparison, then we know the value in one of the |
3546 | 4.39M | // arms of the select. See if substituting this value into the arm and |
3547 | 4.39M | // simplifying the result yields the same value as the other arm. |
3548 | 4.39M | if (4.39M Pred == ICmpInst::ICMP_EQ4.39M ) { |
3549 | 1.41M | if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == |
3550 | 1.41M | TrueVal || |
3551 | 1.40M | SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == |
3552 | 1.40M | TrueVal) |
3553 | 6.36k | return FalseVal; |
3554 | 1.40M | if (1.40M SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == |
3555 | 1.40M | FalseVal || |
3556 | 1.40M | SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == |
3557 | 1.40M | FalseVal) |
3558 | 2 | return FalseVal; |
3559 | 2.97M | } else if (2.97M Pred == ICmpInst::ICMP_NE2.97M ) { |
3560 | 53.8k | if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) == |
3561 | 53.8k | FalseVal || |
3562 | 53.7k | SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) == |
3563 | 53.7k | FalseVal) |
3564 | 86 | return TrueVal; |
3565 | 53.7k | if (53.7k SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) == |
3566 | 53.7k | TrueVal || |
3567 | 53.7k | SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) == |
3568 | 53.7k | TrueVal) |
3569 | 0 | return TrueVal; |
3570 | 4.38M | } |
3571 | 4.38M | |
3572 | 4.38M | return nullptr; |
3573 | 4.38M | } |
3574 | | |
3575 | | /// Given operands for a SelectInst, see if we can fold the result. |
3576 | | /// If not, this returns null. |
3577 | | static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, |
3578 | | Value *FalseVal, const SimplifyQuery &Q, |
3579 | 4.79M | unsigned MaxRecurse) { |
3580 | 4.79M | // select true, X, Y -> X |
3581 | 4.79M | // select false, X, Y -> Y |
3582 | 4.79M | if (Constant *CB4.79M = dyn_cast<Constant>(CondVal)) { |
3583 | 6.77k | if (CB->isAllOnesValue()) |
3584 | 4.27k | return TrueVal; |
3585 | 2.50k | if (2.50k CB->isNullValue()2.50k ) |
3586 | 2.37k | return FalseVal; |
3587 | 4.79M | } |
3588 | 4.79M | |
3589 | 4.79M | // select C, X, X -> X |
3590 | 4.79M | if (4.79M TrueVal == FalseVal4.79M ) |
3591 | 387 | return TrueVal; |
3592 | 4.79M | |
3593 | 4.79M | if (4.79M isa<UndefValue>(CondVal)4.79M ) { // select undef, X, Y -> X or Y |
3594 | 49 | if (isa<Constant>(FalseVal)) |
3595 | 21 | return FalseVal; |
3596 | 28 | return TrueVal; |
3597 | 28 | } |
3598 | 4.79M | if (4.79M isa<UndefValue>(TrueVal)4.79M ) // select C, undef, X -> X |
3599 | 52 | return FalseVal; |
3600 | 4.79M | if (4.79M isa<UndefValue>(FalseVal)4.79M ) // select C, X, undef -> X |
3601 | 45 | return TrueVal; |
3602 | 4.79M | |
3603 | 4.79M | if (Value *4.79M V4.79M = |
3604 | 4.79M | simplifySelectWithICmpCond(CondVal, TrueVal, FalseVal, Q, MaxRecurse)) |
3605 | 6.49k | return V; |
3606 | 4.78M | |
3607 | 4.78M | return nullptr; |
3608 | 4.78M | } |
3609 | | |
3610 | | Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, |
3611 | 4.79M | const SimplifyQuery &Q) { |
3612 | 4.79M | return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); |
3613 | 4.79M | } |
3614 | | |
3615 | | /// Given operands for an GetElementPtrInst, see if we can fold the result. |
3616 | | /// If not, this returns null. |
3617 | | static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, |
3618 | 49.7M | const SimplifyQuery &Q, unsigned) { |
3619 | 49.7M | // The type of the GEP pointer operand. |
3620 | 49.7M | unsigned AS = |
3621 | 49.7M | cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace(); |
3622 | 49.7M | |
3623 | 49.7M | // getelementptr P -> P. |
3624 | 49.7M | if (Ops.size() == 1) |
3625 | 57 | return Ops[0]; |
3626 | 49.7M | |
3627 | 49.7M | // Compute the (pointer) type returned by the GEP instruction. |
3628 | 49.7M | Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1)); |
3629 | 49.7M | Type *GEPTy = PointerType::get(LastType, AS); |
3630 | 49.7M | if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType())) |
3631 | 2.15k | GEPTy = VectorType::get(GEPTy, VT->getNumElements()); |
3632 | 49.7M | else if (VectorType *49.7M VT49.7M = dyn_cast<VectorType>(Ops[1]->getType())) |
3633 | 856 | GEPTy = VectorType::get(GEPTy, VT->getNumElements()); |
3634 | 49.7M | |
3635 | 49.7M | if (isa<UndefValue>(Ops[0])) |
3636 | 2.41k | return UndefValue::get(GEPTy); |
3637 | 49.7M | |
3638 | 49.7M | if (49.7M Ops.size() == 249.7M ) { |
3639 | 13.7M | // getelementptr P, 0 -> P. |
3640 | 13.7M | if (match(Ops[1], m_Zero())) |
3641 | 182k | return Ops[0]; |
3642 | 13.5M | |
3643 | 13.5M | Type *Ty = SrcTy; |
3644 | 13.5M | if (Ty->isSized()13.5M ) { |
3645 | 13.5M | Value *P; |
3646 | 13.5M | uint64_t C; |
3647 | 13.5M | uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty); |
3648 | 13.5M | // getelementptr P, N -> P if P points to a type of zero size. |
3649 | 13.5M | if (TyAllocSize == 0) |
3650 | 2 | return Ops[0]; |
3651 | 13.5M | |
3652 | 13.5M | // The following transforms are only safe if the ptrtoint cast |
3653 | 13.5M | // doesn't truncate the pointers. |
3654 | 13.5M | if (13.5M Ops[1]->getType()->getScalarSizeInBits() == |
3655 | 13.5M | Q.DL.getPointerSizeInBits(AS)) { |
3656 | 32 | auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { |
3657 | 32 | if (match(P, m_Zero())) |
3658 | 3 | return Constant::getNullValue(GEPTy); |
3659 | 29 | Value *Temp; |
3660 | 29 | if (match(P, m_PtrToInt(m_Value(Temp)))) |
3661 | 29 | if (29 Temp->getType() == GEPTy29 ) |
3662 | 27 | return Temp; |
3663 | 2 | return nullptr; |
3664 | 2 | }; |
3665 | 13.3M | |
3666 | 13.3M | // getelementptr V, (sub P, V) -> P if P points to a type of size 1. |
3667 | 13.3M | if (TyAllocSize == 1 && |
3668 | 7.79M | match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))))) |
3669 | 27 | if (Value *27 R27 = PtrToIntOrZero(P)) |
3670 | 26 | return R; |
3671 | 13.3M | |
3672 | 13.3M | // getelementptr V, (ashr (sub P, V), C) -> Q |
3673 | 13.3M | // if P points to a type of size 1 << C. |
3674 | 13.3M | if (13.3M match(Ops[1], |
3675 | 13.3M | m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), |
3676 | 13.3M | m_ConstantInt(C))) && |
3677 | 2 | TyAllocSize == 1ULL << C) |
3678 | 2 | if (Value *2 R2 = PtrToIntOrZero(P)) |
3679 | 2 | return R; |
3680 | 13.3M | |
3681 | 13.3M | // getelementptr V, (sdiv (sub P, V), C) -> Q |
3682 | 13.3M | // if P points to a type of size C. |
3683 | 13.3M | if (13.3M match(Ops[1], |
3684 | 13.3M | m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), |
3685 | 13.3M | m_SpecificInt(TyAllocSize)))) |
3686 | 3 | if (Value *3 R3 = PtrToIntOrZero(P)) |
3687 | 2 | return R; |
3688 | 49.5M | } |
3689 | 13.5M | } |
3690 | 13.7M | } |
3691 | 49.5M | |
3692 | 49.5M | if (49.5M Q.DL.getTypeAllocSize(LastType) == 1 && |
3693 | 12.7M | all_of(Ops.slice(1).drop_back(1), |
3694 | 49.5M | [](Value *Idx) { return match(Idx, m_Zero()); }5.64M )) { |
3695 | 12.1M | unsigned PtrWidth = |
3696 | 12.1M | Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace()); |
3697 | 12.1M | if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth12.1M ) { |
3698 | 11.1M | APInt BasePtrOffset(PtrWidth, 0); |
3699 | 11.1M | Value *StrippedBasePtr = |
3700 | 11.1M | Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL, |
3701 | 11.1M | BasePtrOffset); |
3702 | 11.1M | |
3703 | 11.1M | // gep (gep V, C), (sub 0, V) -> C |
3704 | 11.1M | if (match(Ops.back(), |
3705 | 11.1M | m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) { |
3706 | 3 | auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset); |
3707 | 3 | return ConstantExpr::getIntToPtr(CI, GEPTy); |
3708 | 3 | } |
3709 | 11.1M | // gep (gep V, C), (xor V, -1) -> C-1 |
3710 | 11.1M | if (11.1M match(Ops.back(), |
3711 | 11.1M | m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) { |
3712 | 1 | auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1); |
3713 | 1 | return ConstantExpr::getIntToPtr(CI, GEPTy); |
3714 | 1 | } |
3715 | 49.5M | } |
3716 | 12.1M | } |
3717 | 49.5M | |
3718 | 49.5M | // Check to see if this is constant foldable. |
3719 | 53.6M | if (49.5M !all_of(Ops, [](Value *V) 49.5M { return isa<Constant>(V); }53.6M )) |
3720 | 49.5M | return nullptr; |
3721 | 70.4k | |
3722 | 70.4k | auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]), |
3723 | 70.4k | Ops.slice(1)); |
3724 | 70.4k | if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL)) |
3725 | 69.6k | return CEFolded; |
3726 | 794 | return CE; |
3727 | 794 | } |
3728 | | |
3729 | | Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, |
3730 | 49.7M | const SimplifyQuery &Q) { |
3731 | 49.7M | return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit); |
3732 | 49.7M | } |
3733 | | |
3734 | | /// Given operands for an InsertValueInst, see if we can fold the result. |
3735 | | /// If not, this returns null. |
3736 | | static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, |
3737 | | ArrayRef<unsigned> Idxs, const SimplifyQuery &Q, |
3738 | 26.3k | unsigned) { |
3739 | 26.3k | if (Constant *CAgg = dyn_cast<Constant>(Agg)) |
3740 | 13.0k | if (Constant *13.0k CVal13.0k = dyn_cast<Constant>(Val)) |
3741 | 742 | return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); |
3742 | 25.6k | |
3743 | 25.6k | // insertvalue x, undef, n -> x |
3744 | 25.6k | if (25.6k match(Val, m_Undef())25.6k ) |
3745 | 13 | return Agg; |
3746 | 25.6k | |
3747 | 25.6k | // insertvalue x, (extractvalue y, n), n |
3748 | 25.6k | if (ExtractValueInst *25.6k EV25.6k = dyn_cast<ExtractValueInst>(Val)) |
3749 | 7.54k | if (7.54k EV->getAggregateOperand()->getType() == Agg->getType() && |
3750 | 7.54k | EV->getIndices() == Idxs6.32k ) { |
3751 | 6.32k | // insertvalue undef, (extractvalue y, n), n -> y |
3752 | 6.32k | if (match(Agg, m_Undef())) |
3753 | 3.12k | return EV->getAggregateOperand(); |
3754 | 3.19k | |
3755 | 3.19k | // insertvalue y, (extractvalue y, n), n -> y |
3756 | 3.19k | if (3.19k Agg == EV->getAggregateOperand()3.19k ) |
3757 | 3.17k | return Agg; |
3758 | 19.3k | } |
3759 | 19.3k | |
3760 | 19.3k | return nullptr; |
3761 | 19.3k | } |
3762 | | |
3763 | | Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, |
3764 | | ArrayRef<unsigned> Idxs, |
3765 | 26.3k | const SimplifyQuery &Q) { |
3766 | 26.3k | return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); |
3767 | 26.3k | } |
3768 | | |
3769 | | /// Given operands for an ExtractValueInst, see if we can fold the result. |
3770 | | /// If not, this returns null. |
3771 | | static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, |
3772 | 604k | const SimplifyQuery &, unsigned) { |
3773 | 604k | if (auto *CAgg = dyn_cast<Constant>(Agg)) |
3774 | 754 | return ConstantFoldExtractValueInstruction(CAgg, Idxs); |
3775 | 603k | |
3776 | 603k | // extractvalue x, (insertvalue y, elt, n), n -> elt |
3777 | 603k | unsigned NumIdxs = Idxs.size(); |
3778 | 604k | for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr; |
3779 | 603k | IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())1.27k ) { |
3780 | 3.04k | ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices(); |
3781 | 3.04k | unsigned NumInsertValueIdxs = InsertValueIdxs.size(); |
3782 | 3.04k | unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs); |
3783 | 3.04k | if (InsertValueIdxs.slice(0, NumCommonIdxs) == |
3784 | 3.04k | Idxs.slice(0, NumCommonIdxs)) { |
3785 | 1.77k | if (NumIdxs == NumInsertValueIdxs) |
3786 | 1.52k | return IVI->getInsertedValueOperand(); |
3787 | 246 | break; |
3788 | 246 | } |
3789 | 3.04k | } |
3790 | 603k | |
3791 | 602k | return nullptr; |
3792 | 604k | } |
3793 | | |
3794 | | Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, |
3795 | 604k | const SimplifyQuery &Q) { |
3796 | 604k | return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); |
3797 | 604k | } |
3798 | | |
3799 | | /// Given operands for an ExtractElementInst, see if we can fold the result. |
3800 | | /// If not, this returns null. |
3801 | | static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &, |
3802 | 255k | unsigned) { |
3803 | 255k | if (auto *CVec255k = dyn_cast<Constant>(Vec)) { |
3804 | 6.59k | if (auto *CIdx = dyn_cast<Constant>(Idx)) |
3805 | 6.28k | return ConstantFoldExtractElementInstruction(CVec, CIdx); |
3806 | 308 | |
3807 | 308 | // The index is not relevant if our vector is a splat. |
3808 | 308 | if (auto *308 Splat308 = CVec->getSplatValue()) |
3809 | 0 | return Splat; |
3810 | 308 | |
3811 | 308 | if (308 isa<UndefValue>(Vec)308 ) |
3812 | 1 | return UndefValue::get(Vec->getType()->getVectorElementType()); |
3813 | 248k | } |
3814 | 248k | |
3815 | 248k | // If extracting a specified index from the vector, see if we can recursively |
3816 | 248k | // find a previously computed scalar that was inserted into the vector. |
3817 | 248k | if (auto *248k IdxC248k = dyn_cast<ConstantInt>(Idx)) |
3818 | 247k | if (Value *247k Elt247k = findScalarElement(Vec, IdxC->getZExtValue())) |
3819 | 343 | return Elt; |
3820 | 248k | |
3821 | 248k | return nullptr; |
3822 | 248k | } |
3823 | | |
3824 | | Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, |
3825 | 255k | const SimplifyQuery &Q) { |
3826 | 255k | return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); |
3827 | 255k | } |
3828 | | |
3829 | | /// See if we can fold the given phi. If not, returns null. |
3830 | 113M | static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { |
3831 | 113M | // If all of the PHI's incoming values are the same then replace the PHI node |
3832 | 113M | // with the common value. |
3833 | 113M | Value *CommonValue = nullptr; |
3834 | 113M | bool HasUndefInput = false; |
3835 | 230M | for (Value *Incoming : PN->incoming_values()) { |
3836 | 230M | // If the incoming value is the phi node itself, it can safely be skipped. |
3837 | 230M | if (Incoming == PN230M ) continue254k ; |
3838 | 230M | if (230M isa<UndefValue>(Incoming)230M ) { |
3839 | 271k | // Remember that we saw an undef value, but otherwise ignore them. |
3840 | 271k | HasUndefInput = true; |
3841 | 271k | continue; |
3842 | 271k | } |
3843 | 230M | if (230M CommonValue && 230M Incoming != CommonValue116M ) |
3844 | 111M | return nullptr; // Not the same, bail out. |
3845 | 118M | CommonValue = Incoming; |
3846 | 118M | } |
3847 | 113M | |
3848 | 113M | // If CommonValue is null then all of the incoming values were either undef or |
3849 | 113M | // equal to the phi node itself. |
3850 | 1.96M | if (1.96M !CommonValue1.96M ) |
3851 | 1.20k | return UndefValue::get(PN->getType()); |
3852 | 1.96M | |
3853 | 1.96M | // If we have a PHI node like phi(X, undef, X), where X is defined by some |
3854 | 1.96M | // instruction, we cannot return X as the result of the PHI node unless it |
3855 | 1.96M | // dominates the PHI block. |
3856 | 1.96M | if (1.96M HasUndefInput1.96M ) |
3857 | 238k | return ValueDominatesPHI(CommonValue, PN, Q.DT) ? 238k CommonValue3.08k : nullptr235k ; |
3858 | 1.72M | |
3859 | 1.72M | return CommonValue; |
3860 | 1.72M | } |
3861 | | |
3862 | | static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, |
3863 | 10.3M | Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) { |
3864 | 10.3M | if (auto *C = dyn_cast<Constant>(Op)) |
3865 | 118k | return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); |
3866 | 10.2M | |
3867 | 10.2M | if (auto *10.2M CI10.2M = dyn_cast<CastInst>(Op)) { |
3868 | 309k | auto *Src = CI->getOperand(0); |
3869 | 309k | Type *SrcTy = Src->getType(); |
3870 | 309k | Type *MidTy = CI->getType(); |
3871 | 309k | Type *DstTy = Ty; |
3872 | 309k | if (Src->getType() == Ty309k ) { |
3873 | 191k | auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode()); |
3874 | 191k | auto SecondOp = static_cast<Instruction::CastOps>(CastOpc); |
3875 | 191k | Type *SrcIntPtrTy = |
3876 | 191k | SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy)9.13k : nullptr182k ; |
3877 | 191k | Type *MidIntPtrTy = |
3878 | 191k | MidTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(MidTy)6.79k : nullptr184k ; |
3879 | 191k | Type *DstIntPtrTy = |
3880 | 191k | DstTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(DstTy)9.13k : nullptr182k ; |
3881 | 191k | if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy, |
3882 | 191k | SrcIntPtrTy, MidIntPtrTy, |
3883 | 191k | DstIntPtrTy) == Instruction::BitCast) |
3884 | 131k | return Src; |
3885 | 10.1M | } |
3886 | 309k | } |
3887 | 10.1M | |
3888 | 10.1M | // bitcast x -> x |
3889 | 10.1M | if (10.1M CastOpc == Instruction::BitCast10.1M ) |
3890 | 4.11M | if (4.11M Op->getType() == Ty4.11M ) |
3891 | 5.62k | return Op; |
3892 | 10.1M | |
3893 | 10.1M | return nullptr; |
3894 | 10.1M | } |
3895 | | |
3896 | | Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, |
3897 | 10.3M | const SimplifyQuery &Q) { |
3898 | 10.3M | return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); |
3899 | 10.3M | } |
3900 | | |
3901 | | /// For the given destination element of a shuffle, peek through shuffles to |
3902 | | /// match a root vector source operand that contains that element in the same |
3903 | | /// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). |
3904 | | static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, |
3905 | | int MaskVal, Value *RootVec, |
3906 | 539k | unsigned MaxRecurse) { |
3907 | 539k | if (!MaxRecurse--) |
3908 | 46 | return nullptr; |
3909 | 539k | |
3910 | 539k | // Bail out if any mask value is undefined. That kind of shuffle may be |
3911 | 539k | // simplified further based on demanded bits or other folds. |
3912 | 539k | if (539k MaskVal == -1539k ) |
3913 | 0 | return nullptr; |
3914 | 539k | |
3915 | 539k | // The mask value chooses which source operand we need to look at next. |
3916 | 539k | int InVecNumElts = Op0->getType()->getVectorNumElements(); |
3917 | 539k | int RootElt = MaskVal; |
3918 | 539k | Value *SourceOp = Op0; |
3919 | 539k | if (MaskVal >= InVecNumElts539k ) { |
3920 | 13.2k | RootElt = MaskVal - InVecNumElts; |
3921 | 13.2k | SourceOp = Op1; |
3922 | 13.2k | } |
3923 | 539k | |
3924 | 539k | // If the source operand is a shuffle itself, look through it to find the |
3925 | 539k | // matching root vector. |
3926 | 539k | if (auto *SourceShuf539k = dyn_cast<ShuffleVectorInst>(SourceOp)) { |
3927 | 38.4k | return foldIdentityShuffles( |
3928 | 38.4k | DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1), |
3929 | 38.4k | SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse); |
3930 | 38.4k | } |
3931 | 501k | |
3932 | 501k | // TODO: Look through bitcasts? What if the bitcast changes the vector element |
3933 | 501k | // size? |
3934 | 501k | |
3935 | 501k | // The source operand is not a shuffle. Initialize the root vector value for |
3936 | 501k | // this shuffle if that has not been done yet. |
3937 | 501k | if (501k !RootVec501k ) |
3938 | 433k | RootVec = SourceOp; |
3939 | 501k | |
3940 | 501k | // Give up as soon as a source operand does not match the existing root value. |
3941 | 501k | if (RootVec != SourceOp) |
3942 | 3.55k | return nullptr; |
3943 | 497k | |
3944 | 497k | // The element must be coming from the same lane in the source vector |
3945 | 497k | // (although it may have crossed lanes in intermediate shuffles). |
3946 | 497k | if (497k RootElt != DestElt497k ) |
3947 | 295k | return nullptr; |
3948 | 201k | |
3949 | 201k | return RootVec; |
3950 | 201k | } |
3951 | | |
3952 | | static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, |
3953 | | Type *RetTy, const SimplifyQuery &Q, |
3954 | 464k | unsigned MaxRecurse) { |
3955 | 464k | if (isa<UndefValue>(Mask)) |
3956 | 9 | return UndefValue::get(RetTy); |
3957 | 464k | |
3958 | 464k | Type *InVecTy = Op0->getType(); |
3959 | 464k | unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); |
3960 | 464k | unsigned InVecNumElts = InVecTy->getVectorNumElements(); |
3961 | 464k | |
3962 | 464k | SmallVector<int, 32> Indices; |
3963 | 464k | ShuffleVectorInst::getShuffleMask(Mask, Indices); |
3964 | 464k | assert(MaskNumElts == Indices.size() && |
3965 | 464k | "Size of Indices not same as number of mask elements?"); |
3966 | 464k | |
3967 | 464k | // Canonicalization: If mask does not select elements from an input vector, |
3968 | 464k | // replace that input vector with undef. |
3969 | 464k | bool MaskSelects0 = false, MaskSelects1 = false; |
3970 | 2.70M | for (unsigned i = 0; i != MaskNumElts2.70M ; ++i2.23M ) { |
3971 | 2.23M | if (Indices[i] == -1) |
3972 | 100k | continue; |
3973 | 2.13M | if (2.13M (unsigned)Indices[i] < InVecNumElts2.13M ) |
3974 | 1.65M | MaskSelects0 = true; |
3975 | 2.13M | else |
3976 | 479k | MaskSelects1 = true; |
3977 | 2.23M | } |
3978 | 464k | if (!MaskSelects0) |
3979 | 2.72k | Op0 = UndefValue::get(InVecTy); |
3980 | 464k | if (!MaskSelects1) |
3981 | 385k | Op1 = UndefValue::get(InVecTy); |
3982 | 464k | |
3983 | 464k | auto *Op0Const = dyn_cast<Constant>(Op0); |
3984 | 464k | auto *Op1Const = dyn_cast<Constant>(Op1); |
3985 | 464k | |
3986 | 464k | // If all operands are constant, constant fold the shuffle. |
3987 | 464k | if (Op0Const && 464k Op1Const12.8k ) |
3988 | 7.49k | return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); |
3989 | 457k | |
3990 | 457k | // Canonicalization: if only one input vector is constant, it shall be the |
3991 | 457k | // second one. |
3992 | 457k | if (457k Op0Const && 457k !Op1Const5.32k ) { |
3993 | 5.32k | std::swap(Op0, Op1); |
3994 | 5.32k | ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); |
3995 | 5.32k | } |
3996 | 457k | |
3997 | 457k | // A shuffle of a splat is always the splat itself. Legal if the shuffle's |
3998 | 457k | // value type is same as the input vectors' type. |
3999 | 457k | if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0)) |
4000 | 21.6k | if (21.6k isa<UndefValue>(Op1) && 21.6k RetTy == InVecTy11.7k && |
4001 | 1.57k | OpShuf->getMask()->getSplatValue()) |
4002 | 11 | return Op0; |
4003 | 457k | |
4004 | 457k | // Don't fold a shuffle with undef mask elements. This may get folded in a |
4005 | 457k | // better way using demanded bits or other analysis. |
4006 | 457k | // TODO: Should we allow this? |
4007 | 457k | if (457k find(Indices, -1) != Indices.end()457k ) |
4008 | 23.5k | return nullptr; |
4009 | 433k | |
4010 | 433k | // Check if every element of this shuffle can be mapped back to the |
4011 | 433k | // corresponding element of a single root vector. If so, we don't need this |
4012 | 433k | // shuffle. This handles simple identity shuffles as well as chains of |
4013 | 433k | // shuffles that may widen/narrow and/or move elements across lanes and back. |
4014 | 433k | Value *RootVec = nullptr; |
4015 | 506k | for (unsigned i = 0; i != MaskNumElts506k ; ++i72.2k ) { |
4016 | 501k | // Note that recursion is limited for each vector element, so if any element |
4017 | 501k | // exceeds the limit, this will fail to simplify. |
4018 | 501k | RootVec = |
4019 | 501k | foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse); |
4020 | 501k | |
4021 | 501k | // We can't replace a widening/narrowing shuffle with one of its operands. |
4022 | 501k | if (!RootVec || 501k RootVec->getType() != RetTy201k ) |
4023 | 428k | return nullptr; |
4024 | 501k | } |
4025 | 4.83k | return RootVec; |
4026 | 464k | } |
4027 | | |
4028 | | /// Given operands for a ShuffleVectorInst, fold the result or return null. |
4029 | | Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, |
4030 | 464k | Type *RetTy, const SimplifyQuery &Q) { |
4031 | 464k | return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); |
4032 | 464k | } |
4033 | | |
4034 | | /// Given operands for an FAdd, see if we can fold the result. If not, this |
4035 | | /// returns null. |
4036 | | static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4037 | 1.02M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
4038 | 1.02M | if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) |
4039 | 555 | return C; |
4040 | 1.02M | |
4041 | 1.02M | // fadd X, -0 ==> X |
4042 | 1.02M | if (1.02M match(Op1, m_NegZero())1.02M ) |
4043 | 3 | return Op0; |
4044 | 1.02M | |
4045 | 1.02M | // fadd X, 0 ==> X, when we know X is not -0 |
4046 | 1.02M | if (1.02M match(Op1, m_Zero()) && |
4047 | 6.99k | (FMF.noSignedZeros() || 6.99k CannotBeNegativeZero(Op0, Q.TLI)6.96k )) |
4048 | 48 | return Op0; |
4049 | 1.02M | |
4050 | 1.02M | // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 |
4051 | 1.02M | // where nnan and ninf have to occur at least once somewhere in this |
4052 | 1.02M | // expression |
4053 | 1.02M | Value *SubOp = nullptr; |
4054 | 1.02M | if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) |
4055 | 0 | SubOp = Op1; |
4056 | 1.02M | else if (1.02M match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))1.02M ) |
4057 | 9 | SubOp = Op0; |
4058 | 1.02M | if (SubOp1.02M ) { |
4059 | 9 | Instruction *FSub = cast<Instruction>(SubOp); |
4060 | 9 | if ((FMF.noNaNs() || 9 FSub->hasNoNaNs()2 ) && |
4061 | 9 | (FMF.noInfs() || 9 FSub->hasNoInfs()6 )) |
4062 | 4 | return Constant::getNullValue(Op0->getType()); |
4063 | 1.02M | } |
4064 | 1.02M | |
4065 | 1.02M | return nullptr; |
4066 | 1.02M | } |
4067 | | |
4068 | | /// Given operands for an FSub, see if we can fold the result. If not, this |
4069 | | /// returns null. |
4070 | | static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4071 | 277k | const SimplifyQuery &Q, unsigned MaxRecurse) { |
4072 | 277k | if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) |
4073 | 2.12k | return C; |
4074 | 275k | |
4075 | 275k | // fsub X, 0 ==> X |
4076 | 275k | if (275k match(Op1, m_Zero())275k ) |
4077 | 37 | return Op0; |
4078 | 275k | |
4079 | 275k | // fsub X, -0 ==> X, when we know X is not -0 |
4080 | 275k | if (275k match(Op1, m_NegZero()) && |
4081 | 0 | (FMF.noSignedZeros() || 0 CannotBeNegativeZero(Op0, Q.TLI)0 )) |
4082 | 0 | return Op0; |
4083 | 275k | |
4084 | 275k | // fsub -0.0, (fsub -0.0, X) ==> X |
4085 | 275k | Value *X; |
4086 | 275k | if (match(Op0, m_NegZero()) && 275k match(Op1, m_FSub(m_NegZero(), m_Value(X)))76.4k ) |
4087 | 7 | return X; |
4088 | 275k | |
4089 | 275k | // fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored. |
4090 | 275k | if (275k FMF.noSignedZeros() && 275k match(Op0, m_AnyZero())219 && |
4091 | 50 | match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) |
4092 | 1 | return X; |
4093 | 275k | |
4094 | 275k | // fsub nnan x, x ==> 0.0 |
4095 | 275k | if (275k FMF.noNaNs() && 275k Op0 == Op1200 ) |
4096 | 1 | return Constant::getNullValue(Op0->getType()); |
4097 | 275k | |
4098 | 275k | return nullptr; |
4099 | 275k | } |
4100 | | |
4101 | | /// Given the operands for an FMul, see if we can fold the result |
4102 | | static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4103 | 873k | const SimplifyQuery &Q, unsigned MaxRecurse) { |
4104 | 873k | if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) |
4105 | 1.70k | return C; |
4106 | 872k | |
4107 | 872k | // fmul X, 1.0 ==> X |
4108 | 872k | if (872k match(Op1, m_FPOne())872k ) |
4109 | 479 | return Op0; |
4110 | 871k | |
4111 | 871k | // fmul nnan nsz X, 0 ==> 0 |
4112 | 871k | if (871k FMF.noNaNs() && 871k FMF.noSignedZeros()1.57k && match(Op1, m_AnyZero())1.55k ) |
4113 | 6 | return Op1; |
4114 | 871k | |
4115 | 871k | return nullptr; |
4116 | 871k | } |
4117 | | |
4118 | | Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4119 | 1.00M | const SimplifyQuery &Q) { |
4120 | 1.00M | return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); |
4121 | 1.00M | } |
4122 | | |
4123 | | |
4124 | | Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4125 | 271k | const SimplifyQuery &Q) { |
4126 | 271k | return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit); |
4127 | 271k | } |
4128 | | |
4129 | | Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4130 | 850k | const SimplifyQuery &Q) { |
4131 | 850k | return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); |
4132 | 850k | } |
4133 | | |
4134 | | static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4135 | 739k | const SimplifyQuery &Q, unsigned) { |
4136 | 739k | if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) |
4137 | 2.13k | return C; |
4138 | 737k | |
4139 | 737k | // undef / X -> undef (the undef could be a snan). |
4140 | 737k | if (737k match(Op0, m_Undef())737k ) |
4141 | 11 | return Op0; |
4142 | 737k | |
4143 | 737k | // X / undef -> undef |
4144 | 737k | if (737k match(Op1, m_Undef())737k ) |
4145 | 1 | return Op1; |
4146 | 737k | |
4147 | 737k | // X / 1.0 -> X |
4148 | 737k | if (737k match(Op1, m_FPOne())737k ) |
4149 | 21 | return Op0; |
4150 | 737k | |
4151 | 737k | // 0 / X -> 0 |
4152 | 737k | // Requires that NaNs are off (X could be zero) and signed zeroes are |
4153 | 737k | // ignored (X could be positive or negative, so the output sign is unknown). |
4154 | 737k | if (737k FMF.noNaNs() && 737k FMF.noSignedZeros()310 && match(Op0, m_AnyZero())291 ) |
4155 | 1 | return Op0; |
4156 | 737k | |
4157 | 737k | if (737k FMF.noNaNs()737k ) { |
4158 | 309 | // X / X -> 1.0 is legal when NaNs are ignored. |
4159 | 309 | if (Op0 == Op1) |
4160 | 1 | return ConstantFP::get(Op0->getType(), 1.0); |
4161 | 308 | |
4162 | 308 | // -X / X -> -1.0 and |
4163 | 308 | // X / -X -> -1.0 are legal when NaNs are ignored. |
4164 | 308 | // We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored. |
4165 | 308 | if (308 (BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) && |
4166 | 3 | BinaryOperator::getFNegArgument(Op0) == Op1) || |
4167 | 306 | (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) && |
4168 | 3 | BinaryOperator::getFNegArgument(Op1) == Op0)) |
4169 | 4 | return ConstantFP::get(Op0->getType(), -1.0); |
4170 | 737k | } |
4171 | 737k | |
4172 | 737k | return nullptr; |
4173 | 737k | } |
4174 | | |
4175 | | Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4176 | 738k | const SimplifyQuery &Q) { |
4177 | 738k | return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); |
4178 | 738k | } |
4179 | | |
4180 | | static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4181 | 72 | const SimplifyQuery &Q, unsigned) { |
4182 | 72 | if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) |
4183 | 1 | return C; |
4184 | 71 | |
4185 | 71 | // undef % X -> undef (the undef could be a snan). |
4186 | 71 | if (71 match(Op0, m_Undef())71 ) |
4187 | 0 | return Op0; |
4188 | 71 | |
4189 | 71 | // X % undef -> undef |
4190 | 71 | if (71 match(Op1, m_Undef())71 ) |
4191 | 0 | return Op1; |
4192 | 71 | |
4193 | 71 | // 0 % X -> 0 |
4194 | 71 | // Requires that NaNs are off (X could be zero) and signed zeroes are |
4195 | 71 | // ignored (X could be positive or negative, so the output sign is unknown). |
4196 | 71 | if (71 FMF.noNaNs() && 71 FMF.noSignedZeros()0 && match(Op0, m_AnyZero())0 ) |
4197 | 0 | return Op0; |
4198 | 71 | |
4199 | 71 | return nullptr; |
4200 | 71 | } |
4201 | | |
4202 | | Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, |
4203 | 72 | const SimplifyQuery &Q) { |
4204 | 72 | return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); |
4205 | 72 | } |
4206 | | |
4207 | | //=== Helper functions for higher up the class hierarchy. |
4208 | | |
4209 | | /// Given operands for a BinaryOperator, see if we can fold the result. |
4210 | | /// If not, this returns null. |
4211 | | static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, |
4212 | 33.2M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
4213 | 33.2M | switch (Opcode) { |
4214 | 10.3M | case Instruction::Add: |
4215 | 10.3M | return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); |
4216 | 1.19M | case Instruction::Sub: |
4217 | 1.19M | return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse); |
4218 | 12.3M | case Instruction::Mul: |
4219 | 12.3M | return SimplifyMulInst(LHS, RHS, Q, MaxRecurse); |
4220 | 46.0k | case Instruction::SDiv: |
4221 | 46.0k | return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); |
4222 | 51.4k | case Instruction::UDiv: |
4223 | 51.4k | return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); |
4224 | 15.1k | case Instruction::SRem: |
4225 | 15.1k | return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); |
4226 | 7.33k | case Instruction::URem: |
4227 | 7.33k | return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); |
4228 | 1.40M | case Instruction::Shl: |
4229 | 1.40M | return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse); |
4230 | 163k | case Instruction::LShr: |
4231 | 163k | return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse); |
4232 | 36.1k | case Instruction::AShr: |
4233 | 36.1k | return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse); |
4234 | 2.24M | case Instruction::And: |
4235 | 2.24M | return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); |
4236 | 4.52M | case Instruction::Or: |
4237 | 4.52M | return SimplifyOrInst(LHS, RHS, Q, MaxRecurse); |
4238 | 812k | case Instruction::Xor: |
4239 | 812k | return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); |
4240 | 476 | case Instruction::FAdd: |
4241 | 476 | return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); |
4242 | 14 | case Instruction::FSub: |
4243 | 14 | return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); |
4244 | 1.79k | case Instruction::FMul: |
4245 | 1.79k | return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); |
4246 | 9 | case Instruction::FDiv: |
4247 | 9 | return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); |
4248 | 0 | case Instruction::FRem: |
4249 | 0 | return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); |
4250 | 0 | default: |
4251 | 0 | llvm_unreachable("Unexpected opcode"); |
4252 | 0 | } |
4253 | 0 | } |
4254 | | |
4255 | | /// Given operands for a BinaryOperator, see if we can fold the result. |
4256 | | /// If not, this returns null. |
4257 | | /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the |
4258 | | /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. |
4259 | | static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, |
4260 | | const FastMathFlags &FMF, const SimplifyQuery &Q, |
4261 | 45.9k | unsigned MaxRecurse) { |
4262 | 45.9k | switch (Opcode) { |
4263 | 17.2k | case Instruction::FAdd: |
4264 | 17.2k | return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); |
4265 | 6.38k | case Instruction::FSub: |
4266 | 6.38k | return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse); |
4267 | 21.9k | case Instruction::FMul: |
4268 | 21.9k | return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse); |
4269 | 426 | case Instruction::FDiv: |
4270 | 426 | return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse); |
4271 | 0 | default: |
4272 | 0 | return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse); |
4273 | 0 | } |
4274 | 0 | } |
4275 | | |
4276 | | Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, |
4277 | 4.65M | const SimplifyQuery &Q) { |
4278 | 4.65M | return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); |
4279 | 4.65M | } |
4280 | | |
4281 | | Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, |
4282 | 45.9k | FastMathFlags FMF, const SimplifyQuery &Q) { |
4283 | 45.9k | return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); |
4284 | 45.9k | } |
4285 | | |
4286 | | /// Given operands for a CmpInst, see if we can fold the result. |
4287 | | static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, |
4288 | 7.05M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
4289 | 7.05M | if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) |
4290 | 6.91M | return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); |
4291 | 141k | return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); |
4292 | 141k | } |
4293 | | |
4294 | | Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, |
4295 | 396k | const SimplifyQuery &Q) { |
4296 | 396k | return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); |
4297 | 396k | } |
4298 | | |
4299 | 210k | static bool IsIdempotent(Intrinsic::ID ID) { |
4300 | 210k | switch (ID) { |
4301 | 128k | default: return false; |
4302 | 210k | |
4303 | 210k | // Unary idempotent: f(f(x)) = f(x) |
4304 | 82.3k | case Intrinsic::fabs: |
4305 | 82.3k | case Intrinsic::floor: |
4306 | 82.3k | case Intrinsic::ceil: |
4307 | 82.3k | case Intrinsic::trunc: |
4308 | 82.3k | case Intrinsic::rint: |
4309 | 82.3k | case Intrinsic::nearbyint: |
4310 | 82.3k | case Intrinsic::round: |
4311 | 82.3k | case Intrinsic::canonicalize: |
4312 | 82.3k | return true; |
4313 | 0 | } |
4314 | 0 | } |
4315 | | |
4316 | | static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset, |
4317 | 9 | const DataLayout &DL) { |
4318 | 9 | GlobalValue *PtrSym; |
4319 | 9 | APInt PtrOffset; |
4320 | 9 | if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL)) |
4321 | 1 | return nullptr; |
4322 | 8 | |
4323 | 8 | Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext()); |
4324 | 8 | Type *Int32Ty = Type::getInt32Ty(Ptr->getContext()); |
4325 | 8 | Type *Int32PtrTy = Int32Ty->getPointerTo(); |
4326 | 8 | Type *Int64Ty = Type::getInt64Ty(Ptr->getContext()); |
4327 | 8 | |
4328 | 8 | auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset); |
4329 | 8 | if (!OffsetConstInt || 8 OffsetConstInt->getType()->getBitWidth() > 648 ) |
4330 | 0 | return nullptr; |
4331 | 8 | |
4332 | 8 | uint64_t OffsetInt = OffsetConstInt->getSExtValue(); |
4333 | 8 | if (OffsetInt % 4 != 0) |
4334 | 1 | return nullptr; |
4335 | 7 | |
4336 | 7 | Constant *C = ConstantExpr::getGetElementPtr( |
4337 | 7 | Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy), |
4338 | 7 | ConstantInt::get(Int64Ty, OffsetInt / 4)); |
4339 | 7 | Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL); |
4340 | 7 | if (!Loaded) |
4341 | 0 | return nullptr; |
4342 | 7 | |
4343 | 7 | auto *LoadedCE = dyn_cast<ConstantExpr>(Loaded); |
4344 | 7 | if (!LoadedCE) |
4345 | 0 | return nullptr; |
4346 | 7 | |
4347 | 7 | if (7 LoadedCE->getOpcode() == Instruction::Trunc7 ) { |
4348 | 6 | LoadedCE = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0)); |
4349 | 6 | if (!LoadedCE) |
4350 | 0 | return nullptr; |
4351 | 7 | } |
4352 | 7 | |
4353 | 7 | if (7 LoadedCE->getOpcode() != Instruction::Sub7 ) |
4354 | 1 | return nullptr; |
4355 | 6 | |
4356 | 6 | auto *LoadedLHS = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0)); |
4357 | 6 | if (!LoadedLHS || 6 LoadedLHS->getOpcode() != Instruction::PtrToInt5 ) |
4358 | 1 | return nullptr; |
4359 | 5 | auto *LoadedLHSPtr = LoadedLHS->getOperand(0); |
4360 | 5 | |
4361 | 5 | Constant *LoadedRHS = LoadedCE->getOperand(1); |
4362 | 5 | GlobalValue *LoadedRHSSym; |
4363 | 5 | APInt LoadedRHSOffset; |
4364 | 5 | if (!IsConstantOffsetFromGlobal(LoadedRHS, LoadedRHSSym, LoadedRHSOffset, |
4365 | 5 | DL) || |
4366 | 5 | PtrSym != LoadedRHSSym4 || PtrOffset != LoadedRHSOffset4 ) |
4367 | 1 | return nullptr; |
4368 | 4 | |
4369 | 4 | return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy); |
4370 | 4 | } |
4371 | | |
4372 | 724 | static bool maskIsAllZeroOrUndef(Value *Mask) { |
4373 | 724 | auto *ConstMask = dyn_cast<Constant>(Mask); |
4374 | 724 | if (!ConstMask) |
4375 | 707 | return false; |
4376 | 17 | if (17 ConstMask->isNullValue() || 17 isa<UndefValue>(ConstMask)15 ) |
4377 | 4 | return true; |
4378 | 29 | for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); 13 I != E29 ; |
4379 | 16 | ++I16 ) { |
4380 | 29 | if (auto *MaskElt = ConstMask->getAggregateElement(I)) |
4381 | 29 | if (29 MaskElt->isNullValue() || 29 isa<UndefValue>(MaskElt)13 ) |
4382 | 16 | continue; |
4383 | 13 | return false; |
4384 | 13 | } |
4385 | 0 | return true; |
4386 | 724 | } |
4387 | | |
4388 | | template <typename IterTy> |
4389 | | static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, |
4390 | 4.15M | const SimplifyQuery &Q, unsigned MaxRecurse) { |
4391 | 4.15M | Intrinsic::ID IID = F->getIntrinsicID(); |
4392 | 4.15M | unsigned NumOperands = std::distance(ArgBegin, ArgEnd); |
4393 | 4.15M | |
4394 | 4.15M | // Unary Ops |
4395 | 4.15M | if (NumOperands == 14.15M ) { |
4396 | 210k | // Perform idempotent optimizations |
4397 | 210k | if (IsIdempotent(IID)210k ) { |
4398 | 82.3k | if (IntrinsicInst *II82.3k = dyn_cast<IntrinsicInst>(*ArgBegin)) { |
4399 | 569 | if (II->getIntrinsicID() == IID) |
4400 | 18 | return II; |
4401 | 210k | } |
4402 | 82.3k | } |
4403 | 210k | |
4404 | 210k | switch (IID) { |
4405 | 57.6k | case Intrinsic::fabs: { |
4406 | 57.6k | if (SignBitMustBeZero(*ArgBegin, Q.TLI)) |
4407 | 20 | return *ArgBegin; |
4408 | 57.6k | return nullptr; |
4409 | 57.6k | } |
4410 | 152k | default: |
4411 | 152k | return nullptr; |
4412 | 3.94M | } |
4413 | 3.94M | } |
4414 | 3.94M | |
4415 | 3.94M | // Binary Ops |
4416 | 3.94M | if (3.94M NumOperands == 23.94M ) { |
4417 | 2.91M | Value *LHS = *ArgBegin; |
4418 | 2.91M | Value *RHS = *(ArgBegin + 1); |
4419 | 2.91M | Type *ReturnType = F->getReturnType(); |
4420 | 2.91M | |
4421 | 2.91M | switch (IID) { |
4422 | 489 | case Intrinsic::usub_with_overflow: |
4423 | 489 | case Intrinsic::ssub_with_overflow: { |
4424 | 489 | // X - X -> { 0, false } |
4425 | 489 | if (LHS == RHS) |
4426 | 4 | return Constant::getNullValue(ReturnType); |
4427 | 485 | |
4428 | 485 | // X - undef -> undef |
4429 | 485 | // undef - X -> undef |
4430 | 485 | if (485 isa<UndefValue>(LHS) || 485 isa<UndefValue>(RHS)481 ) |
4431 | 8 | return UndefValue::get(ReturnType); |
4432 | 477 | |
4433 | 477 | return nullptr; |
4434 | 477 | } |
4435 | 687 | case Intrinsic::uadd_with_overflow: |
4436 | 687 | case Intrinsic::sadd_with_overflow: { |
4437 | 687 | // X + undef -> undef |
4438 | 687 | if (isa<UndefValue>(LHS) || 687 isa<UndefValue>(RHS)682 ) |
4439 | 9 | return UndefValue::get(ReturnType); |
4440 | 678 | |
4441 | 678 | return nullptr; |
4442 | 678 | } |
4443 | 28.5k | case Intrinsic::umul_with_overflow: |
4444 | 28.5k | case Intrinsic::smul_with_overflow: { |
4445 | 28.5k | // 0 * X -> { 0, false } |
4446 | 28.5k | // X * 0 -> { 0, false } |
4447 | 28.5k | if (match(LHS, m_Zero()) || 28.5k match(RHS, m_Zero())28.5k ) |
4448 | 9 | return Constant::getNullValue(ReturnType); |
4449 | 28.5k | |
4450 | 28.5k | // undef * X -> { 0, false } |
4451 | 28.5k | // X * undef -> { 0, false } |
4452 | 28.5k | if (28.5k match(LHS, m_Undef()) || 28.5k match(RHS, m_Undef())28.4k ) |
4453 | 8 | return Constant::getNullValue(ReturnType); |
4454 | 28.4k | |
4455 | 28.4k | return nullptr; |
4456 | 28.4k | } |
4457 | 9 | case Intrinsic::load_relative: { |
4458 | 9 | Constant *C0 = dyn_cast<Constant>(LHS); |
4459 | 9 | Constant *C1 = dyn_cast<Constant>(RHS); |
4460 | 9 | if (C0 && 9 C19 ) |
4461 | 9 | return SimplifyRelativeLoad(C0, C1, Q.DL); |
4462 | 0 | return nullptr; |
4463 | 0 | } |
4464 | 2.88M | default: |
4465 | 2.88M | return nullptr; |
4466 | 1.03M | } |
4467 | 1.03M | } |
4468 | 1.03M | |
4469 | 1.03M | // Simplify calls to llvm.masked.load.* |
4470 | 1.03M | switch (IID) { |
4471 | 724 | case Intrinsic::masked_load: { |
4472 | 724 | Value *MaskArg = ArgBegin[2]; |
4473 | 724 | Value *PassthruArg = ArgBegin[3]; |
4474 | 724 | // If the mask is all zeros or undef, the "passthru" argument is the result. |
4475 | 724 | if (maskIsAllZeroOrUndef(MaskArg)) |
4476 | 4 | return PassthruArg; |
4477 | 720 | return nullptr; |
4478 | 720 | } |
4479 | 1.03M | default: |
4480 | 1.03M | return nullptr; |
4481 | 0 | } |
4482 | 0 | } InstructionSimplify.cpp:llvm::Value* SimplifyIntrinsic<llvm::Use*>(llvm::Function*, llvm::Use*, llvm::Use*, llvm::SimplifyQuery const&, unsigned int) Line | Count | Source | 4390 | 4.15M | const SimplifyQuery &Q, unsigned MaxRecurse) { | 4391 | 4.15M | Intrinsic::ID IID = F->getIntrinsicID(); | 4392 | 4.15M | unsigned NumOperands = std::distance(ArgBegin, ArgEnd); | 4393 | 4.15M | | 4394 | 4.15M | // Unary Ops | 4395 | 4.15M | if (NumOperands == 14.15M ) { | 4396 | 210k | // Perform idempotent optimizations | 4397 | 210k | if (IsIdempotent(IID)210k ) { | 4398 | 82.3k | if (IntrinsicInst *II82.3k = dyn_cast<IntrinsicInst>(*ArgBegin)) { | 4399 | 569 | if (II->getIntrinsicID() == IID) | 4400 | 18 | return II; | 4401 | 210k | } | 4402 | 82.3k | } | 4403 | 210k | | 4404 | 210k | switch (IID) { | 4405 | 57.6k | case Intrinsic::fabs: { | 4406 | 57.6k | if (SignBitMustBeZero(*ArgBegin, Q.TLI)) | 4407 | 20 | return *ArgBegin; | 4408 | 57.6k | return nullptr; | 4409 | 57.6k | } | 4410 | 152k | default: | 4411 | 152k | return nullptr; | 4412 | 3.94M | } | 4413 | 3.94M | } | 4414 | 3.94M | | 4415 | 3.94M | // Binary Ops | 4416 | 3.94M | if (3.94M NumOperands == 23.94M ) { | 4417 | 2.91M | Value *LHS = *ArgBegin; | 4418 | 2.91M | Value *RHS = *(ArgBegin + 1); | 4419 | 2.91M | Type *ReturnType = F->getReturnType(); | 4420 | 2.91M | | 4421 | 2.91M | switch (IID) { | 4422 | 489 | case Intrinsic::usub_with_overflow: | 4423 | 489 | case Intrinsic::ssub_with_overflow: { | 4424 | 489 | // X - X -> { 0, false } | 4425 | 489 | if (LHS == RHS) | 4426 | 4 | return Constant::getNullValue(ReturnType); | 4427 | 485 | | 4428 | 485 | // X - undef -> undef | 4429 | 485 | // undef - X -> undef | 4430 | 485 | if (485 isa<UndefValue>(LHS) || 485 isa<UndefValue>(RHS)481 ) | 4431 | 8 | return UndefValue::get(ReturnType); | 4432 | 477 | | 4433 | 477 | return nullptr; | 4434 | 477 | } | 4435 | 687 | case Intrinsic::uadd_with_overflow: | 4436 | 687 | case Intrinsic::sadd_with_overflow: { | 4437 | 687 | // X + undef -> undef | 4438 | 687 | if (isa<UndefValue>(LHS) || 687 isa<UndefValue>(RHS)682 ) | 4439 | 9 | return UndefValue::get(ReturnType); | 4440 | 678 | | 4441 | 678 | return nullptr; | 4442 | 678 | } | 4443 | 28.5k | case Intrinsic::umul_with_overflow: | 4444 | 28.5k | case Intrinsic::smul_with_overflow: { | 4445 | 28.5k | // 0 * X -> { 0, false } | 4446 | 28.5k | // X * 0 -> { 0, false } | 4447 | 28.5k | if (match(LHS, m_Zero()) || 28.5k match(RHS, m_Zero())28.5k ) | 4448 | 9 | return Constant::getNullValue(ReturnType); | 4449 | 28.5k | | 4450 | 28.5k | // undef * X -> { 0, false } | 4451 | 28.5k | // X * undef -> { 0, false } | 4452 | 28.5k | if (28.5k match(LHS, m_Undef()) || 28.5k match(RHS, m_Undef())28.4k ) | 4453 | 8 | return Constant::getNullValue(ReturnType); | 4454 | 28.4k | | 4455 | 28.4k | return nullptr; | 4456 | 28.4k | } | 4457 | 9 | case Intrinsic::load_relative: { | 4458 | 9 | Constant *C0 = dyn_cast<Constant>(LHS); | 4459 | 9 | Constant *C1 = dyn_cast<Constant>(RHS); | 4460 | 9 | if (C0 && 9 C19 ) | 4461 | 9 | return SimplifyRelativeLoad(C0, C1, Q.DL); | 4462 | 0 | return nullptr; | 4463 | 0 | } | 4464 | 2.88M | default: | 4465 | 2.88M | return nullptr; | 4466 | 1.03M | } | 4467 | 1.03M | } | 4468 | 1.03M | | 4469 | 1.03M | // Simplify calls to llvm.masked.load.* | 4470 | 1.03M | switch (IID) { | 4471 | 724 | case Intrinsic::masked_load: { | 4472 | 724 | Value *MaskArg = ArgBegin[2]; | 4473 | 724 | Value *PassthruArg = ArgBegin[3]; | 4474 | 724 | // If the mask is all zeros or undef, the "passthru" argument is the result. | 4475 | 724 | if (maskIsAllZeroOrUndef(MaskArg)) | 4476 | 4 | return PassthruArg; | 4477 | 720 | return nullptr; | 4478 | 720 | } | 4479 | 1.03M | default: | 4480 | 1.03M | return nullptr; | 4481 | 0 | } | 4482 | 0 | } |
Unexecuted instantiation: InstructionSimplify.cpp:llvm::Value* SimplifyIntrinsic<llvm::Value* const*>(llvm::Function*, llvm::Value* const*, llvm::Value* const*, llvm::SimplifyQuery const&, unsigned int) |
4483 | | |
4484 | | template <typename IterTy> |
4485 | | static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, |
4486 | | IterTy ArgEnd, const SimplifyQuery &Q, |
4487 | 63.7M | unsigned MaxRecurse) { |
4488 | 63.7M | Type *Ty = V->getType(); |
4489 | 63.7M | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) |
4490 | 63.7M | Ty = PTy->getElementType(); |
4491 | 63.7M | FunctionType *FTy = cast<FunctionType>(Ty); |
4492 | 63.7M | |
4493 | 63.7M | // call undef -> undef |
4494 | 63.7M | // call null -> undef |
4495 | 63.7M | if (isa<UndefValue>(V) || 63.7M isa<ConstantPointerNull>(V)63.7M ) |
4496 | 24 | return UndefValue::get(FTy->getReturnType()); |
4497 | 63.7M | |
4498 | 63.7M | Function *F = dyn_cast<Function>(V); |
4499 | 63.7M | if (!F) |
4500 | 2.40M | return nullptr; |
4501 | 61.3M | |
4502 | 61.3M | if (61.3M F->isIntrinsic()61.3M ) |
4503 | 4.15M | if (Value *4.15M Ret4.15M = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) |
4504 | 84 | return Ret; |
4505 | 61.3M | |
4506 | 61.3M | if (61.3M !canConstantFoldCallTo(CS, F)61.3M ) |
4507 | 61.0M | return nullptr; |
4508 | 361k | |
4509 | 361k | SmallVector<Constant *, 4> ConstantArgs; |
4510 | 361k | ConstantArgs.reserve(ArgEnd - ArgBegin); |
4511 | 375k | for (IterTy I = ArgBegin, E = ArgEnd; I != E375k ; ++I13.6k ) { |
4512 | 373k | Constant *C = dyn_cast<Constant>(*I); |
4513 | 373k | if (!C) |
4514 | 359k | return nullptr; |
4515 | 13.6k | ConstantArgs.push_back(C); |
4516 | 13.6k | } |
4517 | 361k | |
4518 | 1.79k | return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI); |
4519 | 63.7M | } InstructionSimplify.cpp:llvm::Value* SimplifyCall<llvm::Use*>(llvm::ImmutableCallSite, llvm::Value*, llvm::Use*, llvm::Use*, llvm::SimplifyQuery const&, unsigned int) Line | Count | Source | 4487 | 63.7M | unsigned MaxRecurse) { | 4488 | 63.7M | Type *Ty = V->getType(); | 4489 | 63.7M | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) | 4490 | 63.7M | Ty = PTy->getElementType(); | 4491 | 63.7M | FunctionType *FTy = cast<FunctionType>(Ty); | 4492 | 63.7M | | 4493 | 63.7M | // call undef -> undef | 4494 | 63.7M | // call null -> undef | 4495 | 63.7M | if (isa<UndefValue>(V) || 63.7M isa<ConstantPointerNull>(V)63.7M ) | 4496 | 24 | return UndefValue::get(FTy->getReturnType()); | 4497 | 63.7M | | 4498 | 63.7M | Function *F = dyn_cast<Function>(V); | 4499 | 63.7M | if (!F) | 4500 | 2.40M | return nullptr; | 4501 | 61.3M | | 4502 | 61.3M | if (61.3M F->isIntrinsic()61.3M ) | 4503 | 4.15M | if (Value *4.15M Ret4.15M = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) | 4504 | 84 | return Ret; | 4505 | 61.3M | | 4506 | 61.3M | if (61.3M !canConstantFoldCallTo(CS, F)61.3M ) | 4507 | 61.0M | return nullptr; | 4508 | 361k | | 4509 | 361k | SmallVector<Constant *, 4> ConstantArgs; | 4510 | 361k | ConstantArgs.reserve(ArgEnd - ArgBegin); | 4511 | 375k | for (IterTy I = ArgBegin, E = ArgEnd; I != E375k ; ++I13.6k ) { | 4512 | 373k | Constant *C = dyn_cast<Constant>(*I); | 4513 | 373k | if (!C) | 4514 | 359k | return nullptr; | 4515 | 13.6k | ConstantArgs.push_back(C); | 4516 | 13.6k | } | 4517 | 361k | | 4518 | 1.79k | return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI); | 4519 | 63.7M | } |
Unexecuted instantiation: InstructionSimplify.cpp:llvm::Value* SimplifyCall<llvm::Value* const*>(llvm::ImmutableCallSite, llvm::Value*, llvm::Value* const*, llvm::Value* const*, llvm::SimplifyQuery const&, unsigned int) |
4520 | | |
4521 | | Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, |
4522 | | User::op_iterator ArgBegin, User::op_iterator ArgEnd, |
4523 | 63.7M | const SimplifyQuery &Q) { |
4524 | 63.7M | return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit); |
4525 | 63.7M | } |
4526 | | |
4527 | | Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, |
4528 | 0 | ArrayRef<Value *> Args, const SimplifyQuery &Q) { |
4529 | 0 | return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); |
4530 | 0 | } |
4531 | | |
4532 | | /// See if we can compute a simplified version of this instruction. |
4533 | | /// If not, this returns null. |
4534 | | |
4535 | | Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, |
4536 | 385M | OptimizationRemarkEmitter *ORE) { |
4537 | 385M | const SimplifyQuery Q = SQ.CxtI ? SQ214M : SQ.getWithInstruction(I)171M ; |
4538 | 385M | Value *Result; |
4539 | 385M | |
4540 | 385M | switch (I->getOpcode()) { |
4541 | 170M | default: |
4542 | 170M | Result = ConstantFoldInstruction(I, Q.DL, Q.TLI); |
4543 | 170M | break; |
4544 | 301k | case Instruction::FAdd: |
4545 | 301k | Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), |
4546 | 301k | I->getFastMathFlags(), Q); |
4547 | 301k | break; |
4548 | 3.76M | case Instruction::Add: |
4549 | 3.76M | Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), |
4550 | 3.76M | cast<BinaryOperator>(I)->hasNoSignedWrap(), |
4551 | 3.76M | cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q); |
4552 | 3.76M | break; |
4553 | 87.9k | case Instruction::FSub: |
4554 | 87.9k | Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), |
4555 | 87.9k | I->getFastMathFlags(), Q); |
4556 | 87.9k | break; |
4557 | 792k | case Instruction::Sub: |
4558 | 792k | Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), |
4559 | 792k | cast<BinaryOperator>(I)->hasNoSignedWrap(), |
4560 | 792k | cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q); |
4561 | 792k | break; |
4562 | 262k | case Instruction::FMul: |
4563 | 262k | Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), |
4564 | 262k | I->getFastMathFlags(), Q); |
4565 | 262k | break; |
4566 | 712k | case Instruction::Mul: |
4567 | 712k | Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), Q); |
4568 | 712k | break; |
4569 | 49.2k | case Instruction::SDiv: |
4570 | 49.2k | Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), Q); |
4571 | 49.2k | break; |
4572 | 65.0k | case Instruction::UDiv: |
4573 | 65.0k | Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), Q); |
4574 | 65.0k | break; |
4575 | 197k | case Instruction::FDiv: |
4576 | 197k | Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), |
4577 | 197k | I->getFastMathFlags(), Q); |
4578 | 197k | break; |
4579 | 24.3k | case Instruction::SRem: |
4580 | 24.3k | Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), Q); |
4581 | 24.3k | break; |
4582 | 11.6k | case Instruction::URem: |
4583 | 11.6k | Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), Q); |
4584 | 11.6k | break; |
4585 | 69 | case Instruction::FRem: |
4586 | 69 | Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), |
4587 | 69 | I->getFastMathFlags(), Q); |
4588 | 69 | break; |
4589 | 585k | case Instruction::Shl: |
4590 | 585k | Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), |
4591 | 585k | cast<BinaryOperator>(I)->hasNoSignedWrap(), |
4592 | 585k | cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q); |
4593 | 585k | break; |
4594 | 375k | case Instruction::LShr: |
4595 | 375k | Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), |
4596 | 375k | cast<BinaryOperator>(I)->isExact(), Q); |
4597 | 375k | break; |
4598 | 157k | case Instruction::AShr: |
4599 | 157k | Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), |
4600 | 157k | cast<BinaryOperator>(I)->isExact(), Q); |
4601 | 157k | break; |
4602 | 974k | case Instruction::And: |
4603 | 974k | Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q); |
4604 | 974k | break; |
4605 | 448k | case Instruction::Or: |
4606 | 448k | Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), Q); |
4607 | 448k | break; |
4608 | 138k | case Instruction::Xor: |
4609 | 138k | Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), Q); |
4610 | 138k | break; |
4611 | 9.88M | case Instruction::ICmp: |
4612 | 9.88M | Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), |
4613 | 9.88M | I->getOperand(0), I->getOperand(1), Q); |
4614 | 9.88M | break; |
4615 | 156k | case Instruction::FCmp: |
4616 | 156k | Result = |
4617 | 156k | SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0), |
4618 | 156k | I->getOperand(1), I->getFastMathFlags(), Q); |
4619 | 156k | break; |
4620 | 3.03M | case Instruction::Select: |
4621 | 3.03M | Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), |
4622 | 3.03M | I->getOperand(2), Q); |
4623 | 3.03M | break; |
4624 | 15.9M | case Instruction::GetElementPtr: { |
4625 | 15.9M | SmallVector<Value *, 8> Ops(I->op_begin(), I->op_end()); |
4626 | 15.9M | Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(), |
4627 | 15.9M | Ops, Q); |
4628 | 15.9M | break; |
4629 | 385M | } |
4630 | 26.3k | case Instruction::InsertValue: { |
4631 | 26.3k | InsertValueInst *IV = cast<InsertValueInst>(I); |
4632 | 26.3k | Result = SimplifyInsertValueInst(IV->getAggregateOperand(), |
4633 | 26.3k | IV->getInsertedValueOperand(), |
4634 | 26.3k | IV->getIndices(), Q); |
4635 | 26.3k | break; |
4636 | 385M | } |
4637 | 193k | case Instruction::ExtractValue: { |
4638 | 193k | auto *EVI = cast<ExtractValueInst>(I); |
4639 | 193k | Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), |
4640 | 193k | EVI->getIndices(), Q); |
4641 | 193k | break; |
4642 | 385M | } |
4643 | 87.3k | case Instruction::ExtractElement: { |
4644 | 87.3k | auto *EEI = cast<ExtractElementInst>(I); |
4645 | 87.3k | Result = SimplifyExtractElementInst(EEI->getVectorOperand(), |
4646 | 87.3k | EEI->getIndexOperand(), Q); |
4647 | 87.3k | break; |
4648 | 385M | } |
4649 | 159k | case Instruction::ShuffleVector: { |
4650 | 159k | auto *SVI = cast<ShuffleVectorInst>(I); |
4651 | 159k | Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), |
4652 | 159k | SVI->getMask(), SVI->getType(), Q); |
4653 | 159k | break; |
4654 | 385M | } |
4655 | 113M | case Instruction::PHI: |
4656 | 113M | Result = SimplifyPHINode(cast<PHINode>(I), Q); |
4657 | 113M | break; |
4658 | 35.7M | case Instruction::Call: { |
4659 | 35.7M | CallSite CS(cast<CallInst>(I)); |
4660 | 35.7M | Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), |
4661 | 35.7M | Q); |
4662 | 35.7M | break; |
4663 | 385M | } |
4664 | 134M | #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: |
4665 | 35.7M | #include "llvm/IR/Instruction.def"35.7M |
4666 | 10.3M | #undef HANDLE_CAST_INST |
4667 | 10.3M | Result = |
4668 | 10.3M | SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), Q); |
4669 | 10.3M | break; |
4670 | 16.6M | case Instruction::Alloca: |
4671 | 16.6M | // No simplifications for Alloca and it can't be constant folded. |
4672 | 16.6M | Result = nullptr; |
4673 | 16.6M | break; |
4674 | 385M | } |
4675 | 385M | |
4676 | 385M | // In general, it is possible for computeKnownBits to determine all bits in a |
4677 | 385M | // value even when the operands are not all constants. |
4678 | 385M | if (385M !Result && 385M I->getType()->isIntOrIntVectorTy()382M ) { |
4679 | 51.3M | KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); |
4680 | 51.3M | if (Known.isConstant()) |
4681 | 527 | Result = ConstantInt::get(I->getType(), Known.getConstant()); |
4682 | 51.3M | } |
4683 | 385M | |
4684 | 385M | /// If called on unreachable code, the above logic may report that the |
4685 | 385M | /// instruction simplified to itself. Make life easier for users by |
4686 | 385M | /// detecting that case here, returning a safe value instead. |
4687 | 385M | return Result == I ? UndefValue::get(I->getType())15 : Result385M ; |
4688 | 385M | } |
4689 | | |
4690 | | /// \brief Implementation of recursive simplification through an instruction's |
4691 | | /// uses. |
4692 | | /// |
4693 | | /// This is the common implementation of the recursive simplification routines. |
4694 | | /// If we have a pre-simplified value in 'SimpleV', that is forcibly used to |
4695 | | /// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of |
4696 | | /// instructions to process and attempt to simplify it using |
4697 | | /// InstructionSimplify. |
4698 | | /// |
4699 | | /// This routine returns 'true' only when *it* simplifies something. The passed |
4700 | | /// in simplified value does not count toward this. |
4701 | | static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, |
4702 | | const TargetLibraryInfo *TLI, |
4703 | | const DominatorTree *DT, |
4704 | 7.03k | AssumptionCache *AC) { |
4705 | 7.03k | bool Simplified = false; |
4706 | 7.03k | SmallSetVector<Instruction *, 8> Worklist; |
4707 | 7.03k | const DataLayout &DL = I->getModule()->getDataLayout(); |
4708 | 7.03k | |
4709 | 7.03k | // If we have an explicit value to collapse to, do that round of the |
4710 | 7.03k | // simplification loop by hand initially. |
4711 | 7.03k | if (SimpleV7.03k ) { |
4712 | 7.03k | for (User *U : I->users()) |
4713 | 7.25k | if (7.25k U != I7.25k ) |
4714 | 7.25k | Worklist.insert(cast<Instruction>(U)); |
4715 | 7.03k | |
4716 | 7.03k | // Replace the instruction with its simplified value. |
4717 | 7.03k | I->replaceAllUsesWith(SimpleV); |
4718 | 7.03k | |
4719 | 7.03k | // Gracefully handle edge cases where the instruction is not wired into any |
4720 | 7.03k | // parent block. |
4721 | 7.03k | if (I->getParent() && 7.03k !I->isEHPad()7.03k && !isa<TerminatorInst>(I)7.03k && |
4722 | 7.03k | !I->mayHaveSideEffects()) |
4723 | 7.03k | I->eraseFromParent(); |
4724 | 0 | } else { |
4725 | 0 | Worklist.insert(I); |
4726 | 0 | } |
4727 | 7.03k | |
4728 | 7.03k | // Note that we must test the size on each iteration, the worklist can grow. |
4729 | 14.3k | for (unsigned Idx = 0; Idx != Worklist.size()14.3k ; ++Idx7.30k ) { |
4730 | 7.30k | I = Worklist[Idx]; |
4731 | 7.30k | |
4732 | 7.30k | // See if this instruction simplifies. |
4733 | 7.30k | SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC}); |
4734 | 7.30k | if (!SimpleV) |
4735 | 7.25k | continue; |
4736 | 55 | |
4737 | 55 | Simplified = true; |
4738 | 55 | |
4739 | 55 | // Stash away all the uses of the old instruction so we can check them for |
4740 | 55 | // recursive simplifications after a RAUW. This is cheaper than checking all |
4741 | 55 | // uses of To on the recursive step in most cases. |
4742 | 55 | for (User *U : I->users()) |
4743 | 61 | Worklist.insert(cast<Instruction>(U)); |
4744 | 55 | |
4745 | 55 | // Replace the instruction with its simplified value. |
4746 | 55 | I->replaceAllUsesWith(SimpleV); |
4747 | 55 | |
4748 | 55 | // Gracefully handle edge cases where the instruction is not wired into any |
4749 | 55 | // parent block. |
4750 | 55 | if (I->getParent() && 55 !I->isEHPad()55 && !isa<TerminatorInst>(I)55 && |
4751 | 55 | !I->mayHaveSideEffects()) |
4752 | 55 | I->eraseFromParent(); |
4753 | 7.30k | } |
4754 | 7.03k | return Simplified; |
4755 | 7.03k | } |
4756 | | |
4757 | | bool llvm::recursivelySimplifyInstruction(Instruction *I, |
4758 | | const TargetLibraryInfo *TLI, |
4759 | | const DominatorTree *DT, |
4760 | 0 | AssumptionCache *AC) { |
4761 | 0 | return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); |
4762 | 0 | } |
4763 | | |
4764 | | bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, |
4765 | | const TargetLibraryInfo *TLI, |
4766 | | const DominatorTree *DT, |
4767 | 7.03k | AssumptionCache *AC) { |
4768 | 7.03k | assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); |
4769 | 7.03k | assert(SimpleV && "Must provide a simplified value."); |
4770 | 7.03k | return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); |
4771 | 7.03k | } |
4772 | | |
4773 | | namespace llvm { |
4774 | 1.73M | const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { |
4775 | 1.73M | auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
4776 | 18.4E | auto *DT = DTWP ? &DTWP->getDomTree()1.73M : nullptr18.4E ; |
4777 | 1.73M | auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); |
4778 | 18.4E | auto *TLI = TLIWP ? &TLIWP->getTLI()1.73M : nullptr18.4E ; |
4779 | 1.73M | auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>(); |
4780 | 18.4E | auto *AC = ACWP ? &ACWP->getAssumptionCache(F)1.73M : nullptr18.4E ; |
4781 | 1.73M | return {F.getParent()->getDataLayout(), TLI, DT, AC}; |
4782 | 1.73M | } |
4783 | | |
4784 | | const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &AR, |
4785 | 59 | const DataLayout &DL) { |
4786 | 59 | return {DL, &AR.TLI, &AR.DT, &AR.AC}; |
4787 | 59 | } |
4788 | | |
4789 | | template <class T, class... TArgs> |
4790 | | const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &AM, |
4791 | 220 | Function &F) { |
4792 | 220 | auto *DT = AM.template getCachedResult<DominatorTreeAnalysis>(F); |
4793 | 220 | auto *TLI = AM.template getCachedResult<TargetLibraryAnalysis>(F); |
4794 | 220 | auto *AC = AM.template getCachedResult<AssumptionAnalysis>(F); |
4795 | 220 | return {F.getParent()->getDataLayout(), TLI, DT, AC}; |
4796 | 220 | } |
4797 | | template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &, |
4798 | | Function &); |
4799 | | } |