/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the visit functions for add, fadd, sub, and fsub. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "InstCombineInternal.h" |
15 | | #include "llvm/ADT/STLExtras.h" |
16 | | #include "llvm/Analysis/InstructionSimplify.h" |
17 | | #include "llvm/IR/DataLayout.h" |
18 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
19 | | #include "llvm/IR/PatternMatch.h" |
20 | | #include "llvm/Support/KnownBits.h" |
21 | | |
22 | | using namespace llvm; |
23 | | using namespace PatternMatch; |
24 | | |
25 | | #define DEBUG_TYPE "instcombine" |
26 | | |
27 | | namespace { |
28 | | |
29 | | /// Class representing coefficient of floating-point addend. |
30 | | /// This class needs to be highly efficient, which is especially true for |
31 | | /// the constructor. As of I write this comment, the cost of the default |
32 | | /// constructor is merely 4-byte-store-zero (Assuming compiler is able to |
33 | | /// perform write-merging). |
34 | | /// |
35 | | class FAddendCoef { |
36 | | public: |
37 | | // The constructor has to initialize a APFloat, which is unnecessary for |
38 | | // most addends which have coefficient either 1 or -1. So, the constructor |
39 | | // is expensive. In order to avoid the cost of the constructor, we should |
40 | | // reuse some instances whenever possible. The pre-created instances |
41 | | // FAddCombine::Add[0-5] embodies this idea. |
42 | | // |
43 | 3.44k | FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} |
44 | | ~FAddendCoef(); |
45 | | |
46 | 933 | void set(short C) { |
47 | 933 | assert(!insaneIntVal(C) && "Insane coefficient"); |
48 | 933 | IsFp = false; IntVal = C; |
49 | 933 | } |
50 | | |
51 | | void set(const APFloat& C); |
52 | | |
53 | | void negate(); |
54 | | |
55 | 16 | bool isZero() const { return isInt() ? 16 !IntVal13 : getFpVal().isZero()3 ; } |
56 | | Value *getValue(Type *) const; |
57 | | |
58 | | // If possible, don't define operator+/operator- etc because these |
59 | | // operators inevitably call FAddendCoef's constructor which is not cheap. |
60 | | void operator=(const FAddendCoef &A); |
61 | | void operator+=(const FAddendCoef &A); |
62 | | void operator*=(const FAddendCoef &S); |
63 | | |
64 | 539 | bool isOne() const { return isInt() && 539 IntVal == 1453 ; } |
65 | 7 | bool isTwo() const { return isInt() && 7 IntVal == 24 ; } |
66 | 728 | bool isMinusOne() const { return isInt() && 728 IntVal == -1559 ; } |
67 | 329 | bool isMinusTwo() const { return isInt() && 329 IntVal == -2243 ; } |
68 | | |
69 | | private: |
70 | 0 | bool insaneIntVal(int V) { return V > 4 || V < -4; } |
71 | | APFloat *getFpValPtr() |
72 | 571 | { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } |
73 | | const APFloat *getFpValPtr() const |
74 | 14 | { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); } |
75 | | |
76 | 14 | const APFloat &getFpVal() const { |
77 | 14 | assert(IsFp && BufHasFpVal && "Incorret state"); |
78 | 14 | return *getFpValPtr(); |
79 | 14 | } |
80 | | |
81 | 9 | APFloat &getFpVal() { |
82 | 9 | assert(IsFp && BufHasFpVal && "Incorret state"); |
83 | 9 | return *getFpValPtr(); |
84 | 9 | } |
85 | | |
86 | 2.12k | bool isInt() const { return !IsFp; } |
87 | | |
88 | | // If the coefficient is represented by an integer, promote it to a |
89 | | // floating point. |
90 | | void convertToFpType(const fltSemantics &Sem); |
91 | | |
92 | | // Construct an APFloat from a signed integer. |
93 | | // TODO: We should get rid of this function when APFloat can be constructed |
94 | | // from an *SIGNED* integer. |
95 | | APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); |
96 | | |
97 | | private: |
98 | | bool IsFp; |
99 | | |
100 | | // True iff FpValBuf contains an instance of APFloat. |
101 | | bool BufHasFpVal; |
102 | | |
103 | | // The integer coefficient of an individual addend is either 1 or -1, |
104 | | // and we try to simplify at most 4 addends from neighboring at most |
105 | | // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt |
106 | | // is overkill of this end. |
107 | | short IntVal; |
108 | | |
109 | | AlignedCharArrayUnion<APFloat> FpValBuf; |
110 | | }; |
111 | | |
112 | | /// FAddend is used to represent floating-point addend. An addend is |
113 | | /// represented as <C, V>, where the V is a symbolic value, and C is a |
114 | | /// constant coefficient. A constant addend is represented as <C, 0>. |
115 | | /// |
116 | | class FAddend { |
117 | | public: |
118 | 3.44k | FAddend() : Val(nullptr) {} |
119 | | |
120 | 1.22k | Value *getSymVal() const { return Val; } |
121 | 387 | const FAddendCoef &getCoef() const { return Coeff; } |
122 | | |
123 | 2.17k | bool isConstant() const { return Val == nullptr; } |
124 | 16 | bool isZero() const { return Coeff.isZero(); } |
125 | | |
126 | 918 | void set(short Coefficient, Value *V) { |
127 | 918 | Coeff.set(Coefficient); |
128 | 918 | Val = V; |
129 | 918 | } |
130 | 0 | void set(const APFloat &Coefficient, Value *V) { |
131 | 0 | Coeff.set(Coefficient); |
132 | 0 | Val = V; |
133 | 0 | } |
134 | 276 | void set(const ConstantFP *Coefficient, Value *V) { |
135 | 276 | Coeff.set(Coefficient->getValueAPF()); |
136 | 276 | Val = V; |
137 | 276 | } |
138 | | |
139 | 120 | void negate() { Coeff.negate(); } |
140 | | |
141 | | /// Drill down the U-D chain one step to find the definition of V, and |
142 | | /// try to break the definition into one or two addends. |
143 | | static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); |
144 | | |
145 | | /// Similar to FAddend::drillDownOneStep() except that the value being |
146 | | /// splitted is the addend itself. |
147 | | unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; |
148 | | |
149 | 23 | void operator+=(const FAddend &T) { |
150 | 23 | assert((Val == T.Val) && "Symbolic-values disagree"); |
151 | 23 | Coeff += T.Coeff; |
152 | 23 | } |
153 | | |
154 | | private: |
155 | 15 | void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } |
156 | | |
157 | | // This addend has the value of "Coeff * Val". |
158 | | Value *Val; |
159 | | FAddendCoef Coeff; |
160 | | }; |
161 | | |
162 | | /// FAddCombine is the class for optimizing an unsafe fadd/fsub along |
163 | | /// with its neighboring at most two instructions. |
164 | | /// |
165 | | class FAddCombine { |
166 | | public: |
167 | 754 | FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {} |
168 | | Value *simplify(Instruction *FAdd); |
169 | | |
170 | | private: |
171 | | typedef SmallVector<const FAddend*, 4> AddendVect; |
172 | | |
173 | | Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); |
174 | | |
175 | | Value *performFactorization(Instruction *I); |
176 | | |
177 | | /// Convert given addend to a Value |
178 | | Value *createAddendVal(const FAddend &A, bool& NeedNeg); |
179 | | |
180 | | /// Return the number of instructions needed to emit the N-ary addition. |
181 | | unsigned calcInstrNumber(const AddendVect& Vect); |
182 | | Value *createFSub(Value *Opnd0, Value *Opnd1); |
183 | | Value *createFAdd(Value *Opnd0, Value *Opnd1); |
184 | | Value *createFMul(Value *Opnd0, Value *Opnd1); |
185 | | Value *createFDiv(Value *Opnd0, Value *Opnd1); |
186 | | Value *createFNeg(Value *V); |
187 | | Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); |
188 | | void createInstPostProc(Instruction *NewInst, bool NoNumber = false); |
189 | | |
190 | | InstCombiner::BuilderTy &Builder; |
191 | | Instruction *Instr; |
192 | | |
193 | | // Debugging stuff are clustered here. |
194 | | #ifndef NDEBUG |
195 | | unsigned CreateInstrNum; |
196 | | void initCreateInstNum() { CreateInstrNum = 0; } |
197 | | void incCreateInstNum() { CreateInstrNum++; } |
198 | | #else |
199 | 10 | void initCreateInstNum() {} |
200 | 26 | void incCreateInstNum() {} |
201 | | #endif |
202 | | }; |
203 | | |
204 | | } // anonymous namespace |
205 | | |
206 | | //===----------------------------------------------------------------------===// |
207 | | // |
208 | | // Implementation of |
209 | | // {FAddendCoef, FAddend, FAddition, FAddCombine}. |
210 | | // |
211 | | //===----------------------------------------------------------------------===// |
212 | 3.44k | FAddendCoef::~FAddendCoef() { |
213 | 3.44k | if (BufHasFpVal) |
214 | 281 | getFpValPtr()->~APFloat(); |
215 | 3.44k | } |
216 | | |
217 | 279 | void FAddendCoef::set(const APFloat& C) { |
218 | 279 | APFloat *P = getFpValPtr(); |
219 | 279 | |
220 | 279 | if (isInt()279 ) { |
221 | 279 | // As the buffer is meanless byte stream, we cannot call |
222 | 279 | // APFloat::operator=(). |
223 | 279 | new(P) APFloat(C); |
224 | 279 | } else |
225 | 0 | *P = C; |
226 | 279 | |
227 | 279 | IsFp = BufHasFpVal = true; |
228 | 279 | } |
229 | | |
230 | 2 | void FAddendCoef::convertToFpType(const fltSemantics &Sem) { |
231 | 2 | if (!isInt()) |
232 | 0 | return; |
233 | 2 | |
234 | 2 | APFloat *P = getFpValPtr(); |
235 | 2 | if (IntVal > 0) |
236 | 1 | new(P) APFloat(Sem, IntVal); |
237 | 1 | else { |
238 | 1 | new(P) APFloat(Sem, 0 - IntVal); |
239 | 1 | P->changeSign(); |
240 | 1 | } |
241 | 2 | IsFp = BufHasFpVal = true; |
242 | 2 | } |
243 | | |
244 | 2 | APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { |
245 | 2 | if (Val >= 0) |
246 | 2 | return APFloat(Sem, Val); |
247 | 0 |
|
248 | 0 | APFloat T(Sem, 0 - Val); |
249 | 0 | T.changeSign(); |
250 | 0 |
|
251 | 0 | return T; |
252 | 0 | } |
253 | | |
254 | 18 | void FAddendCoef::operator=(const FAddendCoef &That) { |
255 | 18 | if (That.isInt()) |
256 | 15 | set(That.IntVal); |
257 | 18 | else |
258 | 3 | set(That.getFpVal()); |
259 | 18 | } |
260 | | |
261 | 23 | void FAddendCoef::operator+=(const FAddendCoef &That) { |
262 | 23 | enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; |
263 | 23 | if (isInt() == That.isInt()23 ) { |
264 | 19 | if (isInt()) |
265 | 17 | IntVal += That.IntVal; |
266 | 19 | else |
267 | 2 | getFpVal().add(That.getFpVal(), RndMode); |
268 | 19 | return; |
269 | 19 | } |
270 | 4 | |
271 | 4 | if (4 isInt()4 ) { |
272 | 2 | const APFloat &T = That.getFpVal(); |
273 | 2 | convertToFpType(T.getSemantics()); |
274 | 2 | getFpVal().add(T, RndMode); |
275 | 2 | return; |
276 | 2 | } |
277 | 2 | |
278 | 2 | APFloat &T = getFpVal(); |
279 | 2 | T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); |
280 | 2 | } |
281 | | |
282 | 15 | void FAddendCoef::operator*=(const FAddendCoef &That) { |
283 | 15 | if (That.isOne()) |
284 | 0 | return; |
285 | 15 | |
286 | 15 | if (15 That.isMinusOne()15 ) { |
287 | 15 | negate(); |
288 | 15 | return; |
289 | 15 | } |
290 | 0 |
|
291 | 0 | if (0 isInt() && 0 That.isInt()0 ) { |
292 | 0 | int Res = IntVal * (int)That.IntVal; |
293 | 0 | assert(!insaneIntVal(Res) && "Insane int value"); |
294 | 0 | IntVal = Res; |
295 | 0 | return; |
296 | 0 | } |
297 | 0 |
|
298 | 0 | const fltSemantics &Semantic = |
299 | 0 | isInt() ? That.getFpVal().getSemantics()0 : getFpVal().getSemantics()0 ; |
300 | 0 |
|
301 | 0 | if (isInt()) |
302 | 0 | convertToFpType(Semantic); |
303 | 0 | APFloat &F0 = getFpVal(); |
304 | 0 |
|
305 | 0 | if (That.isInt()) |
306 | 0 | F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), |
307 | 0 | APFloat::rmNearestTiesToEven); |
308 | 0 | else |
309 | 0 | F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); |
310 | 15 | } |
311 | | |
312 | 135 | void FAddendCoef::negate() { |
313 | 135 | if (isInt()) |
314 | 132 | IntVal = 0 - IntVal; |
315 | 135 | else |
316 | 3 | getFpVal().changeSign(); |
317 | 135 | } |
318 | | |
319 | 7 | Value *FAddendCoef::getValue(Type *Ty) const { |
320 | 7 | return isInt() ? |
321 | 3 | ConstantFP::get(Ty, float(IntVal)) : |
322 | 4 | ConstantFP::get(Ty->getContext(), getFpVal()); |
323 | 7 | } |
324 | | |
325 | | // The definition of <Val> Addends |
326 | | // ========================================= |
327 | | // A + B <1, A>, <1,B> |
328 | | // A - B <1, A>, <1,B> |
329 | | // 0 - B <-1, B> |
330 | | // C * A, <C, A> |
331 | | // A + C <1, A> <C, NULL> |
332 | | // 0 +/- 0 <0, NULL> (corner case) |
333 | | // |
334 | | // Legend: A and B are not constant, C is constant |
335 | | // |
336 | | unsigned FAddend::drillValueDownOneStep |
337 | 1.25k | (Value *Val, FAddend &Addend0, FAddend &Addend1) { |
338 | 1.25k | Instruction *I = nullptr; |
339 | 1.25k | if (!Val || 1.25k !(I = dyn_cast<Instruction>(Val))1.25k ) |
340 | 272 | return 0; |
341 | 979 | |
342 | 979 | unsigned Opcode = I->getOpcode(); |
343 | 979 | |
344 | 979 | if (Opcode == Instruction::FAdd || 979 Opcode == Instruction::FSub527 ) { |
345 | 572 | ConstantFP *C0, *C1; |
346 | 572 | Value *Opnd0 = I->getOperand(0); |
347 | 572 | Value *Opnd1 = I->getOperand(1); |
348 | 572 | if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && 572 C0->isZero()37 ) |
349 | 31 | Opnd0 = nullptr; |
350 | 572 | |
351 | 572 | if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && 572 C1->isZero()189 ) |
352 | 0 | Opnd1 = nullptr; |
353 | 572 | |
354 | 572 | if (Opnd0572 ) { |
355 | 541 | if (!C0) |
356 | 535 | Addend0.set(1, Opnd0); |
357 | 541 | else |
358 | 6 | Addend0.set(C0, nullptr); |
359 | 541 | } |
360 | 572 | |
361 | 572 | if (Opnd1572 ) { |
362 | 572 | FAddend &Addend = Opnd0 ? Addend1541 : Addend031 ; |
363 | 572 | if (!C1) |
364 | 383 | Addend.set(1, Opnd1); |
365 | 572 | else |
366 | 189 | Addend.set(C1, nullptr); |
367 | 572 | if (Opcode == Instruction::FSub) |
368 | 120 | Addend.negate(); |
369 | 572 | } |
370 | 572 | |
371 | 572 | if (Opnd0 || 572 Opnd131 ) |
372 | 572 | return Opnd0 && 572 Opnd1541 ? 2541 : 131 ; |
373 | 0 |
|
374 | 0 | // Both operands are zero. Weird! |
375 | 0 | Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr); |
376 | 0 | return 1; |
377 | 0 | } |
378 | 407 | |
379 | 407 | if (407 I->getOpcode() == Instruction::FMul407 ) { |
380 | 143 | Value *V0 = I->getOperand(0); |
381 | 143 | Value *V1 = I->getOperand(1); |
382 | 143 | if (ConstantFP *C143 = dyn_cast<ConstantFP>(V0)) { |
383 | 1 | Addend0.set(C, V1); |
384 | 1 | return 1; |
385 | 1 | } |
386 | 142 | |
387 | 142 | if (ConstantFP *142 C142 = dyn_cast<ConstantFP>(V1)) { |
388 | 80 | Addend0.set(C, V0); |
389 | 80 | return 1; |
390 | 80 | } |
391 | 326 | } |
392 | 326 | |
393 | 326 | return 0; |
394 | 326 | } |
395 | | |
396 | | // Try to break *this* addend into two addends. e.g. Suppose this addend is |
397 | | // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, |
398 | | // i.e. <2.3, X> and <2.3, Y>. |
399 | | // |
400 | | unsigned FAddend::drillAddendDownOneStep |
401 | 768 | (FAddend &Addend0, FAddend &Addend1) const { |
402 | 768 | if (isConstant()) |
403 | 0 | return 0; |
404 | 768 | |
405 | 768 | unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); |
406 | 768 | if (!BreakNum || 768 Coeff.isOne()170 ) |
407 | 759 | return BreakNum; |
408 | 9 | |
409 | 9 | Addend0.Scale(Coeff); |
410 | 9 | |
411 | 9 | if (BreakNum == 2) |
412 | 6 | Addend1.Scale(Coeff); |
413 | 768 | |
414 | 768 | return BreakNum; |
415 | 768 | } |
416 | | |
417 | | // Try to perform following optimization on the input instruction I. Return the |
418 | | // simplified expression if was successful; otherwise, return 0. |
419 | | // |
420 | | // Instruction "I" is Simplified into |
421 | | // ------------------------------------------------------- |
422 | | // (x * y) +/- (x * z) x * (y +/- z) |
423 | | // (y / x) +/- (z / x) (y +/- z) / x |
424 | | // |
425 | 447 | Value *FAddCombine::performFactorization(Instruction *I) { |
426 | 447 | assert((I->getOpcode() == Instruction::FAdd || |
427 | 447 | I->getOpcode() == Instruction::FSub) && "Expect add/sub"); |
428 | 447 | |
429 | 447 | Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); |
430 | 447 | Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); |
431 | 447 | |
432 | 447 | if (!I0 || 447 !I1357 || I0->getOpcode() != I1->getOpcode()100 ) |
433 | 418 | return nullptr; |
434 | 29 | |
435 | 29 | bool isMpy = false; |
436 | 29 | if (I0->getOpcode() == Instruction::FMul) |
437 | 5 | isMpy = true; |
438 | 24 | else if (24 I0->getOpcode() != Instruction::FDiv24 ) |
439 | 18 | return nullptr; |
440 | 11 | |
441 | 11 | Value *Opnd0_0 = I0->getOperand(0); |
442 | 11 | Value *Opnd0_1 = I0->getOperand(1); |
443 | 11 | Value *Opnd1_0 = I1->getOperand(0); |
444 | 11 | Value *Opnd1_1 = I1->getOperand(1); |
445 | 11 | |
446 | 11 | // Input Instr I Factor AddSub0 AddSub1 |
447 | 11 | // ---------------------------------------------- |
448 | 11 | // (x*y) +/- (x*z) x y z |
449 | 11 | // (y/x) +/- (z/x) x y z |
450 | 11 | // |
451 | 11 | Value *Factor = nullptr; |
452 | 11 | Value *AddSub0 = nullptr, *AddSub1 = nullptr; |
453 | 11 | |
454 | 11 | if (isMpy11 ) { |
455 | 5 | if (Opnd0_0 == Opnd1_0 || 5 Opnd0_0 == Opnd1_14 ) |
456 | 2 | Factor = Opnd0_0; |
457 | 3 | else if (3 Opnd0_1 == Opnd1_0 || 3 Opnd0_1 == Opnd1_12 ) |
458 | 3 | Factor = Opnd0_1; |
459 | 5 | |
460 | 5 | if (Factor5 ) { |
461 | 5 | AddSub0 = (Factor == Opnd0_0) ? Opnd0_12 : Opnd0_03 ; |
462 | 5 | AddSub1 = (Factor == Opnd1_0) ? Opnd1_12 : Opnd1_03 ; |
463 | 5 | } |
464 | 11 | } else if (6 Opnd0_1 == Opnd1_16 ) { |
465 | 4 | Factor = Opnd0_1; |
466 | 4 | AddSub0 = Opnd0_0; |
467 | 4 | AddSub1 = Opnd1_0; |
468 | 4 | } |
469 | 11 | |
470 | 11 | if (!Factor) |
471 | 2 | return nullptr; |
472 | 9 | |
473 | 9 | FastMathFlags Flags; |
474 | 9 | Flags.setUnsafeAlgebra(); |
475 | 9 | if (I09 ) Flags &= I->getFastMathFlags()9 ; |
476 | 9 | if (I19 ) Flags &= I->getFastMathFlags()9 ; |
477 | 9 | |
478 | 9 | // Create expression "NewAddSub = AddSub0 +/- AddsSub1" |
479 | 9 | Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? |
480 | 3 | createFAdd(AddSub0, AddSub1) : |
481 | 6 | createFSub(AddSub0, AddSub1); |
482 | 9 | if (ConstantFP *CFP9 = dyn_cast<ConstantFP>(NewAddSub)) { |
483 | 2 | const APFloat &F = CFP->getValueAPF(); |
484 | 2 | if (!F.isNormal()) |
485 | 1 | return nullptr; |
486 | 7 | } else if (Instruction *7 II7 = dyn_cast<Instruction>(NewAddSub)) |
487 | 7 | II->setFastMathFlags(Flags); |
488 | 9 | |
489 | 8 | if (8 isMpy8 ) { |
490 | 5 | Value *RI = createFMul(Factor, NewAddSub); |
491 | 5 | if (Instruction *II = dyn_cast<Instruction>(RI)) |
492 | 5 | II->setFastMathFlags(Flags); |
493 | 5 | return RI; |
494 | 5 | } |
495 | 3 | |
496 | 3 | Value *RI = createFDiv(NewAddSub, Factor); |
497 | 3 | if (Instruction *II = dyn_cast<Instruction>(RI)) |
498 | 3 | II->setFastMathFlags(Flags); |
499 | 447 | return RI; |
500 | 447 | } |
501 | | |
502 | 754 | Value *FAddCombine::simplify(Instruction *I) { |
503 | 754 | assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); |
504 | 754 | |
505 | 754 | // Currently we are not able to handle vector type. |
506 | 754 | if (I->getType()->isVectorTy()) |
507 | 271 | return nullptr; |
508 | 483 | |
509 | 754 | assert((I->getOpcode() == Instruction::FAdd || |
510 | 483 | I->getOpcode() == Instruction::FSub) && "Expect add/sub"); |
511 | 483 | |
512 | 483 | // Save the instruction before calling other member-functions. |
513 | 483 | Instr = I; |
514 | 483 | |
515 | 483 | FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; |
516 | 483 | |
517 | 483 | unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); |
518 | 483 | |
519 | 483 | // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. |
520 | 483 | unsigned Opnd0_ExpNum = 0; |
521 | 483 | unsigned Opnd1_ExpNum = 0; |
522 | 483 | |
523 | 483 | if (!Opnd0.isConstant()) |
524 | 479 | Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); |
525 | 483 | |
526 | 483 | // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. |
527 | 483 | if (OpndNum == 2 && 483 !Opnd1.isConstant()457 ) |
528 | 289 | Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); |
529 | 483 | |
530 | 483 | // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 |
531 | 483 | if (Opnd0_ExpNum && 483 Opnd1_ExpNum141 ) { |
532 | 21 | AddendVect AllOpnds; |
533 | 21 | AllOpnds.push_back(&Opnd0_0); |
534 | 21 | AllOpnds.push_back(&Opnd1_0); |
535 | 21 | if (Opnd0_ExpNum == 2) |
536 | 19 | AllOpnds.push_back(&Opnd0_1); |
537 | 21 | if (Opnd1_ExpNum == 2) |
538 | 20 | AllOpnds.push_back(&Opnd1_1); |
539 | 21 | |
540 | 21 | // Compute instruction quota. We should save at least one instruction. |
541 | 21 | unsigned InstQuota = 0; |
542 | 21 | |
543 | 21 | Value *V0 = I->getOperand(0); |
544 | 21 | Value *V1 = I->getOperand(1); |
545 | 21 | InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && |
546 | 21 | (!isa<Constant>(V1) && 6 V1->hasOneUse()6 )) ? 25 : 116 ; |
547 | 21 | |
548 | 21 | if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) |
549 | 4 | return R; |
550 | 479 | } |
551 | 479 | |
552 | 479 | if (479 OpndNum != 2479 ) { |
553 | 26 | // The input instruction is : "I=0.0 +/- V". If the "V" were able to be |
554 | 26 | // splitted into two addends, say "V = X - Y", the instruction would have |
555 | 26 | // been optimized into "I = Y - X" in the previous steps. |
556 | 26 | // |
557 | 26 | const FAddendCoef &CE = Opnd0.getCoef(); |
558 | 26 | return CE.isOne() ? Opnd0.getSymVal()0 : nullptr26 ; |
559 | 26 | } |
560 | 453 | |
561 | 453 | // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] |
562 | 453 | if (453 Opnd1_ExpNum453 ) { |
563 | 25 | AddendVect AllOpnds; |
564 | 25 | AllOpnds.push_back(&Opnd0); |
565 | 25 | AllOpnds.push_back(&Opnd1_0); |
566 | 25 | if (Opnd1_ExpNum == 2) |
567 | 22 | AllOpnds.push_back(&Opnd1_1); |
568 | 25 | |
569 | 25 | if (Value *R = simplifyFAdd(AllOpnds, 1)) |
570 | 1 | return R; |
571 | 452 | } |
572 | 452 | |
573 | 452 | // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] |
574 | 452 | if (452 Opnd0_ExpNum452 ) { |
575 | 135 | AddendVect AllOpnds; |
576 | 135 | AllOpnds.push_back(&Opnd1); |
577 | 135 | AllOpnds.push_back(&Opnd0_0); |
578 | 135 | if (Opnd0_ExpNum == 2) |
579 | 53 | AllOpnds.push_back(&Opnd0_1); |
580 | 135 | |
581 | 135 | if (Value *R = simplifyFAdd(AllOpnds, 1)) |
582 | 5 | return R; |
583 | 447 | } |
584 | 447 | |
585 | 447 | // step 6: Try factorization as the last resort, |
586 | 447 | return performFactorization(I); |
587 | 447 | } |
588 | | |
589 | 181 | Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { |
590 | 181 | unsigned AddendNum = Addends.size(); |
591 | 181 | assert(AddendNum <= 4 && "Too many addends"); |
592 | 181 | |
593 | 181 | // For saving intermediate results; |
594 | 181 | unsigned NextTmpIdx = 0; |
595 | 181 | FAddend TmpResult[3]; |
596 | 181 | |
597 | 181 | // Points to the constant addend of the resulting simplified expression. |
598 | 181 | // If the resulting expr has constant-addend, this constant-addend is |
599 | 181 | // desirable to reside at the top of the resulting expression tree. Placing |
600 | 181 | // constant close to supper-expr(s) will potentially reveal some optimization |
601 | 181 | // opportunities in super-expr(s). |
602 | 181 | // |
603 | 181 | const FAddend *ConstAdd = nullptr; |
604 | 181 | |
605 | 181 | // Simplified addends are placed <SimpVect>. |
606 | 181 | AddendVect SimpVect; |
607 | 181 | |
608 | 181 | // The outer loop works on one symbolic-value at a time. Suppose the input |
609 | 181 | // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... |
610 | 181 | // The symbolic-values will be processed in this order: x, y, z. |
611 | 181 | // |
612 | 657 | for (unsigned SymIdx = 0; SymIdx < AddendNum657 ; SymIdx++476 ) { |
613 | 476 | |
614 | 476 | const FAddend *ThisAddend = Addends[SymIdx]; |
615 | 476 | if (!ThisAddend476 ) { |
616 | 23 | // This addend was processed before. |
617 | 23 | continue; |
618 | 23 | } |
619 | 453 | |
620 | 453 | Value *Val = ThisAddend->getSymVal(); |
621 | 453 | unsigned StartIdx = SimpVect.size(); |
622 | 453 | SimpVect.push_back(ThisAddend); |
623 | 453 | |
624 | 453 | // The inner loop collects addends sharing same symbolic-value, and these |
625 | 453 | // addends will be later on folded into a single addend. Following above |
626 | 453 | // example, if the symbolic value "y" is being processed, the inner loop |
627 | 453 | // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will |
628 | 453 | // be later on folded into "<b1+b2, y>". |
629 | 453 | // |
630 | 453 | for (unsigned SameSymIdx = SymIdx + 1; |
631 | 866 | SameSymIdx < AddendNum866 ; SameSymIdx++413 ) { |
632 | 413 | const FAddend *T = Addends[SameSymIdx]; |
633 | 413 | if (T && 413 T->getSymVal() == Val408 ) { |
634 | 23 | // Set null such that next iteration of the outer loop will not process |
635 | 23 | // this addend again. |
636 | 23 | Addends[SameSymIdx] = nullptr; |
637 | 23 | SimpVect.push_back(T); |
638 | 23 | } |
639 | 413 | } |
640 | 453 | |
641 | 453 | // If multiple addends share same symbolic value, fold them together. |
642 | 453 | if (StartIdx + 1 != SimpVect.size()453 ) { |
643 | 18 | FAddend &R = TmpResult[NextTmpIdx ++]; |
644 | 18 | R = *SimpVect[StartIdx]; |
645 | 41 | for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size()41 ; Idx++23 ) |
646 | 23 | R += *SimpVect[Idx]; |
647 | 18 | |
648 | 18 | // Pop all addends being folded and push the resulting folded addend. |
649 | 18 | SimpVect.resize(StartIdx); |
650 | 18 | if (Val18 ) { |
651 | 16 | if (!R.isZero()16 ) { |
652 | 13 | SimpVect.push_back(&R); |
653 | 13 | } |
654 | 18 | } else { |
655 | 2 | // Don't push constant addend at this time. It will be the last element |
656 | 2 | // of <SimpVect>. |
657 | 2 | ConstAdd = &R; |
658 | 2 | } |
659 | 18 | } |
660 | 476 | } |
661 | 181 | |
662 | 181 | assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) && |
663 | 181 | "out-of-bound access"); |
664 | 181 | |
665 | 181 | if (ConstAdd) |
666 | 2 | SimpVect.push_back(ConstAdd); |
667 | 181 | |
668 | 181 | Value *Result; |
669 | 181 | if (!SimpVect.empty()) |
670 | 181 | Result = createNaryFAdd(SimpVect, InstrQuota); |
671 | 0 | else { |
672 | 0 | // The addition is folded to 0.0. |
673 | 0 | Result = ConstantFP::get(Instr->getType(), 0.0); |
674 | 0 | } |
675 | 181 | |
676 | 181 | return Result; |
677 | 181 | } |
678 | | |
679 | | Value *FAddCombine::createNaryFAdd |
680 | 181 | (const AddendVect &Opnds, unsigned InstrQuota) { |
681 | 181 | assert(!Opnds.empty() && "Expect at least one addend"); |
682 | 181 | |
683 | 181 | // Step 1: Check if the # of instructions needed exceeds the quota. |
684 | 181 | // |
685 | 181 | unsigned InstrNeeded = calcInstrNumber(Opnds); |
686 | 181 | if (InstrNeeded > InstrQuota) |
687 | 171 | return nullptr; |
688 | 10 | |
689 | 10 | initCreateInstNum(); |
690 | 10 | |
691 | 10 | // step 2: Emit the N-ary addition. |
692 | 10 | // Note that at most three instructions are involved in Fadd-InstCombine: the |
693 | 10 | // addition in question, and at most two neighboring instructions. |
694 | 10 | // The resulting optimized addition should have at least one less instruction |
695 | 10 | // than the original addition expression tree. This implies that the resulting |
696 | 10 | // N-ary addition has at most two instructions, and we don't need to worry |
697 | 10 | // about tree-height when constructing the N-ary addition. |
698 | 10 | |
699 | 10 | Value *LastVal = nullptr; |
700 | 10 | bool LastValNeedNeg = false; |
701 | 10 | |
702 | 10 | // Iterate the addends, creating fadd/fsub using adjacent two addends. |
703 | 12 | for (const FAddend *Opnd : Opnds) { |
704 | 12 | bool NeedNeg; |
705 | 12 | Value *V = createAddendVal(*Opnd, NeedNeg); |
706 | 12 | if (!LastVal12 ) { |
707 | 10 | LastVal = V; |
708 | 10 | LastValNeedNeg = NeedNeg; |
709 | 10 | continue; |
710 | 10 | } |
711 | 2 | |
712 | 2 | if (2 LastValNeedNeg == NeedNeg2 ) { |
713 | 1 | LastVal = createFAdd(LastVal, V); |
714 | 1 | continue; |
715 | 1 | } |
716 | 1 | |
717 | 1 | if (1 LastValNeedNeg1 ) |
718 | 1 | LastVal = createFSub(V, LastVal); |
719 | 1 | else |
720 | 0 | LastVal = createFSub(LastVal, V); |
721 | 12 | |
722 | 12 | LastValNeedNeg = false; |
723 | 12 | } |
724 | 10 | |
725 | 10 | if (LastValNeedNeg10 ) { |
726 | 3 | LastVal = createFNeg(LastVal); |
727 | 3 | } |
728 | 10 | |
729 | | #ifndef NDEBUG |
730 | | assert(CreateInstrNum == InstrNeeded && |
731 | | "Inconsistent in instruction numbers"); |
732 | | #endif |
733 | | |
734 | 181 | return LastVal; |
735 | 181 | } |
736 | | |
737 | 10 | Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) { |
738 | 10 | Value *V = Builder.CreateFSub(Opnd0, Opnd1); |
739 | 10 | if (Instruction *I = dyn_cast<Instruction>(V)) |
740 | 8 | createInstPostProc(I); |
741 | 10 | return V; |
742 | 10 | } |
743 | | |
744 | 3 | Value *FAddCombine::createFNeg(Value *V) { |
745 | 3 | Value *Zero = cast<Value>(ConstantFP::getZeroValueForNegation(V->getType())); |
746 | 3 | Value *NewV = createFSub(Zero, V); |
747 | 3 | if (Instruction *I = dyn_cast<Instruction>(NewV)) |
748 | 2 | createInstPostProc(I, true); // fneg's don't receive instruction numbers. |
749 | 3 | return NewV; |
750 | 3 | } |
751 | | |
752 | 5 | Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) { |
753 | 5 | Value *V = Builder.CreateFAdd(Opnd0, Opnd1); |
754 | 5 | if (Instruction *I = dyn_cast<Instruction>(V)) |
755 | 4 | createInstPostProc(I); |
756 | 5 | return V; |
757 | 5 | } |
758 | | |
759 | 11 | Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { |
760 | 11 | Value *V = Builder.CreateFMul(Opnd0, Opnd1); |
761 | 11 | if (Instruction *I = dyn_cast<Instruction>(V)) |
762 | 11 | createInstPostProc(I); |
763 | 11 | return V; |
764 | 11 | } |
765 | | |
766 | 3 | Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { |
767 | 3 | Value *V = Builder.CreateFDiv(Opnd0, Opnd1); |
768 | 3 | if (Instruction *I = dyn_cast<Instruction>(V)) |
769 | 3 | createInstPostProc(I); |
770 | 3 | return V; |
771 | 3 | } |
772 | | |
773 | 28 | void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) { |
774 | 28 | NewInstr->setDebugLoc(Instr->getDebugLoc()); |
775 | 28 | |
776 | 28 | // Keep track of the number of instruction created. |
777 | 28 | if (!NoNumber) |
778 | 26 | incCreateInstNum(); |
779 | 28 | |
780 | 28 | // Propagate fast-math flags |
781 | 28 | NewInstr->setFastMathFlags(Instr->getFastMathFlags()); |
782 | 28 | } |
783 | | |
784 | | // Return the number of instruction needed to emit the N-ary addition. |
785 | | // NOTE: Keep this function in sync with createAddendVal(). |
786 | 181 | unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { |
787 | 181 | unsigned OpndNum = Opnds.size(); |
788 | 181 | unsigned InstrNeeded = OpndNum - 1; |
789 | 181 | |
790 | 181 | // The number of addends in the form of "(-1)*x". |
791 | 181 | unsigned NegOpndNum = 0; |
792 | 181 | |
793 | 181 | // Adjust the number of instructions needed to emit the N-ary add. |
794 | 450 | for (const FAddend *Opnd : Opnds) { |
795 | 450 | if (Opnd->isConstant()) |
796 | 100 | continue; |
797 | 350 | |
798 | 350 | // The constant check above is really for a few special constant |
799 | 350 | // coefficients. |
800 | 350 | if (350 isa<UndefValue>(Opnd->getSymVal())350 ) |
801 | 1 | continue; |
802 | 349 | |
803 | 349 | const FAddendCoef &CE = Opnd->getCoef(); |
804 | 349 | if (CE.isMinusOne() || 349 CE.isMinusTwo()321 ) |
805 | 29 | NegOpndNum++; |
806 | 349 | |
807 | 349 | // Let the addend be "c * x". If "c == +/-1", the value of the addend |
808 | 349 | // is immediately available; otherwise, it needs exactly one instruction |
809 | 349 | // to evaluate the value. |
810 | 349 | if (!CE.isMinusOne() && 349 !CE.isOne()321 ) |
811 | 93 | InstrNeeded++; |
812 | 450 | } |
813 | 181 | if (NegOpndNum == OpndNum) |
814 | 7 | InstrNeeded++; |
815 | 181 | return InstrNeeded; |
816 | 181 | } |
817 | | |
818 | | // Input Addend Value NeedNeg(output) |
819 | | // ================================================================ |
820 | | // Constant C C false |
821 | | // <+/-1, V> V coefficient is -1 |
822 | | // <2/-2, V> "fadd V, V" coefficient is -2 |
823 | | // <C, V> "fmul V, C" false |
824 | | // |
825 | | // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. |
826 | 12 | Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) { |
827 | 12 | const FAddendCoef &Coeff = Opnd.getCoef(); |
828 | 12 | |
829 | 12 | if (Opnd.isConstant()12 ) { |
830 | 1 | NeedNeg = false; |
831 | 1 | return Coeff.getValue(Instr->getType()); |
832 | 1 | } |
833 | 11 | |
834 | 11 | Value *OpndVal = Opnd.getSymVal(); |
835 | 11 | |
836 | 11 | if (Coeff.isMinusOne() || 11 Coeff.isOne()7 ) { |
837 | 4 | NeedNeg = Coeff.isMinusOne(); |
838 | 4 | return OpndVal; |
839 | 4 | } |
840 | 7 | |
841 | 7 | if (7 Coeff.isTwo() || 7 Coeff.isMinusTwo()7 ) { |
842 | 1 | NeedNeg = Coeff.isMinusTwo(); |
843 | 1 | return createFAdd(OpndVal, OpndVal); |
844 | 1 | } |
845 | 6 | |
846 | 6 | NeedNeg = false; |
847 | 6 | return createFMul(OpndVal, Coeff.getValue(Instr->getType())); |
848 | 6 | } |
849 | | |
850 | | /// \brief Return true if we can prove that: |
851 | | /// (sub LHS, RHS) === (sub nsw LHS, RHS) |
852 | | /// This basically requires proving that the add in the original type would not |
853 | | /// overflow to change the sign bit or have a carry out. |
854 | | /// TODO: Handle this for Vectors. |
855 | | bool InstCombiner::willNotOverflowSignedSub(const Value *LHS, |
856 | | const Value *RHS, |
857 | 910k | const Instruction &CxtI) const { |
858 | 910k | // If LHS and RHS each have at least two sign bits, the subtraction |
859 | 910k | // cannot overflow. |
860 | 910k | if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && |
861 | 135k | ComputeNumSignBits(RHS, 0, &CxtI) > 1) |
862 | 18.6k | return true; |
863 | 892k | |
864 | 892k | KnownBits LHSKnown = computeKnownBits(LHS, 0, &CxtI); |
865 | 892k | |
866 | 892k | KnownBits RHSKnown = computeKnownBits(RHS, 0, &CxtI); |
867 | 892k | |
868 | 892k | // Subtraction of two 2's complement numbers having identical signs will |
869 | 892k | // never overflow. |
870 | 892k | if ((LHSKnown.isNegative() && 892k RHSKnown.isNegative()4.28k ) || |
871 | 892k | (LHSKnown.isNonNegative() && 892k RHSKnown.isNonNegative()115k )) |
872 | 251 | return true; |
873 | 891k | |
874 | 891k | // TODO: implement logic similar to checkRippleForAdd |
875 | 891k | return false; |
876 | 891k | } |
877 | | |
878 | | /// \brief Return true if we can prove that: |
879 | | /// (sub LHS, RHS) === (sub nuw LHS, RHS) |
880 | | bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS, |
881 | | const Value *RHS, |
882 | 1.85M | const Instruction &CxtI) const { |
883 | 1.85M | // If the LHS is negative and the RHS is non-negative, no unsigned wrap. |
884 | 1.85M | KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI); |
885 | 1.85M | KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI); |
886 | 1.85M | if (LHSKnown.isNegative() && 1.85M RHSKnown.isNonNegative()5.61k ) |
887 | 46 | return true; |
888 | 1.85M | |
889 | 1.85M | return false; |
890 | 1.85M | } |
891 | | |
892 | | // Checks if any operand is negative and we can convert add to sub. |
893 | | // This function checks for following negative patterns |
894 | | // ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C)) |
895 | | // ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C)) |
896 | | // XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even |
897 | | static Value *checkForNegativeOperand(BinaryOperator &I, |
898 | 8.21M | InstCombiner::BuilderTy &Builder) { |
899 | 8.21M | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); |
900 | 8.21M | |
901 | 8.21M | // This function creates 2 instructions to replace ADD, we need at least one |
902 | 8.21M | // of LHS or RHS to have one use to ensure benefit in transform. |
903 | 8.21M | if (!LHS->hasOneUse() && 8.21M !RHS->hasOneUse()4.88M ) |
904 | 4.52M | return nullptr; |
905 | 3.68M | |
906 | 3.68M | Value *X = nullptr, *Y = nullptr, *Z = nullptr; |
907 | 3.68M | const APInt *C1 = nullptr, *C2 = nullptr; |
908 | 3.68M | |
909 | 3.68M | // if ONE is on other side, swap |
910 | 3.68M | if (match(RHS, m_Add(m_Value(X), m_One()))) |
911 | 2.00k | std::swap(LHS, RHS); |
912 | 3.68M | |
913 | 3.68M | if (match(LHS, m_Add(m_Value(X), m_One()))3.68M ) { |
914 | 45.4k | // if XOR on other side, swap |
915 | 45.4k | if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) |
916 | 96 | std::swap(X, RHS); |
917 | 45.4k | |
918 | 45.4k | if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))45.4k ) { |
919 | 120 | // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) |
920 | 120 | // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) |
921 | 120 | if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && 120 (*C2 == ~(*C1))5 ) { |
922 | 5 | Value *NewAnd = Builder.CreateAnd(Z, *C1); |
923 | 5 | return Builder.CreateSub(RHS, NewAnd, "sub"); |
924 | 115 | } else if (115 match(Y, m_And(m_Value(Z), m_APInt(C2))) && 115 (*C1 == *C2)3 ) { |
925 | 3 | // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1)) |
926 | 3 | // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1)) |
927 | 3 | Value *NewOr = Builder.CreateOr(Z, ~(*C1)); |
928 | 3 | return Builder.CreateSub(RHS, NewOr, "sub"); |
929 | 3 | } |
930 | 3.68M | } |
931 | 45.4k | } |
932 | 3.68M | |
933 | 3.68M | // Restore LHS and RHS |
934 | 3.68M | LHS = I.getOperand(0); |
935 | 3.68M | RHS = I.getOperand(1); |
936 | 3.68M | |
937 | 3.68M | // if XOR is on other side, swap |
938 | 3.68M | if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) |
939 | 3.62k | std::swap(LHS, RHS); |
940 | 3.68M | |
941 | 3.68M | // C2 is ODD |
942 | 3.68M | // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2)) |
943 | 3.68M | // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2)) |
944 | 3.68M | if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) |
945 | 7.93k | if (7.93k C1->countTrailingZeros() == 07.93k ) |
946 | 5.13k | if (5.13k match(Y, m_And(m_Value(Z), m_APInt(C2))) && 5.13k *C1 == (*C2 + 1)288 ) { |
947 | 1 | Value *NewOr = Builder.CreateOr(Z, ~(*C2)); |
948 | 1 | return Builder.CreateSub(RHS, NewOr, "sub"); |
949 | 1 | } |
950 | 3.68M | return nullptr; |
951 | 3.68M | } |
952 | | |
953 | | static Instruction *foldAddWithConstant(BinaryOperator &Add, |
954 | 8.22M | InstCombiner::BuilderTy &Builder) { |
955 | 8.22M | Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1); |
956 | 8.22M | const APInt *C; |
957 | 8.22M | if (!match(Op1, m_APInt(C))) |
958 | 2.37M | return nullptr; |
959 | 5.85M | |
960 | 5.85M | if (5.85M C->isSignMask()5.85M ) { |
961 | 218 | // If wrapping is not allowed, then the addition must set the sign bit: |
962 | 218 | // X + (signmask) --> X | signmask |
963 | 218 | if (Add.hasNoSignedWrap() || 218 Add.hasNoUnsignedWrap()216 ) |
964 | 4 | return BinaryOperator::CreateOr(Op0, Op1); |
965 | 214 | |
966 | 214 | // If wrapping is allowed, then the addition flips the sign bit of LHS: |
967 | 214 | // X + (signmask) --> X ^ signmask |
968 | 214 | return BinaryOperator::CreateXor(Op0, Op1); |
969 | 214 | } |
970 | 5.85M | |
971 | 5.85M | Value *X; |
972 | 5.85M | const APInt *C2; |
973 | 5.85M | Type *Ty = Add.getType(); |
974 | 5.85M | |
975 | 5.85M | // Is this add the last step in a convoluted sext? |
976 | 5.85M | // add(zext(xor i16 X, -32768), -32768) --> sext X |
977 | 5.85M | if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) && |
978 | 5.85M | C2->isMinSignedValue()123 && C2->sext(Ty->getScalarSizeInBits()) == *C20 ) |
979 | 4 | return CastInst::Create(Instruction::SExt, X, Ty); |
980 | 5.85M | |
981 | 5.85M | // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C)) |
982 | 5.85M | // FIXME: This should check hasOneUse to not increase the instruction count? |
983 | 5.85M | if (5.85M C->isNegative() && |
984 | 1.23M | match(Op0, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2)))) && |
985 | 5.85M | C->sge(-C2->sext(C->getBitWidth()))8 ) { |
986 | 8 | Constant *NewC = |
987 | 8 | ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth())); |
988 | 8 | return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty); |
989 | 8 | } |
990 | 5.85M | |
991 | 5.85M | if (5.85M C->isOneValue() && 5.85M Op0->hasOneUse()3.10M ) { |
992 | 807k | // add (sext i1 X), 1 --> zext (not X) |
993 | 807k | // TODO: The smallest IR representation is (select X, 0, 1), and that would |
994 | 807k | // not require the one-use check. But we need to remove a transform in |
995 | 807k | // visitSelect and make sure that IR value tracking for select is equal or |
996 | 807k | // better than for these ops. |
997 | 807k | if (match(Op0, m_SExt(m_Value(X))) && |
998 | 1.26k | X->getType()->getScalarSizeInBits() == 1) |
999 | 4 | return new ZExtInst(Builder.CreateNot(X), Ty); |
1000 | 807k | |
1001 | 807k | // Shifts and add used to flip and mask off the low bit: |
1002 | 807k | // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 |
1003 | 807k | const APInt *C3; |
1004 | 807k | if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) && |
1005 | 807k | C2 == C3182 && *C2 == Ty->getScalarSizeInBits() - 1182 ) { |
1006 | 2 | Value *NotX = Builder.CreateNot(X); |
1007 | 2 | return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); |
1008 | 2 | } |
1009 | 5.85M | } |
1010 | 5.85M | |
1011 | 5.85M | return nullptr; |
1012 | 5.85M | } |
1013 | | |
1014 | 8.25M | Instruction *InstCombiner::visitAdd(BinaryOperator &I) { |
1015 | 8.25M | bool Changed = SimplifyAssociativeOrCommutative(I); |
1016 | 8.25M | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); |
1017 | 8.25M | |
1018 | 8.25M | if (Value *V = SimplifyVectorOp(I)) |
1019 | 33 | return replaceInstUsesWith(I, V); |
1020 | 8.25M | |
1021 | 8.25M | if (Value *8.25M V8.25M = |
1022 | 8.25M | SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), |
1023 | 8.25M | SQ.getWithInstruction(&I))) |
1024 | 30.3k | return replaceInstUsesWith(I, V); |
1025 | 8.22M | |
1026 | 8.22M | // (A*B)+(A*C) -> A*(B+C) etc |
1027 | 8.22M | if (Value *8.22M V8.22M = SimplifyUsingDistributiveLaws(I)) |
1028 | 410 | return replaceInstUsesWith(I, V); |
1029 | 8.22M | |
1030 | 8.22M | if (Instruction *8.22M X8.22M = foldAddWithConstant(I, Builder)) |
1031 | 236 | return X; |
1032 | 8.22M | |
1033 | 8.22M | // FIXME: This should be moved into the above helper function to allow these |
1034 | 8.22M | // transforms for splat vectors. |
1035 | 8.22M | if (ConstantInt *8.22M CI8.22M = dyn_cast<ConstantInt>(RHS)) { |
1036 | 5.70M | // zext(bool) + C -> bool ? C + 1 : C |
1037 | 5.70M | if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) |
1038 | 65.0k | if (65.0k ZI->getSrcTy()->isIntegerTy(1)65.0k ) |
1039 | 108 | return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); |
1040 | 5.70M | |
1041 | 5.70M | Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr; |
1042 | 5.70M | if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))5.70M ) { |
1043 | 3.27k | uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); |
1044 | 3.27k | const APInt &RHSVal = CI->getValue(); |
1045 | 3.27k | unsigned ExtendAmt = 0; |
1046 | 3.27k | // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. |
1047 | 3.27k | // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. |
1048 | 3.27k | if (XorRHS->getValue() == -RHSVal3.27k ) { |
1049 | 31 | if (RHSVal.isPowerOf2()) |
1050 | 6 | ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; |
1051 | 25 | else if (25 XorRHS->getValue().isPowerOf2()25 ) |
1052 | 25 | ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; |
1053 | 31 | } |
1054 | 3.27k | |
1055 | 3.27k | if (ExtendAmt3.27k ) { |
1056 | 31 | APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); |
1057 | 31 | if (!MaskedValueIsZero(XorLHS, Mask, 0, &I)) |
1058 | 27 | ExtendAmt = 0; |
1059 | 31 | } |
1060 | 3.27k | |
1061 | 3.27k | if (ExtendAmt3.27k ) { |
1062 | 4 | Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); |
1063 | 4 | Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext"); |
1064 | 4 | return BinaryOperator::CreateAShr(NewShl, ShAmt); |
1065 | 4 | } |
1066 | 3.27k | |
1067 | 3.27k | // If this is a xor that was canonicalized from a sub, turn it back into |
1068 | 3.27k | // a sub and fuse this add with it. |
1069 | 3.27k | if (3.27k LHS->hasOneUse() && 3.27k (XorRHS->getValue()+1).isPowerOf2()1.21k ) { |
1070 | 43 | KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I); |
1071 | 43 | if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue()) |
1072 | 21 | return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), |
1073 | 21 | XorLHS); |
1074 | 3.25k | } |
1075 | 3.25k | // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C, |
1076 | 3.25k | // transform them into (X + (signmask ^ C)) |
1077 | 3.25k | if (3.25k XorRHS->getValue().isSignMask()3.25k ) |
1078 | 1 | return BinaryOperator::CreateAdd(XorLHS, |
1079 | 1 | ConstantExpr::getXor(XorRHS, CI)); |
1080 | 8.22M | } |
1081 | 5.70M | } |
1082 | 8.22M | |
1083 | 8.22M | if (8.22M isa<Constant>(RHS)8.22M ) |
1084 | 5.85M | if (Instruction *5.85M NV5.85M = foldOpWithConstantIntoOperand(I)) |
1085 | 1.06k | return NV; |
1086 | 8.22M | |
1087 | 8.22M | if (8.22M I.getType()->isIntOrIntVectorTy(1)8.22M ) |
1088 | 3 | return BinaryOperator::CreateXor(LHS, RHS); |
1089 | 8.22M | |
1090 | 8.22M | // X + X --> X << 1 |
1091 | 8.22M | if (8.22M LHS == RHS8.22M ) { |
1092 | 216 | BinaryOperator *New = |
1093 | 216 | BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); |
1094 | 216 | New->setHasNoSignedWrap(I.hasNoSignedWrap()); |
1095 | 216 | New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); |
1096 | 216 | return New; |
1097 | 216 | } |
1098 | 8.22M | |
1099 | 8.22M | // -A + B --> B - A |
1100 | 8.22M | // -A + -B --> -(A + B) |
1101 | 8.22M | if (Value *8.22M LHSV8.22M = dyn_castNegVal(LHS)) { |
1102 | 1.76k | if (!isa<Constant>(RHS)) |
1103 | 416 | if (Value *416 RHSV416 = dyn_castNegVal(RHS)) { |
1104 | 236 | Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum"); |
1105 | 236 | return BinaryOperator::CreateNeg(NewAdd); |
1106 | 236 | } |
1107 | 1.52k | |
1108 | 1.52k | return BinaryOperator::CreateSub(RHS, LHSV); |
1109 | 1.52k | } |
1110 | 8.22M | |
1111 | 8.22M | // A + -B --> A - B |
1112 | 8.22M | if (8.22M !isa<Constant>(RHS)8.22M ) |
1113 | 2.36M | if (Value *2.36M V2.36M = dyn_castNegVal(RHS)) |
1114 | 8.46k | return BinaryOperator::CreateSub(LHS, V); |
1115 | 8.21M | |
1116 | 8.21M | if (Value *8.21M V8.21M = checkForNegativeOperand(I, Builder)) |
1117 | 9 | return replaceInstUsesWith(I, V); |
1118 | 8.21M | |
1119 | 8.21M | // A+B --> A|B iff A and B have no bits set in common. |
1120 | 8.21M | if (8.21M haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)8.21M ) |
1121 | 29.4k | return BinaryOperator::CreateOr(LHS, RHS); |
1122 | 8.18M | |
1123 | 8.18M | if (Constant *8.18M CRHS8.18M = dyn_cast<Constant>(RHS)) { |
1124 | 5.82M | Value *X; |
1125 | 5.82M | if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X |
1126 | 141 | return BinaryOperator::CreateSub(SubOne(CRHS), X); |
1127 | 8.18M | } |
1128 | 8.18M | |
1129 | 8.18M | // FIXME: We already did a check for ConstantInt RHS above this. |
1130 | 8.18M | // FIXME: Is this pattern covered by another fold? No regression tests fail on |
1131 | 8.18M | // removal. |
1132 | 8.18M | if (ConstantInt *8.18M CRHS8.18M = dyn_cast<ConstantInt>(RHS)) { |
1133 | 5.67M | // (X & FF00) + xx00 -> (X+xx00) & FF00 |
1134 | 5.67M | Value *X; |
1135 | 5.67M | ConstantInt *C2; |
1136 | 5.67M | if (LHS->hasOneUse() && |
1137 | 1.65M | match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && |
1138 | 5.67M | CRHS->getValue() == (CRHS->getValue() & C2->getValue())7.87k ) { |
1139 | 4.64k | // See if all bits from the first bit set in the Add RHS up are included |
1140 | 4.64k | // in the mask. First, get the rightmost bit. |
1141 | 4.64k | const APInt &AddRHSV = CRHS->getValue(); |
1142 | 4.64k | |
1143 | 4.64k | // Form a mask of all bits from the lowest bit added through the top. |
1144 | 4.64k | APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); |
1145 | 4.64k | |
1146 | 4.64k | // See if the and mask includes all of these bits. |
1147 | 4.64k | APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); |
1148 | 4.64k | |
1149 | 4.64k | if (AddRHSHighBits == AddRHSHighBitsAnd4.64k ) { |
1150 | 179 | // Okay, the xform is safe. Insert the new add pronto. |
1151 | 179 | Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName()); |
1152 | 179 | return BinaryOperator::CreateAnd(NewAdd, C2); |
1153 | 179 | } |
1154 | 8.18M | } |
1155 | 5.67M | } |
1156 | 8.18M | |
1157 | 8.18M | // add (select X 0 (sub n A)) A --> select X A n |
1158 | 8.18M | { |
1159 | 8.18M | SelectInst *SI = dyn_cast<SelectInst>(LHS); |
1160 | 8.18M | Value *A = RHS; |
1161 | 8.18M | if (!SI8.18M ) { |
1162 | 8.00M | SI = dyn_cast<SelectInst>(RHS); |
1163 | 8.00M | A = LHS; |
1164 | 8.00M | } |
1165 | 8.18M | if (SI && 8.18M SI->hasOneUse()206k ) { |
1166 | 77.8k | Value *TV = SI->getTrueValue(); |
1167 | 77.8k | Value *FV = SI->getFalseValue(); |
1168 | 77.8k | Value *N; |
1169 | 77.8k | |
1170 | 77.8k | // Can we fold the add into the argument of the select? |
1171 | 77.8k | // We check both true and false select arguments for a matching subtract. |
1172 | 77.8k | if (match(FV, m_Zero()) && 77.8k match(TV, m_Sub(m_Value(N), m_Specific(A)))5.82k ) |
1173 | 77.8k | // Fold the add into the true select value. |
1174 | 116 | return SelectInst::Create(SI->getCondition(), N, A); |
1175 | 77.7k | |
1176 | 77.7k | if (77.7k match(TV, m_Zero()) && 77.7k match(FV, m_Sub(m_Value(N), m_Specific(A)))5.74k ) |
1177 | 77.7k | // Fold the add into the false select value. |
1178 | 1 | return SelectInst::Create(SI->getCondition(), A, N); |
1179 | 8.18M | } |
1180 | 8.18M | } |
1181 | 8.18M | |
1182 | 8.18M | // Check for (add (sext x), y), see if we can merge this into an |
1183 | 8.18M | // integer add followed by a sext. |
1184 | 8.18M | if (SExtInst *8.18M LHSConv8.18M = dyn_cast<SExtInst>(LHS)) { |
1185 | 127k | // (add (sext x), cst) --> (sext (add x, cst')) |
1186 | 127k | if (ConstantInt *RHSC127k = dyn_cast<ConstantInt>(RHS)) { |
1187 | 79.9k | if (LHSConv->hasOneUse()79.9k ) { |
1188 | 14.9k | Constant *CI = |
1189 | 14.9k | ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); |
1190 | 14.9k | if (ConstantExpr::getSExt(CI, I.getType()) == RHSC && |
1191 | 14.9k | willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)14.8k ) { |
1192 | 62 | // Insert the new, smaller add. |
1193 | 62 | Value *NewAdd = |
1194 | 62 | Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); |
1195 | 62 | return new SExtInst(NewAdd, I.getType()); |
1196 | 62 | } |
1197 | 127k | } |
1198 | 79.9k | } |
1199 | 127k | |
1200 | 127k | // (add (sext x), (sext y)) --> (sext (add int x, y)) |
1201 | 127k | if (SExtInst *127k RHSConv127k = dyn_cast<SExtInst>(RHS)) { |
1202 | 7.64k | // Only do this if x/y have the same type, if at least one of them has a |
1203 | 7.64k | // single use (so we don't increase the number of sexts), and if the |
1204 | 7.64k | // integer add will not overflow. |
1205 | 7.64k | if (LHSConv->getOperand(0)->getType() == |
1206 | 7.64k | RHSConv->getOperand(0)->getType() && |
1207 | 7.18k | (LHSConv->hasOneUse() || 7.18k RHSConv->hasOneUse()2.95k ) && |
1208 | 4.29k | willNotOverflowSignedAdd(LHSConv->getOperand(0), |
1209 | 7.64k | RHSConv->getOperand(0), I)) { |
1210 | 17 | // Insert the new integer add. |
1211 | 17 | Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), |
1212 | 17 | RHSConv->getOperand(0), "addconv"); |
1213 | 17 | return new SExtInst(NewAdd, I.getType()); |
1214 | 17 | } |
1215 | 8.18M | } |
1216 | 127k | } |
1217 | 8.18M | |
1218 | 8.18M | // Check for (add (zext x), y), see if we can merge this into an |
1219 | 8.18M | // integer add followed by a zext. |
1220 | 8.18M | if (auto *8.18M LHSConv8.18M = dyn_cast<ZExtInst>(LHS)) { |
1221 | 90.1k | // (add (zext x), cst) --> (zext (add x, cst')) |
1222 | 90.1k | if (ConstantInt *RHSC90.1k = dyn_cast<ConstantInt>(RHS)) { |
1223 | 64.6k | if (LHSConv->hasOneUse()64.6k ) { |
1224 | 35.3k | Constant *CI = |
1225 | 35.3k | ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); |
1226 | 35.3k | if (ConstantExpr::getZExt(CI, I.getType()) == RHSC && |
1227 | 35.3k | willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)25.6k ) { |
1228 | 90 | // Insert the new, smaller add. |
1229 | 90 | Value *NewAdd = |
1230 | 90 | Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); |
1231 | 90 | return new ZExtInst(NewAdd, I.getType()); |
1232 | 90 | } |
1233 | 90.0k | } |
1234 | 64.6k | } |
1235 | 90.0k | |
1236 | 90.0k | // (add (zext x), (zext y)) --> (zext (add int x, y)) |
1237 | 90.0k | if (auto *90.0k RHSConv90.0k = dyn_cast<ZExtInst>(RHS)) { |
1238 | 19.6k | // Only do this if x/y have the same type, if at least one of them has a |
1239 | 19.6k | // single use (so we don't increase the number of zexts), and if the |
1240 | 19.6k | // integer add will not overflow. |
1241 | 19.6k | if (LHSConv->getOperand(0)->getType() == |
1242 | 19.6k | RHSConv->getOperand(0)->getType() && |
1243 | 18.9k | (LHSConv->hasOneUse() || 18.9k RHSConv->hasOneUse()3.89k ) && |
1244 | 16.2k | willNotOverflowUnsignedAdd(LHSConv->getOperand(0), |
1245 | 19.6k | RHSConv->getOperand(0), I)) { |
1246 | 21 | // Insert the new integer add. |
1247 | 21 | Value *NewAdd = Builder.CreateNUWAdd( |
1248 | 21 | LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); |
1249 | 21 | return new ZExtInst(NewAdd, I.getType()); |
1250 | 21 | } |
1251 | 8.18M | } |
1252 | 90.1k | } |
1253 | 8.18M | |
1254 | 8.18M | // (add (xor A, B) (and A, B)) --> (or A, B) |
1255 | 8.18M | { |
1256 | 8.18M | Value *A = nullptr, *B = nullptr; |
1257 | 8.18M | if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && |
1258 | 11.4k | match(LHS, m_c_And(m_Specific(A), m_Specific(B)))) |
1259 | 1 | return BinaryOperator::CreateOr(A, B); |
1260 | 8.18M | |
1261 | 8.18M | if (8.18M match(LHS, m_Xor(m_Value(A), m_Value(B))) && |
1262 | 9.56k | match(RHS, m_c_And(m_Specific(A), m_Specific(B)))) |
1263 | 0 | return BinaryOperator::CreateOr(A, B); |
1264 | 8.18M | } |
1265 | 8.18M | |
1266 | 8.18M | // (add (or A, B) (and A, B)) --> (add A, B) |
1267 | 8.18M | { |
1268 | 8.18M | Value *A = nullptr, *B = nullptr; |
1269 | 8.18M | if (match(RHS, m_Or(m_Value(A), m_Value(B))) && |
1270 | 8.18M | match(LHS, m_c_And(m_Specific(A), m_Specific(B)))6.68k ) { |
1271 | 0 | auto *New = BinaryOperator::CreateAdd(A, B); |
1272 | 0 | New->setHasNoSignedWrap(I.hasNoSignedWrap()); |
1273 | 0 | New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); |
1274 | 0 | return New; |
1275 | 0 | } |
1276 | 8.18M | |
1277 | 8.18M | if (8.18M match(LHS, m_Or(m_Value(A), m_Value(B))) && |
1278 | 8.18M | match(RHS, m_c_And(m_Specific(A), m_Specific(B)))74.3k ) { |
1279 | 4 | auto *New = BinaryOperator::CreateAdd(A, B); |
1280 | 4 | New->setHasNoSignedWrap(I.hasNoSignedWrap()); |
1281 | 4 | New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); |
1282 | 4 | return New; |
1283 | 4 | } |
1284 | 8.18M | } |
1285 | 8.18M | |
1286 | 8.18M | // TODO(jingyue): Consider willNotOverflowSignedAdd and |
1287 | 8.18M | // willNotOverflowUnsignedAdd to reduce the number of invocations of |
1288 | 8.18M | // computeKnownBits. |
1289 | 8.18M | if (8.18M !I.hasNoSignedWrap() && 8.18M willNotOverflowSignedAdd(LHS, RHS, I)3.12M ) { |
1290 | 33.2k | Changed = true; |
1291 | 33.2k | I.setHasNoSignedWrap(true); |
1292 | 33.2k | } |
1293 | 8.18M | if (!I.hasNoUnsignedWrap() && 8.18M willNotOverflowUnsignedAdd(LHS, RHS, I)6.20M ) { |
1294 | 20.4k | Changed = true; |
1295 | 20.4k | I.setHasNoUnsignedWrap(true); |
1296 | 20.4k | } |
1297 | 8.18M | |
1298 | 8.18M | return Changed ? &I96.7k : nullptr8.08M ; |
1299 | 8.25M | } |
1300 | | |
1301 | 703k | Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { |
1302 | 703k | bool Changed = SimplifyAssociativeOrCommutative(I); |
1303 | 703k | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); |
1304 | 703k | |
1305 | 703k | if (Value *V = SimplifyVectorOp(I)) |
1306 | 49 | return replaceInstUsesWith(I, V); |
1307 | 703k | |
1308 | 703k | if (Value *703k V703k = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), |
1309 | 703k | SQ.getWithInstruction(&I))) |
1310 | 21 | return replaceInstUsesWith(I, V); |
1311 | 703k | |
1312 | 703k | if (703k isa<Constant>(RHS)703k ) |
1313 | 404k | if (Instruction *404k FoldedFAdd404k = foldOpWithConstantIntoOperand(I)) |
1314 | 21 | return FoldedFAdd; |
1315 | 703k | |
1316 | 703k | // -A + B --> B - A |
1317 | 703k | // -A + -B --> -(A + B) |
1318 | 703k | if (Value *703k LHSV703k = dyn_castFNegVal(LHS)) { |
1319 | 22 | Instruction *RI = BinaryOperator::CreateFSub(RHS, LHSV); |
1320 | 22 | RI->copyFastMathFlags(&I); |
1321 | 22 | return RI; |
1322 | 22 | } |
1323 | 703k | |
1324 | 703k | // A + -B --> A - B |
1325 | 703k | if (703k !isa<Constant>(RHS)703k ) |
1326 | 299k | if (Value *299k V299k = dyn_castFNegVal(RHS)) { |
1327 | 99 | Instruction *RI = BinaryOperator::CreateFSub(LHS, V); |
1328 | 99 | RI->copyFastMathFlags(&I); |
1329 | 99 | return RI; |
1330 | 99 | } |
1331 | 703k | |
1332 | 703k | // Check for (fadd double (sitofp x), y), see if we can merge this into an |
1333 | 703k | // integer add followed by a promotion. |
1334 | 703k | if (SIToFPInst *703k LHSConv703k = dyn_cast<SIToFPInst>(LHS)) { |
1335 | 3.45k | Value *LHSIntVal = LHSConv->getOperand(0); |
1336 | 3.45k | Type *FPType = LHSConv->getType(); |
1337 | 3.45k | |
1338 | 3.45k | // TODO: This check is overly conservative. In many cases known bits |
1339 | 3.45k | // analysis can tell us that the result of the addition has less significant |
1340 | 3.45k | // bits than the integer type can hold. |
1341 | 3.21k | auto IsValidPromotion = [](Type *FTy, Type *ITy) { |
1342 | 3.21k | Type *FScalarTy = FTy->getScalarType(); |
1343 | 3.21k | Type *IScalarTy = ITy->getScalarType(); |
1344 | 3.21k | |
1345 | 3.21k | // Do we have enough bits in the significand to represent the result of |
1346 | 3.21k | // the integer addition? |
1347 | 3.21k | unsigned MaxRepresentableBits = |
1348 | 3.21k | APFloat::semanticsPrecision(FScalarTy->getFltSemantics()); |
1349 | 3.21k | return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits; |
1350 | 3.21k | }; |
1351 | 3.45k | |
1352 | 3.45k | // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) |
1353 | 3.45k | // ... if the constant fits in the integer value. This is useful for things |
1354 | 3.45k | // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer |
1355 | 3.45k | // requires a constant pool load, and generally allows the add to be better |
1356 | 3.45k | // instcombined. |
1357 | 3.45k | if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) |
1358 | 2.93k | if (2.93k IsValidPromotion(FPType, LHSIntVal->getType())2.93k ) { |
1359 | 2.60k | Constant *CI = |
1360 | 2.60k | ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); |
1361 | 2.60k | if (LHSConv->hasOneUse() && |
1362 | 1.86k | ConstantExpr::getSIToFP(CI, I.getType()) == CFP && |
1363 | 2.60k | willNotOverflowSignedAdd(LHSIntVal, CI, I)294 ) { |
1364 | 4 | // Insert the new integer add. |
1365 | 4 | Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv"); |
1366 | 4 | return new SIToFPInst(NewAdd, I.getType()); |
1367 | 4 | } |
1368 | 3.45k | } |
1369 | 3.45k | |
1370 | 3.45k | // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) |
1371 | 3.45k | if (SIToFPInst *3.45k RHSConv3.45k = dyn_cast<SIToFPInst>(RHS)) { |
1372 | 281 | Value *RHSIntVal = RHSConv->getOperand(0); |
1373 | 281 | // It's enough to check LHS types only because we require int types to |
1374 | 281 | // be the same for this transform. |
1375 | 281 | if (IsValidPromotion(FPType, LHSIntVal->getType())281 ) { |
1376 | 251 | // Only do this if x/y have the same type, if at least one of them has a |
1377 | 251 | // single use (so we don't increase the number of int->fp conversions), |
1378 | 251 | // and if the integer add will not overflow. |
1379 | 251 | if (LHSIntVal->getType() == RHSIntVal->getType() && |
1380 | 251 | (LHSConv->hasOneUse() || 251 RHSConv->hasOneUse()126 ) && |
1381 | 251 | willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)181 ) { |
1382 | 7 | // Insert the new integer add. |
1383 | 7 | Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv"); |
1384 | 7 | return new SIToFPInst(NewAdd, I.getType()); |
1385 | 7 | } |
1386 | 703k | } |
1387 | 281 | } |
1388 | 3.45k | } |
1389 | 703k | |
1390 | 703k | // Handle specials cases for FAdd with selects feeding the operation |
1391 | 703k | if (Value *703k V703k = SimplifySelectsFeedingBinaryOp(I, LHS, RHS)) |
1392 | 8 | return replaceInstUsesWith(I, V); |
1393 | 703k | |
1394 | 703k | if (703k I.hasUnsafeAlgebra()703k ) { |
1395 | 625 | if (Value *V = FAddCombine(Builder).simplify(&I)) |
1396 | 9 | return replaceInstUsesWith(I, V); |
1397 | 703k | } |
1398 | 703k | |
1399 | 703k | return Changed ? 703k &I4.27k : nullptr699k ; |
1400 | 703k | } |
1401 | | |
1402 | | /// Optimize pointer differences into the same array into a size. Consider: |
1403 | | /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer |
1404 | | /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. |
1405 | | /// |
1406 | | Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, |
1407 | 449k | Type *Ty) { |
1408 | 449k | // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize |
1409 | 449k | // this. |
1410 | 449k | bool Swapped = false; |
1411 | 449k | GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; |
1412 | 449k | |
1413 | 449k | // For now we require one side to be the base pointer "A" or a constant |
1414 | 449k | // GEP derived from it. |
1415 | 449k | if (GEPOperator *LHSGEP449k = dyn_cast<GEPOperator>(LHS)) { |
1416 | 2.89k | // (gep X, ...) - X |
1417 | 2.89k | if (LHSGEP->getOperand(0) == RHS2.89k ) { |
1418 | 35 | GEP1 = LHSGEP; |
1419 | 35 | Swapped = false; |
1420 | 2.89k | } else if (GEPOperator *2.85k RHSGEP2.85k = dyn_cast<GEPOperator>(RHS)) { |
1421 | 439 | // (gep X, ...) - (gep X, ...) |
1422 | 439 | if (LHSGEP->getOperand(0)->stripPointerCasts() == |
1423 | 439 | RHSGEP->getOperand(0)->stripPointerCasts()) { |
1424 | 25 | GEP2 = RHSGEP; |
1425 | 25 | GEP1 = LHSGEP; |
1426 | 25 | Swapped = false; |
1427 | 25 | } |
1428 | 2.85k | } |
1429 | 2.89k | } |
1430 | 449k | |
1431 | 449k | if (GEPOperator *RHSGEP449k = dyn_cast<GEPOperator>(RHS)) { |
1432 | 2.02k | // X - (gep X, ...) |
1433 | 2.02k | if (RHSGEP->getOperand(0) == LHS2.02k ) { |
1434 | 2 | GEP1 = RHSGEP; |
1435 | 2 | Swapped = true; |
1436 | 2.02k | } else if (GEPOperator *2.02k LHSGEP2.02k = dyn_cast<GEPOperator>(LHS)) { |
1437 | 439 | // (gep X, ...) - (gep X, ...) |
1438 | 439 | if (RHSGEP->getOperand(0)->stripPointerCasts() == |
1439 | 439 | LHSGEP->getOperand(0)->stripPointerCasts()) { |
1440 | 25 | GEP2 = LHSGEP; |
1441 | 25 | GEP1 = RHSGEP; |
1442 | 25 | Swapped = true; |
1443 | 25 | } |
1444 | 2.02k | } |
1445 | 2.02k | } |
1446 | 449k | |
1447 | 449k | if (!GEP1) |
1448 | 449k | // No GEP found. |
1449 | 449k | return nullptr; |
1450 | 62 | |
1451 | 62 | if (62 GEP262 ) { |
1452 | 25 | // (gep X, ...) - (gep X, ...) |
1453 | 25 | // |
1454 | 25 | // Avoid duplicating the arithmetic if there are more than one non-constant |
1455 | 25 | // indices between the two GEPs and either GEP has a non-constant index and |
1456 | 25 | // multiple users. If zero non-constant index, the result is a constant and |
1457 | 25 | // there is no duplication. If one non-constant index, the result is an add |
1458 | 25 | // or sub with a constant, which is no larger than the original code, and |
1459 | 25 | // there's no duplicated arithmetic, even if either GEP has multiple |
1460 | 25 | // users. If more than one non-constant indices combined, as long as the GEP |
1461 | 25 | // with at least one non-constant index doesn't have multiple users, there |
1462 | 25 | // is no duplication. |
1463 | 25 | unsigned NumNonConstantIndices1 = GEP1->countNonConstantIndices(); |
1464 | 25 | unsigned NumNonConstantIndices2 = GEP2->countNonConstantIndices(); |
1465 | 25 | if (NumNonConstantIndices1 + NumNonConstantIndices2 > 1 && |
1466 | 20 | ((NumNonConstantIndices1 > 0 && 20 !GEP1->hasOneUse()19 ) || |
1467 | 25 | (NumNonConstantIndices2 > 0 && 9 !GEP2->hasOneUse()9 ))) { |
1468 | 12 | return nullptr; |
1469 | 12 | } |
1470 | 50 | } |
1471 | 50 | |
1472 | 50 | // Emit the offset of the GEP and an intptr_t. |
1473 | 50 | Value *Result = EmitGEPOffset(GEP1); |
1474 | 50 | |
1475 | 50 | // If we had a constant expression GEP on the other side offsetting the |
1476 | 50 | // pointer, subtract it from the offset we have. |
1477 | 50 | if (GEP250 ) { |
1478 | 13 | Value *Offset = EmitGEPOffset(GEP2); |
1479 | 13 | Result = Builder.CreateSub(Result, Offset); |
1480 | 13 | } |
1481 | 50 | |
1482 | 50 | // If we have p - gep(p, ...) then we have to negate the result. |
1483 | 50 | if (Swapped) |
1484 | 15 | Result = Builder.CreateNeg(Result, "diff.neg"); |
1485 | 449k | |
1486 | 449k | return Builder.CreateIntCast(Result, Ty, true); |
1487 | 449k | } |
1488 | | |
1489 | 1.88M | Instruction *InstCombiner::visitSub(BinaryOperator &I) { |
1490 | 1.88M | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
1491 | 1.88M | |
1492 | 1.88M | if (Value *V = SimplifyVectorOp(I)) |
1493 | 0 | return replaceInstUsesWith(I, V); |
1494 | 1.88M | |
1495 | 1.88M | if (Value *1.88M V1.88M = |
1496 | 1.88M | SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), |
1497 | 1.88M | SQ.getWithInstruction(&I))) |
1498 | 219 | return replaceInstUsesWith(I, V); |
1499 | 1.88M | |
1500 | 1.88M | // (A*B)-(A*C) -> A*(B-C) etc |
1501 | 1.88M | if (Value *1.88M V1.88M = SimplifyUsingDistributiveLaws(I)) |
1502 | 151 | return replaceInstUsesWith(I, V); |
1503 | 1.88M | |
1504 | 1.88M | // If this is a 'B = x-(-A)', change to B = x+A. |
1505 | 1.88M | if (Value *1.88M V1.88M = dyn_castNegVal(Op1)) { |
1506 | 20.7k | BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); |
1507 | 20.7k | |
1508 | 20.7k | if (const auto *BO20.7k = dyn_cast<BinaryOperator>(Op1)) { |
1509 | 99 | assert(BO->getOpcode() == Instruction::Sub && |
1510 | 99 | "Expected a subtraction operator!"); |
1511 | 99 | if (BO->hasNoSignedWrap() && 99 I.hasNoSignedWrap()88 ) |
1512 | 80 | Res->setHasNoSignedWrap(true); |
1513 | 20.7k | } else { |
1514 | 20.6k | if (cast<Constant>(Op1)->isNotMinSignedValue() && 20.6k I.hasNoSignedWrap()20.6k ) |
1515 | 12.0k | Res->setHasNoSignedWrap(true); |
1516 | 20.6k | } |
1517 | 20.7k | |
1518 | 20.7k | return Res; |
1519 | 20.7k | } |
1520 | 1.86M | |
1521 | 1.86M | if (1.86M I.getType()->isIntOrIntVectorTy(1)1.86M ) |
1522 | 2 | return BinaryOperator::CreateXor(Op0, Op1); |
1523 | 1.86M | |
1524 | 1.86M | // Replace (-1 - A) with (~A). |
1525 | 1.86M | if (1.86M match(Op0, m_AllOnes())1.86M ) |
1526 | 1.67k | return BinaryOperator::CreateNot(Op1); |
1527 | 1.86M | |
1528 | 1.86M | if (Constant *1.86M C1.86M = dyn_cast<Constant>(Op0)) { |
1529 | 672k | // C - ~X == X + (1+C) |
1530 | 672k | Value *X = nullptr; |
1531 | 672k | if (match(Op1, m_Not(m_Value(X)))) |
1532 | 102 | return BinaryOperator::CreateAdd(X, AddOne(C)); |
1533 | 672k | |
1534 | 672k | // Try to fold constant sub into select arguments. |
1535 | 672k | if (SelectInst *672k SI672k = dyn_cast<SelectInst>(Op1)) |
1536 | 8.68k | if (Instruction *8.68k R8.68k = FoldOpIntoSelect(I, SI)) |
1537 | 99 | return R; |
1538 | 671k | |
1539 | 671k | // Try to fold constant sub into PHI values. |
1540 | 671k | if (PHINode *671k PN671k = dyn_cast<PHINode>(Op1)) |
1541 | 110k | if (Instruction *110k R110k = foldOpIntoPhi(I, PN)) |
1542 | 117 | return R; |
1543 | 671k | |
1544 | 671k | // C-(X+C2) --> (C-C2)-X |
1545 | 671k | Constant *C2; |
1546 | 671k | if (match(Op1, m_Add(m_Value(X), m_Constant(C2)))) |
1547 | 1.24k | return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); |
1548 | 670k | |
1549 | 670k | // Fold (sub 0, (zext bool to B)) --> (sext bool to B) |
1550 | 670k | if (670k C->isNullValue() && 670k match(Op1, m_ZExt(m_Value(X)))296k ) |
1551 | 18.8k | if (18.8k X->getType()->isIntOrIntVectorTy(1)18.8k ) |
1552 | 168 | return CastInst::CreateSExtOrBitCast(X, Op1->getType()); |
1553 | 670k | |
1554 | 670k | // Fold (sub 0, (sext bool to B)) --> (zext bool to B) |
1555 | 670k | if (670k C->isNullValue() && 670k match(Op1, m_SExt(m_Value(X)))296k ) |
1556 | 10.5k | if (10.5k X->getType()->isIntOrIntVectorTy(1)10.5k ) |
1557 | 2 | return CastInst::CreateZExtOrBitCast(X, Op1->getType()); |
1558 | 1.86M | } |
1559 | 1.86M | |
1560 | 1.86M | const APInt *Op0C; |
1561 | 1.86M | if (match(Op0, m_APInt(Op0C))1.86M ) { |
1562 | 670k | unsigned BitWidth = I.getType()->getScalarSizeInBits(); |
1563 | 670k | |
1564 | 670k | // -(X >>u 31) -> (X >>s 31) |
1565 | 670k | // -(X >>s 31) -> (X >>u 31) |
1566 | 670k | if (Op0C->isNullValue()670k ) { |
1567 | 296k | Value *X; |
1568 | 296k | const APInt *ShAmt; |
1569 | 296k | if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) && |
1570 | 296k | *ShAmt == BitWidth - 11.26k ) { |
1571 | 29 | Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1); |
1572 | 29 | return BinaryOperator::CreateAShr(X, ShAmtOp); |
1573 | 29 | } |
1574 | 296k | if (296k match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) && |
1575 | 296k | *ShAmt == BitWidth - 137.2k ) { |
1576 | 5 | Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1); |
1577 | 5 | return BinaryOperator::CreateLShr(X, ShAmtOp); |
1578 | 5 | } |
1579 | 670k | } |
1580 | 670k | |
1581 | 670k | // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known |
1582 | 670k | // zero. |
1583 | 670k | if (670k Op0C->isMask()670k ) { |
1584 | 24.8k | KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); |
1585 | 24.8k | if ((*Op0C | RHSKnown.Zero).isAllOnesValue()) |
1586 | 313 | return BinaryOperator::CreateXor(Op1, Op0); |
1587 | 1.86M | } |
1588 | 670k | } |
1589 | 1.86M | |
1590 | 1.86M | { |
1591 | 1.86M | Value *Y; |
1592 | 1.86M | // X-(X+Y) == -Y X-(Y+X) == -Y |
1593 | 1.86M | if (match(Op1, m_c_Add(m_Specific(Op0), m_Value(Y)))) |
1594 | 78 | return BinaryOperator::CreateNeg(Y); |
1595 | 1.86M | |
1596 | 1.86M | // (X-Y)-X == -Y |
1597 | 1.86M | if (1.86M match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))1.86M ) |
1598 | 1 | return BinaryOperator::CreateNeg(Y); |
1599 | 1.86M | } |
1600 | 1.86M | |
1601 | 1.86M | // (sub (or A, B), (xor A, B)) --> (and A, B) |
1602 | 1.86M | { |
1603 | 1.86M | Value *A, *B; |
1604 | 1.86M | if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && |
1605 | 29.7k | match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) |
1606 | 2 | return BinaryOperator::CreateAnd(A, B); |
1607 | 1.86M | } |
1608 | 1.86M | |
1609 | 1.86M | { |
1610 | 1.86M | Value *Y; |
1611 | 1.86M | // ((X | Y) - X) --> (~X & Y) |
1612 | 1.86M | if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1))))) |
1613 | 4 | return BinaryOperator::CreateAnd( |
1614 | 4 | Y, Builder.CreateNot(Op1, Op1->getName() + ".not")); |
1615 | 1.86M | } |
1616 | 1.86M | |
1617 | 1.86M | if (1.86M Op1->hasOneUse()1.86M ) { |
1618 | 504k | Value *X = nullptr, *Y = nullptr, *Z = nullptr; |
1619 | 504k | Constant *C = nullptr; |
1620 | 504k | |
1621 | 504k | // (X - (Y - Z)) --> (X + (Z - Y)). |
1622 | 504k | if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) |
1623 | 336 | return BinaryOperator::CreateAdd(Op0, |
1624 | 336 | Builder.CreateSub(Z, Y, Op1->getName())); |
1625 | 503k | |
1626 | 503k | // (X - (X & Y)) --> (X & ~Y) |
1627 | 503k | // |
1628 | 503k | if (503k match(Op1, m_c_And(m_Value(Y), m_Specific(Op0)))503k ) |
1629 | 4.27k | return BinaryOperator::CreateAnd(Op0, |
1630 | 4.27k | Builder.CreateNot(Y, Y->getName() + ".not")); |
1631 | 499k | |
1632 | 499k | // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow. |
1633 | 499k | if (499k match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && 499k match(Op0, m_Zero())498 && |
1634 | 499k | C->isNotMinSignedValue()28 && !C->isOneValue()28 ) |
1635 | 28 | return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); |
1636 | 499k | |
1637 | 499k | // 0 - (X << Y) -> (-X << Y) when X is freely negatable. |
1638 | 499k | if (499k match(Op1, m_Shl(m_Value(X), m_Value(Y))) && 499k match(Op0, m_Zero())68.3k ) |
1639 | 778 | if (Value *778 XNeg778 = dyn_castNegVal(X)) |
1640 | 15 | return BinaryOperator::CreateShl(XNeg, Y); |
1641 | 499k | |
1642 | 499k | // Subtracting -1/0 is the same as adding 1/0: |
1643 | 499k | // sub [nsw] Op0, sext(bool Y) -> add [nsw] Op0, zext(bool Y) |
1644 | 499k | // 'nuw' is dropped in favor of the canonical form. |
1645 | 499k | if (499k match(Op1, m_SExt(m_Value(Y))) && |
1646 | 499k | Y->getType()->getScalarSizeInBits() == 121.2k ) { |
1647 | 5 | Value *Zext = Builder.CreateZExt(Y, I.getType()); |
1648 | 5 | BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext); |
1649 | 5 | Add->setHasNoSignedWrap(I.hasNoSignedWrap()); |
1650 | 5 | return Add; |
1651 | 5 | } |
1652 | 499k | |
1653 | 499k | // X - A*-B -> X + A*B |
1654 | 499k | // X - -A*B -> X + A*B |
1655 | 499k | Value *A, *B; |
1656 | 499k | Constant *CI; |
1657 | 499k | if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B))))) |
1658 | 2 | return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B)); |
1659 | 499k | |
1660 | 499k | // X - A*CI -> X + A*-CI |
1661 | 499k | // No need to handle commuted multiply because multiply handling will |
1662 | 499k | // ensure constant will be move to the right hand side. |
1663 | 499k | if (499k match(Op1, m_Mul(m_Value(A), m_Constant(CI)))499k ) { |
1664 | 261 | Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI)); |
1665 | 261 | return BinaryOperator::CreateAdd(Op0, NewMul); |
1666 | 261 | } |
1667 | 1.85M | } |
1668 | 1.85M | |
1669 | 1.85M | // Optimize pointer differences into the same array into a size. Consider: |
1670 | 1.85M | // &A[10] - &A[0]: we should compile this to "10". |
1671 | 1.85M | Value *LHSOp, *RHSOp; |
1672 | 1.85M | if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && |
1673 | 456k | match(Op1, m_PtrToInt(m_Value(RHSOp)))) |
1674 | 449k | if (Value *449k Res449k = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) |
1675 | 48 | return replaceInstUsesWith(I, Res); |
1676 | 1.85M | |
1677 | 1.85M | // trunc(p)-trunc(q) -> trunc(p-q) |
1678 | 1.85M | if (1.85M match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && |
1679 | 20 | match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) |
1680 | 2 | if (Value *2 Res2 = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) |
1681 | 2 | return replaceInstUsesWith(I, Res); |
1682 | 1.85M | |
1683 | 1.85M | bool Changed = false; |
1684 | 1.85M | if (!I.hasNoSignedWrap() && 1.85M willNotOverflowSignedSub(Op0, Op1, I)910k ) { |
1685 | 18.8k | Changed = true; |
1686 | 18.8k | I.setHasNoSignedWrap(true); |
1687 | 18.8k | } |
1688 | 1.85M | if (!I.hasNoUnsignedWrap() && 1.85M willNotOverflowUnsignedSub(Op0, Op1, I)1.85M ) { |
1689 | 45 | Changed = true; |
1690 | 45 | I.setHasNoUnsignedWrap(true); |
1691 | 45 | } |
1692 | 1.85M | |
1693 | 1.85M | return Changed ? &I18.8k : nullptr1.83M ; |
1694 | 1.88M | } |
1695 | | |
1696 | 183k | Instruction *InstCombiner::visitFSub(BinaryOperator &I) { |
1697 | 183k | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); |
1698 | 183k | |
1699 | 183k | if (Value *V = SimplifyVectorOp(I)) |
1700 | 25 | return replaceInstUsesWith(I, V); |
1701 | 183k | |
1702 | 183k | if (Value *183k V183k = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), |
1703 | 183k | SQ.getWithInstruction(&I))) |
1704 | 11 | return replaceInstUsesWith(I, V); |
1705 | 183k | |
1706 | 183k | // fsub nsz 0, X ==> fsub nsz -0.0, X |
1707 | 183k | if (183k I.getFastMathFlags().noSignedZeros() && 183k match(Op0, m_Zero())147 ) { |
1708 | 6 | // Subtraction from -0.0 is the canonical form of fneg. |
1709 | 6 | Instruction *NewI = BinaryOperator::CreateFNeg(Op1); |
1710 | 6 | NewI->copyFastMathFlags(&I); |
1711 | 6 | return NewI; |
1712 | 6 | } |
1713 | 183k | |
1714 | 183k | if (183k isa<Constant>(Op0)183k ) |
1715 | 75.7k | if (SelectInst *75.7k SI75.7k = dyn_cast<SelectInst>(Op1)) |
1716 | 468 | if (Instruction *468 NV468 = FoldOpIntoSelect(I, SI)) |
1717 | 2 | return NV; |
1718 | 183k | |
1719 | 183k | // If this is a 'B = x-(-A)', change to B = x+A, potentially looking |
1720 | 183k | // through FP extensions/truncations along the way. |
1721 | 183k | if (Value *183k V183k = dyn_castFNegVal(Op1)) { |
1722 | 504 | Instruction *NewI = BinaryOperator::CreateFAdd(Op0, V); |
1723 | 504 | NewI->copyFastMathFlags(&I); |
1724 | 504 | return NewI; |
1725 | 504 | } |
1726 | 182k | if (FPTruncInst *182k FPTI182k = dyn_cast<FPTruncInst>(Op1)) { |
1727 | 1.62k | if (Value *V1.62k = dyn_castFNegVal(FPTI->getOperand(0))) { |
1728 | 0 | Value *NewTrunc = Builder.CreateFPTrunc(V, I.getType()); |
1729 | 0 | Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc); |
1730 | 0 | NewI->copyFastMathFlags(&I); |
1731 | 0 | return NewI; |
1732 | 0 | } |
1733 | 180k | } else if (FPExtInst *180k FPEI180k = dyn_cast<FPExtInst>(Op1)) { |
1734 | 4.78k | if (Value *V4.78k = dyn_castFNegVal(FPEI->getOperand(0))) { |
1735 | 2 | Value *NewExt = Builder.CreateFPExt(V, I.getType()); |
1736 | 2 | Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt); |
1737 | 2 | NewI->copyFastMathFlags(&I); |
1738 | 2 | return NewI; |
1739 | 2 | } |
1740 | 182k | } |
1741 | 182k | |
1742 | 182k | // Handle specials cases for FSub with selects feeding the operation |
1743 | 182k | if (Value *182k V182k = SimplifySelectsFeedingBinaryOp(I, Op0, Op1)) |
1744 | 1 | return replaceInstUsesWith(I, V); |
1745 | 182k | |
1746 | 182k | if (182k I.hasUnsafeAlgebra()182k ) { |
1747 | 129 | if (Value *V = FAddCombine(Builder).simplify(&I)) |
1748 | 9 | return replaceInstUsesWith(I, V); |
1749 | 182k | } |
1750 | 182k | |
1751 | 182k | return nullptr; |
1752 | 182k | } |