Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements the visit functions for add, fadd, sub, and fsub.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "InstCombineInternal.h"
15
#include "llvm/ADT/STLExtras.h"
16
#include "llvm/Analysis/InstructionSimplify.h"
17
#include "llvm/IR/DataLayout.h"
18
#include "llvm/IR/GetElementPtrTypeIterator.h"
19
#include "llvm/IR/PatternMatch.h"
20
#include "llvm/Support/KnownBits.h"
21
22
using namespace llvm;
23
using namespace PatternMatch;
24
25
#define DEBUG_TYPE "instcombine"
26
27
namespace {
28
29
  /// Class representing coefficient of floating-point addend.
30
  /// This class needs to be highly efficient, which is especially true for
31
  /// the constructor. As of I write this comment, the cost of the default
32
  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
33
  /// perform write-merging).
34
  ///
35
  class FAddendCoef {
36
  public:
37
    // The constructor has to initialize a APFloat, which is unnecessary for
38
    // most addends which have coefficient either 1 or -1. So, the constructor
39
    // is expensive. In order to avoid the cost of the constructor, we should
40
    // reuse some instances whenever possible. The pre-created instances
41
    // FAddCombine::Add[0-5] embodies this idea.
42
    //
43
3.44k
    FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
44
    ~FAddendCoef();
45
46
933
    void set(short C) {
47
933
      assert(!insaneIntVal(C) && "Insane coefficient");
48
933
      IsFp = false; IntVal = C;
49
933
    }
50
51
    void set(const APFloat& C);
52
53
    void negate();
54
55
16
    bool isZero() const 
{ return isInt() ? 16
!IntVal13
:
getFpVal().isZero()3
; }
56
    Value *getValue(Type *) const;
57
58
    // If possible, don't define operator+/operator- etc because these
59
    // operators inevitably call FAddendCoef's constructor which is not cheap.
60
    void operator=(const FAddendCoef &A);
61
    void operator+=(const FAddendCoef &A);
62
    void operator*=(const FAddendCoef &S);
63
64
539
    bool isOne() const 
{ return isInt() && 539
IntVal == 1453
; }
65
7
    bool isTwo() const 
{ return isInt() && 7
IntVal == 24
; }
66
728
    bool isMinusOne() const 
{ return isInt() && 728
IntVal == -1559
; }
67
329
    bool isMinusTwo() const 
{ return isInt() && 329
IntVal == -2243
; }
68
69
  private:
70
0
    bool insaneIntVal(int V) { return V > 4 || V < -4; }
71
    APFloat *getFpValPtr()
72
571
      { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
73
    const APFloat *getFpValPtr() const
74
14
      { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
75
76
14
    const APFloat &getFpVal() const {
77
14
      assert(IsFp && BufHasFpVal && "Incorret state");
78
14
      return *getFpValPtr();
79
14
    }
80
81
9
    APFloat &getFpVal() {
82
9
      assert(IsFp && BufHasFpVal && "Incorret state");
83
9
      return *getFpValPtr();
84
9
    }
85
86
2.12k
    bool isInt() const { return !IsFp; }
87
88
    // If the coefficient is represented by an integer, promote it to a
89
    // floating point.
90
    void convertToFpType(const fltSemantics &Sem);
91
92
    // Construct an APFloat from a signed integer.
93
    // TODO: We should get rid of this function when APFloat can be constructed
94
    //       from an *SIGNED* integer.
95
    APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
96
97
  private:
98
    bool IsFp;
99
100
    // True iff FpValBuf contains an instance of APFloat.
101
    bool BufHasFpVal;
102
103
    // The integer coefficient of an individual addend is either 1 or -1,
104
    // and we try to simplify at most 4 addends from neighboring at most
105
    // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
106
    // is overkill of this end.
107
    short IntVal;
108
109
    AlignedCharArrayUnion<APFloat> FpValBuf;
110
  };
111
112
  /// FAddend is used to represent floating-point addend. An addend is
113
  /// represented as <C, V>, where the V is a symbolic value, and C is a
114
  /// constant coefficient. A constant addend is represented as <C, 0>.
115
  ///
116
  class FAddend {
117
  public:
118
3.44k
    FAddend() : Val(nullptr) {}
119
120
1.22k
    Value *getSymVal() const { return Val; }
121
387
    const FAddendCoef &getCoef() const { return Coeff; }
122
123
2.17k
    bool isConstant() const { return Val == nullptr; }
124
16
    bool isZero() const { return Coeff.isZero(); }
125
126
918
    void set(short Coefficient, Value *V) {
127
918
      Coeff.set(Coefficient);
128
918
      Val = V;
129
918
    }
130
0
    void set(const APFloat &Coefficient, Value *V) {
131
0
      Coeff.set(Coefficient);
132
0
      Val = V;
133
0
    }
134
276
    void set(const ConstantFP *Coefficient, Value *V) {
135
276
      Coeff.set(Coefficient->getValueAPF());
136
276
      Val = V;
137
276
    }
138
139
120
    void negate() { Coeff.negate(); }
140
141
    /// Drill down the U-D chain one step to find the definition of V, and
142
    /// try to break the definition into one or two addends.
143
    static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
144
145
    /// Similar to FAddend::drillDownOneStep() except that the value being
146
    /// splitted is the addend itself.
147
    unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
148
149
23
    void operator+=(const FAddend &T) {
150
23
      assert((Val == T.Val) && "Symbolic-values disagree");
151
23
      Coeff += T.Coeff;
152
23
    }
153
154
  private:
155
15
    void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
156
157
    // This addend has the value of "Coeff * Val".
158
    Value *Val;
159
    FAddendCoef Coeff;
160
  };
161
162
  /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
163
  /// with its neighboring at most two instructions.
164
  ///
165
  class FAddCombine {
166
  public:
167
754
    FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {}
168
    Value *simplify(Instruction *FAdd);
169
170
  private:
171
    typedef SmallVector<const FAddend*, 4> AddendVect;
172
173
    Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
174
175
    Value *performFactorization(Instruction *I);
176
177
    /// Convert given addend to a Value
178
    Value *createAddendVal(const FAddend &A, bool& NeedNeg);
179
180
    /// Return the number of instructions needed to emit the N-ary addition.
181
    unsigned calcInstrNumber(const AddendVect& Vect);
182
    Value *createFSub(Value *Opnd0, Value *Opnd1);
183
    Value *createFAdd(Value *Opnd0, Value *Opnd1);
184
    Value *createFMul(Value *Opnd0, Value *Opnd1);
185
    Value *createFDiv(Value *Opnd0, Value *Opnd1);
186
    Value *createFNeg(Value *V);
187
    Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
188
    void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
189
190
    InstCombiner::BuilderTy &Builder;
191
    Instruction *Instr;
192
193
     // Debugging stuff are clustered here.
194
    #ifndef NDEBUG
195
      unsigned CreateInstrNum;
196
      void initCreateInstNum() { CreateInstrNum = 0; }
197
      void incCreateInstNum() { CreateInstrNum++; }
198
    #else
199
10
      void initCreateInstNum() {}
200
26
      void incCreateInstNum() {}
201
    #endif
202
  };
203
204
} // anonymous namespace
205
206
//===----------------------------------------------------------------------===//
207
//
208
// Implementation of
209
//    {FAddendCoef, FAddend, FAddition, FAddCombine}.
210
//
211
//===----------------------------------------------------------------------===//
212
3.44k
FAddendCoef::~FAddendCoef() {
213
3.44k
  if (BufHasFpVal)
214
281
    getFpValPtr()->~APFloat();
215
3.44k
}
216
217
279
void FAddendCoef::set(const APFloat& C) {
218
279
  APFloat *P = getFpValPtr();
219
279
220
279
  if (
isInt()279
) {
221
279
    // As the buffer is meanless byte stream, we cannot call
222
279
    // APFloat::operator=().
223
279
    new(P) APFloat(C);
224
279
  } else
225
0
    *P = C;
226
279
227
279
  IsFp = BufHasFpVal = true;
228
279
}
229
230
2
void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
231
2
  if (!isInt())
232
0
    return;
233
2
234
2
  APFloat *P = getFpValPtr();
235
2
  if (IntVal > 0)
236
1
    new(P) APFloat(Sem, IntVal);
237
1
  else {
238
1
    new(P) APFloat(Sem, 0 - IntVal);
239
1
    P->changeSign();
240
1
  }
241
2
  IsFp = BufHasFpVal = true;
242
2
}
243
244
2
APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
245
2
  if (Val >= 0)
246
2
    return APFloat(Sem, Val);
247
0
248
0
  APFloat T(Sem, 0 - Val);
249
0
  T.changeSign();
250
0
251
0
  return T;
252
0
}
253
254
18
void FAddendCoef::operator=(const FAddendCoef &That) {
255
18
  if (That.isInt())
256
15
    set(That.IntVal);
257
18
  else
258
3
    set(That.getFpVal());
259
18
}
260
261
23
void FAddendCoef::operator+=(const FAddendCoef &That) {
262
23
  enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
263
23
  if (
isInt() == That.isInt()23
) {
264
19
    if (isInt())
265
17
      IntVal += That.IntVal;
266
19
    else
267
2
      getFpVal().add(That.getFpVal(), RndMode);
268
19
    return;
269
19
  }
270
4
271
4
  
if (4
isInt()4
) {
272
2
    const APFloat &T = That.getFpVal();
273
2
    convertToFpType(T.getSemantics());
274
2
    getFpVal().add(T, RndMode);
275
2
    return;
276
2
  }
277
2
278
2
  APFloat &T = getFpVal();
279
2
  T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
280
2
}
281
282
15
void FAddendCoef::operator*=(const FAddendCoef &That) {
283
15
  if (That.isOne())
284
0
    return;
285
15
286
15
  
if (15
That.isMinusOne()15
) {
287
15
    negate();
288
15
    return;
289
15
  }
290
0
291
0
  
if (0
isInt() && 0
That.isInt()0
) {
292
0
    int Res = IntVal * (int)That.IntVal;
293
0
    assert(!insaneIntVal(Res) && "Insane int value");
294
0
    IntVal = Res;
295
0
    return;
296
0
  }
297
0
298
0
  const fltSemantics &Semantic =
299
0
    isInt() ? 
That.getFpVal().getSemantics()0
:
getFpVal().getSemantics()0
;
300
0
301
0
  if (isInt())
302
0
    convertToFpType(Semantic);
303
0
  APFloat &F0 = getFpVal();
304
0
305
0
  if (That.isInt())
306
0
    F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
307
0
                APFloat::rmNearestTiesToEven);
308
0
  else
309
0
    F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
310
15
}
311
312
135
void FAddendCoef::negate() {
313
135
  if (isInt())
314
132
    IntVal = 0 - IntVal;
315
135
  else
316
3
    getFpVal().changeSign();
317
135
}
318
319
7
Value *FAddendCoef::getValue(Type *Ty) const {
320
7
  return isInt() ?
321
3
    ConstantFP::get(Ty, float(IntVal)) :
322
4
    ConstantFP::get(Ty->getContext(), getFpVal());
323
7
}
324
325
// The definition of <Val>     Addends
326
// =========================================
327
//  A + B                     <1, A>, <1,B>
328
//  A - B                     <1, A>, <1,B>
329
//  0 - B                     <-1, B>
330
//  C * A,                    <C, A>
331
//  A + C                     <1, A> <C, NULL>
332
//  0 +/- 0                   <0, NULL> (corner case)
333
//
334
// Legend: A and B are not constant, C is constant
335
//
336
unsigned FAddend::drillValueDownOneStep
337
1.25k
  (Value *Val, FAddend &Addend0, FAddend &Addend1) {
338
1.25k
  Instruction *I = nullptr;
339
1.25k
  if (
!Val || 1.25k
!(I = dyn_cast<Instruction>(Val))1.25k
)
340
272
    return 0;
341
979
342
979
  unsigned Opcode = I->getOpcode();
343
979
344
979
  if (
Opcode == Instruction::FAdd || 979
Opcode == Instruction::FSub527
) {
345
572
    ConstantFP *C0, *C1;
346
572
    Value *Opnd0 = I->getOperand(0);
347
572
    Value *Opnd1 = I->getOperand(1);
348
572
    if (
(C0 = dyn_cast<ConstantFP>(Opnd0)) && 572
C0->isZero()37
)
349
31
      Opnd0 = nullptr;
350
572
351
572
    if (
(C1 = dyn_cast<ConstantFP>(Opnd1)) && 572
C1->isZero()189
)
352
0
      Opnd1 = nullptr;
353
572
354
572
    if (
Opnd0572
) {
355
541
      if (!C0)
356
535
        Addend0.set(1, Opnd0);
357
541
      else
358
6
        Addend0.set(C0, nullptr);
359
541
    }
360
572
361
572
    if (
Opnd1572
) {
362
572
      FAddend &Addend = Opnd0 ? 
Addend1541
:
Addend031
;
363
572
      if (!C1)
364
383
        Addend.set(1, Opnd1);
365
572
      else
366
189
        Addend.set(C1, nullptr);
367
572
      if (Opcode == Instruction::FSub)
368
120
        Addend.negate();
369
572
    }
370
572
371
572
    if (
Opnd0 || 572
Opnd131
)
372
572
      
return Opnd0 && 572
Opnd1541
?
2541
:
131
;
373
0
374
0
    // Both operands are zero. Weird!
375
0
    Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr);
376
0
    return 1;
377
0
  }
378
407
379
407
  
if (407
I->getOpcode() == Instruction::FMul407
) {
380
143
    Value *V0 = I->getOperand(0);
381
143
    Value *V1 = I->getOperand(1);
382
143
    if (ConstantFP *
C143
= dyn_cast<ConstantFP>(V0)) {
383
1
      Addend0.set(C, V1);
384
1
      return 1;
385
1
    }
386
142
387
142
    
if (ConstantFP *142
C142
= dyn_cast<ConstantFP>(V1)) {
388
80
      Addend0.set(C, V0);
389
80
      return 1;
390
80
    }
391
326
  }
392
326
393
326
  return 0;
394
326
}
395
396
// Try to break *this* addend into two addends. e.g. Suppose this addend is
397
// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
398
// i.e. <2.3, X> and <2.3, Y>.
399
//
400
unsigned FAddend::drillAddendDownOneStep
401
768
  (FAddend &Addend0, FAddend &Addend1) const {
402
768
  if (isConstant())
403
0
    return 0;
404
768
405
768
  unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
406
768
  if (
!BreakNum || 768
Coeff.isOne()170
)
407
759
    return BreakNum;
408
9
409
9
  Addend0.Scale(Coeff);
410
9
411
9
  if (BreakNum == 2)
412
6
    Addend1.Scale(Coeff);
413
768
414
768
  return BreakNum;
415
768
}
416
417
// Try to perform following optimization on the input instruction I. Return the
418
// simplified expression if was successful; otherwise, return 0.
419
//
420
//   Instruction "I" is                Simplified into
421
// -------------------------------------------------------
422
//   (x * y) +/- (x * z)               x * (y +/- z)
423
//   (y / x) +/- (z / x)               (y +/- z) / x
424
//
425
447
Value *FAddCombine::performFactorization(Instruction *I) {
426
447
  assert((I->getOpcode() == Instruction::FAdd ||
427
447
          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
428
447
429
447
  Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
430
447
  Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
431
447
432
447
  if (
!I0 || 447
!I1357
||
I0->getOpcode() != I1->getOpcode()100
)
433
418
    return nullptr;
434
29
435
29
  bool isMpy = false;
436
29
  if (I0->getOpcode() == Instruction::FMul)
437
5
    isMpy = true;
438
24
  else 
if (24
I0->getOpcode() != Instruction::FDiv24
)
439
18
    return nullptr;
440
11
441
11
  Value *Opnd0_0 = I0->getOperand(0);
442
11
  Value *Opnd0_1 = I0->getOperand(1);
443
11
  Value *Opnd1_0 = I1->getOperand(0);
444
11
  Value *Opnd1_1 = I1->getOperand(1);
445
11
446
11
  //  Input Instr I       Factor   AddSub0  AddSub1
447
11
  //  ----------------------------------------------
448
11
  // (x*y) +/- (x*z)        x        y         z
449
11
  // (y/x) +/- (z/x)        x        y         z
450
11
  //
451
11
  Value *Factor = nullptr;
452
11
  Value *AddSub0 = nullptr, *AddSub1 = nullptr;
453
11
454
11
  if (
isMpy11
) {
455
5
    if (
Opnd0_0 == Opnd1_0 || 5
Opnd0_0 == Opnd1_14
)
456
2
      Factor = Opnd0_0;
457
3
    else 
if (3
Opnd0_1 == Opnd1_0 || 3
Opnd0_1 == Opnd1_12
)
458
3
      Factor = Opnd0_1;
459
5
460
5
    if (
Factor5
) {
461
5
      AddSub0 = (Factor == Opnd0_0) ? 
Opnd0_12
:
Opnd0_03
;
462
5
      AddSub1 = (Factor == Opnd1_0) ? 
Opnd1_12
:
Opnd1_03
;
463
5
    }
464
11
  } else 
if (6
Opnd0_1 == Opnd1_16
) {
465
4
    Factor = Opnd0_1;
466
4
    AddSub0 = Opnd0_0;
467
4
    AddSub1 = Opnd1_0;
468
4
  }
469
11
470
11
  if (!Factor)
471
2
    return nullptr;
472
9
473
9
  FastMathFlags Flags;
474
9
  Flags.setUnsafeAlgebra();
475
9
  if (
I09
)
Flags &= I->getFastMathFlags()9
;
476
9
  if (
I19
)
Flags &= I->getFastMathFlags()9
;
477
9
478
9
  // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
479
9
  Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
480
3
                      createFAdd(AddSub0, AddSub1) :
481
6
                      createFSub(AddSub0, AddSub1);
482
9
  if (ConstantFP *
CFP9
= dyn_cast<ConstantFP>(NewAddSub)) {
483
2
    const APFloat &F = CFP->getValueAPF();
484
2
    if (!F.isNormal())
485
1
      return nullptr;
486
7
  } else 
if (Instruction *7
II7
= dyn_cast<Instruction>(NewAddSub))
487
7
    II->setFastMathFlags(Flags);
488
9
489
8
  
if (8
isMpy8
) {
490
5
    Value *RI = createFMul(Factor, NewAddSub);
491
5
    if (Instruction *II = dyn_cast<Instruction>(RI))
492
5
      II->setFastMathFlags(Flags);
493
5
    return RI;
494
5
  }
495
3
496
3
  Value *RI = createFDiv(NewAddSub, Factor);
497
3
  if (Instruction *II = dyn_cast<Instruction>(RI))
498
3
    II->setFastMathFlags(Flags);
499
447
  return RI;
500
447
}
501
502
754
Value *FAddCombine::simplify(Instruction *I) {
503
754
  assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
504
754
505
754
  // Currently we are not able to handle vector type.
506
754
  if (I->getType()->isVectorTy())
507
271
    return nullptr;
508
483
509
754
  assert((I->getOpcode() == Instruction::FAdd ||
510
483
          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
511
483
512
483
  // Save the instruction before calling other member-functions.
513
483
  Instr = I;
514
483
515
483
  FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
516
483
517
483
  unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
518
483
519
483
  // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
520
483
  unsigned Opnd0_ExpNum = 0;
521
483
  unsigned Opnd1_ExpNum = 0;
522
483
523
483
  if (!Opnd0.isConstant())
524
479
    Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
525
483
526
483
  // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
527
483
  if (
OpndNum == 2 && 483
!Opnd1.isConstant()457
)
528
289
    Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
529
483
530
483
  // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
531
483
  if (
Opnd0_ExpNum && 483
Opnd1_ExpNum141
) {
532
21
    AddendVect AllOpnds;
533
21
    AllOpnds.push_back(&Opnd0_0);
534
21
    AllOpnds.push_back(&Opnd1_0);
535
21
    if (Opnd0_ExpNum == 2)
536
19
      AllOpnds.push_back(&Opnd0_1);
537
21
    if (Opnd1_ExpNum == 2)
538
20
      AllOpnds.push_back(&Opnd1_1);
539
21
540
21
    // Compute instruction quota. We should save at least one instruction.
541
21
    unsigned InstQuota = 0;
542
21
543
21
    Value *V0 = I->getOperand(0);
544
21
    Value *V1 = I->getOperand(1);
545
21
    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
546
21
                 
(!isa<Constant>(V1) && 6
V1->hasOneUse()6
)) ?
25
:
116
;
547
21
548
21
    if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
549
4
      return R;
550
479
  }
551
479
552
479
  
if (479
OpndNum != 2479
) {
553
26
    // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
554
26
    // splitted into two addends, say "V = X - Y", the instruction would have
555
26
    // been optimized into "I = Y - X" in the previous steps.
556
26
    //
557
26
    const FAddendCoef &CE = Opnd0.getCoef();
558
26
    return CE.isOne() ? 
Opnd0.getSymVal()0
:
nullptr26
;
559
26
  }
560
453
561
453
  // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
562
453
  
if (453
Opnd1_ExpNum453
) {
563
25
    AddendVect AllOpnds;
564
25
    AllOpnds.push_back(&Opnd0);
565
25
    AllOpnds.push_back(&Opnd1_0);
566
25
    if (Opnd1_ExpNum == 2)
567
22
      AllOpnds.push_back(&Opnd1_1);
568
25
569
25
    if (Value *R = simplifyFAdd(AllOpnds, 1))
570
1
      return R;
571
452
  }
572
452
573
452
  // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
574
452
  
if (452
Opnd0_ExpNum452
) {
575
135
    AddendVect AllOpnds;
576
135
    AllOpnds.push_back(&Opnd1);
577
135
    AllOpnds.push_back(&Opnd0_0);
578
135
    if (Opnd0_ExpNum == 2)
579
53
      AllOpnds.push_back(&Opnd0_1);
580
135
581
135
    if (Value *R = simplifyFAdd(AllOpnds, 1))
582
5
      return R;
583
447
  }
584
447
585
447
  // step 6: Try factorization as the last resort,
586
447
  return performFactorization(I);
587
447
}
588
589
181
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
590
181
  unsigned AddendNum = Addends.size();
591
181
  assert(AddendNum <= 4 && "Too many addends");
592
181
593
181
  // For saving intermediate results;
594
181
  unsigned NextTmpIdx = 0;
595
181
  FAddend TmpResult[3];
596
181
597
181
  // Points to the constant addend of the resulting simplified expression.
598
181
  // If the resulting expr has constant-addend, this constant-addend is
599
181
  // desirable to reside at the top of the resulting expression tree. Placing
600
181
  // constant close to supper-expr(s) will potentially reveal some optimization
601
181
  // opportunities in super-expr(s).
602
181
  //
603
181
  const FAddend *ConstAdd = nullptr;
604
181
605
181
  // Simplified addends are placed <SimpVect>.
606
181
  AddendVect SimpVect;
607
181
608
181
  // The outer loop works on one symbolic-value at a time. Suppose the input
609
181
  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
610
181
  // The symbolic-values will be processed in this order: x, y, z.
611
181
  //
612
657
  for (unsigned SymIdx = 0; 
SymIdx < AddendNum657
;
SymIdx++476
) {
613
476
614
476
    const FAddend *ThisAddend = Addends[SymIdx];
615
476
    if (
!ThisAddend476
) {
616
23
      // This addend was processed before.
617
23
      continue;
618
23
    }
619
453
620
453
    Value *Val = ThisAddend->getSymVal();
621
453
    unsigned StartIdx = SimpVect.size();
622
453
    SimpVect.push_back(ThisAddend);
623
453
624
453
    // The inner loop collects addends sharing same symbolic-value, and these
625
453
    // addends will be later on folded into a single addend. Following above
626
453
    // example, if the symbolic value "y" is being processed, the inner loop
627
453
    // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
628
453
    // be later on folded into "<b1+b2, y>".
629
453
    //
630
453
    for (unsigned SameSymIdx = SymIdx + 1;
631
866
         
SameSymIdx < AddendNum866
;
SameSymIdx++413
) {
632
413
      const FAddend *T = Addends[SameSymIdx];
633
413
      if (
T && 413
T->getSymVal() == Val408
) {
634
23
        // Set null such that next iteration of the outer loop will not process
635
23
        // this addend again.
636
23
        Addends[SameSymIdx] = nullptr;
637
23
        SimpVect.push_back(T);
638
23
      }
639
413
    }
640
453
641
453
    // If multiple addends share same symbolic value, fold them together.
642
453
    if (
StartIdx + 1 != SimpVect.size()453
) {
643
18
      FAddend &R = TmpResult[NextTmpIdx ++];
644
18
      R = *SimpVect[StartIdx];
645
41
      for (unsigned Idx = StartIdx + 1; 
Idx < SimpVect.size()41
;
Idx++23
)
646
23
        R += *SimpVect[Idx];
647
18
648
18
      // Pop all addends being folded and push the resulting folded addend.
649
18
      SimpVect.resize(StartIdx);
650
18
      if (
Val18
) {
651
16
        if (
!R.isZero()16
) {
652
13
          SimpVect.push_back(&R);
653
13
        }
654
18
      } else {
655
2
        // Don't push constant addend at this time. It will be the last element
656
2
        // of <SimpVect>.
657
2
        ConstAdd = &R;
658
2
      }
659
18
    }
660
476
  }
661
181
662
181
  assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) &&
663
181
         "out-of-bound access");
664
181
665
181
  if (ConstAdd)
666
2
    SimpVect.push_back(ConstAdd);
667
181
668
181
  Value *Result;
669
181
  if (!SimpVect.empty())
670
181
    Result = createNaryFAdd(SimpVect, InstrQuota);
671
0
  else {
672
0
    // The addition is folded to 0.0.
673
0
    Result = ConstantFP::get(Instr->getType(), 0.0);
674
0
  }
675
181
676
181
  return Result;
677
181
}
678
679
Value *FAddCombine::createNaryFAdd
680
181
  (const AddendVect &Opnds, unsigned InstrQuota) {
681
181
  assert(!Opnds.empty() && "Expect at least one addend");
682
181
683
181
  // Step 1: Check if the # of instructions needed exceeds the quota.
684
181
  //
685
181
  unsigned InstrNeeded = calcInstrNumber(Opnds);
686
181
  if (InstrNeeded > InstrQuota)
687
171
    return nullptr;
688
10
689
10
  initCreateInstNum();
690
10
691
10
  // step 2: Emit the N-ary addition.
692
10
  // Note that at most three instructions are involved in Fadd-InstCombine: the
693
10
  // addition in question, and at most two neighboring instructions.
694
10
  // The resulting optimized addition should have at least one less instruction
695
10
  // than the original addition expression tree. This implies that the resulting
696
10
  // N-ary addition has at most two instructions, and we don't need to worry
697
10
  // about tree-height when constructing the N-ary addition.
698
10
699
10
  Value *LastVal = nullptr;
700
10
  bool LastValNeedNeg = false;
701
10
702
10
  // Iterate the addends, creating fadd/fsub using adjacent two addends.
703
12
  for (const FAddend *Opnd : Opnds) {
704
12
    bool NeedNeg;
705
12
    Value *V = createAddendVal(*Opnd, NeedNeg);
706
12
    if (
!LastVal12
) {
707
10
      LastVal = V;
708
10
      LastValNeedNeg = NeedNeg;
709
10
      continue;
710
10
    }
711
2
712
2
    
if (2
LastValNeedNeg == NeedNeg2
) {
713
1
      LastVal = createFAdd(LastVal, V);
714
1
      continue;
715
1
    }
716
1
717
1
    
if (1
LastValNeedNeg1
)
718
1
      LastVal = createFSub(V, LastVal);
719
1
    else
720
0
      LastVal = createFSub(LastVal, V);
721
12
722
12
    LastValNeedNeg = false;
723
12
  }
724
10
725
10
  if (
LastValNeedNeg10
) {
726
3
    LastVal = createFNeg(LastVal);
727
3
  }
728
10
729
  #ifndef NDEBUG
730
    assert(CreateInstrNum == InstrNeeded &&
731
           "Inconsistent in instruction numbers");
732
  #endif
733
734
181
  return LastVal;
735
181
}
736
737
10
Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) {
738
10
  Value *V = Builder.CreateFSub(Opnd0, Opnd1);
739
10
  if (Instruction *I = dyn_cast<Instruction>(V))
740
8
    createInstPostProc(I);
741
10
  return V;
742
10
}
743
744
3
Value *FAddCombine::createFNeg(Value *V) {
745
3
  Value *Zero = cast<Value>(ConstantFP::getZeroValueForNegation(V->getType()));
746
3
  Value *NewV = createFSub(Zero, V);
747
3
  if (Instruction *I = dyn_cast<Instruction>(NewV))
748
2
    createInstPostProc(I, true); // fneg's don't receive instruction numbers.
749
3
  return NewV;
750
3
}
751
752
5
Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) {
753
5
  Value *V = Builder.CreateFAdd(Opnd0, Opnd1);
754
5
  if (Instruction *I = dyn_cast<Instruction>(V))
755
4
    createInstPostProc(I);
756
5
  return V;
757
5
}
758
759
11
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
760
11
  Value *V = Builder.CreateFMul(Opnd0, Opnd1);
761
11
  if (Instruction *I = dyn_cast<Instruction>(V))
762
11
    createInstPostProc(I);
763
11
  return V;
764
11
}
765
766
3
Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
767
3
  Value *V = Builder.CreateFDiv(Opnd0, Opnd1);
768
3
  if (Instruction *I = dyn_cast<Instruction>(V))
769
3
    createInstPostProc(I);
770
3
  return V;
771
3
}
772
773
28
void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) {
774
28
  NewInstr->setDebugLoc(Instr->getDebugLoc());
775
28
776
28
  // Keep track of the number of instruction created.
777
28
  if (!NoNumber)
778
26
    incCreateInstNum();
779
28
780
28
  // Propagate fast-math flags
781
28
  NewInstr->setFastMathFlags(Instr->getFastMathFlags());
782
28
}
783
784
// Return the number of instruction needed to emit the N-ary addition.
785
// NOTE: Keep this function in sync with createAddendVal().
786
181
unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
787
181
  unsigned OpndNum = Opnds.size();
788
181
  unsigned InstrNeeded = OpndNum - 1;
789
181
790
181
  // The number of addends in the form of "(-1)*x".
791
181
  unsigned NegOpndNum = 0;
792
181
793
181
  // Adjust the number of instructions needed to emit the N-ary add.
794
450
  for (const FAddend *Opnd : Opnds) {
795
450
    if (Opnd->isConstant())
796
100
      continue;
797
350
798
350
    // The constant check above is really for a few special constant
799
350
    // coefficients.
800
350
    
if (350
isa<UndefValue>(Opnd->getSymVal())350
)
801
1
      continue;
802
349
803
349
    const FAddendCoef &CE = Opnd->getCoef();
804
349
    if (
CE.isMinusOne() || 349
CE.isMinusTwo()321
)
805
29
      NegOpndNum++;
806
349
807
349
    // Let the addend be "c * x". If "c == +/-1", the value of the addend
808
349
    // is immediately available; otherwise, it needs exactly one instruction
809
349
    // to evaluate the value.
810
349
    if (
!CE.isMinusOne() && 349
!CE.isOne()321
)
811
93
      InstrNeeded++;
812
450
  }
813
181
  if (NegOpndNum == OpndNum)
814
7
    InstrNeeded++;
815
181
  return InstrNeeded;
816
181
}
817
818
// Input Addend        Value           NeedNeg(output)
819
// ================================================================
820
// Constant C          C               false
821
// <+/-1, V>           V               coefficient is -1
822
// <2/-2, V>          "fadd V, V"      coefficient is -2
823
// <C, V>             "fmul V, C"      false
824
//
825
// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
826
12
Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) {
827
12
  const FAddendCoef &Coeff = Opnd.getCoef();
828
12
829
12
  if (
Opnd.isConstant()12
) {
830
1
    NeedNeg = false;
831
1
    return Coeff.getValue(Instr->getType());
832
1
  }
833
11
834
11
  Value *OpndVal = Opnd.getSymVal();
835
11
836
11
  if (
Coeff.isMinusOne() || 11
Coeff.isOne()7
) {
837
4
    NeedNeg = Coeff.isMinusOne();
838
4
    return OpndVal;
839
4
  }
840
7
841
7
  
if (7
Coeff.isTwo() || 7
Coeff.isMinusTwo()7
) {
842
1
    NeedNeg = Coeff.isMinusTwo();
843
1
    return createFAdd(OpndVal, OpndVal);
844
1
  }
845
6
846
6
  NeedNeg = false;
847
6
  return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
848
6
}
849
850
/// \brief Return true if we can prove that:
851
///    (sub LHS, RHS)  === (sub nsw LHS, RHS)
852
/// This basically requires proving that the add in the original type would not
853
/// overflow to change the sign bit or have a carry out.
854
/// TODO: Handle this for Vectors.
855
bool InstCombiner::willNotOverflowSignedSub(const Value *LHS,
856
                                            const Value *RHS,
857
910k
                                            const Instruction &CxtI) const {
858
910k
  // If LHS and RHS each have at least two sign bits, the subtraction
859
910k
  // cannot overflow.
860
910k
  if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
861
135k
      ComputeNumSignBits(RHS, 0, &CxtI) > 1)
862
18.6k
    return true;
863
892k
864
892k
  KnownBits LHSKnown = computeKnownBits(LHS, 0, &CxtI);
865
892k
866
892k
  KnownBits RHSKnown = computeKnownBits(RHS, 0, &CxtI);
867
892k
868
892k
  // Subtraction of two 2's complement numbers having identical signs will
869
892k
  // never overflow.
870
892k
  if (
(LHSKnown.isNegative() && 892k
RHSKnown.isNegative()4.28k
) ||
871
892k
      
(LHSKnown.isNonNegative() && 892k
RHSKnown.isNonNegative()115k
))
872
251
    return true;
873
891k
874
891k
  // TODO: implement logic similar to checkRippleForAdd
875
891k
  return false;
876
891k
}
877
878
/// \brief Return true if we can prove that:
879
///    (sub LHS, RHS)  === (sub nuw LHS, RHS)
880
bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS,
881
                                              const Value *RHS,
882
1.85M
                                              const Instruction &CxtI) const {
883
1.85M
  // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
884
1.85M
  KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI);
885
1.85M
  KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI);
886
1.85M
  if (
LHSKnown.isNegative() && 1.85M
RHSKnown.isNonNegative()5.61k
)
887
46
    return true;
888
1.85M
889
1.85M
  return false;
890
1.85M
}
891
892
// Checks if any operand is negative and we can convert add to sub.
893
// This function checks for following negative patterns
894
//   ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
895
//   ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
896
//   XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
897
static Value *checkForNegativeOperand(BinaryOperator &I,
898
8.21M
                                      InstCombiner::BuilderTy &Builder) {
899
8.21M
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
900
8.21M
901
8.21M
  // This function creates 2 instructions to replace ADD, we need at least one
902
8.21M
  // of LHS or RHS to have one use to ensure benefit in transform.
903
8.21M
  if (
!LHS->hasOneUse() && 8.21M
!RHS->hasOneUse()4.88M
)
904
4.52M
    return nullptr;
905
3.68M
906
3.68M
  Value *X = nullptr, *Y = nullptr, *Z = nullptr;
907
3.68M
  const APInt *C1 = nullptr, *C2 = nullptr;
908
3.68M
909
3.68M
  // if ONE is on other side, swap
910
3.68M
  if (match(RHS, m_Add(m_Value(X), m_One())))
911
2.00k
    std::swap(LHS, RHS);
912
3.68M
913
3.68M
  if (
match(LHS, m_Add(m_Value(X), m_One()))3.68M
) {
914
45.4k
    // if XOR on other side, swap
915
45.4k
    if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
916
96
      std::swap(X, RHS);
917
45.4k
918
45.4k
    if (
match(X, m_Xor(m_Value(Y), m_APInt(C1)))45.4k
) {
919
120
      // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
920
120
      // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
921
120
      if (
match(Y, m_Or(m_Value(Z), m_APInt(C2))) && 120
(*C2 == ~(*C1))5
) {
922
5
        Value *NewAnd = Builder.CreateAnd(Z, *C1);
923
5
        return Builder.CreateSub(RHS, NewAnd, "sub");
924
115
      } else 
if (115
match(Y, m_And(m_Value(Z), m_APInt(C2))) && 115
(*C1 == *C2)3
) {
925
3
        // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
926
3
        // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
927
3
        Value *NewOr = Builder.CreateOr(Z, ~(*C1));
928
3
        return Builder.CreateSub(RHS, NewOr, "sub");
929
3
      }
930
3.68M
    }
931
45.4k
  }
932
3.68M
933
3.68M
  // Restore LHS and RHS
934
3.68M
  LHS = I.getOperand(0);
935
3.68M
  RHS = I.getOperand(1);
936
3.68M
937
3.68M
  // if XOR is on other side, swap
938
3.68M
  if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
939
3.62k
    std::swap(LHS, RHS);
940
3.68M
941
3.68M
  // C2 is ODD
942
3.68M
  // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))
943
3.68M
  // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
944
3.68M
  if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
945
7.93k
    
if (7.93k
C1->countTrailingZeros() == 07.93k
)
946
5.13k
      
if (5.13k
match(Y, m_And(m_Value(Z), m_APInt(C2))) && 5.13k
*C1 == (*C2 + 1)288
) {
947
1
        Value *NewOr = Builder.CreateOr(Z, ~(*C2));
948
1
        return Builder.CreateSub(RHS, NewOr, "sub");
949
1
      }
950
3.68M
  return nullptr;
951
3.68M
}
952
953
static Instruction *foldAddWithConstant(BinaryOperator &Add,
954
8.22M
                                        InstCombiner::BuilderTy &Builder) {
955
8.22M
  Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
956
8.22M
  const APInt *C;
957
8.22M
  if (!match(Op1, m_APInt(C)))
958
2.37M
    return nullptr;
959
5.85M
960
5.85M
  
if (5.85M
C->isSignMask()5.85M
) {
961
218
    // If wrapping is not allowed, then the addition must set the sign bit:
962
218
    // X + (signmask) --> X | signmask
963
218
    if (
Add.hasNoSignedWrap() || 218
Add.hasNoUnsignedWrap()216
)
964
4
      return BinaryOperator::CreateOr(Op0, Op1);
965
214
966
214
    // If wrapping is allowed, then the addition flips the sign bit of LHS:
967
214
    // X + (signmask) --> X ^ signmask
968
214
    return BinaryOperator::CreateXor(Op0, Op1);
969
214
  }
970
5.85M
971
5.85M
  Value *X;
972
5.85M
  const APInt *C2;
973
5.85M
  Type *Ty = Add.getType();
974
5.85M
975
5.85M
  // Is this add the last step in a convoluted sext?
976
5.85M
  // add(zext(xor i16 X, -32768), -32768) --> sext X
977
5.85M
  if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) &&
978
5.85M
      
C2->isMinSignedValue()123
&&
C2->sext(Ty->getScalarSizeInBits()) == *C20
)
979
4
    return CastInst::Create(Instruction::SExt, X, Ty);
980
5.85M
981
5.85M
  // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
982
5.85M
  // FIXME: This should check hasOneUse to not increase the instruction count?
983
5.85M
  
if (5.85M
C->isNegative() &&
984
1.23M
      match(Op0, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2)))) &&
985
5.85M
      
C->sge(-C2->sext(C->getBitWidth()))8
) {
986
8
    Constant *NewC =
987
8
        ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth()));
988
8
    return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
989
8
  }
990
5.85M
991
5.85M
  
if (5.85M
C->isOneValue() && 5.85M
Op0->hasOneUse()3.10M
) {
992
807k
    // add (sext i1 X), 1 --> zext (not X)
993
807k
    // TODO: The smallest IR representation is (select X, 0, 1), and that would
994
807k
    // not require the one-use check. But we need to remove a transform in
995
807k
    // visitSelect and make sure that IR value tracking for select is equal or
996
807k
    // better than for these ops.
997
807k
    if (match(Op0, m_SExt(m_Value(X))) &&
998
1.26k
        X->getType()->getScalarSizeInBits() == 1)
999
4
      return new ZExtInst(Builder.CreateNot(X), Ty);
1000
807k
1001
807k
    // Shifts and add used to flip and mask off the low bit:
1002
807k
    // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
1003
807k
    const APInt *C3;
1004
807k
    if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) &&
1005
807k
        
C2 == C3182
&&
*C2 == Ty->getScalarSizeInBits() - 1182
) {
1006
2
      Value *NotX = Builder.CreateNot(X);
1007
2
      return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));
1008
2
    }
1009
5.85M
  }
1010
5.85M
1011
5.85M
  return nullptr;
1012
5.85M
}
1013
1014
8.25M
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
1015
8.25M
  bool Changed = SimplifyAssociativeOrCommutative(I);
1016
8.25M
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1017
8.25M
1018
8.25M
  if (Value *V = SimplifyVectorOp(I))
1019
33
    return replaceInstUsesWith(I, V);
1020
8.25M
1021
8.25M
  
if (Value *8.25M
V8.25M
=
1022
8.25M
          SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
1023
8.25M
                          SQ.getWithInstruction(&I)))
1024
30.3k
    return replaceInstUsesWith(I, V);
1025
8.22M
1026
8.22M
   // (A*B)+(A*C) -> A*(B+C) etc
1027
8.22M
  
if (Value *8.22M
V8.22M
= SimplifyUsingDistributiveLaws(I))
1028
410
    return replaceInstUsesWith(I, V);
1029
8.22M
1030
8.22M
  
if (Instruction *8.22M
X8.22M
= foldAddWithConstant(I, Builder))
1031
236
    return X;
1032
8.22M
1033
8.22M
  // FIXME: This should be moved into the above helper function to allow these
1034
8.22M
  // transforms for splat vectors.
1035
8.22M
  
if (ConstantInt *8.22M
CI8.22M
= dyn_cast<ConstantInt>(RHS)) {
1036
5.70M
    // zext(bool) + C -> bool ? C + 1 : C
1037
5.70M
    if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
1038
65.0k
      
if (65.0k
ZI->getSrcTy()->isIntegerTy(1)65.0k
)
1039
108
        return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
1040
5.70M
1041
5.70M
    Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr;
1042
5.70M
    if (
match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))5.70M
) {
1043
3.27k
      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
1044
3.27k
      const APInt &RHSVal = CI->getValue();
1045
3.27k
      unsigned ExtendAmt = 0;
1046
3.27k
      // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
1047
3.27k
      // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
1048
3.27k
      if (
XorRHS->getValue() == -RHSVal3.27k
) {
1049
31
        if (RHSVal.isPowerOf2())
1050
6
          ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
1051
25
        else 
if (25
XorRHS->getValue().isPowerOf2()25
)
1052
25
          ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
1053
31
      }
1054
3.27k
1055
3.27k
      if (
ExtendAmt3.27k
) {
1056
31
        APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
1057
31
        if (!MaskedValueIsZero(XorLHS, Mask, 0, &I))
1058
27
          ExtendAmt = 0;
1059
31
      }
1060
3.27k
1061
3.27k
      if (
ExtendAmt3.27k
) {
1062
4
        Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
1063
4
        Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext");
1064
4
        return BinaryOperator::CreateAShr(NewShl, ShAmt);
1065
4
      }
1066
3.27k
1067
3.27k
      // If this is a xor that was canonicalized from a sub, turn it back into
1068
3.27k
      // a sub and fuse this add with it.
1069
3.27k
      
if (3.27k
LHS->hasOneUse() && 3.27k
(XorRHS->getValue()+1).isPowerOf2()1.21k
) {
1070
43
        KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I);
1071
43
        if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue())
1072
21
          return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
1073
21
                                           XorLHS);
1074
3.25k
      }
1075
3.25k
      // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C,
1076
3.25k
      // transform them into (X + (signmask ^ C))
1077
3.25k
      
if (3.25k
XorRHS->getValue().isSignMask()3.25k
)
1078
1
        return BinaryOperator::CreateAdd(XorLHS,
1079
1
                                         ConstantExpr::getXor(XorRHS, CI));
1080
8.22M
    }
1081
5.70M
  }
1082
8.22M
1083
8.22M
  
if (8.22M
isa<Constant>(RHS)8.22M
)
1084
5.85M
    
if (Instruction *5.85M
NV5.85M
= foldOpWithConstantIntoOperand(I))
1085
1.06k
      return NV;
1086
8.22M
1087
8.22M
  
if (8.22M
I.getType()->isIntOrIntVectorTy(1)8.22M
)
1088
3
    return BinaryOperator::CreateXor(LHS, RHS);
1089
8.22M
1090
8.22M
  // X + X --> X << 1
1091
8.22M
  
if (8.22M
LHS == RHS8.22M
) {
1092
216
    BinaryOperator *New =
1093
216
      BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
1094
216
    New->setHasNoSignedWrap(I.hasNoSignedWrap());
1095
216
    New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1096
216
    return New;
1097
216
  }
1098
8.22M
1099
8.22M
  // -A + B  -->  B - A
1100
8.22M
  // -A + -B  -->  -(A + B)
1101
8.22M
  
if (Value *8.22M
LHSV8.22M
= dyn_castNegVal(LHS)) {
1102
1.76k
    if (!isa<Constant>(RHS))
1103
416
      
if (Value *416
RHSV416
= dyn_castNegVal(RHS)) {
1104
236
        Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum");
1105
236
        return BinaryOperator::CreateNeg(NewAdd);
1106
236
      }
1107
1.52k
1108
1.52k
    return BinaryOperator::CreateSub(RHS, LHSV);
1109
1.52k
  }
1110
8.22M
1111
8.22M
  // A + -B  -->  A - B
1112
8.22M
  
if (8.22M
!isa<Constant>(RHS)8.22M
)
1113
2.36M
    
if (Value *2.36M
V2.36M
= dyn_castNegVal(RHS))
1114
8.46k
      return BinaryOperator::CreateSub(LHS, V);
1115
8.21M
1116
8.21M
  
if (Value *8.21M
V8.21M
= checkForNegativeOperand(I, Builder))
1117
9
    return replaceInstUsesWith(I, V);
1118
8.21M
1119
8.21M
  // A+B --> A|B iff A and B have no bits set in common.
1120
8.21M
  
if (8.21M
haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)8.21M
)
1121
29.4k
    return BinaryOperator::CreateOr(LHS, RHS);
1122
8.18M
1123
8.18M
  
if (Constant *8.18M
CRHS8.18M
= dyn_cast<Constant>(RHS)) {
1124
5.82M
    Value *X;
1125
5.82M
    if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
1126
141
      return BinaryOperator::CreateSub(SubOne(CRHS), X);
1127
8.18M
  }
1128
8.18M
1129
8.18M
  // FIXME: We already did a check for ConstantInt RHS above this.
1130
8.18M
  // FIXME: Is this pattern covered by another fold? No regression tests fail on
1131
8.18M
  // removal.
1132
8.18M
  
if (ConstantInt *8.18M
CRHS8.18M
= dyn_cast<ConstantInt>(RHS)) {
1133
5.67M
    // (X & FF00) + xx00  -> (X+xx00) & FF00
1134
5.67M
    Value *X;
1135
5.67M
    ConstantInt *C2;
1136
5.67M
    if (LHS->hasOneUse() &&
1137
1.65M
        match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
1138
5.67M
        
CRHS->getValue() == (CRHS->getValue() & C2->getValue())7.87k
) {
1139
4.64k
      // See if all bits from the first bit set in the Add RHS up are included
1140
4.64k
      // in the mask.  First, get the rightmost bit.
1141
4.64k
      const APInt &AddRHSV = CRHS->getValue();
1142
4.64k
1143
4.64k
      // Form a mask of all bits from the lowest bit added through the top.
1144
4.64k
      APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
1145
4.64k
1146
4.64k
      // See if the and mask includes all of these bits.
1147
4.64k
      APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
1148
4.64k
1149
4.64k
      if (
AddRHSHighBits == AddRHSHighBitsAnd4.64k
) {
1150
179
        // Okay, the xform is safe.  Insert the new add pronto.
1151
179
        Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName());
1152
179
        return BinaryOperator::CreateAnd(NewAdd, C2);
1153
179
      }
1154
8.18M
    }
1155
5.67M
  }
1156
8.18M
1157
8.18M
  // add (select X 0 (sub n A)) A  -->  select X A n
1158
8.18M
  {
1159
8.18M
    SelectInst *SI = dyn_cast<SelectInst>(LHS);
1160
8.18M
    Value *A = RHS;
1161
8.18M
    if (
!SI8.18M
) {
1162
8.00M
      SI = dyn_cast<SelectInst>(RHS);
1163
8.00M
      A = LHS;
1164
8.00M
    }
1165
8.18M
    if (
SI && 8.18M
SI->hasOneUse()206k
) {
1166
77.8k
      Value *TV = SI->getTrueValue();
1167
77.8k
      Value *FV = SI->getFalseValue();
1168
77.8k
      Value *N;
1169
77.8k
1170
77.8k
      // Can we fold the add into the argument of the select?
1171
77.8k
      // We check both true and false select arguments for a matching subtract.
1172
77.8k
      if (
match(FV, m_Zero()) && 77.8k
match(TV, m_Sub(m_Value(N), m_Specific(A)))5.82k
)
1173
77.8k
        // Fold the add into the true select value.
1174
116
        return SelectInst::Create(SI->getCondition(), N, A);
1175
77.7k
1176
77.7k
      
if (77.7k
match(TV, m_Zero()) && 77.7k
match(FV, m_Sub(m_Value(N), m_Specific(A)))5.74k
)
1177
77.7k
        // Fold the add into the false select value.
1178
1
        return SelectInst::Create(SI->getCondition(), A, N);
1179
8.18M
    }
1180
8.18M
  }
1181
8.18M
1182
8.18M
  // Check for (add (sext x), y), see if we can merge this into an
1183
8.18M
  // integer add followed by a sext.
1184
8.18M
  
if (SExtInst *8.18M
LHSConv8.18M
= dyn_cast<SExtInst>(LHS)) {
1185
127k
    // (add (sext x), cst) --> (sext (add x, cst'))
1186
127k
    if (ConstantInt *
RHSC127k
= dyn_cast<ConstantInt>(RHS)) {
1187
79.9k
      if (
LHSConv->hasOneUse()79.9k
) {
1188
14.9k
        Constant *CI =
1189
14.9k
            ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
1190
14.9k
        if (ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
1191
14.9k
            
willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)14.8k
) {
1192
62
          // Insert the new, smaller add.
1193
62
          Value *NewAdd =
1194
62
              Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
1195
62
          return new SExtInst(NewAdd, I.getType());
1196
62
        }
1197
127k
      }
1198
79.9k
    }
1199
127k
1200
127k
    // (add (sext x), (sext y)) --> (sext (add int x, y))
1201
127k
    
if (SExtInst *127k
RHSConv127k
= dyn_cast<SExtInst>(RHS)) {
1202
7.64k
      // Only do this if x/y have the same type, if at least one of them has a
1203
7.64k
      // single use (so we don't increase the number of sexts), and if the
1204
7.64k
      // integer add will not overflow.
1205
7.64k
      if (LHSConv->getOperand(0)->getType() ==
1206
7.64k
              RHSConv->getOperand(0)->getType() &&
1207
7.18k
          
(LHSConv->hasOneUse() || 7.18k
RHSConv->hasOneUse()2.95k
) &&
1208
4.29k
          willNotOverflowSignedAdd(LHSConv->getOperand(0),
1209
7.64k
                                   RHSConv->getOperand(0), I)) {
1210
17
        // Insert the new integer add.
1211
17
        Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0),
1212
17
                                             RHSConv->getOperand(0), "addconv");
1213
17
        return new SExtInst(NewAdd, I.getType());
1214
17
      }
1215
8.18M
    }
1216
127k
  }
1217
8.18M
1218
8.18M
  // Check for (add (zext x), y), see if we can merge this into an
1219
8.18M
  // integer add followed by a zext.
1220
8.18M
  
if (auto *8.18M
LHSConv8.18M
= dyn_cast<ZExtInst>(LHS)) {
1221
90.1k
    // (add (zext x), cst) --> (zext (add x, cst'))
1222
90.1k
    if (ConstantInt *
RHSC90.1k
= dyn_cast<ConstantInt>(RHS)) {
1223
64.6k
      if (
LHSConv->hasOneUse()64.6k
) {
1224
35.3k
        Constant *CI =
1225
35.3k
            ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
1226
35.3k
        if (ConstantExpr::getZExt(CI, I.getType()) == RHSC &&
1227
35.3k
            
willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)25.6k
) {
1228
90
          // Insert the new, smaller add.
1229
90
          Value *NewAdd =
1230
90
              Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
1231
90
          return new ZExtInst(NewAdd, I.getType());
1232
90
        }
1233
90.0k
      }
1234
64.6k
    }
1235
90.0k
1236
90.0k
    // (add (zext x), (zext y)) --> (zext (add int x, y))
1237
90.0k
    
if (auto *90.0k
RHSConv90.0k
= dyn_cast<ZExtInst>(RHS)) {
1238
19.6k
      // Only do this if x/y have the same type, if at least one of them has a
1239
19.6k
      // single use (so we don't increase the number of zexts), and if the
1240
19.6k
      // integer add will not overflow.
1241
19.6k
      if (LHSConv->getOperand(0)->getType() ==
1242
19.6k
              RHSConv->getOperand(0)->getType() &&
1243
18.9k
          
(LHSConv->hasOneUse() || 18.9k
RHSConv->hasOneUse()3.89k
) &&
1244
16.2k
          willNotOverflowUnsignedAdd(LHSConv->getOperand(0),
1245
19.6k
                                     RHSConv->getOperand(0), I)) {
1246
21
        // Insert the new integer add.
1247
21
        Value *NewAdd = Builder.CreateNUWAdd(
1248
21
            LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv");
1249
21
        return new ZExtInst(NewAdd, I.getType());
1250
21
      }
1251
8.18M
    }
1252
90.1k
  }
1253
8.18M
1254
8.18M
  // (add (xor A, B) (and A, B)) --> (or A, B)
1255
8.18M
  {
1256
8.18M
    Value *A = nullptr, *B = nullptr;
1257
8.18M
    if (match(RHS, m_Xor(m_Value(A), m_Value(B))) &&
1258
11.4k
        match(LHS, m_c_And(m_Specific(A), m_Specific(B))))
1259
1
      return BinaryOperator::CreateOr(A, B);
1260
8.18M
1261
8.18M
    
if (8.18M
match(LHS, m_Xor(m_Value(A), m_Value(B))) &&
1262
9.56k
        match(RHS, m_c_And(m_Specific(A), m_Specific(B))))
1263
0
      return BinaryOperator::CreateOr(A, B);
1264
8.18M
  }
1265
8.18M
1266
8.18M
  // (add (or A, B) (and A, B)) --> (add A, B)
1267
8.18M
  {
1268
8.18M
    Value *A = nullptr, *B = nullptr;
1269
8.18M
    if (match(RHS, m_Or(m_Value(A), m_Value(B))) &&
1270
8.18M
        
match(LHS, m_c_And(m_Specific(A), m_Specific(B)))6.68k
) {
1271
0
      auto *New = BinaryOperator::CreateAdd(A, B);
1272
0
      New->setHasNoSignedWrap(I.hasNoSignedWrap());
1273
0
      New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1274
0
      return New;
1275
0
    }
1276
8.18M
1277
8.18M
    
if (8.18M
match(LHS, m_Or(m_Value(A), m_Value(B))) &&
1278
8.18M
        
match(RHS, m_c_And(m_Specific(A), m_Specific(B)))74.3k
) {
1279
4
      auto *New = BinaryOperator::CreateAdd(A, B);
1280
4
      New->setHasNoSignedWrap(I.hasNoSignedWrap());
1281
4
      New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1282
4
      return New;
1283
4
    }
1284
8.18M
  }
1285
8.18M
1286
8.18M
  // TODO(jingyue): Consider willNotOverflowSignedAdd and
1287
8.18M
  // willNotOverflowUnsignedAdd to reduce the number of invocations of
1288
8.18M
  // computeKnownBits.
1289
8.18M
  
if (8.18M
!I.hasNoSignedWrap() && 8.18M
willNotOverflowSignedAdd(LHS, RHS, I)3.12M
) {
1290
33.2k
    Changed = true;
1291
33.2k
    I.setHasNoSignedWrap(true);
1292
33.2k
  }
1293
8.18M
  if (
!I.hasNoUnsignedWrap() && 8.18M
willNotOverflowUnsignedAdd(LHS, RHS, I)6.20M
) {
1294
20.4k
    Changed = true;
1295
20.4k
    I.setHasNoUnsignedWrap(true);
1296
20.4k
  }
1297
8.18M
1298
8.18M
  return Changed ? 
&I96.7k
:
nullptr8.08M
;
1299
8.25M
}
1300
1301
703k
Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
1302
703k
  bool Changed = SimplifyAssociativeOrCommutative(I);
1303
703k
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1304
703k
1305
703k
  if (Value *V = SimplifyVectorOp(I))
1306
49
    return replaceInstUsesWith(I, V);
1307
703k
1308
703k
  
if (Value *703k
V703k
= SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(),
1309
703k
                                  SQ.getWithInstruction(&I)))
1310
21
    return replaceInstUsesWith(I, V);
1311
703k
1312
703k
  
if (703k
isa<Constant>(RHS)703k
)
1313
404k
    
if (Instruction *404k
FoldedFAdd404k
= foldOpWithConstantIntoOperand(I))
1314
21
      return FoldedFAdd;
1315
703k
1316
703k
  // -A + B  -->  B - A
1317
703k
  // -A + -B  -->  -(A + B)
1318
703k
  
if (Value *703k
LHSV703k
= dyn_castFNegVal(LHS)) {
1319
22
    Instruction *RI = BinaryOperator::CreateFSub(RHS, LHSV);
1320
22
    RI->copyFastMathFlags(&I);
1321
22
    return RI;
1322
22
  }
1323
703k
1324
703k
  // A + -B  -->  A - B
1325
703k
  
if (703k
!isa<Constant>(RHS)703k
)
1326
299k
    
if (Value *299k
V299k
= dyn_castFNegVal(RHS)) {
1327
99
      Instruction *RI = BinaryOperator::CreateFSub(LHS, V);
1328
99
      RI->copyFastMathFlags(&I);
1329
99
      return RI;
1330
99
    }
1331
703k
1332
703k
  // Check for (fadd double (sitofp x), y), see if we can merge this into an
1333
703k
  // integer add followed by a promotion.
1334
703k
  
if (SIToFPInst *703k
LHSConv703k
= dyn_cast<SIToFPInst>(LHS)) {
1335
3.45k
    Value *LHSIntVal = LHSConv->getOperand(0);
1336
3.45k
    Type *FPType = LHSConv->getType();
1337
3.45k
1338
3.45k
    // TODO: This check is overly conservative. In many cases known bits
1339
3.45k
    // analysis can tell us that the result of the addition has less significant
1340
3.45k
    // bits than the integer type can hold.
1341
3.21k
    auto IsValidPromotion = [](Type *FTy, Type *ITy) {
1342
3.21k
      Type *FScalarTy = FTy->getScalarType();
1343
3.21k
      Type *IScalarTy = ITy->getScalarType();
1344
3.21k
1345
3.21k
      // Do we have enough bits in the significand to represent the result of
1346
3.21k
      // the integer addition?
1347
3.21k
      unsigned MaxRepresentableBits =
1348
3.21k
          APFloat::semanticsPrecision(FScalarTy->getFltSemantics());
1349
3.21k
      return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits;
1350
3.21k
    };
1351
3.45k
1352
3.45k
    // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
1353
3.45k
    // ... if the constant fits in the integer value.  This is useful for things
1354
3.45k
    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
1355
3.45k
    // requires a constant pool load, and generally allows the add to be better
1356
3.45k
    // instcombined.
1357
3.45k
    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
1358
2.93k
      
if (2.93k
IsValidPromotion(FPType, LHSIntVal->getType())2.93k
) {
1359
2.60k
        Constant *CI =
1360
2.60k
          ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
1361
2.60k
        if (LHSConv->hasOneUse() &&
1362
1.86k
            ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
1363
2.60k
            
willNotOverflowSignedAdd(LHSIntVal, CI, I)294
) {
1364
4
          // Insert the new integer add.
1365
4
          Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv");
1366
4
          return new SIToFPInst(NewAdd, I.getType());
1367
4
        }
1368
3.45k
      }
1369
3.45k
1370
3.45k
    // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
1371
3.45k
    
if (SIToFPInst *3.45k
RHSConv3.45k
= dyn_cast<SIToFPInst>(RHS)) {
1372
281
      Value *RHSIntVal = RHSConv->getOperand(0);
1373
281
      // It's enough to check LHS types only because we require int types to
1374
281
      // be the same for this transform.
1375
281
      if (
IsValidPromotion(FPType, LHSIntVal->getType())281
) {
1376
251
        // Only do this if x/y have the same type, if at least one of them has a
1377
251
        // single use (so we don't increase the number of int->fp conversions),
1378
251
        // and if the integer add will not overflow.
1379
251
        if (LHSIntVal->getType() == RHSIntVal->getType() &&
1380
251
            
(LHSConv->hasOneUse() || 251
RHSConv->hasOneUse()126
) &&
1381
251
            
willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)181
) {
1382
7
          // Insert the new integer add.
1383
7
          Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv");
1384
7
          return new SIToFPInst(NewAdd, I.getType());
1385
7
        }
1386
703k
      }
1387
281
    }
1388
3.45k
  }
1389
703k
1390
703k
  // Handle specials cases for FAdd with selects feeding the operation
1391
703k
  
if (Value *703k
V703k
= SimplifySelectsFeedingBinaryOp(I, LHS, RHS))
1392
8
    return replaceInstUsesWith(I, V);
1393
703k
1394
703k
  
if (703k
I.hasUnsafeAlgebra()703k
) {
1395
625
    if (Value *V = FAddCombine(Builder).simplify(&I))
1396
9
      return replaceInstUsesWith(I, V);
1397
703k
  }
1398
703k
1399
703k
  
return Changed ? 703k
&I4.27k
:
nullptr699k
;
1400
703k
}
1401
1402
/// Optimize pointer differences into the same array into a size.  Consider:
1403
///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
1404
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
1405
///
1406
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
1407
449k
                                               Type *Ty) {
1408
449k
  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
1409
449k
  // this.
1410
449k
  bool Swapped = false;
1411
449k
  GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;
1412
449k
1413
449k
  // For now we require one side to be the base pointer "A" or a constant
1414
449k
  // GEP derived from it.
1415
449k
  if (GEPOperator *
LHSGEP449k
= dyn_cast<GEPOperator>(LHS)) {
1416
2.89k
    // (gep X, ...) - X
1417
2.89k
    if (
LHSGEP->getOperand(0) == RHS2.89k
) {
1418
35
      GEP1 = LHSGEP;
1419
35
      Swapped = false;
1420
2.89k
    } else 
if (GEPOperator *2.85k
RHSGEP2.85k
= dyn_cast<GEPOperator>(RHS)) {
1421
439
      // (gep X, ...) - (gep X, ...)
1422
439
      if (LHSGEP->getOperand(0)->stripPointerCasts() ==
1423
439
            RHSGEP->getOperand(0)->stripPointerCasts()) {
1424
25
        GEP2 = RHSGEP;
1425
25
        GEP1 = LHSGEP;
1426
25
        Swapped = false;
1427
25
      }
1428
2.85k
    }
1429
2.89k
  }
1430
449k
1431
449k
  if (GEPOperator *
RHSGEP449k
= dyn_cast<GEPOperator>(RHS)) {
1432
2.02k
    // X - (gep X, ...)
1433
2.02k
    if (
RHSGEP->getOperand(0) == LHS2.02k
) {
1434
2
      GEP1 = RHSGEP;
1435
2
      Swapped = true;
1436
2.02k
    } else 
if (GEPOperator *2.02k
LHSGEP2.02k
= dyn_cast<GEPOperator>(LHS)) {
1437
439
      // (gep X, ...) - (gep X, ...)
1438
439
      if (RHSGEP->getOperand(0)->stripPointerCasts() ==
1439
439
            LHSGEP->getOperand(0)->stripPointerCasts()) {
1440
25
        GEP2 = LHSGEP;
1441
25
        GEP1 = RHSGEP;
1442
25
        Swapped = true;
1443
25
      }
1444
2.02k
    }
1445
2.02k
  }
1446
449k
1447
449k
  if (!GEP1)
1448
449k
    // No GEP found.
1449
449k
    return nullptr;
1450
62
1451
62
  
if (62
GEP262
) {
1452
25
    // (gep X, ...) - (gep X, ...)
1453
25
    //
1454
25
    // Avoid duplicating the arithmetic if there are more than one non-constant
1455
25
    // indices between the two GEPs and either GEP has a non-constant index and
1456
25
    // multiple users. If zero non-constant index, the result is a constant and
1457
25
    // there is no duplication. If one non-constant index, the result is an add
1458
25
    // or sub with a constant, which is no larger than the original code, and
1459
25
    // there's no duplicated arithmetic, even if either GEP has multiple
1460
25
    // users. If more than one non-constant indices combined, as long as the GEP
1461
25
    // with at least one non-constant index doesn't have multiple users, there
1462
25
    // is no duplication.
1463
25
    unsigned NumNonConstantIndices1 = GEP1->countNonConstantIndices();
1464
25
    unsigned NumNonConstantIndices2 = GEP2->countNonConstantIndices();
1465
25
    if (NumNonConstantIndices1 + NumNonConstantIndices2 > 1 &&
1466
20
        
((NumNonConstantIndices1 > 0 && 20
!GEP1->hasOneUse()19
) ||
1467
25
         
(NumNonConstantIndices2 > 0 && 9
!GEP2->hasOneUse()9
))) {
1468
12
      return nullptr;
1469
12
    }
1470
50
  }
1471
50
1472
50
  // Emit the offset of the GEP and an intptr_t.
1473
50
  Value *Result = EmitGEPOffset(GEP1);
1474
50
1475
50
  // If we had a constant expression GEP on the other side offsetting the
1476
50
  // pointer, subtract it from the offset we have.
1477
50
  if (
GEP250
) {
1478
13
    Value *Offset = EmitGEPOffset(GEP2);
1479
13
    Result = Builder.CreateSub(Result, Offset);
1480
13
  }
1481
50
1482
50
  // If we have p - gep(p, ...)  then we have to negate the result.
1483
50
  if (Swapped)
1484
15
    Result = Builder.CreateNeg(Result, "diff.neg");
1485
449k
1486
449k
  return Builder.CreateIntCast(Result, Ty, true);
1487
449k
}
1488
1489
1.88M
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
1490
1.88M
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1491
1.88M
1492
1.88M
  if (Value *V = SimplifyVectorOp(I))
1493
0
    return replaceInstUsesWith(I, V);
1494
1.88M
1495
1.88M
  
if (Value *1.88M
V1.88M
=
1496
1.88M
          SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
1497
1.88M
                          SQ.getWithInstruction(&I)))
1498
219
    return replaceInstUsesWith(I, V);
1499
1.88M
1500
1.88M
  // (A*B)-(A*C) -> A*(B-C) etc
1501
1.88M
  
if (Value *1.88M
V1.88M
= SimplifyUsingDistributiveLaws(I))
1502
151
    return replaceInstUsesWith(I, V);
1503
1.88M
1504
1.88M
  // If this is a 'B = x-(-A)', change to B = x+A.
1505
1.88M
  
if (Value *1.88M
V1.88M
= dyn_castNegVal(Op1)) {
1506
20.7k
    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
1507
20.7k
1508
20.7k
    if (const auto *
BO20.7k
= dyn_cast<BinaryOperator>(Op1)) {
1509
99
      assert(BO->getOpcode() == Instruction::Sub &&
1510
99
             "Expected a subtraction operator!");
1511
99
      if (
BO->hasNoSignedWrap() && 99
I.hasNoSignedWrap()88
)
1512
80
        Res->setHasNoSignedWrap(true);
1513
20.7k
    } else {
1514
20.6k
      if (
cast<Constant>(Op1)->isNotMinSignedValue() && 20.6k
I.hasNoSignedWrap()20.6k
)
1515
12.0k
        Res->setHasNoSignedWrap(true);
1516
20.6k
    }
1517
20.7k
1518
20.7k
    return Res;
1519
20.7k
  }
1520
1.86M
1521
1.86M
  
if (1.86M
I.getType()->isIntOrIntVectorTy(1)1.86M
)
1522
2
    return BinaryOperator::CreateXor(Op0, Op1);
1523
1.86M
1524
1.86M
  // Replace (-1 - A) with (~A).
1525
1.86M
  
if (1.86M
match(Op0, m_AllOnes())1.86M
)
1526
1.67k
    return BinaryOperator::CreateNot(Op1);
1527
1.86M
1528
1.86M
  
if (Constant *1.86M
C1.86M
= dyn_cast<Constant>(Op0)) {
1529
672k
    // C - ~X == X + (1+C)
1530
672k
    Value *X = nullptr;
1531
672k
    if (match(Op1, m_Not(m_Value(X))))
1532
102
      return BinaryOperator::CreateAdd(X, AddOne(C));
1533
672k
1534
672k
    // Try to fold constant sub into select arguments.
1535
672k
    
if (SelectInst *672k
SI672k
= dyn_cast<SelectInst>(Op1))
1536
8.68k
      
if (Instruction *8.68k
R8.68k
= FoldOpIntoSelect(I, SI))
1537
99
        return R;
1538
671k
1539
671k
    // Try to fold constant sub into PHI values.
1540
671k
    
if (PHINode *671k
PN671k
= dyn_cast<PHINode>(Op1))
1541
110k
      
if (Instruction *110k
R110k
= foldOpIntoPhi(I, PN))
1542
117
        return R;
1543
671k
1544
671k
    // C-(X+C2) --> (C-C2)-X
1545
671k
    Constant *C2;
1546
671k
    if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
1547
1.24k
      return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
1548
670k
1549
670k
    // Fold (sub 0, (zext bool to B)) --> (sext bool to B)
1550
670k
    
if (670k
C->isNullValue() && 670k
match(Op1, m_ZExt(m_Value(X)))296k
)
1551
18.8k
      
if (18.8k
X->getType()->isIntOrIntVectorTy(1)18.8k
)
1552
168
        return CastInst::CreateSExtOrBitCast(X, Op1->getType());
1553
670k
1554
670k
    // Fold (sub 0, (sext bool to B)) --> (zext bool to B)
1555
670k
    
if (670k
C->isNullValue() && 670k
match(Op1, m_SExt(m_Value(X)))296k
)
1556
10.5k
      
if (10.5k
X->getType()->isIntOrIntVectorTy(1)10.5k
)
1557
2
        return CastInst::CreateZExtOrBitCast(X, Op1->getType());
1558
1.86M
  }
1559
1.86M
1560
1.86M
  const APInt *Op0C;
1561
1.86M
  if (
match(Op0, m_APInt(Op0C))1.86M
) {
1562
670k
    unsigned BitWidth = I.getType()->getScalarSizeInBits();
1563
670k
1564
670k
    // -(X >>u 31) -> (X >>s 31)
1565
670k
    // -(X >>s 31) -> (X >>u 31)
1566
670k
    if (
Op0C->isNullValue()670k
) {
1567
296k
      Value *X;
1568
296k
      const APInt *ShAmt;
1569
296k
      if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
1570
296k
          
*ShAmt == BitWidth - 11.26k
) {
1571
29
        Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
1572
29
        return BinaryOperator::CreateAShr(X, ShAmtOp);
1573
29
      }
1574
296k
      
if (296k
match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) &&
1575
296k
          
*ShAmt == BitWidth - 137.2k
) {
1576
5
        Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
1577
5
        return BinaryOperator::CreateLShr(X, ShAmtOp);
1578
5
      }
1579
670k
    }
1580
670k
1581
670k
    // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
1582
670k
    // zero.
1583
670k
    
if (670k
Op0C->isMask()670k
) {
1584
24.8k
      KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
1585
24.8k
      if ((*Op0C | RHSKnown.Zero).isAllOnesValue())
1586
313
        return BinaryOperator::CreateXor(Op1, Op0);
1587
1.86M
    }
1588
670k
  }
1589
1.86M
1590
1.86M
  {
1591
1.86M
    Value *Y;
1592
1.86M
    // X-(X+Y) == -Y    X-(Y+X) == -Y
1593
1.86M
    if (match(Op1, m_c_Add(m_Specific(Op0), m_Value(Y))))
1594
78
      return BinaryOperator::CreateNeg(Y);
1595
1.86M
1596
1.86M
    // (X-Y)-X == -Y
1597
1.86M
    
if (1.86M
match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))1.86M
)
1598
1
      return BinaryOperator::CreateNeg(Y);
1599
1.86M
  }
1600
1.86M
1601
1.86M
  // (sub (or A, B), (xor A, B)) --> (and A, B)
1602
1.86M
  {
1603
1.86M
    Value *A, *B;
1604
1.86M
    if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
1605
29.7k
        match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
1606
2
      return BinaryOperator::CreateAnd(A, B);
1607
1.86M
  }
1608
1.86M
1609
1.86M
  {
1610
1.86M
    Value *Y;
1611
1.86M
    // ((X | Y) - X) --> (~X & Y)
1612
1.86M
    if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1)))))
1613
4
      return BinaryOperator::CreateAnd(
1614
4
          Y, Builder.CreateNot(Op1, Op1->getName() + ".not"));
1615
1.86M
  }
1616
1.86M
1617
1.86M
  
if (1.86M
Op1->hasOneUse()1.86M
) {
1618
504k
    Value *X = nullptr, *Y = nullptr, *Z = nullptr;
1619
504k
    Constant *C = nullptr;
1620
504k
1621
504k
    // (X - (Y - Z))  -->  (X + (Z - Y)).
1622
504k
    if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
1623
336
      return BinaryOperator::CreateAdd(Op0,
1624
336
                                      Builder.CreateSub(Z, Y, Op1->getName()));
1625
503k
1626
503k
    // (X - (X & Y))   -->   (X & ~Y)
1627
503k
    //
1628
503k
    
if (503k
match(Op1, m_c_And(m_Value(Y), m_Specific(Op0)))503k
)
1629
4.27k
      return BinaryOperator::CreateAnd(Op0,
1630
4.27k
                                  Builder.CreateNot(Y, Y->getName() + ".not"));
1631
499k
1632
499k
    // 0 - (X sdiv C)  -> (X sdiv -C)  provided the negation doesn't overflow.
1633
499k
    
if (499k
match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && 499k
match(Op0, m_Zero())498
&&
1634
499k
        
C->isNotMinSignedValue()28
&&
!C->isOneValue()28
)
1635
28
      return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
1636
499k
1637
499k
    // 0 - (X << Y)  -> (-X << Y)   when X is freely negatable.
1638
499k
    
if (499k
match(Op1, m_Shl(m_Value(X), m_Value(Y))) && 499k
match(Op0, m_Zero())68.3k
)
1639
778
      
if (Value *778
XNeg778
= dyn_castNegVal(X))
1640
15
        return BinaryOperator::CreateShl(XNeg, Y);
1641
499k
1642
499k
    // Subtracting -1/0 is the same as adding 1/0:
1643
499k
    // sub [nsw] Op0, sext(bool Y) -> add [nsw] Op0, zext(bool Y)
1644
499k
    // 'nuw' is dropped in favor of the canonical form.
1645
499k
    
if (499k
match(Op1, m_SExt(m_Value(Y))) &&
1646
499k
        
Y->getType()->getScalarSizeInBits() == 121.2k
) {
1647
5
      Value *Zext = Builder.CreateZExt(Y, I.getType());
1648
5
      BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext);
1649
5
      Add->setHasNoSignedWrap(I.hasNoSignedWrap());
1650
5
      return Add;
1651
5
    }
1652
499k
1653
499k
    // X - A*-B -> X + A*B
1654
499k
    // X - -A*B -> X + A*B
1655
499k
    Value *A, *B;
1656
499k
    Constant *CI;
1657
499k
    if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B)))))
1658
2
      return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B));
1659
499k
1660
499k
    // X - A*CI -> X + A*-CI
1661
499k
    // No need to handle commuted multiply because multiply handling will
1662
499k
    // ensure constant will be move to the right hand side.
1663
499k
    
if (499k
match(Op1, m_Mul(m_Value(A), m_Constant(CI)))499k
) {
1664
261
      Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI));
1665
261
      return BinaryOperator::CreateAdd(Op0, NewMul);
1666
261
    }
1667
1.85M
  }
1668
1.85M
1669
1.85M
  // Optimize pointer differences into the same array into a size.  Consider:
1670
1.85M
  //  &A[10] - &A[0]: we should compile this to "10".
1671
1.85M
  Value *LHSOp, *RHSOp;
1672
1.85M
  if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
1673
456k
      match(Op1, m_PtrToInt(m_Value(RHSOp))))
1674
449k
    
if (Value *449k
Res449k
= OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1675
48
      return replaceInstUsesWith(I, Res);
1676
1.85M
1677
1.85M
  // trunc(p)-trunc(q) -> trunc(p-q)
1678
1.85M
  
if (1.85M
match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
1679
20
      match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
1680
2
    
if (Value *2
Res2
= OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
1681
2
      return replaceInstUsesWith(I, Res);
1682
1.85M
1683
1.85M
  bool Changed = false;
1684
1.85M
  if (
!I.hasNoSignedWrap() && 1.85M
willNotOverflowSignedSub(Op0, Op1, I)910k
) {
1685
18.8k
    Changed = true;
1686
18.8k
    I.setHasNoSignedWrap(true);
1687
18.8k
  }
1688
1.85M
  if (
!I.hasNoUnsignedWrap() && 1.85M
willNotOverflowUnsignedSub(Op0, Op1, I)1.85M
) {
1689
45
    Changed = true;
1690
45
    I.setHasNoUnsignedWrap(true);
1691
45
  }
1692
1.85M
1693
1.85M
  return Changed ? 
&I18.8k
:
nullptr1.83M
;
1694
1.88M
}
1695
1696
183k
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
1697
183k
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1698
183k
1699
183k
  if (Value *V = SimplifyVectorOp(I))
1700
25
    return replaceInstUsesWith(I, V);
1701
183k
1702
183k
  
if (Value *183k
V183k
= SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(),
1703
183k
                                  SQ.getWithInstruction(&I)))
1704
11
    return replaceInstUsesWith(I, V);
1705
183k
1706
183k
  // fsub nsz 0, X ==> fsub nsz -0.0, X
1707
183k
  
if (183k
I.getFastMathFlags().noSignedZeros() && 183k
match(Op0, m_Zero())147
) {
1708
6
    // Subtraction from -0.0 is the canonical form of fneg.
1709
6
    Instruction *NewI = BinaryOperator::CreateFNeg(Op1);
1710
6
    NewI->copyFastMathFlags(&I);
1711
6
    return NewI;
1712
6
  }
1713
183k
1714
183k
  
if (183k
isa<Constant>(Op0)183k
)
1715
75.7k
    
if (SelectInst *75.7k
SI75.7k
= dyn_cast<SelectInst>(Op1))
1716
468
      
if (Instruction *468
NV468
= FoldOpIntoSelect(I, SI))
1717
2
        return NV;
1718
183k
1719
183k
  // If this is a 'B = x-(-A)', change to B = x+A, potentially looking
1720
183k
  // through FP extensions/truncations along the way.
1721
183k
  
if (Value *183k
V183k
= dyn_castFNegVal(Op1)) {
1722
504
    Instruction *NewI = BinaryOperator::CreateFAdd(Op0, V);
1723
504
    NewI->copyFastMathFlags(&I);
1724
504
    return NewI;
1725
504
  }
1726
182k
  
if (FPTruncInst *182k
FPTI182k
= dyn_cast<FPTruncInst>(Op1)) {
1727
1.62k
    if (Value *
V1.62k
= dyn_castFNegVal(FPTI->getOperand(0))) {
1728
0
      Value *NewTrunc = Builder.CreateFPTrunc(V, I.getType());
1729
0
      Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc);
1730
0
      NewI->copyFastMathFlags(&I);
1731
0
      return NewI;
1732
0
    }
1733
180k
  } else 
if (FPExtInst *180k
FPEI180k
= dyn_cast<FPExtInst>(Op1)) {
1734
4.78k
    if (Value *
V4.78k
= dyn_castFNegVal(FPEI->getOperand(0))) {
1735
2
      Value *NewExt = Builder.CreateFPExt(V, I.getType());
1736
2
      Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt);
1737
2
      NewI->copyFastMathFlags(&I);
1738
2
      return NewI;
1739
2
    }
1740
182k
  }
1741
182k
1742
182k
  // Handle specials cases for FSub with selects feeding the operation
1743
182k
  
if (Value *182k
V182k
= SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
1744
1
    return replaceInstUsesWith(I, V);
1745
182k
1746
182k
  
if (182k
I.hasUnsafeAlgebra()182k
) {
1747
129
    if (Value *V = FAddCombine(Builder).simplify(&I))
1748
9
      return replaceInstUsesWith(I, V);
1749
182k
  }
1750
182k
1751
182k
  return nullptr;
1752
182k
}