Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// InstructionCombining - Combine instructions to form fewer, simple
10
// instructions.  This pass does not modify the CFG.  This pass is where
11
// algebraic simplification happens.
12
//
13
// This pass combines things like:
14
//    %Y = add i32 %X, 1
15
//    %Z = add i32 %Y, 1
16
// into:
17
//    %Z = add i32 %X, 2
18
//
19
// This is a simple worklist driven algorithm.
20
//
21
// This pass guarantees that the following canonicalizations are performed on
22
// the program:
23
//    1. If a binary operator has a constant operand, it is moved to the RHS
24
//    2. Bitwise operators with constant operands are always grouped so that
25
//       shifts are performed first, then or's, then and's, then xor's.
26
//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27
//    4. All cmp instructions on boolean values are replaced with logical ops
28
//    5. add X, X is represented as (X*2) => (X << 1)
29
//    6. Multiplies with a power-of-two constant argument are transformed into
30
//       shifts.
31
//   ... etc.
32
//
33
//===----------------------------------------------------------------------===//
34
35
#include "InstCombineInternal.h"
36
#include "llvm-c/Initialization.h"
37
#include "llvm-c/Transforms/InstCombine.h"
38
#include "llvm/ADT/APInt.h"
39
#include "llvm/ADT/ArrayRef.h"
40
#include "llvm/ADT/DenseMap.h"
41
#include "llvm/ADT/None.h"
42
#include "llvm/ADT/SmallPtrSet.h"
43
#include "llvm/ADT/SmallVector.h"
44
#include "llvm/ADT/Statistic.h"
45
#include "llvm/ADT/TinyPtrVector.h"
46
#include "llvm/Analysis/AliasAnalysis.h"
47
#include "llvm/Analysis/AssumptionCache.h"
48
#include "llvm/Analysis/BasicAliasAnalysis.h"
49
#include "llvm/Analysis/BlockFrequencyInfo.h"
50
#include "llvm/Analysis/CFG.h"
51
#include "llvm/Analysis/ConstantFolding.h"
52
#include "llvm/Analysis/EHPersonalities.h"
53
#include "llvm/Analysis/GlobalsModRef.h"
54
#include "llvm/Analysis/InstructionSimplify.h"
55
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
56
#include "llvm/Analysis/LoopInfo.h"
57
#include "llvm/Analysis/MemoryBuiltins.h"
58
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
59
#include "llvm/Analysis/ProfileSummaryInfo.h"
60
#include "llvm/Analysis/TargetFolder.h"
61
#include "llvm/Analysis/TargetLibraryInfo.h"
62
#include "llvm/Analysis/ValueTracking.h"
63
#include "llvm/IR/BasicBlock.h"
64
#include "llvm/IR/CFG.h"
65
#include "llvm/IR/Constant.h"
66
#include "llvm/IR/Constants.h"
67
#include "llvm/IR/DIBuilder.h"
68
#include "llvm/IR/DataLayout.h"
69
#include "llvm/IR/DerivedTypes.h"
70
#include "llvm/IR/Dominators.h"
71
#include "llvm/IR/Function.h"
72
#include "llvm/IR/GetElementPtrTypeIterator.h"
73
#include "llvm/IR/IRBuilder.h"
74
#include "llvm/IR/InstrTypes.h"
75
#include "llvm/IR/Instruction.h"
76
#include "llvm/IR/Instructions.h"
77
#include "llvm/IR/IntrinsicInst.h"
78
#include "llvm/IR/Intrinsics.h"
79
#include "llvm/IR/LegacyPassManager.h"
80
#include "llvm/IR/Metadata.h"
81
#include "llvm/IR/Operator.h"
82
#include "llvm/IR/PassManager.h"
83
#include "llvm/IR/PatternMatch.h"
84
#include "llvm/IR/Type.h"
85
#include "llvm/IR/Use.h"
86
#include "llvm/IR/User.h"
87
#include "llvm/IR/Value.h"
88
#include "llvm/IR/ValueHandle.h"
89
#include "llvm/Pass.h"
90
#include "llvm/Support/CBindingWrapping.h"
91
#include "llvm/Support/Casting.h"
92
#include "llvm/Support/CommandLine.h"
93
#include "llvm/Support/Compiler.h"
94
#include "llvm/Support/Debug.h"
95
#include "llvm/Support/DebugCounter.h"
96
#include "llvm/Support/ErrorHandling.h"
97
#include "llvm/Support/KnownBits.h"
98
#include "llvm/Support/raw_ostream.h"
99
#include "llvm/Transforms/InstCombine/InstCombine.h"
100
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
101
#include "llvm/Transforms/Utils/Local.h"
102
#include <algorithm>
103
#include <cassert>
104
#include <cstdint>
105
#include <memory>
106
#include <string>
107
#include <utility>
108
109
using namespace llvm;
110
using namespace llvm::PatternMatch;
111
112
#define DEBUG_TYPE "instcombine"
113
114
STATISTIC(NumCombined , "Number of insts combined");
115
STATISTIC(NumConstProp, "Number of constant folds");
116
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
117
STATISTIC(NumSunkInst , "Number of instructions sunk");
118
STATISTIC(NumExpand,    "Number of expansions");
119
STATISTIC(NumFactor   , "Number of factorizations");
120
STATISTIC(NumReassoc  , "Number of reassociations");
121
DEBUG_COUNTER(VisitCounter, "instcombine-visit",
122
              "Controls which instructions are visited");
123
124
static cl::opt<bool>
125
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
126
                                              cl::init(true));
127
128
static cl::opt<bool>
129
EnableExpensiveCombines("expensive-combines",
130
                        cl::desc("Enable expensive instruction combines"));
131
132
static cl::opt<unsigned>
133
MaxArraySize("instcombine-maxarray-size", cl::init(1024),
134
             cl::desc("Maximum array size considered when doing a combine"));
135
136
// FIXME: Remove this flag when it is no longer necessary to convert
137
// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
138
// increases variable availability at the cost of accuracy. Variables that
139
// cannot be promoted by mem2reg or SROA will be described as living in memory
140
// for their entire lifetime. However, passes like DSE and instcombine can
141
// delete stores to the alloca, leading to misleading and inaccurate debug
142
// information. This flag can be removed when those passes are fixed.
143
static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
144
                                               cl::Hidden, cl::init(true));
145
146
287
Value *InstCombiner::EmitGEPOffset(User *GEP) {
147
287
  return llvm::EmitGEPOffset(&Builder, DL, GEP);
148
287
}
149
150
/// Return true if it is desirable to convert an integer computation from a
151
/// given bit width to a new bit width.
152
/// We don't want to convert from a legal to an illegal type or from a smaller
153
/// to a larger illegal type. A width of '1' is always treated as a legal type
154
/// because i1 is a fundamental type in IR, and there are many specialized
155
/// optimizations for i1 types. Widths of 8, 16 or 32 are equally treated as
156
/// legal to convert to, in order to open up more combining opportunities.
157
/// NOTE: this treats i8, i16 and i32 specially, due to them being so common
158
/// from frontend languages.
159
bool InstCombiner::shouldChangeType(unsigned FromWidth,
160
7.24M
                                    unsigned ToWidth) const {
161
7.24M
  bool FromLegal = FromWidth == 1 || 
DL.isLegalInteger(FromWidth)7.01M
;
162
7.24M
  bool ToLegal = ToWidth == 1 || 
DL.isLegalInteger(ToWidth)7.21M
;
163
7.24M
164
7.24M
  // Convert to widths of 8, 16 or 32 even if they are not legal types. Only
165
7.24M
  // shrink types, to prevent infinite loops.
166
7.24M
  if (ToWidth < FromWidth && 
(3.31M
ToWidth == 83.31M
||
ToWidth == 162.67M
||
ToWidth == 322.43M
))
167
3.24M
    return true;
168
3.99M
169
3.99M
  // If this is a legal integer from type, and the result would be an illegal
170
3.99M
  // type, don't do the transformation.
171
3.99M
  if (FromLegal && 
!ToLegal3.13M
)
172
146k
    return false;
173
3.85M
174
3.85M
  // Otherwise, if both are illegal, do not increase the size of the result. We
175
3.85M
  // do allow things like i160 -> i64, but not i64 -> i160.
176
3.85M
  if (!FromLegal && 
!ToLegal863k
&&
ToWidth > FromWidth22.2k
)
177
21.1k
    return false;
178
3.83M
179
3.83M
  return true;
180
3.83M
}
181
182
/// Return true if it is desirable to convert a computation from 'From' to 'To'.
183
/// We don't want to convert from a legal to an illegal type or from a smaller
184
/// to a larger illegal type. i1 is always treated as a legal type because it is
185
/// a fundamental type in IR, and there are many specialized optimizations for
186
/// i1 types.
187
7.25M
bool InstCombiner::shouldChangeType(Type *From, Type *To) const {
188
7.25M
  // TODO: This could be extended to allow vectors. Datalayout changes might be
189
7.25M
  // needed to properly support that.
190
7.25M
  if (!From->isIntegerTy() || 
!To->isIntegerTy()7.23M
)
191
22.6k
    return false;
192
7.23M
193
7.23M
  unsigned FromWidth = From->getPrimitiveSizeInBits();
194
7.23M
  unsigned ToWidth = To->getPrimitiveSizeInBits();
195
7.23M
  return shouldChangeType(FromWidth, ToWidth);
196
7.23M
}
197
198
// Return true, if No Signed Wrap should be maintained for I.
199
// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
200
// where both B and C should be ConstantInts, results in a constant that does
201
// not overflow. This function only handles the Add and Sub opcodes. For
202
// all other opcodes, the function conservatively returns false.
203
33.1k
static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
204
33.1k
  OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
205
33.1k
  if (!OBO || 
!OBO->hasNoSignedWrap()30.6k
)
206
21.3k
    return false;
207
11.8k
208
11.8k
  // We reason about Add and Sub Only.
209
11.8k
  Instruction::BinaryOps Opcode = I.getOpcode();
210
11.8k
  if (Opcode != Instruction::Add && 
Opcode != Instruction::Sub23
)
211
23
    return false;
212
11.8k
213
11.8k
  const APInt *BVal, *CVal;
214
11.8k
  if (!match(B, m_APInt(BVal)) || 
!match(C, m_APInt(CVal))11.8k
)
215
15
    return false;
216
11.8k
217
11.8k
  bool Overflow = false;
218
11.8k
  if (Opcode == Instruction::Add)
219
11.8k
    (void)BVal->sadd_ov(*CVal, Overflow);
220
0
  else
221
0
    (void)BVal->ssub_ov(*CVal, Overflow);
222
11.8k
223
11.8k
  return !Overflow;
224
11.8k
}
225
226
43.3k
static bool hasNoUnsignedWrap(BinaryOperator &I) {
227
43.3k
  OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
228
43.3k
  return OBO && 
OBO->hasNoUnsignedWrap()40.7k
;
229
43.3k
}
230
231
/// Conservatively clears subclassOptionalData after a reassociation or
232
/// commutation. We preserve fast-math flags when applicable as they can be
233
/// preserved.
234
33.4k
static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
235
33.4k
  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
236
33.4k
  if (!FPMO) {
237
33.3k
    I.clearSubclassOptionalData();
238
33.3k
    return;
239
33.3k
  }
240
37
241
37
  FastMathFlags FMF = I.getFastMathFlags();
242
37
  I.clearSubclassOptionalData();
243
37
  I.setFastMathFlags(FMF);
244
37
}
245
246
/// Combine constant operands of associative operations either before or after a
247
/// cast to eliminate one of the associative operations:
248
/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
249
/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
250
9.48M
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1) {
251
9.48M
  auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
252
9.48M
  if (!Cast || 
!Cast->hasOneUse()838k
)
253
9.02M
    return false;
254
458k
255
458k
  // TODO: Enhance logic for other casts and remove this check.
256
458k
  auto CastOpcode = Cast->getOpcode();
257
458k
  if (CastOpcode != Instruction::ZExt)
258
410k
    return false;
259
47.6k
260
47.6k
  // TODO: Enhance logic for other BinOps and remove this check.
261
47.6k
  if (!BinOp1->isBitwiseLogicOp())
262
40.0k
    return false;
263
7.67k
264
7.67k
  auto AssocOpcode = BinOp1->getOpcode();
265
7.67k
  auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
266
7.67k
  if (!BinOp2 || 
!BinOp2->hasOneUse()2.49k
||
BinOp2->getOpcode() != AssocOpcode2.09k
)
267
7.36k
    return false;
268
306
269
306
  Constant *C1, *C2;
270
306
  if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
271
306
      
!match(BinOp2->getOperand(1), m_Constant(C2))55
)
272
256
    return false;
273
50
274
50
  // TODO: This assumes a zext cast.
275
50
  // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
276
50
  // to the destination type might lose bits.
277
50
278
50
  // Fold the constants together in the destination type:
279
50
  // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
280
50
  Type *DestTy = C1->getType();
281
50
  Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
282
50
  Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
283
50
  Cast->setOperand(0, BinOp2->getOperand(0));
284
50
  BinOp1->setOperand(1, FoldedC);
285
50
  return true;
286
50
}
287
288
/// This performs a few simplifications for operators that are associative or
289
/// commutative:
290
///
291
///  Commutative operators:
292
///
293
///  1. Order operands such that they are listed from right (least complex) to
294
///     left (most complex).  This puts constants before unary operators before
295
///     binary operators.
296
///
297
///  Associative operators:
298
///
299
///  2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
300
///  3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
301
///
302
///  Associative and commutative operators:
303
///
304
///  4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
305
///  5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
306
///  6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
307
///     if C1 and C2 are constants.
308
10.8M
bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
309
10.8M
  Instruction::BinaryOps Opcode = I.getOpcode();
310
10.8M
  bool Changed = false;
311
10.8M
312
10.8M
  do {
313
10.8M
    // Order operands such that they are listed from right (least complex) to
314
10.8M
    // left (most complex).  This puts constants before unary operators before
315
10.8M
    // binary operators.
316
10.8M
    if (
I.isCommutative()10.8M
&& getComplexity(I.getOperand(0)) <
317
10.8M
        getComplexity(I.getOperand(1)))
318
64.2k
      Changed = !I.swapOperands();
319
10.8M
320
10.8M
    BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
321
10.8M
    BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
322
10.8M
323
10.8M
    if (I.isAssociative()) {
324
9.51M
      // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
325
9.51M
      if (Op0 && 
Op0->getOpcode() == Opcode2.70M
) {
326
800k
        Value *A = Op0->getOperand(0);
327
800k
        Value *B = Op0->getOperand(1);
328
800k
        Value *C = I.getOperand(1);
329
800k
330
800k
        // Does "B op C" simplify?
331
800k
        if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
332
33.1k
          // It simplifies to V.  Form "A op V".
333
33.1k
          I.setOperand(0, A);
334
33.1k
          I.setOperand(1, V);
335
33.1k
          // Conservatively clear the optional flags, since they may not be
336
33.1k
          // preserved by the reassociation.
337
33.1k
          bool IsNUW = hasNoUnsignedWrap(I) && 
hasNoUnsignedWrap(*Op0)10.0k
;
338
33.1k
          bool IsNSW = MaintainNoSignedWrap(I, B, C);
339
33.1k
340
33.1k
          ClearSubclassDataAfterReassociation(I);
341
33.1k
342
33.1k
          if (IsNUW)
343
3.41k
            I.setHasNoUnsignedWrap(true);
344
33.1k
345
33.1k
          if (IsNSW &&
346
33.1k
              
(11.7k
!Op011.7k
||
(11.7k
isa<BinaryOperator>(Op0)11.7k
&&
Op0->hasNoSignedWrap()11.7k
))) {
347
5.20k
            // Note: this is only valid because SimplifyBinOp doesn't look at
348
5.20k
            // the operands to Op0.
349
5.20k
            I.setHasNoSignedWrap(true);
350
5.20k
          }
351
33.1k
352
33.1k
          Changed = true;
353
33.1k
          ++NumReassoc;
354
33.1k
          continue;
355
33.1k
        }
356
9.48M
      }
357
9.48M
358
9.48M
      // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
359
9.48M
      if (Op1 && 
Op1->getOpcode() == Opcode964k
) {
360
201k
        Value *A = I.getOperand(0);
361
201k
        Value *B = Op1->getOperand(0);
362
201k
        Value *C = Op1->getOperand(1);
363
201k
364
201k
        // Does "A op B" simplify?
365
201k
        if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
366
2
          // It simplifies to V.  Form "V op C".
367
2
          I.setOperand(0, V);
368
2
          I.setOperand(1, C);
369
2
          // Conservatively clear the optional flags, since they may not be
370
2
          // preserved by the reassociation.
371
2
          ClearSubclassDataAfterReassociation(I);
372
2
          Changed = true;
373
2
          ++NumReassoc;
374
2
          continue;
375
2
        }
376
10.8M
      }
377
9.48M
    }
378
10.8M
379
10.8M
    if (I.isAssociative() && 
I.isCommutative()9.48M
) {
380
9.48M
      if (simplifyAssocCastAssoc(&I)) {
381
49
        Changed = true;
382
49
        ++NumReassoc;
383
49
        continue;
384
49
      }
385
9.48M
386
9.48M
      // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
387
9.48M
      if (Op0 && 
Op0->getOpcode() == Opcode2.66M
) {
388
767k
        Value *A = Op0->getOperand(0);
389
767k
        Value *B = Op0->getOperand(1);
390
767k
        Value *C = I.getOperand(1);
391
767k
392
767k
        // Does "C op A" simplify?
393
767k
        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
394
50
          // It simplifies to V.  Form "V op B".
395
50
          I.setOperand(0, V);
396
50
          I.setOperand(1, B);
397
50
          // Conservatively clear the optional flags, since they may not be
398
50
          // preserved by the reassociation.
399
50
          ClearSubclassDataAfterReassociation(I);
400
50
          Changed = true;
401
50
          ++NumReassoc;
402
50
          continue;
403
50
        }
404
9.48M
      }
405
9.48M
406
9.48M
      // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
407
9.48M
      if (Op1 && 
Op1->getOpcode() == Opcode964k
) {
408
201k
        Value *A = I.getOperand(0);
409
201k
        Value *B = Op1->getOperand(0);
410
201k
        Value *C = Op1->getOperand(1);
411
201k
412
201k
        // Does "C op A" simplify?
413
201k
        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
414
74
          // It simplifies to V.  Form "B op V".
415
74
          I.setOperand(0, B);
416
74
          I.setOperand(1, V);
417
74
          // Conservatively clear the optional flags, since they may not be
418
74
          // preserved by the reassociation.
419
74
          ClearSubclassDataAfterReassociation(I);
420
74
          Changed = true;
421
74
          ++NumReassoc;
422
74
          continue;
423
74
        }
424
9.48M
      }
425
9.48M
426
9.48M
      // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
427
9.48M
      // if C1 and C2 are constants.
428
9.48M
      Value *A, *B;
429
9.48M
      Constant *C1, *C2;
430
9.48M
      if (Op0 && 
Op12.66M
&&
431
9.48M
          
Op0->getOpcode() == Opcode611k
&&
Op1->getOpcode() == Opcode236k
&&
432
9.48M
          
match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1))))27.6k
&&
433
9.48M
          
match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))7.20k
) {
434
113
        bool IsNUW = hasNoUnsignedWrap(I) &&
435
113
           
hasNoUnsignedWrap(*Op0)2
&&
436
113
           
hasNoUnsignedWrap(*Op1)2
;
437
113
         BinaryOperator *NewBO = (IsNUW && 
Opcode == Instruction::Add2
) ?
438
1
           BinaryOperator::CreateNUW(Opcode, A, B) :
439
113
           
BinaryOperator::Create(Opcode, A, B)112
;
440
113
441
113
         if (isa<FPMathOperator>(NewBO)) {
442
6
          FastMathFlags Flags = I.getFastMathFlags();
443
6
          Flags &= Op0->getFastMathFlags();
444
6
          Flags &= Op1->getFastMathFlags();
445
6
          NewBO->setFastMathFlags(Flags);
446
6
        }
447
113
        InsertNewInstWith(NewBO, I);
448
113
        NewBO->takeName(Op1);
449
113
        I.setOperand(0, NewBO);
450
113
        I.setOperand(1, ConstantExpr::get(Opcode, C1, C2));
451
113
        // Conservatively clear the optional flags, since they may not be
452
113
        // preserved by the reassociation.
453
113
        ClearSubclassDataAfterReassociation(I);
454
113
        if (IsNUW)
455
2
          I.setHasNoUnsignedWrap(true);
456
113
457
113
        Changed = true;
458
113
        continue;
459
113
      }
460
10.8M
    }
461
10.8M
462
10.8M
    // No further simplifications.
463
10.8M
    return Changed;
464
10.8M
  } while (
true33.4k
);
465
10.8M
}
466
467
/// Return whether "X LOp (Y ROp Z)" is always equal to
468
/// "(X LOp Y) ROp (X LOp Z)".
469
static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
470
8.16M
                                     Instruction::BinaryOps ROp) {
471
8.16M
  // X & (Y | Z) <--> (X & Y) | (X & Z)
472
8.16M
  // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
473
8.16M
  if (LOp == Instruction::And)
474
1.44M
    return ROp == Instruction::Or || 
ROp == Instruction::Xor1.04M
;
475
6.72M
476
6.72M
  // X | (Y & Z) <--> (X | Y) & (X | Z)
477
6.72M
  if (LOp == Instruction::Or)
478
924k
    return ROp == Instruction::And;
479
5.80M
480
5.80M
  // X * (Y + Z) <--> (X * Y) + (X * Z)
481
5.80M
  // X * (Y - Z) <--> (X * Y) - (X * Z)
482
5.80M
  if (LOp == Instruction::Mul)
483
1.47M
    return ROp == Instruction::Add || 
ROp == Instruction::Sub665k
;
484
4.32M
485
4.32M
  return false;
486
4.32M
}
487
488
/// Return whether "(X LOp Y) ROp Z" is always equal to
489
/// "(X ROp Z) LOp (Y ROp Z)".
490
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
491
4.94M
                                     Instruction::BinaryOps ROp) {
492
4.94M
  if (Instruction::isCommutative(ROp))
493
4.75M
    return leftDistributesOverRight(ROp, LOp);
494
193k
495
193k
  // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
496
193k
  return Instruction::isBitwiseLogicOp(LOp) && 
Instruction::isShift(ROp)58.2k
;
497
193k
498
193k
  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
499
193k
  // but this requires knowing that the addition does not overflow and other
500
193k
  // such subtleties.
501
193k
}
502
503
/// This function returns identity value for given opcode, which can be used to
504
/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
505
4.05M
static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
506
4.05M
  if (isa<Constant>(V))
507
1.42M
    return nullptr;
508
2.62M
509
2.62M
  return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
510
2.62M
}
511
512
/// This function predicates factorization using distributive laws. By default,
513
/// it just returns the 'Op' inputs. But for special-cases like
514
/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
515
/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
516
/// allow more factorization opportunities.
517
static Instruction::BinaryOps
518
getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
519
4.06M
                          Value *&LHS, Value *&RHS) {
520
4.06M
  assert(Op && "Expected a binary operator");
521
4.06M
  LHS = Op->getOperand(0);
522
4.06M
  RHS = Op->getOperand(1);
523
4.06M
  if (TopOpcode == Instruction::Add || 
TopOpcode == Instruction::Sub2.31M
) {
524
2.22M
    Constant *C;
525
2.22M
    if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
526
262k
      // X << C --> X * (1 << C)
527
262k
      RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
528
262k
      return Instruction::Mul;
529
262k
    }
530
3.79M
    // TODO: We can add other conversions e.g. shr => div etc.
531
3.79M
  }
532
3.79M
  return Op->getOpcode();
533
3.79M
}
534
535
/// This tries to simplify binary operations by factorizing out common terms
536
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
537
Value *InstCombiner::tryFactorization(BinaryOperator &I,
538
                                      Instruction::BinaryOps InnerOpcode,
539
2.15M
                                      Value *A, Value *B, Value *C, Value *D) {
540
2.15M
  assert(A && B && C && D && "All values must be provided");
541
2.15M
542
2.15M
  Value *V = nullptr;
543
2.15M
  Value *SimplifiedInst = nullptr;
544
2.15M
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
545
2.15M
  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
546
2.15M
547
2.15M
  // Does "X op' Y" always equal "Y op' X"?
548
2.15M
  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
549
2.15M
550
2.15M
  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
551
2.15M
  if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode))
552
621k
    // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
553
621k
    // commutative case, "(A op' B) op (C op' A)"?
554
621k
    if (A == C || 
(618k
InnerCommutative618k
&&
A == D618k
)) {
555
3.26k
      if (A != C)
556
350
        std::swap(C, D);
557
3.26k
      // Consider forming "A op' (B op D)".
558
3.26k
      // If "B op D" simplifies then it can be formed with no cost.
559
3.26k
      V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
560
3.26k
      // If "B op D" doesn't simplify then only go on if both of the existing
561
3.26k
      // operations "A op' B" and "C op' D" will be zapped as no longer used.
562
3.26k
      if (!V && 
LHS->hasOneUse()1.32k
&&
RHS->hasOneUse()108
)
563
63
        V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
564
3.26k
      if (V) {
565
2.00k
        SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V);
566
2.00k
      }
567
3.26k
    }
568
2.15M
569
2.15M
  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
570
2.15M
  if (!SimplifiedInst && 
rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)2.15M
)
571
631k
    // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
572
631k
    // commutative case, "(A op' B) op (B op' D)"?
573
631k
    if (B == D || 
(629k
InnerCommutative629k
&&
B == C617k
)) {
574
4.07k
      if (B != D)
575
1.64k
        std::swap(C, D);
576
4.07k
      // Consider forming "(A op C) op' B".
577
4.07k
      // If "A op C" simplifies then it can be formed with no cost.
578
4.07k
      V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
579
4.07k
580
4.07k
      // If "A op C" doesn't simplify then only go on if both of the existing
581
4.07k
      // operations "A op' B" and "C op' D" will be zapped as no longer used.
582
4.07k
      if (!V && 
LHS->hasOneUse()4.06k
&&
RHS->hasOneUse()709
)
583
157
        V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
584
4.07k
      if (V) {
585
164
        SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B);
586
164
      }
587
4.07k
    }
588
2.15M
589
2.15M
  if (SimplifiedInst) {
590
2.17k
    ++NumFactor;
591
2.17k
    SimplifiedInst->takeName(&I);
592
2.17k
593
2.17k
    // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.
594
2.17k
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
595
2.17k
      if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
596
1.57k
        bool HasNSW = false;
597
1.57k
        bool HasNUW = false;
598
1.57k
        if (isa<OverflowingBinaryOperator>(&I)) {
599
1.53k
          HasNSW = I.hasNoSignedWrap();
600
1.53k
          HasNUW = I.hasNoUnsignedWrap();
601
1.53k
        }
602
1.57k
603
1.57k
        if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
604
1.55k
          HasNSW &= LOBO->hasNoSignedWrap();
605
1.55k
          HasNUW &= LOBO->hasNoUnsignedWrap();
606
1.55k
        }
607
1.57k
608
1.57k
        if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
609
1.43k
          HasNSW &= ROBO->hasNoSignedWrap();
610
1.43k
          HasNUW &= ROBO->hasNoUnsignedWrap();
611
1.43k
        }
612
1.57k
613
1.57k
        const APInt *CInt;
614
1.57k
        if (TopLevelOpcode == Instruction::Add &&
615
1.57k
            
InnerOpcode == Instruction::Mul1.42k
) {
616
1.42k
          // We can propagate 'nsw' if we know that
617
1.42k
          //  %Y = mul nsw i16 %X, C
618
1.42k
          //  %Z = add nsw i16 %Y, %X
619
1.42k
          // =>
620
1.42k
          //  %Z = mul nsw i16 %X, C+1
621
1.42k
          //
622
1.42k
          // iff C+1 isn't INT_MIN
623
1.42k
          if (match(V, m_APInt(CInt))) {
624
1.38k
            if (!CInt->isMinSignedValue())
625
1.38k
              BO->setHasNoSignedWrap(HasNSW);
626
1.38k
          }
627
1.42k
628
1.42k
          // nuw can be propagated with any constant or nuw value.
629
1.42k
          BO->setHasNoUnsignedWrap(HasNUW);
630
1.42k
        }
631
1.57k
      }
632
2.17k
    }
633
2.17k
  }
634
2.15M
  return SimplifiedInst;
635
2.15M
}
636
637
/// This tries to simplify binary operations which some other binary operation
638
/// distributes over either by factorizing out common terms
639
/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
640
/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
641
/// Returns the simplified value, or null if it didn't simplify.
642
10.8M
Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
643
10.8M
  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
644
10.8M
  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
645
10.8M
  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
646
10.8M
  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
647
10.8M
648
10.8M
  {
649
10.8M
    // Factorization.
650
10.8M
    Value *A, *B, *C, *D;
651
10.8M
    Instruction::BinaryOps LHSOpcode, RHSOpcode;
652
10.8M
    if (Op0)
653
2.80M
      LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
654
10.8M
    if (Op1)
655
1.26M
      RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
656
10.8M
657
10.8M
    // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
658
10.8M
    // a common term.
659
10.8M
    if (Op0 && 
Op12.80M
&&
LHSOpcode == RHSOpcode705k
)
660
201k
      if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
661
1.99k
        return V;
662
10.8M
663
10.8M
    // The instruction has the form "(A op' B) op (C)".  Try to factorize common
664
10.8M
    // term.
665
10.8M
    if (Op0)
666
2.79M
      if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
667
1.15M
        if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
668
153
          return V;
669
10.8M
670
10.8M
    // The instruction has the form "(B) op (C op' D)".  Try to factorize common
671
10.8M
    // term.
672
10.8M
    if (Op1)
673
1.25M
      if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
674
796k
        if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
675
27
          return V;
676
10.8M
  }
677
10.8M
678
10.8M
  // Expansion.
679
10.8M
  if (Op0 && 
rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)2.79M
) {
680
275k
    // The instruction has the form "(A op' B) op C".  See if expanding it out
681
275k
    // to "(A op C) op' (B op C)" results in simplifications.
682
275k
    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
683
275k
    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
684
275k
685
275k
    Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
686
275k
    Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I));
687
275k
688
275k
    // Do "A op C" and "B op C" both simplify?
689
275k
    if (L && 
R2.47k
) {
690
21
      // They do! Return "L op' R".
691
21
      ++NumExpand;
692
21
      C = Builder.CreateBinOp(InnerOpcode, L, R);
693
21
      C->takeName(&I);
694
21
      return C;
695
21
    }
696
275k
697
275k
    // Does "A op C" simplify to the identity value for the inner opcode?
698
275k
    if (L && 
L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())2.45k
) {
699
17
      // They do! Return "B op C".
700
17
      ++NumExpand;
701
17
      C = Builder.CreateBinOp(TopLevelOpcode, B, C);
702
17
      C->takeName(&I);
703
17
      return C;
704
17
    }
705
275k
706
275k
    // Does "B op C" simplify to the identity value for the inner opcode?
707
275k
    if (R && 
R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())91.2k
) {
708
32
      // They do! Return "A op C".
709
32
      ++NumExpand;
710
32
      C = Builder.CreateBinOp(TopLevelOpcode, A, C);
711
32
      C->takeName(&I);
712
32
      return C;
713
32
    }
714
10.8M
  }
715
10.8M
716
10.8M
  if (Op1 && 
leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())1.25M
) {
717
177k
    // The instruction has the form "A op (B op' C)".  See if expanding it out
718
177k
    // to "(A op B) op' (A op C)" results in simplifications.
719
177k
    Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
720
177k
    Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
721
177k
722
177k
    Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I));
723
177k
    Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
724
177k
725
177k
    // Do "A op B" and "A op C" both simplify?
726
177k
    if (L && 
R482
) {
727
179
      // They do! Return "L op' R".
728
179
      ++NumExpand;
729
179
      A = Builder.CreateBinOp(InnerOpcode, L, R);
730
179
      A->takeName(&I);
731
179
      return A;
732
179
    }
733
177k
734
177k
    // Does "A op B" simplify to the identity value for the inner opcode?
735
177k
    if (L && 
L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())303
) {
736
7
      // They do! Return "A op C".
737
7
      ++NumExpand;
738
7
      A = Builder.CreateBinOp(TopLevelOpcode, A, C);
739
7
      A->takeName(&I);
740
7
      return A;
741
7
    }
742
177k
743
177k
    // Does "A op C" simplify to the identity value for the inner opcode?
744
177k
    if (R && 
R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())56.0k
) {
745
7
      // They do! Return "A op B".
746
7
      ++NumExpand;
747
7
      A = Builder.CreateBinOp(TopLevelOpcode, A, B);
748
7
      A->takeName(&I);
749
7
      return A;
750
7
    }
751
10.8M
  }
752
10.8M
753
10.8M
  return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
754
10.8M
}
755
756
Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
757
12.3M
                                                    Value *LHS, Value *RHS) {
758
12.3M
  Instruction::BinaryOps Opcode = I.getOpcode();
759
12.3M
  // (op (select (a, b, c)), (select (a, d, e))) -> (select (a, (op b, d), (op
760
12.3M
  // c, e)))
761
12.3M
  Value *A, *B, *C, *D, *E;
762
12.3M
  Value *SI = nullptr;
763
12.3M
  if (match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))) &&
764
12.3M
      
match(RHS, m_Select(m_Specific(A), m_Value(D), m_Value(E)))246k
) {
765
1.96k
    bool SelectsHaveOneUse = LHS->hasOneUse() && 
RHS->hasOneUse()210
;
766
1.96k
    BuilderTy::FastMathFlagGuard Guard(Builder);
767
1.96k
    if (isa<FPMathOperator>(&I))
768
289
      Builder.setFastMathFlags(I.getFastMathFlags());
769
1.96k
770
1.96k
    Value *V1 = SimplifyBinOp(Opcode, C, E, SQ.getWithInstruction(&I));
771
1.96k
    Value *V2 = SimplifyBinOp(Opcode, B, D, SQ.getWithInstruction(&I));
772
1.96k
    if (V1 && 
V2111
)
773
28
      SI = Builder.CreateSelect(A, V2, V1);
774
1.93k
    else if (V2 && 
SelectsHaveOneUse275
)
775
17
      SI = Builder.CreateSelect(A, V2, Builder.CreateBinOp(Opcode, C, E));
776
1.91k
    else if (V1 && 
SelectsHaveOneUse83
)
777
8
      SI = Builder.CreateSelect(A, Builder.CreateBinOp(Opcode, B, D), V1);
778
1.96k
779
1.96k
    if (SI)
780
53
      SI->takeName(&I);
781
1.96k
  }
782
12.3M
783
12.3M
  return SI;
784
12.3M
}
785
786
/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
787
/// constant zero (which is the 'negate' form).
788
2.42M
Value *InstCombiner::dyn_castNegVal(Value *V) const {
789
2.42M
  Value *NegV;
790
2.42M
  if (match(V, m_Neg(m_Value(NegV))))
791
65
    return NegV;
792
2.42M
793
2.42M
  // Constants can be considered to be negated values if they can be folded.
794
2.42M
  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
795
68.3k
    return ConstantExpr::getNeg(C);
796
2.35M
797
2.35M
  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
798
51
    if (C->getType()->getElementType()->isIntegerTy())
799
51
      return ConstantExpr::getNeg(C);
800
2.35M
801
2.35M
  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
802
46
    for (unsigned i = 0, e = CV->getNumOperands(); i != e; 
++i35
) {
803
35
      Constant *Elt = CV->getAggregateElement(i);
804
35
      if (!Elt)
805
0
        return nullptr;
806
35
807
35
      if (isa<UndefValue>(Elt))
808
9
        continue;
809
26
810
26
      if (!isa<ConstantInt>(Elt))
811
0
        return nullptr;
812
26
    }
813
11
    return ConstantExpr::getNeg(CV);
814
2.35M
  }
815
2.35M
816
2.35M
  return nullptr;
817
2.35M
}
818
819
static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
820
2.06k
                                             InstCombiner::BuilderTy &Builder) {
821
2.06k
  if (auto *Cast = dyn_cast<CastInst>(&I))
822
1.10k
    return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
823
964
824
964
  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
825
964
826
964
  // Figure out if the constant is the left or the right argument.
827
964
  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
828
964
  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
829
964
830
964
  if (auto *SOC = dyn_cast<Constant>(SO)) {
831
595
    if (ConstIsRHS)
832
561
      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
833
34
    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
834
34
  }
835
369
836
369
  Value *Op0 = SO, *Op1 = ConstOperand;
837
369
  if (!ConstIsRHS)
838
16
    std::swap(Op0, Op1);
839
369
840
369
  auto *BO = cast<BinaryOperator>(&I);
841
369
  Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1,
842
369
                                  SO->getName() + ".op");
843
369
  auto *FPInst = dyn_cast<Instruction>(RI);
844
369
  if (FPInst && isa<FPMathOperator>(FPInst))
845
22
    FPInst->copyFastMathFlags(BO);
846
369
  return RI;
847
369
}
848
849
302k
Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
850
302k
  // Don't modify shared select instructions.
851
302k
  if (!SI->hasOneUse())
852
268k
    return nullptr;
853
33.7k
854
33.7k
  Value *TV = SI->getTrueValue();
855
33.7k
  Value *FV = SI->getFalseValue();
856
33.7k
  if (!(isa<Constant>(TV) || 
isa<Constant>(FV)31.9k
))
857
27.9k
    return nullptr;
858
5.77k
859
5.77k
  // Bool selects with constant operands can be folded to logical ops.
860
5.77k
  if (SI->getType()->isIntOrIntVectorTy(1))
861
299
    return nullptr;
862
5.47k
863
5.47k
  // If it's a bitcast involving vectors, make sure it has the same number of
864
5.47k
  // elements on both sides.
865
5.47k
  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
866
44
    VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
867
44
    VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
868
44
869
44
    // Verify that either both or neither are vectors.
870
44
    if ((SrcTy == nullptr) != (DestTy == nullptr))
871
1
      return nullptr;
872
43
873
43
    // If vectors, verify that they have the same number of elements.
874
43
    if (SrcTy && 
SrcTy->getNumElements() != DestTy->getNumElements()0
)
875
0
      return nullptr;
876
5.47k
  }
877
5.47k
878
5.47k
  // Test if a CmpInst instruction is used exclusively by a select as
879
5.47k
  // part of a minimum or maximum operation. If so, refrain from doing
880
5.47k
  // any other folding. This helps out other analyses which understand
881
5.47k
  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
882
5.47k
  // and CodeGen. And in this case, at least one of the comparison
883
5.47k
  // operands has at least one user besides the compare (the select),
884
5.47k
  // which would often largely negate the benefit of folding anyway.
885
5.47k
  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
886
5.24k
    if (CI->hasOneUse()) {
887
5.04k
      Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
888
5.04k
      if ((SI->getOperand(1) == Op0 && 
SI->getOperand(2) == Op13.66k
) ||
889
5.04k
          
(1.38k
SI->getOperand(2) == Op01.38k
&&
SI->getOperand(1) == Op1795
))
890
4.44k
        return nullptr;
891
1.03k
    }
892
5.24k
  }
893
1.03k
894
1.03k
  Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
895
1.03k
  Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
896
1.03k
  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
897
1.03k
}
898
899
static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
900
3.77k
                                        InstCombiner::BuilderTy &Builder) {
901
3.77k
  bool ConstIsRHS = isa<Constant>(I->getOperand(1));
902
3.77k
  Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
903
3.77k
904
3.77k
  if (auto *InC = dyn_cast<Constant>(InV)) {
905
2.77k
    if (ConstIsRHS)
906
2.65k
      return ConstantExpr::get(I->getOpcode(), InC, C);
907
122
    return ConstantExpr::get(I->getOpcode(), C, InC);
908
122
  }
909
998
910
998
  Value *Op0 = InV, *Op1 = C;
911
998
  if (!ConstIsRHS)
912
109
    std::swap(Op0, Op1);
913
998
914
998
  Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
915
998
  auto *FPInst = dyn_cast<Instruction>(RI);
916
998
  if (FPInst && isa<FPMathOperator>(FPInst))
917
33
    FPInst->copyFastMathFlags(I);
918
998
  return RI;
919
998
}
920
921
4.32M
Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
922
4.32M
  unsigned NumPHIValues = PN->getNumIncomingValues();
923
4.32M
  if (NumPHIValues == 0)
924
0
    return nullptr;
925
4.32M
926
4.32M
  // We normally only transform phis with a single use.  However, if a PHI has
927
4.32M
  // multiple uses and they are all the same operation, we can fold *all* of the
928
4.32M
  // uses into the PHI.
929
4.32M
  if (!PN->hasOneUse()) {
930
3.53M
    // Walk the use list for the instruction, comparing them to I.
931
5.06M
    for (User *U : PN->users()) {
932
5.06M
      Instruction *UI = cast<Instruction>(U);
933
5.06M
      if (UI != &I && 
!I.isIdenticalTo(UI)3.66M
)
934
3.52M
        return nullptr;
935
5.06M
    }
936
3.53M
    // Otherwise, we can replace *all* users with the new PHI we form.
937
3.53M
  }
938
4.32M
939
4.32M
  // Check to see if all of the operands of the PHI are simple constants
940
4.32M
  // (constantint/constantfp/undef).  If there is one non-constant value,
941
4.32M
  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
942
4.32M
  // bail out.  We don't do arbitrary constant expressions here because moving
943
4.32M
  // their computation can be expensive without a cost model.
944
4.32M
  BasicBlock *NonConstBB = nullptr;
945
1.13M
  for (unsigned i = 0; i != NumPHIValues; 
++i332k
) {
946
1.12M
    Value *InVal = PN->getIncomingValue(i);
947
1.12M
    if (isa<Constant>(InVal) && 
!isa<ConstantExpr>(InVal)128k
)
948
127k
      continue;
949
994k
950
994k
    if (isa<PHINode>(InVal)) 
return nullptr106k
; // Itself a phi.
951
887k
    if (NonConstBB) 
return nullptr172k
; // More than one non-const value.
952
714k
953
714k
    NonConstBB = PN->getIncomingBlock(i);
954
714k
955
714k
    // If the InVal is an invoke at the end of the pred block, then we can't
956
714k
    // insert a computation after it without breaking the edge.
957
714k
    if (isa<InvokeInst>(InVal))
958
331
      if (cast<Instruction>(InVal)->getParent() == NonConstBB)
959
32
        return nullptr;
960
714k
961
714k
    // If the incoming non-constant value is in I's block, we will remove one
962
714k
    // instruction, but insert another equivalent one, leading to infinite
963
714k
    // instcombine.
964
714k
    if (isPotentiallyReachable(I.getParent(), NonConstBB, &DT, LI))
965
510k
      return nullptr;
966
714k
  }
967
801k
968
801k
  // If there is exactly one non-constant value, we can insert a copy of the
969
801k
  // operation in that block.  However, if this is a critical edge, we would be
970
801k
  // inserting the computation on some other paths (e.g. inside a loop).  Only
971
801k
  // do this if the pred block is unconditionally branching into the phi block.
972
801k
  
if (12.5k
NonConstBB != nullptr12.5k
) {
973
6.64k
    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
974
6.64k
    if (!BI || 
!BI->isUnconditional()6.63k
)
return nullptr1.92k
;
975
10.5k
  }
976
10.5k
977
10.5k
  // Okay, we can do the transformation: create the new PHI node.
978
10.5k
  PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
979
10.5k
  InsertNewInstBefore(NewPN, *PN);
980
10.5k
  NewPN->takeName(PN);
981
10.5k
982
10.5k
  // If we are going to have to insert a new computation, do so right before the
983
10.5k
  // predecessor's terminator.
984
10.5k
  if (NonConstBB)
985
4.71k
    Builder.SetInsertPoint(NonConstBB->getTerminator());
986
10.5k
987
10.5k
  // Next, add all of the operands to the PHI.
988
10.5k
  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
989
83
    // We only currently try to fold the condition of a select when it is a phi,
990
83
    // not the true/false values.
991
83
    Value *TrueV = SI->getTrueValue();
992
83
    Value *FalseV = SI->getFalseValue();
993
83
    BasicBlock *PhiTransBB = PN->getParent();
994
292
    for (unsigned i = 0; i != NumPHIValues; 
++i209
) {
995
209
      BasicBlock *ThisBB = PN->getIncomingBlock(i);
996
209
      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
997
209
      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
998
209
      Value *InV = nullptr;
999
209
      // Beware of ConstantExpr:  it may eventually evaluate to getNullValue,
1000
209
      // even if currently isNullValue gives false.
1001
209
      Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
1002
209
      // For vector constants, we cannot use isNullValue to fold into
1003
209
      // FalseVInPred versus TrueVInPred. When we have individual nonzero
1004
209
      // elements in the vector, we will incorrectly fold InC to
1005
209
      // `TrueVInPred`.
1006
209
      if (InC && 
!isa<ConstantExpr>(InC)168
&&
isa<ConstantInt>(InC)166
)
1007
155
        InV = InC->isNullValue() ? 
FalseVInPred89
:
TrueVInPred66
;
1008
54
      else {
1009
54
        // Generate the select in the same block as PN's current incoming block.
1010
54
        // Note: ThisBB need not be the NonConstBB because vector constants
1011
54
        // which are constants by definition are handled here.
1012
54
        // FIXME: This can lead to an increase in IR generation because we might
1013
54
        // generate selects for vector constant phi operand, that could not be
1014
54
        // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
1015
54
        // non-vector phis, this transformation was always profitable because
1016
54
        // the select would be generated exactly once in the NonConstBB.
1017
54
        Builder.SetInsertPoint(ThisBB->getTerminator());
1018
54
        InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
1019
54
                                   FalseVInPred, "phitmp");
1020
54
      }
1021
209
      NewPN->addIncoming(InV, ThisBB);
1022
209
    }
1023
10.5k
  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
1024
5.74k
    Constant *C = cast<Constant>(I.getOperand(1));
1025
19.6k
    for (unsigned i = 0; i != NumPHIValues; 
++i13.9k
) {
1026
13.9k
      Value *InV = nullptr;
1027
13.9k
      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1028
12.7k
        InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
1029
1.20k
      else if (isa<ICmpInst>(CI))
1030
1.19k
        InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
1031
1.19k
                                 C, "phitmp");
1032
4
      else
1033
4
        InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
1034
4
                                 C, "phitmp");
1035
13.9k
      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1036
13.9k
    }
1037
5.74k
  } else 
if (auto *4.75k
BO4.75k
= dyn_cast<BinaryOperator>(&I)) {
1038
5.47k
    for (unsigned i = 0; i != NumPHIValues; 
++i3.77k
) {
1039
3.77k
      Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i),
1040
3.77k
                                             Builder);
1041
3.77k
      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1042
3.77k
    }
1043
3.04k
  } else {
1044
3.04k
    CastInst *CI = cast<CastInst>(&I);
1045
3.04k
    Type *RetTy = CI->getType();
1046
11.5k
    for (unsigned i = 0; i != NumPHIValues; 
++i8.52k
) {
1047
8.52k
      Value *InV;
1048
8.52k
      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
1049
6.04k
        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
1050
2.47k
      else
1051
2.47k
        InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
1052
2.47k
                                 I.getType(), "phitmp");
1053
8.52k
      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
1054
8.52k
    }
1055
3.04k
  }
1056
10.5k
1057
21.3k
  for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
1058
10.7k
    Instruction *User = cast<Instruction>(*UI++);
1059
10.7k
    if (User == &I) 
continue10.5k
;
1060
155
    replaceInstUsesWith(*User, NewPN);
1061
155
    eraseInstFromFunction(*User);
1062
155
  }
1063
10.5k
  return replaceInstUsesWith(I, NewPN);
1064
10.5k
}
1065
1066
11.1M
Instruction *InstCombiner::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
1067
11.1M
  if (!isa<Constant>(I.getOperand(1)))
1068
2.93M
    return nullptr;
1069
8.20M
1070
8.20M
  if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
1071
195k
    if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
1072
451
      return NewSel;
1073
8.01M
  } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
1074
2.62M
    if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
1075
1.57k
      return NewPhi;
1076
8.20M
  }
1077
8.20M
  return nullptr;
1078
8.20M
}
1079
1080
/// Given a pointer type and a constant offset, determine whether or not there
1081
/// is a sequence of GEP indices into the pointed type that will land us at the
1082
/// specified offset. If so, fill them into NewIndices and return the resultant
1083
/// element type, otherwise return null.
1084
Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
1085
268k
                                        SmallVectorImpl<Value *> &NewIndices) {
1086
268k
  Type *Ty = PtrTy->getElementType();
1087
268k
  if (!Ty->isSized())
1088
15.4k
    return nullptr;
1089
252k
1090
252k
  // Start with the index over the outer type.  Note that the type size
1091
252k
  // might be zero (even if the offset isn't zero) if the indexed type
1092
252k
  // is something like [0 x {int, int}]
1093
252k
  Type *IndexTy = DL.getIndexType(PtrTy);
1094
252k
  int64_t FirstIdx = 0;
1095
252k
  if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
1096
252k
    FirstIdx = Offset/TySize;
1097
252k
    Offset -= FirstIdx*TySize;
1098
252k
1099
252k
    // Handle hosts where % returns negative instead of values [0..TySize).
1100
252k
    if (Offset < 0) {
1101
1.08k
      --FirstIdx;
1102
1.08k
      Offset += TySize;
1103
1.08k
      assert(Offset >= 0);
1104
1.08k
    }
1105
252k
    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
1106
252k
  }
1107
252k
1108
252k
  NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx));
1109
252k
1110
252k
  // Index into the types.  If we fail, set OrigBase to null.
1111
1.26M
  while (Offset) {
1112
1.20M
    // Indexing into tail padding between struct/array elements.
1113
1.20M
    if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
1114
6.98k
      return nullptr;
1115
1.19M
1116
1.19M
    if (StructType *STy = dyn_cast<StructType>(Ty)) {
1117
1.00M
      const StructLayout *SL = DL.getStructLayout(STy);
1118
1.00M
      assert(Offset < (int64_t)SL->getSizeInBytes() &&
1119
1.00M
             "Offset must stay within the indexed type");
1120
1.00M
1121
1.00M
      unsigned Elt = SL->getElementContainingOffset(Offset);
1122
1.00M
      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
1123
1.00M
                                            Elt));
1124
1.00M
1125
1.00M
      Offset -= SL->getElementOffset(Elt);
1126
1.00M
      Ty = STy->getElementType(Elt);
1127
1.00M
    } else 
if (ArrayType *192k
AT192k
= dyn_cast<ArrayType>(Ty)) {
1128
7.35k
      uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
1129
7.35k
      assert(EltSize && "Cannot index into a zero-sized array");
1130
7.35k
      NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize));
1131
7.35k
      Offset %= EltSize;
1132
7.35k
      Ty = AT->getElementType();
1133
184k
    } else {
1134
184k
      // Otherwise, we can't index into the middle of this atomic type, bail.
1135
184k
      return nullptr;
1136
184k
    }
1137
1.19M
  }
1138
252k
1139
252k
  
return Ty61.1k
;
1140
252k
}
1141
1142
1.24M
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
1143
1.24M
  // If this GEP has only 0 indices, it is the same pointer as
1144
1.24M
  // Src. If Src is not a trivial GEP too, don't combine
1145
1.24M
  // the indices.
1146
1.24M
  if (GEP.hasAllZeroIndices() && 
!Src.hasAllZeroIndices()378k
&&
1147
1.24M
      
!Src.hasOneUse()328k
)
1148
272k
    return false;
1149
970k
  return true;
1150
970k
}
1151
1152
/// Return a value X such that Val = X * Scale, or null if none.
1153
/// If the multiplication is known not to overflow, then NoSignedWrap is set.
1154
116k
Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
1155
116k
  assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
1156
116k
  assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
1157
116k
         Scale.getBitWidth() && "Scale not compatible with value!");
1158
116k
1159
116k
  // If Val is zero or Scale is one then Val = Val * Scale.
1160
116k
  if (match(Val, m_Zero()) || Scale == 1) {
1161
4.09k
    NoSignedWrap = true;
1162
4.09k
    return Val;
1163
4.09k
  }
1164
112k
1165
112k
  // If Scale is zero then it does not divide Val.
1166
112k
  if (Scale.isMinValue())
1167
1
    return nullptr;
1168
112k
1169
112k
  // Look through chains of multiplications, searching for a constant that is
1170
112k
  // divisible by Scale.  For example, descaling X*(Y*(Z*4)) by a factor of 4
1171
112k
  // will find the constant factor 4 and produce X*(Y*Z).  Descaling X*(Y*8) by
1172
112k
  // a factor of 4 will produce X*(Y*2).  The principle of operation is to bore
1173
112k
  // down from Val:
1174
112k
  //
1175
112k
  //     Val = M1 * X          ||   Analysis starts here and works down
1176
112k
  //      M1 = M2 * Y          ||   Doesn't descend into terms with more
1177
112k
  //      M2 =  Z * 4          \/   than one use
1178
112k
  //
1179
112k
  // Then to modify a term at the bottom:
1180
112k
  //
1181
112k
  //     Val = M1 * X
1182
112k
  //      M1 =  Z * Y          ||   Replaced M2 with Z
1183
112k
  //
1184
112k
  // Then to work back up correcting nsw flags.
1185
112k
1186
112k
  // Op - the term we are currently analyzing.  Starts at Val then drills down.
1187
112k
  // Replaced with its descaled value before exiting from the drill down loop.
1188
112k
  Value *Op = Val;
1189
112k
1190
112k
  // Parent - initially null, but after drilling down notes where Op came from.
1191
112k
  // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
1192
112k
  // 0'th operand of Val.
1193
112k
  std::pair<Instruction *, unsigned> Parent;
1194
112k
1195
112k
  // Set if the transform requires a descaling at deeper levels that doesn't
1196
112k
  // overflow.
1197
112k
  bool RequireNoSignedWrap = false;
1198
112k
1199
112k
  // Log base 2 of the scale. Negative if not a power of 2.
1200
112k
  int32_t logScale = Scale.exactLogBase2();
1201
112k
1202
112k
  for (;; 
Op = Parent.first->getOperand(Parent.second)642
) { // Drill down
1203
112k
    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
1204
56.1k
      // If Op is a constant divisible by Scale then descale to the quotient.
1205
56.1k
      APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
1206
56.1k
      APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
1207
56.1k
      if (!Remainder.isMinValue())
1208
48.8k
        // Not divisible by Scale.
1209
48.8k
        return nullptr;
1210
7.29k
      // Replace with the quotient in the parent.
1211
7.29k
      Op = ConstantInt::get(CI->getType(), Quotient);
1212
7.29k
      NoSignedWrap = true;
1213
7.29k
      break;
1214
7.29k
    }
1215
56.6k
1216
56.6k
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
1217
7.76k
      if (BO->getOpcode() == Instruction::Mul) {
1218
51
        // Multiplication.
1219
51
        NoSignedWrap = BO->hasNoSignedWrap();
1220
51
        if (RequireNoSignedWrap && 
!NoSignedWrap6
)
1221
2
          return nullptr;
1222
49
1223
49
        // There are three cases for multiplication: multiplication by exactly
1224
49
        // the scale, multiplication by a constant different to the scale, and
1225
49
        // multiplication by something else.
1226
49
        Value *LHS = BO->getOperand(0);
1227
49
        Value *RHS = BO->getOperand(1);
1228
49
1229
49
        if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
1230
8
          // Multiplication by a constant.
1231
8
          if (CI->getValue() == Scale) {
1232
5
            // Multiplication by exactly the scale, replace the multiplication
1233
5
            // by its left-hand side in the parent.
1234
5
            Op = LHS;
1235
5
            break;
1236
5
          }
1237
3
1238
3
          // Otherwise drill down into the constant.
1239
3
          if (!Op->hasOneUse())
1240
0
            return nullptr;
1241
3
1242
3
          Parent = std::make_pair(BO, 1);
1243
3
          continue;
1244
3
        }
1245
41
1246
41
        // Multiplication by something else. Drill down into the left-hand side
1247
41
        // since that's where the reassociate pass puts the good stuff.
1248
41
        if (!Op->hasOneUse())
1249
0
          return nullptr;
1250
41
1251
41
        Parent = std::make_pair(BO, 0);
1252
41
        continue;
1253
41
      }
1254
7.71k
1255
7.71k
      if (logScale > 0 && 
BO->getOpcode() == Instruction::Shl6.39k
&&
1256
7.71k
          
isa<ConstantInt>(BO->getOperand(1))2.95k
) {
1257
54
        // Multiplication by a power of 2.
1258
54
        NoSignedWrap = BO->hasNoSignedWrap();
1259
54
        if (RequireNoSignedWrap && 
!NoSignedWrap13
)
1260
1
          return nullptr;
1261
53
1262
53
        Value *LHS = BO->getOperand(0);
1263
53
        int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
1264
53
          getLimitedValue(Scale.getBitWidth());
1265
53
        // Op = LHS << Amt.
1266
53
1267
53
        if (Amt == logScale) {
1268
18
          // Multiplication by exactly the scale, replace the multiplication
1269
18
          // by its left-hand side in the parent.
1270
18
          Op = LHS;
1271
18
          break;
1272
18
        }
1273
35
        if (Amt < logScale || !Op->hasOneUse())
1274
34
          return nullptr;
1275
1
1276
1
        // Multiplication by more than the scale.  Reduce the multiplying amount
1277
1
        // by the scale in the parent.
1278
1
        Parent = std::make_pair(BO, 1);
1279
1
        Op = ConstantInt::get(BO->getType(), Amt - logScale);
1280
1
        break;
1281
1
      }
1282
7.71k
    }
1283
56.5k
1284
56.5k
    if (!Op->hasOneUse())
1285
4.62k
      return nullptr;
1286
51.8k
1287
51.8k
    if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
1288
2.41k
      if (Cast->getOpcode() == Instruction::SExt) {
1289
445
        // Op is sign-extended from a smaller type, descale in the smaller type.
1290
445
        unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1291
445
        APInt SmallScale = Scale.trunc(SmallSize);
1292
445
        // Suppose Op = sext X, and we descale X as Y * SmallScale.  We want to
1293
445
        // descale Op as (sext Y) * Scale.  In order to have
1294
445
        //   sext (Y * SmallScale) = (sext Y) * Scale
1295
445
        // some conditions need to hold however: SmallScale must sign-extend to
1296
445
        // Scale and the multiplication Y * SmallScale should not overflow.
1297
445
        if (SmallScale.sext(Scale.getBitWidth()) != Scale)
1298
0
          // SmallScale does not sign-extend to Scale.
1299
0
          return nullptr;
1300
445
        assert(SmallScale.exactLogBase2() == logScale);
1301
445
        // Require that Y * SmallScale must not overflow.
1302
445
        RequireNoSignedWrap = true;
1303
445
1304
445
        // Drill down through the cast.
1305
445
        Parent = std::make_pair(Cast, 0);
1306
445
        Scale = SmallScale;
1307
445
        continue;
1308
445
      }
1309
1.97k
1310
1.97k
      if (Cast->getOpcode() == Instruction::Trunc) {
1311
153
        // Op is truncated from a larger type, descale in the larger type.
1312
153
        // Suppose Op = trunc X, and we descale X as Y * sext Scale.  Then
1313
153
        //   trunc (Y * sext Scale) = (trunc Y) * Scale
1314
153
        // always holds.  However (trunc Y) * Scale may overflow even if
1315
153
        // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
1316
153
        // from this point up in the expression (see later).
1317
153
        if (RequireNoSignedWrap)
1318
0
          return nullptr;
1319
153
1320
153
        // Drill down through the cast.
1321
153
        unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
1322
153
        Parent = std::make_pair(Cast, 0);
1323
153
        Scale = Scale.sext(LargeSize);
1324
153
        if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
1325
0
          logScale = -1;
1326
153
        assert(Scale.exactLogBase2() == logScale);
1327
153
        continue;
1328
153
      }
1329
1.97k
    }
1330
51.2k
1331
51.2k
    // Unsupported expression, bail out.
1332
51.2k
    return nullptr;
1333
51.2k
  }
1334
112k
1335
112k
  // If Op is zero then Val = Op * Scale.
1336
112k
  
if (7.32k
match(Op, m_Zero())7.32k
) {
1337
0
    NoSignedWrap = true;
1338
0
    return Op;
1339
0
  }
1340
7.32k
1341
7.32k
  // We know that we can successfully descale, so from here on we can safely
1342
7.32k
  // modify the IR.  Op holds the descaled version of the deepest term in the
1343
7.32k
  // expression.  NoSignedWrap is 'true' if multiplying Op by Scale is known
1344
7.32k
  // not to overflow.
1345
7.32k
1346
7.32k
  if (!Parent.first)
1347
7.30k
    // The expression only had one term.
1348
7.30k
    return Op;
1349
18
1350
18
  // Rewrite the parent using the descaled version of its operand.
1351
18
  assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
1352
18
  assert(Op != Parent.first->getOperand(Parent.second) &&
1353
18
         "Descaling was a no-op?");
1354
18
  Parent.first->setOperand(Parent.second, Op);
1355
18
  Worklist.Add(Parent.first);
1356
18
1357
18
  // Now work back up the expression correcting nsw flags.  The logic is based
1358
18
  // on the following observation: if X * Y is known not to overflow as a signed
1359
18
  // multiplication, and Y is replaced by a value Z with smaller absolute value,
1360
18
  // then X * Z will not overflow as a signed multiplication either.  As we work
1361
18
  // our way up, having NoSignedWrap 'true' means that the descaled value at the
1362
18
  // current level has strictly smaller absolute value than the original.
1363
18
  Instruction *Ancestor = Parent.first;
1364
28
  do {
1365
28
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
1366
14
      // If the multiplication wasn't nsw then we can't say anything about the
1367
14
      // value of the descaled multiplication, and we have to clear nsw flags
1368
14
      // from this point on up.
1369
14
      bool OpNoSignedWrap = BO->hasNoSignedWrap();
1370
14
      NoSignedWrap &= OpNoSignedWrap;
1371
14
      if (NoSignedWrap != OpNoSignedWrap) {
1372
3
        BO->setHasNoSignedWrap(NoSignedWrap);
1373
3
        Worklist.Add(Ancestor);
1374
3
      }
1375
14
    } else if (Ancestor->getOpcode() == Instruction::Trunc) {
1376
1
      // The fact that the descaled input to the trunc has smaller absolute
1377
1
      // value than the original input doesn't tell us anything useful about
1378
1
      // the absolute values of the truncations.
1379
1
      NoSignedWrap = false;
1380
1
    }
1381
28
    assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
1382
28
           "Failed to keep proper track of nsw flags while drilling down?");
1383
28
1384
28
    if (Ancestor == Val)
1385
18
      // Got to the top, all done!
1386
18
      return Val;
1387
10
1388
10
    // Move up one level in the expression.
1389
10
    assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
1390
10
    Ancestor = Ancestor->user_back();
1391
10
  } while (true);
1392
18
}
1393
1394
15.2M
Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
1395
15.2M
  if (!Inst.getType()->isVectorTy()) 
return nullptr14.8M
;
1396
460k
1397
460k
  BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
1398
460k
  unsigned NumElts = cast<VectorType>(Inst.getType())->getNumElements();
1399
460k
  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
1400
460k
  assert(cast<VectorType>(LHS->getType())->getNumElements() == NumElts);
1401
460k
  assert(cast<VectorType>(RHS->getType())->getNumElements() == NumElts);
1402
460k
1403
460k
  // If both operands of the binop are vector concatenations, then perform the
1404
460k
  // narrow binop on each pair of the source operands followed by concatenation
1405
460k
  // of the results.
1406
460k
  Value *L0, *L1, *R0, *R1;
1407
460k
  Constant *Mask;
1408
460k
  if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
1409
460k
      
match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask)))38.4k
&&
1410
460k
      
LHS->hasOneUse()1.64k
&&
RHS->hasOneUse()1.05k
&&
1411
460k
      
cast<ShuffleVectorInst>(LHS)->isConcat()1.00k
&&
1412
460k
      
cast<ShuffleVectorInst>(RHS)->isConcat()159
) {
1413
158
    // This transform does not have the speculative execution constraint as
1414
158
    // below because the shuffle is a concatenation. The new binops are
1415
158
    // operating on exactly the same elements as the existing binop.
1416
158
    // TODO: We could ease the mask requirement to allow different undef lanes,
1417
158
    //       but that requires an analysis of the binop-with-undef output value.
1418
158
    Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
1419
158
    if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
1420
158
      BO->copyIRFlags(&Inst);
1421
158
    Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
1422
158
    if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
1423
158
      BO->copyIRFlags(&Inst);
1424
158
    return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
1425
158
  }
1426
460k
1427
460k
  // It may not be safe to reorder shuffles and things like div, urem, etc.
1428
460k
  // because we may trap when executing those ops on unknown vector elements.
1429
460k
  // See PR20059.
1430
460k
  if (!isSafeToSpeculativelyExecute(&Inst))
1431
368
    return nullptr;
1432
460k
1433
460k
  auto createBinOpShuffle = [&](Value *X, Value *Y, Constant *M) {
1434
164
    Value *XY = Builder.CreateBinOp(Opcode, X, Y);
1435
164
    if (auto *BO = dyn_cast<BinaryOperator>(XY))
1436
164
      BO->copyIRFlags(&Inst);
1437
164
    return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M);
1438
164
  };
1439
460k
1440
460k
  // If both arguments of the binary operation are shuffles that use the same
1441
460k
  // mask and shuffle within a single vector, move the shuffle after the binop.
1442
460k
  Value *V1, *V2;
1443
460k
  if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))) &&
1444
460k
      
match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(Mask)))33.7k
&&
1445
460k
      
V1->getType() == V2->getType()579
&&
1446
460k
      
(577
LHS->hasOneUse()577
||
RHS->hasOneUse()520
||
LHS == RHS517
)) {
1447
109
    // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
1448
109
    return createBinOpShuffle(V1, V2, Mask);
1449
109
  }
1450
459k
1451
459k
  // If both arguments of a commutative binop are select-shuffles that use the
1452
459k
  // same mask with commuted operands, the shuffles are unnecessary.
1453
459k
  if (Inst.isCommutative() &&
1454
459k
      
match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask)))347k
&&
1455
459k
      match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
1456
34.6k
                                 m_Specific(Mask)))) {
1457
9
    auto *LShuf = cast<ShuffleVectorInst>(LHS);
1458
9
    auto *RShuf = cast<ShuffleVectorInst>(RHS);
1459
9
    // TODO: Allow shuffles that contain undefs in the mask?
1460
9
    //       That is legal, but it reduces undef knowledge.
1461
9
    // TODO: Allow arbitrary shuffles by shuffling after binop?
1462
9
    //       That might be legal, but we have to deal with poison.
1463
9
    if (LShuf->isSelect() && 
!LShuf->getMask()->containsUndefElement()8
&&
1464
9
        
RShuf->isSelect()7
&&
!RShuf->getMask()->containsUndefElement()7
) {
1465
7
      // Example:
1466
7
      // LHS = shuffle V1, V2, <0, 5, 6, 3>
1467
7
      // RHS = shuffle V2, V1, <0, 5, 6, 3>
1468
7
      // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
1469
7
      Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
1470
7
      NewBO->copyIRFlags(&Inst);
1471
7
      return NewBO;
1472
7
    }
1473
459k
  }
1474
459k
1475
459k
  // If one argument is a shuffle within one vector and the other is a constant,
1476
459k
  // try moving the shuffle after the binary operation. This canonicalization
1477
459k
  // intends to move shuffles closer to other shuffles and binops closer to
1478
459k
  // other binops, so they can be folded. It may also enable demanded elements
1479
459k
  // transforms.
1480
459k
  Constant *C;
1481
459k
  if (match(&Inst, m_c_BinOp(
1482
459k
          m_OneUse(m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))),
1483
459k
          m_Constant(C))) &&
1484
459k
      
V1->getType()->getVectorNumElements() <= NumElts674
) {
1485
568
    assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
1486
568
           "Shuffle should not change scalar type");
1487
568
1488
568
    // Find constant NewC that has property:
1489
568
    //   shuffle(NewC, ShMask) = C
1490
568
    // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
1491
568
    // reorder is not possible. A 1-to-1 mapping is not required. Example:
1492
568
    // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
1493
568
    bool ConstOp1 = isa<Constant>(RHS);
1494
568
    SmallVector<int, 16> ShMask;
1495
568
    ShuffleVectorInst::getShuffleMask(Mask, ShMask);
1496
568
    unsigned SrcVecNumElts = V1->getType()->getVectorNumElements();
1497
568
    UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
1498
568
    SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
1499
568
    bool MayChange = true;
1500
1.92k
    for (unsigned I = 0; I < NumElts; 
++I1.35k
) {
1501
1.87k
      Constant *CElt = C->getAggregateElement(I);
1502
1.87k
      if (ShMask[I] >= 0) {
1503
1.07k
        assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
1504
1.07k
        Constant *NewCElt = NewVecC[ShMask[I]];
1505
1.07k
        // Bail out if:
1506
1.07k
        // 1. The constant vector contains a constant expression.
1507
1.07k
        // 2. The shuffle needs an element of the constant vector that can't
1508
1.07k
        //    be mapped to a new constant vector.
1509
1.07k
        // 3. This is a widening shuffle that copies elements of V1 into the
1510
1.07k
        //    extended elements (extending with undef is allowed).
1511
1.07k
        if (!CElt || 
(1.06k
!isa<UndefValue>(NewCElt)1.06k
&&
NewCElt != CElt476
) ||
1512
1.07k
            
I >= SrcVecNumElts647
) {
1513
511
          MayChange = false;
1514
511
          break;
1515
511
        }
1516
559
        NewVecC[ShMask[I]] = CElt;
1517
559
      }
1518
1.87k
      // If this is a widening shuffle, we must be able to extend with undef
1519
1.87k
      // elements. If the original binop does not produce an undef in the high
1520
1.87k
      // lanes, then this transform is not safe.
1521
1.87k
      // TODO: We could shuffle those non-undef constant values into the
1522
1.87k
      //       result by using a constant vector (rather than an undef vector)
1523
1.87k
      //       as operand 1 of the new binop, but that might be too aggressive
1524
1.87k
      //       for target-independent shuffle creation.
1525
1.87k
      
if (1.36k
I >= SrcVecNumElts1.36k
) {
1526
11
        Constant *MaybeUndef =
1527
11
            ConstOp1 ? 
ConstantExpr::get(Opcode, UndefScalar, CElt)9
1528
11
                     : 
ConstantExpr::get(Opcode, CElt, UndefScalar)2
;
1529
11
        if (!isa<UndefValue>(MaybeUndef)) {
1530
2
          MayChange = false;
1531
2
          break;
1532
2
        }
1533
11
      }
1534
1.36k
    }
1535
568
    if (MayChange) {
1536
55
      Constant *NewC = ConstantVector::get(NewVecC);
1537
55
      // It may not be safe to execute a binop on a vector with undef elements
1538
55
      // because the entire instruction can be folded to undef or create poison
1539
55
      // that did not exist in the original code.
1540
55
      if (Inst.isIntDivRem() || 
(51
Inst.isShift()51
&&
ConstOp19
))
1541
8
        NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
1542
55
1543
55
      // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
1544
55
      // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
1545
55
      Value *NewLHS = ConstOp1 ? 
V143
:
NewC12
;
1546
55
      Value *NewRHS = ConstOp1 ? 
NewC43
:
V112
;
1547
55
      return createBinOpShuffle(NewLHS, NewRHS, Mask);
1548
55
    }
1549
459k
  }
1550
459k
1551
459k
  return nullptr;
1552
459k
}
1553
1554
/// Try to narrow the width of a binop if at least 1 operand is an extend of
1555
/// of a value. This requires a potentially expensive known bits check to make
1556
/// sure the narrow op does not overflow.
1557
7.79M
Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
1558
7.79M
  // We need at least one extended operand.
1559
7.79M
  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
1560
7.79M
1561
7.79M
  // If this is a sub, we swap the operands since we always want an extension
1562
7.79M
  // on the RHS. The LHS can be an extension or a constant.
1563
7.79M
  if (BO.getOpcode() == Instruction::Sub)
1564
1.44M
    std::swap(Op0, Op1);
1565
7.79M
1566
7.79M
  Value *X;
1567
7.79M
  bool IsSext = match(Op0, m_SExt(m_Value(X)));
1568
7.79M
  if (!IsSext && 
!match(Op0, m_ZExt(m_Value(X)))7.53M
)
1569
7.41M
    return nullptr;
1570
379k
1571
379k
  // If both operands are the same extension from the same source type and we
1572
379k
  // can eliminate at least one (hasOneUse), this might work.
1573
379k
  CastInst::CastOps CastOpc = IsSext ? 
Instruction::SExt263k
:
Instruction::ZExt116k
;
1574
379k
  Value *Y;
1575
379k
  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && 
X->getType() == Y->getType()160k
&&
1576
379k
        
cast<Operator>(Op1)->getOpcode() == CastOpc158k
&&
1577
379k
        
(105k
Op0->hasOneUse()105k
||
Op1->hasOneUse()76.0k
))) {
1578
317k
    // If that did not match, see if we have a suitable constant operand.
1579
317k
    // Truncating and extending must produce the same constant.
1580
317k
    Constant *WideC;
1581
317k
    if (!Op0->hasOneUse() || 
!match(Op1, m_Constant(WideC))188k
)
1582
215k
      return nullptr;
1583
101k
    Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
1584
101k
    if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
1585
8.12k
      return nullptr;
1586
93.5k
    Y = NarrowC;
1587
93.5k
  }
1588
379k
1589
379k
  // Swap back now that we found our operands.
1590
379k
  
if (156k
BO.getOpcode() == Instruction::Sub156k
)
1591
24.8k
    std::swap(X, Y);
1592
156k
1593
156k
  // Both operands have narrow versions. Last step: the math must not overflow
1594
156k
  // in the narrow width.
1595
156k
  if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
1596
156k
    return nullptr;
1597
246
1598
246
  // bo (ext X), (ext Y) --> ext (bo X, Y)
1599
246
  // bo (ext X), C       --> ext (bo X, C')
1600
246
  Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
1601
249
  if (auto *
NewBinOp246
= dyn_cast<BinaryOperator>(NarrowBO)) {
1602
249
    if (IsSext)
1603
106
      NewBinOp->setHasNoSignedWrap();
1604
143
    else
1605
143
      NewBinOp->setHasNoUnsignedWrap();
1606
249
  }
1607
246
  return CastInst::Create(CastOpc, NarrowBO, BO.getType());
1608
246
}
1609
1610
24.7M
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
1611
24.7M
  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
1612
24.7M
  Type *GEPType = GEP.getType();
1613
24.7M
  Type *GEPEltType = GEP.getSourceElementType();
1614
24.7M
  if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
1615
22.2k
    return replaceInstUsesWith(GEP, V);
1616
24.7M
1617
24.7M
  // For vector geps, use the generic demanded vector support.
1618
24.7M
  if (GEP.getType()->isVectorTy()) {
1619
1.03k
    auto VWidth = GEP.getType()->getVectorNumElements();
1620
1.03k
    APInt UndefElts(VWidth, 0);
1621
1.03k
    APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1622
1.03k
    if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
1623
2
                                              UndefElts)) {
1624
2
      if (V != &GEP)
1625
2
        return replaceInstUsesWith(GEP, V);
1626
0
      return &GEP;
1627
0
    }
1628
1.03k
1629
1.03k
    // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
1630
1.03k
    // possible (decide on canonical form for pointer broadcast), 3) exploit
1631
1.03k
    // undef elements to decrease demanded bits  
1632
1.03k
  }
1633
24.7M
1634
24.7M
  Value *PtrOp = GEP.getOperand(0);
1635
24.7M
1636
24.7M
  // Eliminate unneeded casts for indices, and replace indices which displace
1637
24.7M
  // by multiples of a zero size type with zero.
1638
24.7M
  bool MadeChange = false;
1639
24.7M
1640
24.7M
  // Index width may not be the same width as pointer width.
1641
24.7M
  // Data layout chooses the right type based on supported integer types.
1642
24.7M
  Type *NewScalarIndexTy =
1643
24.7M
      DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
1644
24.7M
1645
24.7M
  gep_type_iterator GTI = gep_type_begin(GEP);
1646
83.6M
  for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
1647
58.9M
       ++I, ++GTI) {
1648
58.9M
    // Skip indices into struct types.
1649
58.9M
    if (GTI.isStruct())
1650
26.6M
      continue;
1651
32.2M
1652
32.2M
    Type *IndexTy = (*I)->getType();
1653
32.2M
    Type *NewIndexType =
1654
32.2M
        IndexTy->isVectorTy()
1655
32.2M
            ? 
VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements())970
1656
32.2M
            : 
NewScalarIndexTy32.2M
;
1657
32.2M
1658
32.2M
    // If the element type has zero size then any index over it is equivalent
1659
32.2M
    // to an index of zero, so replace it with zero if it is not zero already.
1660
32.2M
    Type *EltTy = GTI.getIndexedType();
1661
32.2M
    if (EltTy->isSized() && 
DL.getTypeAllocSize(EltTy) == 032.2M
)
1662
37.8k
      if (!isa<Constant>(*I) || 
!cast<Constant>(*I)->isNullValue()37.8k
) {
1663
2
        *I = Constant::getNullValue(NewIndexType);
1664
2
        MadeChange = true;
1665
2
      }
1666
32.2M
1667
32.2M
    if (IndexTy != NewIndexType) {
1668
771k
      // If we are using a wider index than needed for this platform, shrink
1669
771k
      // it to what we need.  If narrower, sign-extend it to what we need.
1670
771k
      // This explicit cast can make subsequent optimizations more obvious.
1671
771k
      *I = Builder.CreateIntCast(*I, NewIndexType, true);
1672
771k
      MadeChange = true;
1673
771k
    }
1674
32.2M
  }
1675
24.7M
  if (MadeChange)
1676
685k
    return &GEP;
1677
24.0M
1678
24.0M
  // Check to see if the inputs to the PHI node are getelementptr instructions.
1679
24.0M
  if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
1680
2.93M
    auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
1681
2.93M
    if (!Op1)
1682
2.32M
      return nullptr;
1683
614k
1684
614k
    // Don't fold a GEP into itself through a PHI node. This can only happen
1685
614k
    // through the back-edge of a loop. Folding a GEP into itself means that
1686
614k
    // the value of the previous iteration needs to be stored in the meantime,
1687
614k
    // thus requiring an additional register variable to be live, but not
1688
614k
    // actually achieving anything (the GEP still needs to be executed once per
1689
614k
    // loop iteration).
1690
614k
    if (Op1 == &GEP)
1691
223k
      return nullptr;
1692
391k
1693
391k
    int DI = -1;
1694
391k
1695
403k
    for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; 
++I12.2k
) {
1696
395k
      auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
1697
395k
      if (!Op2 || 
Op1->getNumOperands() != Op2->getNumOperands()277k
)
1698
196k
        return nullptr;
1699
199k
1700
199k
      // As for Op1 above, don't try to fold a GEP into itself.
1701
199k
      if (Op2 == &GEP)
1702
39.4k
        return nullptr;
1703
159k
1704
159k
      // Keep track of the type as we walk the GEP.
1705
159k
      Type *CurTy = nullptr;
1706
159k
1707
332k
      for (unsigned J = 0, F = Op1->getNumOperands(); J != F; 
++J173k
) {
1708
320k
        if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
1709
1.06k
          return nullptr;
1710
319k
1711
319k
        if (Op1->getOperand(J) != Op2->getOperand(J)) {
1712
301k
          if (DI == -1) {
1713
155k
            // We have not seen any differences yet in the GEPs feeding the
1714
155k
            // PHI yet, so we record this one if it is allowed to be a
1715
155k
            // variable.
1716
155k
1717
155k
            // The first two arguments can vary for any GEP, the rest have to be
1718
155k
            // static for struct slots
1719
155k
            if (J > 1 && 
CurTy->isStructTy()389
)
1720
110
              return nullptr;
1721
155k
1722
155k
            DI = J;
1723
155k
          } else {
1724
146k
            // The GEP is different by more than one input. While this could be
1725
146k
            // extended to support GEPs that vary by more than one variable it
1726
146k
            // doesn't make sense since it greatly increases the complexity and
1727
146k
            // would result in an R+R+R addressing mode which no backend
1728
146k
            // directly supports and would need to be broken into several
1729
146k
            // simpler instructions anyway.
1730
146k
            return nullptr;
1731
146k
          }
1732
173k
        }
1733
173k
1734
173k
        // Sink down a layer of the type for the next iteration.
1735
173k
        if (J > 0) {
1736
14.6k
          if (J == 1) {
1737
12.4k
            CurTy = Op1->getSourceElementType();
1738
12.4k
          } else 
if (auto *2.24k
CT2.24k
= dyn_cast<CompositeType>(CurTy)) {
1739
2.24k
            CurTy = CT->getTypeAtIndex(Op1->getOperand(J));
1740
2.24k
          } else {
1741
0
            CurTy = nullptr;
1742
0
          }
1743
14.6k
        }
1744
173k
      }
1745
159k
    }
1746
391k
1747
391k
    // If not all GEPs are identical we'll have to create a new PHI node.
1748
391k
    // Check that the old PHI node has only one use so that it will get
1749
391k
    // removed.
1750
391k
    
if (8.24k
DI != -18.24k
&&
!PN->hasOneUse()8.11k
)
1751
8.02k
      return nullptr;
1752
217
1753
217
    auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
1754
217
    if (DI == -1) {
1755
129
      // All the GEPs feeding the PHI are identical. Clone one down into our
1756
129
      // BB so that it can be merged with the current GEP.
1757
129
      GEP.getParent()->getInstList().insert(
1758
129
          GEP.getParent()->getFirstInsertionPt(), NewGEP);
1759
129
    } else {
1760
88
      // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
1761
88
      // into the current block so it can be merged, and create a new PHI to
1762
88
      // set that index.
1763
88
      PHINode *NewPN;
1764
88
      {
1765
88
        IRBuilderBase::InsertPointGuard Guard(Builder);
1766
88
        Builder.SetInsertPoint(PN);
1767
88
        NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
1768
88
                                  PN->getNumOperands());
1769
88
      }
1770
88
1771
88
      for (auto &I : PN->operands())
1772
177
        NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
1773
177
                           PN->getIncomingBlock(I));
1774
88
1775
88
      NewGEP->setOperand(DI, NewPN);
1776
88
      GEP.getParent()->getInstList().insert(
1777
88
          GEP.getParent()->getFirstInsertionPt(), NewGEP);
1778
88
      NewGEP->setOperand(DI, NewPN);
1779
88
    }
1780
217
1781
217
    GEP.setOperand(0, NewGEP);
1782
217
    PtrOp = NewGEP;
1783
217
  }
1784
24.0M
1785
24.0M
  // Combine Indices - If the source pointer to this getelementptr instruction
1786
24.0M
  // is a getelementptr instruction, combine the indices of the two
1787
24.0M
  // getelementptr instructions into a single instruction.
1788
24.0M
  
if (auto *21.1M
Src21.1M
= dyn_cast<GEPOperator>(PtrOp)) {
1789
1.11M
    if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
1790
270k
      return nullptr;
1791
845k
1792
845k
    // Try to reassociate loop invariant GEP chains to enable LICM.
1793
845k
    if (LI && 
Src->getNumOperands() == 2845k
&&
GEP.getNumOperands() == 2436k
&&
1794
845k
        
Src->hasOneUse()364k
) {
1795
89.4k
      if (Loop *L = LI->getLoopFor(GEP.getParent())) {
1796
42.7k
        Value *GO1 = GEP.getOperand(1);
1797
42.7k
        Value *SO1 = Src->getOperand(1);
1798
42.7k
        // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
1799
42.7k
        // invariant: this breaks the dependence between GEPs and allows LICM
1800
42.7k
        // to hoist the invariant part out of the loop.
1801
42.7k
        if (L->isLoopInvariant(GO1) && 
!L->isLoopInvariant(SO1)12.6k
) {
1802
1.16k
          // We have to be careful here.
1803
1.16k
          // We have something like:
1804
1.16k
          //  %src = getelementptr <ty>, <ty>* %base, <ty> %idx
1805
1.16k
          //  %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2
1806
1.16k
          // If we just swap idx & idx2 then we could inadvertantly
1807
1.16k
          // change %src from a vector to a scalar, or vice versa.
1808
1.16k
          // Cases:
1809
1.16k
          //  1) %base a scalar & idx a scalar & idx2 a vector
1810
1.16k
          //      => Swapping idx & idx2 turns %src into a vector type.
1811
1.16k
          //  2) %base a scalar & idx a vector & idx2 a scalar
1812
1.16k
          //      => Swapping idx & idx2 turns %src in a scalar type
1813
1.16k
          //  3) %base, %idx, and %idx2 are scalars
1814
1.16k
          //      => %src & %gep are scalars
1815
1.16k
          //      => swapping idx & idx2 is safe
1816
1.16k
          //  4) %base a vector
1817
1.16k
          //      => %src is a vector
1818
1.16k
          //      => swapping idx & idx2 is safe.
1819
1.16k
          auto *SO0 = Src->getOperand(0);
1820
1.16k
          auto *SO0Ty = SO0->getType();
1821
1.16k
          if (!isa<VectorType>(GEPType) || // case 3
1822
1.16k
              
isa<VectorType>(SO0Ty)3
) { // case 4
1823
1.16k
            Src->setOperand(1, GO1);
1824
1.16k
            GEP.setOperand(1, SO1);
1825
1.16k
            return &GEP;
1826
1.16k
          } else {
1827
2
            // Case 1 or 2
1828
2
            // -- have to recreate %src & %gep
1829
2
            // put NewSrc at same location as %src
1830
2
            Builder.SetInsertPoint(cast<Instruction>(PtrOp));
1831
2
            auto *NewSrc = cast<GetElementPtrInst>(
1832
2
                Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
1833
2
            NewSrc->setIsInBounds(Src->isInBounds());
1834
2
            auto *NewGEP = GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
1835
2
            NewGEP->setIsInBounds(GEP.isInBounds());
1836
2
            return NewGEP;
1837
2
          }
1838
844k
        }
1839
42.7k
      }
1840
89.4k
    }
1841
844k
1842
844k
    // Note that if our source is a gep chain itself then we wait for that
1843
844k
    // chain to be resolved before we perform this transformation.  This
1844
844k
    // avoids us creating a TON of code in some cases.
1845
844k
    if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
1846
130k
      if (SrcGEP->getNumOperands() == 2 && 
shouldMergeGEPs(*Src, *SrcGEP)126k
)
1847
125k
        return nullptr;   // Wait until our source is folded to completion.
1848
719k
1849
719k
    SmallVector<Value*, 8> Indices;
1850
719k
1851
719k
    // Find out whether the last index in the source GEP is a sequential idx.
1852
719k
    bool EndsWithSequential = false;
1853
719k
    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
1854
1.99M
         I != E; 
++I1.27M
)
1855
1.27M
      EndsWithSequential = I.isSequential();
1856
719k
1857
719k
    // Can we combine the two pointer arithmetics offsets?
1858
719k
    if (EndsWithSequential) {
1859
492k
      // Replace: gep (gep %P, long B), long A, ...
1860
492k
      // With:    T = long A+B; gep %P, T, ...
1861
492k
      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
1862
492k
      Value *GO1 = GEP.getOperand(1);
1863
492k
1864
492k
      // If they aren't the same type, then the input hasn't been processed
1865
492k
      // by the loop above yet (which canonicalizes sequential index types to
1866
492k
      // intptr_t).  Just avoid transforming this until the input has been
1867
492k
      // normalized.
1868
492k
      if (SO1->getType() != GO1->getType())
1869
27
        return nullptr;
1870
492k
1871
492k
      Value *Sum =
1872
492k
          SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
1873
492k
      // Only do the combine when we are sure the cost after the
1874
492k
      // merge is never more than that before the merge.
1875
492k
      if (Sum == nullptr)
1876
313k
        return nullptr;
1877
179k
1878
179k
      // Update the GEP in place if possible.
1879
179k
      if (Src->getNumOperands() == 2) {
1880
59.5k
        GEP.setOperand(0, Src->getOperand(0));
1881
59.5k
        GEP.setOperand(1, Sum);
1882
59.5k
        return &GEP;
1883
59.5k
      }
1884
119k
      Indices.append(Src->op_begin()+1, Src->op_end()-1);
1885
119k
      Indices.push_back(Sum);
1886
119k
      Indices.append(GEP.op_begin()+2, GEP.op_end());
1887
226k
    } else if (isa<Constant>(*GEP.idx_begin()) &&
1888
226k
               
cast<Constant>(*GEP.idx_begin())->isNullValue()223k
&&
1889
226k
               
Src->getNumOperands() != 1203k
) {
1890
203k
      // Otherwise we can do the fold if the first index of the GEP is a zero
1891
203k
      Indices.append(Src->op_begin()+1, Src->op_end());
1892
203k
      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
1893
203k
    }
1894
719k
1895
719k
    
if (346k
!Indices.empty()346k
)
1896
322k
      return GEP.isInBounds() && 
Src->isInBounds()310k
1897
322k
                 ? GetElementPtrInst::CreateInBounds(
1898
310k
                       Src->getSourceElementType(), Src->getOperand(0), Indices,
1899
310k
                       GEP.getName())
1900
322k
                 : GetElementPtrInst::Create(Src->getSourceElementType(),
1901
12.5k
                                             Src->getOperand(0), Indices,
1902
12.5k
                                             GEP.getName());
1903
20.0M
  }
1904
20.0M
1905
20.0M
  if (GEP.getNumIndices() == 1) {
1906
4.67M
    unsigned AS = GEP.getPointerAddressSpace();
1907
4.67M
    if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
1908
4.67M
        DL.getIndexSizeInBits(AS)) {
1909
4.67M
      uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType);
1910
4.67M
1911
4.67M
      bool Matched = false;
1912
4.67M
      uint64_t C;
1913
4.67M
      Value *V = nullptr;
1914
4.67M
      if (TyAllocSize == 1) {
1915
2.40M
        V = GEP.getOperand(1);
1916
2.40M
        Matched = true;
1917
2.40M
      } else 
if (2.27M
match(GEP.getOperand(1),
1918
2.27M
                       m_AShr(m_Value(V), m_ConstantInt(C)))) {
1919
11.8k
        if (TyAllocSize == 1ULL << C)
1920
6.07k
          Matched = true;
1921
2.26M
      } else if (match(GEP.getOperand(1),
1922
2.26M
                       m_SDiv(m_Value(V), m_ConstantInt(C)))) {
1923
10.2k
        if (TyAllocSize == C)
1924
3.01k
          Matched = true;
1925
10.2k
      }
1926
4.67M
1927
4.67M
      if (Matched) {
1928
2.41M
        // Canonicalize (gep i8* X, -(ptrtoint Y))
1929
2.41M
        // to (inttoptr (sub (ptrtoint X), (ptrtoint Y)))
1930
2.41M
        // The GEP pattern is emitted by the SCEV expander for certain kinds of
1931
2.41M
        // pointer arithmetic.
1932
2.41M
        if (match(V, m_Neg(m_PtrToInt(m_Value())))) {
1933
1.23k
          Operator *Index = cast<Operator>(V);
1934
1.23k
          Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType());
1935
1.23k
          Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1));
1936
1.23k
          return CastInst::Create(Instruction::IntToPtr, NewSub, GEPType);
1937
1.23k
        }
1938
2.41M
        // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X))
1939
2.41M
        // to (bitcast Y)
1940
2.41M
        Value *Y;
1941
2.41M
        if (match(V, m_Sub(m_PtrToInt(m_Value(Y)),
1942
2.41M
                           m_PtrToInt(m_Specific(GEP.getOperand(0))))))
1943
1
          return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
1944
20.0M
      }
1945
4.67M
    }
1946
4.67M
  }
1947
20.0M
1948
20.0M
  // We do not handle pointer-vector geps here.
1949
20.0M
  if (GEPType->isVectorTy())
1950
965
    return nullptr;
1951
20.0M
1952
20.0M
  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
1953
20.0M
  Value *StrippedPtr = PtrOp->stripPointerCasts();
1954
20.0M
  PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
1955
20.0M
1956
20.0M
  if (StrippedPtr != PtrOp) {
1957
984k
    bool HasZeroPointerIndex = false;
1958
984k
    Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
1959
984k
1960
984k
    if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
1961
329k
      HasZeroPointerIndex = C->isZero();
1962
984k
1963
984k
    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
1964
984k
    // into     : GEP [10 x i8]* X, i32 0, ...
1965
984k
    //
1966
984k
    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
1967
984k
    //           into     : GEP i8* X, ...
1968
984k
    //
1969
984k
    // This occurs when the program declares an array extern like "int X[];"
1970
984k
    if (HasZeroPointerIndex) {
1971
262k
      if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
1972
25.5k
        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
1973
25.5k
        if (CATy->getElementType() == StrippedPtrEltTy) {
1974
227
          // -> GEP i8* X, ...
1975
227
          SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
1976
227
          GetElementPtrInst *Res = GetElementPtrInst::Create(
1977
227
              StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
1978
227
          Res->setIsInBounds(GEP.isInBounds());
1979
227
          if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
1980
226
            return Res;
1981
1
          // Insert Res, and create an addrspacecast.
1982
1
          // e.g.,
1983
1
          // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
1984
1
          // ->
1985
1
          // %0 = GEP i8 addrspace(1)* X, ...
1986
1
          // addrspacecast i8 addrspace(1)* %0 to i8*
1987
1
          return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
1988
1
        }
1989
25.2k
1990
25.2k
        if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
1991
4.42k
          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
1992
4.42k
          if (CATy->getElementType() == XATy->getElementType()) {
1993
20
            // -> GEP [10 x i8]* X, i32 0, ...
1994
20
            // At this point, we know that the cast source type is a pointer
1995
20
            // to an array of the same type as the destination pointer
1996
20
            // array.  Because the array type is never stepped over (there
1997
20
            // is a leading zero) we can fold the cast into this GEP.
1998
20
            if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
1999
19
              GEP.setOperand(0, StrippedPtr);
2000
19
              GEP.setSourceElementType(XATy);
2001
19
              return &GEP;
2002
19
            }
2003
1
            // Cannot replace the base pointer directly because StrippedPtr's
2004
1
            // address space is different. Instead, create a new GEP followed by
2005
1
            // an addrspacecast.
2006
1
            // e.g.,
2007
1
            // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
2008
1
            //   i32 0, ...
2009
1
            // ->
2010
1
            // %0 = GEP [10 x i8] addrspace(1)* X, ...
2011
1
            // addrspacecast i8 addrspace(1)* %0 to i8*
2012
1
            SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
2013
1
            Value *NewGEP =
2014
1
                GEP.isInBounds()
2015
1
                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2016
0
                                                Idx, GEP.getName())
2017
1
                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2018
1
                                        GEP.getName());
2019
1
            return new AddrSpaceCastInst(NewGEP, GEPType);
2020
1
          }
2021
4.42k
        }
2022
25.2k
      }
2023
722k
    } else if (GEP.getNumOperands() == 2) {
2024
380k
      // Transform things like:
2025
380k
      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
2026
380k
      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
2027
380k
      if (StrippedPtrEltTy->isArrayTy() &&
2028
380k
          DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
2029
7.75k
              DL.getTypeAllocSize(GEPEltType)) {
2030
4
        Type *IdxType = DL.getIndexType(GEPType);
2031
4
        Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
2032
4
        Value *NewGEP =
2033
4
            GEP.isInBounds()
2034
4
                ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2035
0
                                            GEP.getName())
2036
4
                : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
2037
4
                                    GEP.getName());
2038
4
2039
4
        // V and GEP are both pointer types --> BitCast
2040
4
        return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
2041
4
      }
2042
380k
2043
380k
      // Transform things like:
2044
380k
      // %V = mul i64 %N, 4
2045
380k
      // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
2046
380k
      // into:  %t1 = getelementptr i32* %arr, i32 %N; bitcast
2047
380k
      if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
2048
379k
        // Check that changing the type amounts to dividing the index by a scale
2049
379k
        // factor.
2050
379k
        uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
2051
379k
        uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
2052
379k
        if (ResSize && SrcSize % ResSize == 0) {
2053
108k
          Value *Idx = GEP.getOperand(1);
2054
108k
          unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2055
108k
          uint64_t Scale = SrcSize / ResSize;
2056
108k
2057
108k
          // Earlier transforms ensure that the index has the right type
2058
108k
          // according to Data Layout, which considerably simplifies the
2059
108k
          // logic by eliminating implicit casts.
2060
108k
          assert(Idx->getType() == DL.getIndexType(GEPType) &&
2061
108k
                 "Index type does not match the Data Layout preferences");
2062
108k
2063
108k
          bool NSW;
2064
108k
          if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2065
4.80k
            // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2066
4.80k
            // If the multiplication NewIdx * Scale may overflow then the new
2067
4.80k
            // GEP may not be "inbounds".
2068
4.80k
            Value *NewGEP =
2069
4.80k
                GEP.isInBounds() && 
NSW1.25k
2070
4.80k
                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2071
1.24k
                                                NewIdx, GEP.getName())
2072
4.80k
                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
2073
3.55k
                                        GEP.getName());
2074
4.80k
2075
4.80k
            // The NewGEP must be pointer typed, so must the old one -> BitCast
2076
4.80k
            return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
2077
4.80k
                                                                 GEPType);
2078
4.80k
          }
2079
375k
        }
2080
379k
      }
2081
375k
2082
375k
      // Similarly, transform things like:
2083
375k
      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
2084
375k
      //   (where tmp = 8*tmp2) into:
2085
375k
      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
2086
375k
      if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
2087
375k
          
StrippedPtrEltTy->isArrayTy()374k
) {
2088
7.75k
        // Check that changing to the array element type amounts to dividing the
2089
7.75k
        // index by a scale factor.
2090
7.75k
        uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
2091
7.75k
        uint64_t ArrayEltSize =
2092
7.75k
            DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
2093
7.75k
        if (ResSize && ArrayEltSize % ResSize == 0) {
2094
7.67k
          Value *Idx = GEP.getOperand(1);
2095
7.67k
          unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
2096
7.67k
          uint64_t Scale = ArrayEltSize / ResSize;
2097
7.67k
2098
7.67k
          // Earlier transforms ensure that the index has the right type
2099
7.67k
          // according to the Data Layout, which considerably simplifies
2100
7.67k
          // the logic by eliminating implicit casts.
2101
7.67k
          assert(Idx->getType() == DL.getIndexType(GEPType) &&
2102
7.67k
                 "Index type does not match the Data Layout preferences");
2103
7.67k
2104
7.67k
          bool NSW;
2105
7.67k
          if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
2106
6.61k
            // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
2107
6.61k
            // If the multiplication NewIdx * Scale may overflow then the new
2108
6.61k
            // GEP may not be "inbounds".
2109
6.61k
            Type *IndTy = DL.getIndexType(GEPType);
2110
6.61k
            Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
2111
6.61k
2112
6.61k
            Value *NewGEP =
2113
6.61k
                GEP.isInBounds() && 
NSW9
2114
6.61k
                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
2115
9
                                                Off, GEP.getName())
2116
6.61k
                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
2117
6.60k
                                        GEP.getName());
2118
6.61k
            // The NewGEP must be pointer typed, so must the old one -> BitCast
2119
6.61k
            return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
2120
6.61k
                                                                 GEPType);
2121
6.61k
          }
2122
20.0M
        }
2123
7.75k
      }
2124
375k
    }
2125
984k
  }
2126
20.0M
2127
20.0M
  // addrspacecast between types is canonicalized as a bitcast, then an
2128
20.0M
  // addrspacecast. To take advantage of the below bitcast + struct GEP, look
2129
20.0M
  // through the addrspacecast.
2130
20.0M
  Value *ASCStrippedPtrOp = PtrOp;
2131
20.0M
  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
2132
28
    //   X = bitcast A addrspace(1)* to B addrspace(1)*
2133
28
    //   Y = addrspacecast A addrspace(1)* to B addrspace(2)*
2134
28
    //   Z = gep Y, <...constant indices...>
2135
28
    // Into an addrspacecasted GEP of the struct.
2136
28
    if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
2137
22
      ASCStrippedPtrOp = BC;
2138
28
  }
2139
20.0M
2140
20.0M
  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) {
2141
734k
    Value *SrcOp = BCI->getOperand(0);
2142
734k
    PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
2143
734k
    Type *SrcEltType = SrcType->getElementType();
2144
734k
2145
734k
    // GEP directly using the source operand if this GEP is accessing an element
2146
734k
    // of a bitcasted pointer to vector or array of the same dimensions:
2147
734k
    // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
2148
734k
    // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
2149
734k
    auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy) {
2150
120
      return ArrTy->getArrayElementType() == VecTy->getVectorElementType() &&
2151
120
             
ArrTy->getArrayNumElements() == VecTy->getVectorNumElements()118
;
2152
120
    };
2153
734k
    if (GEP.getNumOperands() == 3 &&
2154
734k
        
(199k
(199k
GEPEltType->isArrayTy()199k
&&
SrcEltType->isVectorTy()29.9k
&&
2155
199k
          
areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)119
) ||
2156
199k
         
(199k
GEPEltType->isVectorTy()199k
&&
SrcEltType->isArrayTy()3
&&
2157
199k
          
areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)1
))) {
2158
118
2159
118
      // Create a new GEP here, as using `setOperand()` followed by
2160
118
      // `setSourceElementType()` won't actually update the type of the
2161
118
      // existing GEP Value. Causing issues if this Value is accessed when
2162
118
      // constructing an AddrSpaceCastInst
2163
118
      Value *NGEP =
2164
118
          GEP.isInBounds()
2165
118
              ? 
Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]})114
2166
118
              : 
Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]})4
;
2167
118
      NGEP->takeName(&GEP);
2168
118
2169
118
      // Preserve GEP address space to satisfy users
2170
118
      if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2171
3
        return new AddrSpaceCastInst(NGEP, GEPType);
2172
115
2173
115
      return replaceInstUsesWith(GEP, NGEP);
2174
115
    }
2175
734k
2176
734k
    // See if we can simplify:
2177
734k
    //   X = bitcast A* to B*
2178
734k
    //   Y = gep X, <...constant indices...>
2179
734k
    // into a gep of the original struct. This is important for SROA and alias
2180
734k
    // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
2181
734k
    unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType);
2182
734k
    APInt Offset(OffsetBits, 0);
2183
734k
    if (!isa<BitCastInst>(SrcOp) && 
GEP.accumulateConstantOffset(DL, Offset)733k
) {
2184
293k
      // If this GEP instruction doesn't move the pointer, just replace the GEP
2185
293k
      // with a bitcast of the real input to the dest type.
2186
293k
      if (!Offset) {
2187
25.4k
        // If the bitcast is of an allocation, and the allocation will be
2188
25.4k
        // converted to match the type of the cast, don't touch this.
2189
25.4k
        if (isa<AllocaInst>(SrcOp) || 
isAllocationFn(SrcOp, &TLI)22.7k
) {
2190
6.87k
          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
2191
6.87k
          if (Instruction *I = visitBitCast(*BCI)) {
2192
1
            if (I != BCI) {
2193
0
              I->takeName(BCI);
2194
0
              BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
2195
0
              replaceInstUsesWith(*BCI, I);
2196
0
            }
2197
1
            return &GEP;
2198
1
          }
2199
25.4k
        }
2200
25.4k
2201
25.4k
        if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
2202
2
          return new AddrSpaceCastInst(SrcOp, GEPType);
2203
25.4k
        return new BitCastInst(SrcOp, GEPType);
2204
25.4k
      }
2205
268k
2206
268k
      // Otherwise, if the offset is non-zero, we need to find out if there is a
2207
268k
      // field at Offset in 'A's type.  If so, we can pull the cast through the
2208
268k
      // GEP.
2209
268k
      SmallVector<Value*, 8> NewIndices;
2210
268k
      if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
2211
61.1k
        Value *NGEP =
2212
61.1k
            GEP.isInBounds()
2213
61.1k
                ? 
Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)51.5k
2214
61.1k
                : 
Builder.CreateGEP(SrcEltType, SrcOp, NewIndices)9.53k
;
2215
61.1k
2216
61.1k
        if (NGEP->getType() == GEPType)
2217
12.0k
          return replaceInstUsesWith(GEP, NGEP);
2218
49.0k
        NGEP->takeName(&GEP);
2219
49.0k
2220
49.0k
        if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
2221
2
          return new AddrSpaceCastInst(NGEP, GEPType);
2222
49.0k
        return new BitCastInst(NGEP, GEPType);
2223
49.0k
      }
2224
268k
    }
2225
734k
  }
2226
19.9M
2227
19.9M
  if (!GEP.isInBounds()) {
2228
791k
    unsigned IdxWidth =
2229
791k
        DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
2230
791k
    APInt BasePtrOffset(IdxWidth, 0);
2231
791k
    Value *UnderlyingPtrOp =
2232
791k
            PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
2233
791k
                                                             BasePtrOffset);
2234
791k
    if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
2235
107k
      if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
2236
107k
          
BasePtrOffset.isNonNegative()17.7k
) {
2237
17.7k
        APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
2238
17.7k
        if (BasePtrOffset.ule(AllocSize)) {
2239
17.6k
          return GetElementPtrInst::CreateInBounds(
2240
17.6k
              GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),
2241
17.6k
              GEP.getName());
2242
17.6k
        }
2243
19.9M
      }
2244
107k
    }
2245
791k
  }
2246
19.9M
2247
19.9M
  return nullptr;
2248
19.9M
}
2249
2250
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
2251
11.0k
                                         Instruction *AI) {
2252
11.0k
  if (isa<ConstantPointerNull>(V))
2253
9.04k
    return true;
2254
1.98k
  if (auto *LI = dyn_cast<LoadInst>(V))
2255
177
    return isa<GlobalVariable>(LI->getPointerOperand());
2256
1.80k
  // Two distinct allocations will never be equal.
2257
1.80k
  // We rely on LookThroughBitCast in isAllocLikeFn being false, since looking
2258
1.80k
  // through bitcasts of V can cause
2259
1.80k
  // the result statement below to be true, even when AI and V (ex:
2260
1.80k
  // i8* ->i32* ->i8* of AI) are the same allocations.
2261
1.80k
  return isAllocLikeFn(V, TLI) && 
V != AI5
;
2262
1.80k
}
2263
2264
static bool isAllocSiteRemovable(Instruction *AI,
2265
                                 SmallVectorImpl<WeakTrackingVH> &Users,
2266
1.25M
                                 const TargetLibraryInfo *TLI) {
2267
1.25M
  SmallVector<Instruction*, 4> Worklist;
2268
1.25M
  Worklist.push_back(AI);
2269
1.25M
2270
1.94M
  do {
2271
1.94M
    Instruction *PI = Worklist.pop_back_val();
2272
8.27M
    for (User *U : PI->users()) {
2273
8.27M
      Instruction *I = cast<Instruction>(U);
2274
8.27M
      switch (I->getOpcode()) {
2275
8.27M
      default:
2276
446k
        // Give up the moment we see something we can't handle.
2277
446k
        return false;
2278
8.27M
2279
8.27M
      case Instruction::AddrSpaceCast:
2280
6.20M
      case Instruction::BitCast:
2281
6.20M
      case Instruction::GetElementPtr:
2282
6.20M
        Users.emplace_back(I);
2283
6.20M
        Worklist.push_back(I);
2284
6.20M
        continue;
2285
6.20M
2286
6.20M
      case Instruction::ICmp: {
2287
12.3k
        ICmpInst *ICI = cast<ICmpInst>(I);
2288
12.3k
        // We can fold eq/ne comparisons with null to false/true, respectively.
2289
12.3k
        // We also fold comparisons in some conditions provided the alloc has
2290
12.3k
        // not escaped (see isNeverEqualToUnescapedAlloc).
2291
12.3k
        if (!ICI->isEquality())
2292
1.30k
          return false;
2293
11.0k
        unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 
19.82k
:
01.21k
;
2294
11.0k
        if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
2295
1.95k
          return false;
2296
9.07k
        Users.emplace_back(I);
2297
9.07k
        continue;
2298
9.07k
      }
2299
9.07k
2300
1.21M
      case Instruction::Call:
2301
1.21M
        // Ignore no-op and store intrinsics.
2302
1.21M
        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2303
525k
          switch (II->getIntrinsicID()) {
2304
525k
          default:
2305
15.2k
            return false;
2306
525k
2307
525k
          case Intrinsic::memmove:
2308
69.2k
          case Intrinsic::memcpy:
2309
69.2k
          case Intrinsic::memset: {
2310
69.2k
            MemIntrinsic *MI = cast<MemIntrinsic>(II);
2311
69.2k
            if (MI->isVolatile() || 
MI->getRawDest() != PI69.0k
)
2312
23.5k
              return false;
2313
45.6k
            LLVM_FALLTHROUGH;
2314
45.6k
          }
2315
486k
          case Intrinsic::invariant_start:
2316
486k
          case Intrinsic::invariant_end:
2317
486k
          case Intrinsic::lifetime_start:
2318
486k
          case Intrinsic::lifetime_end:
2319
486k
          case Intrinsic::objectsize:
2320
486k
            Users.emplace_back(I);
2321
486k
            continue;
2322
686k
          }
2323
686k
        }
2324
686k
2325
686k
        if (isFreeCall(I, TLI)) {
2326
19.9k
          Users.emplace_back(I);
2327
19.9k
          continue;
2328
19.9k
        }
2329
666k
        return false;
2330
666k
2331
666k
      case Instruction::Store: {
2332
398k
        StoreInst *SI = cast<StoreInst>(I);
2333
398k
        if (SI->isVolatile() || 
SI->getPointerOperand() != PI398k
)
2334
98.6k
          return false;
2335
300k
        Users.emplace_back(I);
2336
300k
        continue;
2337
300k
      }
2338
0
      }
2339
0
      llvm_unreachable("missing a return?");
2340
0
    }
2341
1.94M
  } while (
!Worklist.empty()689k
);
2342
1.25M
  
return true3.37k
;
2343
1.25M
}
2344
2345
1.25M
Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
2346
1.25M
  // If we have a malloc call which is only used in any amount of comparisons to
2347
1.25M
  // null and free calls, delete the calls and replace the comparisons with true
2348
1.25M
  // or false as appropriate.
2349
1.25M
2350
1.25M
  // This is based on the principle that we can substitute our own allocation
2351
1.25M
  // function (which will never return null) rather than knowledge of the
2352
1.25M
  // specific function being called. In some sense this can change the permitted
2353
1.25M
  // outputs of a program (when we convert a malloc to an alloca, the fact that
2354
1.25M
  // the allocation is now on the stack is potentially visible, for example),
2355
1.25M
  // but we believe in a permissible manner.
2356
1.25M
  SmallVector<WeakTrackingVH, 64> Users;
2357
1.25M
2358
1.25M
  // If we are removing an alloca with a dbg.declare, insert dbg.value calls
2359
1.25M
  // before each store.
2360
1.25M
  TinyPtrVector<DbgVariableIntrinsic *> DIIs;
2361
1.25M
  std::unique_ptr<DIBuilder> DIB;
2362
1.25M
  if (isa<AllocaInst>(MI)) {
2363
1.07M
    DIIs = FindDbgAddrUses(&MI);
2364
1.07M
    DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
2365
1.07M
  }
2366
1.25M
2367
1.25M
  if (isAllocSiteRemovable(&MI, Users, &TLI)) {
2368
15.2k
    for (unsigned i = 0, e = Users.size(); i != e; 
++i11.8k
) {
2369
11.8k
      // Lowering all @llvm.objectsize calls first because they may
2370
11.8k
      // use a bitcast/GEP of the alloca we are removing.
2371
11.8k
      if (!Users[i])
2372
0
       continue;
2373
11.8k
2374
11.8k
      Instruction *I = cast<Instruction>(&*Users[i]);
2375
11.8k
2376
11.8k
      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
2377
3.86k
        if (II->getIntrinsicID() == Intrinsic::objectsize) {
2378
3
          Value *Result =
2379
3
              lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
2380
3
          replaceInstUsesWith(*I, Result);
2381
3
          eraseInstFromFunction(*I);
2382
3
          Users[i] = nullptr; // Skip examining in the next loop.
2383
3
        }
2384
3.86k
      }
2385
11.8k
    }
2386
15.2k
    for (unsigned i = 0, e = Users.size(); i != e; 
++i11.8k
) {
2387
11.8k
      if (!Users[i])
2388
3
        continue;
2389
11.8k
2390
11.8k
      Instruction *I = cast<Instruction>(&*Users[i]);
2391
11.8k
2392
11.8k
      if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
2393
13
        replaceInstUsesWith(*C,
2394
13
                            ConstantInt::get(Type::getInt1Ty(C->getContext()),
2395
13
                                             C->isFalseWhenEqual()));
2396
11.8k
      } else if (isa<BitCastInst>(I) || 
isa<GetElementPtrInst>(I)10.3k
||
2397
11.8k
                 
isa<AddrSpaceCastInst>(I)8.19k
) {
2398
3.68k
        replaceInstUsesWith(*I, UndefValue::get(I->getType()));
2399
8.18k
      } else if (auto *SI = dyn_cast<StoreInst>(I)) {
2400
4.10k
        for (auto *DII : DIIs)
2401
6
          ConvertDebugDeclareToDebugValue(DII, SI, *DIB);
2402
4.10k
      }
2403
11.8k
      eraseInstFromFunction(*I);
2404
11.8k
    }
2405
3.37k
2406
3.37k
    if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
2407
67
      // Replace invoke with a NOP intrinsic to maintain the original CFG
2408
67
      Module *M = II->getModule();
2409
67
      Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
2410
67
      InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
2411
67
                         None, "", II->getParent());
2412
67
    }
2413
3.37k
2414
3.37k
    for (auto *DII : DIIs)
2415
5
      eraseInstFromFunction(*DII);
2416
3.37k
2417
3.37k
    return eraseInstFromFunction(MI);
2418
3.37k
  }
2419
1.25M
  return nullptr;
2420
1.25M
}
2421
2422
/// Move the call to free before a NULL test.
2423
///
2424
/// Check if this free is accessed after its argument has been test
2425
/// against NULL (property 0).
2426
/// If yes, it is legal to move this call in its predecessor block.
2427
///
2428
/// The move is performed only if the block containing the call to free
2429
/// will be removed, i.e.:
2430
/// 1. it has only one predecessor P, and P has two successors
2431
/// 2. it contains the call, noops, and an unconditional branch
2432
/// 3. its successor is the same as its predecessor's successor
2433
///
2434
/// The profitability is out-of concern here and this function should
2435
/// be called only if the caller knows this transformation would be
2436
/// profitable (e.g., for code size).
2437
static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
2438
6
                                                const DataLayout &DL) {
2439
6
  Value *Op = FI.getArgOperand(0);
2440
6
  BasicBlock *FreeInstrBB = FI.getParent();
2441
6
  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
2442
6
2443
6
  // Validate part of constraint #1: Only one predecessor
2444
6
  // FIXME: We can extend the number of predecessor, but in that case, we
2445
6
  //        would duplicate the call to free in each predecessor and it may
2446
6
  //        not be profitable even for code size.
2447
6
  if (!PredBB)
2448
4
    return nullptr;
2449
2
2450
2
  // Validate constraint #2: Does this block contains only the call to
2451
2
  //                         free, noops, and an unconditional branch?
2452
2
  BasicBlock *SuccBB;
2453
2
  Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
2454
2
  if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
2455
0
    return nullptr;
2456
2
2457
2
  // If there are only 2 instructions in the block, at this point,
2458
2
  // this is the call to free and unconditional.
2459
2
  // If there are more than 2 instructions, check that they are noops
2460
2
  // i.e., they won't hurt the performance of the generated code.
2461
2
  if (FreeInstrBB->size() != 2) {
2462
3
    for (const Instruction &Inst : *FreeInstrBB) {
2463
3
      if (&Inst == &FI || 
&Inst == FreeInstrBBTerminator2
)
2464
2
        continue;
2465
1
      auto *Cast = dyn_cast<CastInst>(&Inst);
2466
1
      if (!Cast || !Cast->isNoopCast(DL))
2467
0
        return nullptr;
2468
1
    }
2469
1
  }
2470
2
  // Validate the rest of constraint #1 by matching on the pred branch.
2471
2
  Instruction *TI = PredBB->getTerminator();
2472
2
  BasicBlock *TrueBB, *FalseBB;
2473
2
  ICmpInst::Predicate Pred;
2474
2
  if (!match(TI, m_Br(m_ICmp(Pred,
2475
2
                             m_CombineOr(m_Specific(Op),
2476
2
                                         m_Specific(Op->stripPointerCasts())),
2477
2
                             m_Zero()),
2478
2
                      TrueBB, FalseBB)))
2479
0
    return nullptr;
2480
2
  if (Pred != ICmpInst::ICMP_EQ && 
Pred != ICmpInst::ICMP_NE0
)
2481
0
    return nullptr;
2482
2
2483
2
  // Validate constraint #3: Ensure the null case just falls through.
2484
2
  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : 
FalseBB0
))
2485
0
    return nullptr;
2486
2
  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
2487
2
         "Broken CFG: missing edge from predecessor to successor");
2488
2
2489
2
  // At this point, we know that everything in FreeInstrBB can be moved
2490
2
  // before TI.
2491
2
  for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end();
2492
5
       It != End;) {
2493
5
    Instruction &Instr = *It++;
2494
5
    if (&Instr == FreeInstrBBTerminator)
2495
2
      break;
2496
3
    Instr.moveBefore(TI);
2497
3
  }
2498
2
  assert(FreeInstrBB->size() == 1 &&
2499
2
         "Only the branch instruction should remain");
2500
2
  return &FI;
2501
2
}
2502
2503
372k
Instruction *InstCombiner::visitFree(CallInst &FI) {
2504
372k
  Value *Op = FI.getArgOperand(0);
2505
372k
2506
372k
  // free undef -> unreachable.
2507
372k
  if (isa<UndefValue>(Op)) {
2508
0
    // Leave a marker since we can't modify the CFG here.
2509
0
    CreateNonTerminatorUnreachable(&FI);
2510
0
    return eraseInstFromFunction(FI);
2511
0
  }
2512
372k
2513
372k
  // If we have 'free null' delete the instruction.  This can happen in stl code
2514
372k
  // when lots of inlining happens.
2515
372k
  if (isa<ConstantPointerNull>(Op))
2516
0
    return eraseInstFromFunction(FI);
2517
372k
2518
372k
  // If we optimize for code size, try to move the call to free before the null
2519
372k
  // test so that simplify cfg can remove the empty block and dead code
2520
372k
  // elimination the branch. I.e., helps to turn something like:
2521
372k
  // if (foo) free(foo);
2522
372k
  // into
2523
372k
  // free(foo);
2524
372k
  if (MinimizeSize)
2525
6
    if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
2526
2
      return I;
2527
372k
2528
372k
  return nullptr;
2529
372k
}
2530
2531
3.73M
Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
2532
3.73M
  if (RI.getNumOperands() == 0) // ret void
2533
1.24M
    return nullptr;
2534
2.48M
2535
2.48M
  Value *ResultOp = RI.getOperand(0);
2536
2.48M
  Type *VTy = ResultOp->getType();
2537
2.48M
  if (!VTy->isIntegerTy())
2538
1.10M
    return nullptr;
2539
1.38M
2540
1.38M
  // There might be assume intrinsics dominating this return that completely
2541
1.38M
  // determine the value. If so, constant fold it.
2542
1.38M
  KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
2543
1.38M
  if (Known.isConstant())
2544
352k
    RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant()));
2545
1.38M
2546
1.38M
  return nullptr;
2547
1.38M
}
2548
2549
24.1M
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
2550
24.1M
  // Change br (not X), label True, label False to: br X, label False, True
2551
24.1M
  Value *X = nullptr;
2552
24.1M
  BasicBlock *TrueDest;
2553
24.1M
  BasicBlock *FalseDest;
2554
24.1M
  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
2555
24.1M
      
!isa<Constant>(X)8.92k
) {
2556
8.92k
    // Swap Destinations and condition...
2557
8.92k
    BI.setCondition(X);
2558
8.92k
    BI.swapSuccessors();
2559
8.92k
    return &BI;
2560
8.92k
  }
2561
24.1M
2562
24.1M
  // If the condition is irrelevant, remove the use so that other
2563
24.1M
  // transforms on the condition become more effective.
2564
24.1M
  if (BI.isConditional() && 
!isa<ConstantInt>(BI.getCondition())13.9M
&&
2565
24.1M
      
BI.getSuccessor(0) == BI.getSuccessor(1)13.8M
) {
2566
4
    BI.setCondition(ConstantInt::getFalse(BI.getCondition()->getType()));
2567
4
    return &BI;
2568
4
  }
2569
24.1M
2570
24.1M
  // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq.
2571
24.1M
  CmpInst::Predicate Pred;
2572
24.1M
  if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest,
2573
24.1M
                      FalseDest)) &&
2574
24.1M
      
!isCanonicalPredicate(Pred)12.0M
) {
2575
229k
    // Swap destinations and condition.
2576
229k
    CmpInst *Cond = cast<CmpInst>(BI.getCondition());
2577
229k
    Cond->setPredicate(CmpInst::getInversePredicate(Pred));
2578
229k
    BI.swapSuccessors();
2579
229k
    Worklist.Add(Cond);
2580
229k
    return &BI;
2581
229k
  }
2582
23.9M
2583
23.9M
  return nullptr;
2584
23.9M
}
2585
2586
233k
Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
2587
233k
  Value *Cond = SI.getCondition();
2588
233k
  Value *Op0;
2589
233k
  ConstantInt *AddRHS;
2590
233k
  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
2591
21
    // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
2592
71
    for (auto Case : SI.cases()) {
2593
71
      Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
2594
71
      assert(isa<ConstantInt>(NewCase) &&
2595
71
             "Result of expression should be constant");
2596
71
      Case.setValue(cast<ConstantInt>(NewCase));
2597
71
    }
2598
21
    SI.setCondition(Op0);
2599
21
    return &SI;
2600
21
  }
2601
233k
2602
233k
  KnownBits Known = computeKnownBits(Cond, 0, &SI);
2603
233k
  unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
2604
233k
  unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
2605
233k
2606
233k
  // Compute the number of leading bits we can ignore.
2607
233k
  // TODO: A better way to determine this would use ComputeNumSignBits().
2608
789k
  for (auto &C : SI.cases()) {
2609
789k
    LeadingKnownZeros = std::min(
2610
789k
        LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
2611
789k
    LeadingKnownOnes = std::min(
2612
789k
        LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
2613
789k
  }
2614
233k
2615
233k
  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
2616
233k
2617
233k
  // Shrink the condition operand if the new type is smaller than the old type.
2618
233k
  // But do not shrink to a non-standard type, because backend can't generate 
2619
233k
  // good code for that yet.
2620
233k
  // TODO: We can make it aggressive again after fixing PR39569.
2621
233k
  if (NewWidth > 0 && 
NewWidth < Known.getBitWidth()233k
&&
2622
233k
      
shouldChangeType(Known.getBitWidth(), NewWidth)7.80k
) {
2623
315
    IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
2624
315
    Builder.SetInsertPoint(&SI);
2625
315
    Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
2626
315
    SI.setCondition(NewCond);
2627
315
2628
3.16k
    for (auto Case : SI.cases()) {
2629
3.16k
      APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
2630
3.16k
      Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
2631
3.16k
    }
2632
315
    return &SI;
2633
315
  }
2634
233k
2635
233k
  return nullptr;
2636
233k
}
2637
2638
501k
Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
2639
501k
  Value *Agg = EV.getAggregateOperand();
2640
501k
2641
501k
  if (!EV.hasIndices())
2642
0
    return replaceInstUsesWith(EV, Agg);
2643
501k
2644
501k
  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(),
2645
8.87k
                                          SQ.getWithInstruction(&EV)))
2646
8.87k
    return replaceInstUsesWith(EV, V);
2647
492k
2648
492k
  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
2649
10.3k
    // We're extracting from an insertvalue instruction, compare the indices
2650
10.3k
    const unsigned *exti, *exte, *insi, *inse;
2651
10.3k
    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
2652
10.3k
         exte = EV.idx_end(), inse = IV->idx_end();
2653
20.3k
         exti != exte && 
insi != inse10.3k
;
2654
10.3k
         
++exti, ++insi10.0k
) {
2655
10.3k
      if (*insi != *exti)
2656
343
        // The insert and extract both reference distinctly different elements.
2657
343
        // This means the extract is not influenced by the insert, and we can
2658
343
        // replace the aggregate operand of the extract with the aggregate
2659
343
        // operand of the insert. i.e., replace
2660
343
        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2661
343
        // %E = extractvalue { i32, { i32 } } %I, 0
2662
343
        // with
2663
343
        // %E = extractvalue { i32, { i32 } } %A, 0
2664
343
        return ExtractValueInst::Create(IV->getAggregateOperand(),
2665
343
                                        EV.getIndices());
2666
10.3k
    }
2667
10.3k
    
if (10.0k
exti == exte10.0k
&&
insi == inse10.0k
)
2668
0
      // Both iterators are at the end: Index lists are identical. Replace
2669
0
      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2670
0
      // %C = extractvalue { i32, { i32 } } %B, 1, 0
2671
0
      // with "i32 42"
2672
0
      return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
2673
10.0k
    if (exti == exte) {
2674
10.0k
      // The extract list is a prefix of the insert list. i.e. replace
2675
10.0k
      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
2676
10.0k
      // %E = extractvalue { i32, { i32 } } %I, 1
2677
10.0k
      // with
2678
10.0k
      // %X = extractvalue { i32, { i32 } } %A, 1
2679
10.0k
      // %E = insertvalue { i32 } %X, i32 42, 0
2680
10.0k
      // by switching the order of the insert and extract (though the
2681
10.0k
      // insertvalue should be left in, since it may have other uses).
2682
10.0k
      Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
2683
10.0k
                                                EV.getIndices());
2684
10.0k
      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
2685
10.0k
                                     makeArrayRef(insi, inse));
2686
10.0k
    }
2687
2
    if (insi == inse)
2688
2
      // The insert list is a prefix of the extract list
2689
2
      // We can simply remove the common indices from the extract and make it
2690
2
      // operate on the inserted value instead of the insertvalue result.
2691
2
      // i.e., replace
2692
2
      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
2693
2
      // %E = extractvalue { i32, { i32 } } %I, 1, 0
2694
2
      // with
2695
2
      // %E extractvalue { i32 } { i32 42 }, 0
2696
2
      return ExtractValueInst::Create(IV->getInsertedValueOperand(),
2697
2
                                      makeArrayRef(exti, exte));
2698
482k
  }
2699
482k
  if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
2700
24.6k
    // We're extracting from an overflow intrinsic, see if we're the only user,
2701
24.6k
    // which allows us to simplify multiple result intrinsics to simpler
2702
24.6k
    // things that just get one value.
2703
24.6k
    if (WO->hasOneUse()) {
2704
20
      // Check if we're grabbing only the result of a 'with overflow' intrinsic
2705
20
      // and replace it with a traditional binary instruction.
2706
20
      if (*EV.idx_begin() == 0) {
2707
2
        Instruction::BinaryOps BinOp = WO->getBinaryOp();
2708
2
        Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
2709
2
        replaceInstUsesWith(*WO, UndefValue::get(WO->getType()));
2710
2
        eraseInstFromFunction(*WO);
2711
2
        return BinaryOperator::Create(BinOp, LHS, RHS);
2712
2
      }
2713
18
2714
18
      // If the normal result of the add is dead, and the RHS is a constant,
2715
18
      // we can transform this into a range comparison.
2716
18
      // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
2717
18
      if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow)
2718
1
        if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS()))
2719
1
          return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(),
2720
1
                              ConstantExpr::getNot(CI));
2721
482k
    }
2722
24.6k
  }
2723
482k
  if (LoadInst *L = dyn_cast<LoadInst>(Agg))
2724
6
    // If the (non-volatile) load only has one use, we can rewrite this to a
2725
6
    // load from a GEP. This reduces the size of the load. If a load is used
2726
6
    // only by extractvalue instructions then this either must have been
2727
6
    // optimized before, or it is a struct with padding, in which case we
2728
6
    // don't want to do the transformation as it loses padding knowledge.
2729
6
    if (L->isSimple() && 
L->hasOneUse()3
) {
2730
3
      // extractvalue has integer indices, getelementptr has Value*s. Convert.
2731
3
      SmallVector<Value*, 4> Indices;
2732
3
      // Prefix an i32 0 since we need the first element.
2733
3
      Indices.push_back(Builder.getInt32(0));
2734
3
      for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
2735
6
            I != E; 
++I3
)
2736
3
        Indices.push_back(Builder.getInt32(*I));
2737
3
2738
3
      // We need to insert these at the location of the old load, not at that of
2739
3
      // the extractvalue.
2740
3
      Builder.SetInsertPoint(L);
2741
3
      Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
2742
3
                                             L->getPointerOperand(), Indices);
2743
3
      Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
2744
3
      // Whatever aliasing information we had for the orignal load must also
2745
3
      // hold for the smaller load, so propagate the annotations.
2746
3
      AAMDNodes Nodes;
2747
3
      L->getAAMetadata(Nodes);
2748
3
      NL->setAAMetadata(Nodes);
2749
3
      // Returning the load directly will cause the main loop to insert it in
2750
3
      // the wrong spot, so use replaceInstUsesWith().
2751
3
      return replaceInstUsesWith(EV, NL);
2752
3
    }
2753
482k
  // We could simplify extracts from other values. Note that nested extracts may
2754
482k
  // already be simplified implicitly by the above: extract (extract (insert) )
2755
482k
  // will be translated into extract ( insert ( extract ) ) first and then just
2756
482k
  // the value inserted, if appropriate. Similarly for extracts from single-use
2757
482k
  // loads: extract (extract (load)) will be translated to extract (load (gep))
2758
482k
  // and if again single-use then via load (gep (gep)) to load (gep).
2759
482k
  // However, double extracts from e.g. function arguments or return values
2760
482k
  // aren't handled yet.
2761
482k
  return nullptr;
2762
482k
}
2763
2764
/// Return 'true' if the given typeinfo will match anything.
2765
74.7k
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
2766
74.7k
  switch (Personality) {
2767
74.7k
  case EHPersonality::GNU_C:
2768
0
  case EHPersonality::GNU_C_SjLj:
2769
0
  case EHPersonality::Rust:
2770
0
    // The GCC C EH and Rust personality only exists to support cleanups, so
2771
0
    // it's not clear what the semantics of catch clauses are.
2772
0
    return false;
2773
45
  case EHPersonality::Unknown:
2774
45
    return false;
2775
0
  case EHPersonality::GNU_Ada:
2776
0
    // While __gnat_all_others_value will match any Ada exception, it doesn't
2777
0
    // match foreign exceptions (or didn't, before gcc-4.7).
2778
0
    return false;
2779
74.7k
  case EHPersonality::GNU_CXX:
2780
74.7k
  case EHPersonality::GNU_CXX_SjLj:
2781
74.7k
  case EHPersonality::GNU_ObjC:
2782
74.7k
  case EHPersonality::MSVC_X86SEH:
2783
74.7k
  case EHPersonality::MSVC_Win64SEH:
2784
74.7k
  case EHPersonality::MSVC_CXX:
2785
74.7k
  case EHPersonality::CoreCLR:
2786
74.7k
  case EHPersonality::Wasm_CXX:
2787
74.7k
    return TypeInfo->isNullValue();
2788
0
  }
2789
0
  llvm_unreachable("invalid enum");
2790
0
}
2791
2792
4
static bool shorter_filter(const Value *LHS, const Value *RHS) {
2793
4
  return
2794
4
    cast<ArrayType>(LHS->getType())->getNumElements()
2795
4
  <
2796
4
    cast<ArrayType>(RHS->getType())->getNumElements();
2797
4
}
2798
2799
240k
Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
2800
240k
  // The logic here should be correct for any real-world personality function.
2801
240k
  // However if that turns out not to be true, the offending logic can always
2802
240k
  // be conditioned on the personality function, like the catch-all logic is.
2803
240k
  EHPersonality Personality =
2804
240k
      classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
2805
240k
2806
240k
  // Simplify the list of clauses, eg by removing repeated catch clauses
2807
240k
  // (these are often created by inlining).
2808
240k
  bool MakeNewInstruction = false; // If true, recreate using the following:
2809
240k
  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
2810
240k
  bool CleanupFlag = LI.isCleanup();   // - The new instruction is a cleanup.
2811
240k
2812
240k
  SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
2813
252k
  for (unsigned i = 0, e = LI.getNumClauses(); i != e; 
++i11.9k
) {
2814
74.8k
    bool isLastClause = i + 1 == e;
2815
74.8k
    if (LI.isCatch(i)) {
2816
74.7k
      // A catch clause.
2817
74.7k
      Constant *CatchClause = LI.getClause(i);
2818
74.7k
      Constant *TypeInfo = CatchClause->stripPointerCasts();
2819
74.7k
2820
74.7k
      // If we already saw this clause, there is no point in having a second
2821
74.7k
      // copy of it.
2822
74.7k
      if (AlreadyCaught.insert(TypeInfo).second) {
2823
74.7k
        // This catch clause was not already seen.
2824
74.7k
        NewClauses.push_back(CatchClause);
2825
74.7k
      } else {
2826
2
        // Repeated catch clause - drop the redundant copy.
2827
2
        MakeNewInstruction = true;
2828
2
      }
2829
74.7k
2830
74.7k
      // If this is a catch-all then there is no point in keeping any following
2831
74.7k
      // clauses or marking the landingpad as having a cleanup.
2832
74.7k
      if (isCatchAll(Personality, TypeInfo)) {
2833
62.8k
        if (!isLastClause)
2834
40
          MakeNewInstruction = true;
2835
62.8k
        CleanupFlag = false;
2836
62.8k
        break;
2837
62.8k
      }
2838
112
    } else {
2839
112
      // A filter clause.  If any of the filter elements were already caught
2840
112
      // then they can be dropped from the filter.  It is tempting to try to
2841
112
      // exploit the filter further by saying that any typeinfo that does not
2842
112
      // occur in the filter can't be caught later (and thus can be dropped).
2843
112
      // However this would be wrong, since typeinfos can match without being
2844
112
      // equal (for example if one represents a C++ class, and the other some
2845
112
      // class derived from it).
2846
112
      assert(LI.isFilter(i) && "Unsupported landingpad clause!");
2847
112
      Constant *FilterClause = LI.getClause(i);
2848
112
      ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
2849
112
      unsigned NumTypeInfos = FilterType->getNumElements();
2850
112
2851
112
      // An empty filter catches everything, so there is no point in keeping any
2852
112
      // following clauses or marking the landingpad as having a cleanup.  By
2853
112
      // dealing with this case here the following code is made a bit simpler.
2854
112
      if (!NumTypeInfos) {
2855
69
        NewClauses.push_back(FilterClause);
2856
69
        if (!isLastClause)
2857
1
          MakeNewInstruction = true;
2858
69
        CleanupFlag = false;
2859
69
        break;
2860
69
      }
2861
43
2862
43
      bool MakeNewFilter = false; // If true, make a new filter.
2863
43
      SmallVector<Constant *, 16> NewFilterElts; // New elements.
2864
43
      if (isa<ConstantAggregateZero>(FilterClause)) {
2865
9
        // Not an empty filter - it contains at least one null typeinfo.
2866
9
        assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
2867
9
        Constant *TypeInfo =
2868
9
          Constant::getNullValue(FilterType->getElementType());
2869
9
        // If this typeinfo is a catch-all then the filter can never match.
2870
9
        if (isCatchAll(Personality, TypeInfo)) {
2871
3
          // Throw the filter away.
2872
3
          MakeNewInstruction = true;
2873
3
          continue;
2874
3
        }
2875
6
2876
6
        // There is no point in having multiple copies of this typeinfo, so
2877
6
        // discard all but the first copy if there is more than one.
2878
6
        NewFilterElts.push_back(TypeInfo);
2879
6
        if (NumTypeInfos > 1)
2880
1
          MakeNewFilter = true;
2881
34
      } else {
2882
34
        ConstantArray *Filter = cast<ConstantArray>(FilterClause);
2883
34
        SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
2884
34
        NewFilterElts.reserve(NumTypeInfos);
2885
34
2886
34
        // Remove any filter elements that were already caught or that already
2887
34
        // occurred in the filter.  While there, see if any of the elements are
2888
34
        // catch-alls.  If so, the filter can be discarded.
2889
34
        bool SawCatchAll = false;
2890
75
        for (unsigned j = 0; j != NumTypeInfos; 
++j41
) {
2891
44
          Constant *Elt = Filter->getOperand(j);
2892
44
          Constant *TypeInfo = Elt->stripPointerCasts();
2893
44
          if (isCatchAll(Personality, TypeInfo)) {
2894
3
            // This element is a catch-all.  Bail out, noting this fact.
2895
3
            SawCatchAll = true;
2896
3
            break;
2897
3
          }
2898
41
2899
41
          // Even if we've seen a type in a catch clause, we don't want to
2900
41
          // remove it from the filter.  An unexpected type handler may be
2901
41
          // set up for a call site which throws an exception of the same
2902
41
          // type caught.  In order for the exception thrown by the unexpected
2903
41
          // handler to propagate correctly, the filter must be correctly
2904
41
          // described for the call site.
2905
41
          //
2906
41
          // Example:
2907
41
          //
2908
41
          // void unexpected() { throw 1;}
2909
41
          // void foo() throw (int) {
2910
41
          //   std::set_unexpected(unexpected);
2911
41
          //   try {
2912
41
          //     throw 2.0;
2913
41
          //   } catch (int i) {}
2914
41
          // }
2915
41
2916
41
          // There is no point in having multiple copies of the same typeinfo in
2917
41
          // a filter, so only add it if we didn't already.
2918
41
          if (SeenInFilter.insert(TypeInfo).second)
2919
40
            NewFilterElts.push_back(cast<Constant>(Elt));
2920
41
        }
2921
34
        // A filter containing a catch-all cannot match anything by definition.
2922
34
        if (SawCatchAll) {
2923
3
          // Throw the filter away.
2924
3
          MakeNewInstruction = true;
2925
3
          continue;
2926
3
        }
2927
31
2928
31
        // If we dropped something from the filter, make a new one.
2929
31
        if (NewFilterElts.size() < NumTypeInfos)
2930
1
          MakeNewFilter = true;
2931
31
      }
2932
43
      
if (37
MakeNewFilter37
) {
2933
2
        FilterType = ArrayType::get(FilterType->getElementType(),
2934
2
                                    NewFilterElts.size());
2935
2
        FilterClause = ConstantArray::get(FilterType, NewFilterElts);
2936
2
        MakeNewInstruction = true;
2937
2
      }
2938
37
2939
37
      NewClauses.push_back(FilterClause);
2940
37
2941
37
      // If the new filter is empty then it will catch everything so there is
2942
37
      // no point in keeping any following clauses or marking the landingpad
2943
37
      // as having a cleanup.  The case of the original filter being empty was
2944
37
      // already handled above.
2945
37
      if (MakeNewFilter && 
!NewFilterElts.size()2
) {
2946
0
        assert(MakeNewInstruction && "New filter but not a new instruction!");
2947
0
        CleanupFlag = false;
2948
0
        break;
2949
0
      }
2950
37
    }
2951
74.8k
  }
2952
240k
2953
240k
  // If several filters occur in a row then reorder them so that the shortest
2954
240k
  // filters come first (those with the smallest number of elements).  This is
2955
240k
  // advantageous because shorter filters are more likely to match, speeding up
2956
240k
  // unwinding, but mostly because it increases the effectiveness of the other
2957
240k
  // filter optimizations below.
2958
250k
  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
2959
10.6k
    unsigned j;
2960
10.6k
    // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
2961
10.7k
    for (j = i; j != e; 
++j16
)
2962
10.7k
      if (!isa<ArrayType>(NewClauses[j]->getType()))
2963
10.6k
        break;
2964
10.6k
2965
10.6k
    // Check whether the filters are already sorted by length.  We need to know
2966
10.6k
    // if sorting them is actually going to do anything so that we only make a
2967
10.6k
    // new landingpad instruction if it does.
2968
10.6k
    for (unsigned k = i; k + 1 < j; 
++k0
)
2969
2
      if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
2970
2
        // Not sorted, so sort the filters now.  Doing an unstable sort would be
2971
2
        // correct too but reordering filters pointlessly might confuse users.
2972
2
        std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
2973
2
                         shorter_filter);
2974
2
        MakeNewInstruction = true;
2975
2
        break;
2976
2
      }
2977
10.6k
2978
10.6k
    // Look for the next batch of filters.
2979
10.6k
    i = j + 1;
2980
10.6k
  }
2981
240k
2982
240k
  // If typeinfos matched if and only if equal, then the elements of a filter L
2983
240k
  // that occurs later than a filter F could be replaced by the intersection of
2984
240k
  // the elements of F and L.  In reality two typeinfos can match without being
2985
240k
  // equal (for example if one represents a C++ class, and the other some class
2986
240k
  // derived from it) so it would be wrong to perform this transform in general.
2987
240k
  // However the transform is correct and useful if F is a subset of L.  In that
2988
240k
  // case L can be replaced by F, and thus removed altogether since repeating a
2989
240k
  // filter is pointless.  So here we look at all pairs of filters F and L where
2990
240k
  // L follows F in the list of clauses, and remove L if every element of F is
2991
240k
  // an element of L.  This can occur when inlining C++ functions with exception
2992
240k
  // specifications.
2993
250k
  for (unsigned i = 0; i + 1 < NewClauses.size(); 
++i10.6k
) {
2994
10.6k
    // Examine each filter in turn.
2995
10.6k
    Value *Filter = NewClauses[i];
2996
10.6k
    ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
2997
10.6k
    if (!FTy)
2998
10.6k
      // Not a filter - skip it.
2999
10.6k
      continue;
3000
14
    unsigned FElts = FTy->getNumElements();
3001
14
    // Examine each filter following this one.  Doing this backwards means that
3002
14
    // we don't have to worry about filters disappearing under us when removed.
3003
29
    for (unsigned j = NewClauses.size() - 1; j != i; 
--j15
) {
3004
15
      Value *LFilter = NewClauses[j];
3005
15
      ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
3006
15
      if (!LTy)
3007
12
        // Not a filter - skip it.
3008
12
        continue;
3009
3
      // If Filter is a subset of LFilter, i.e. every element of Filter is also
3010
3
      // an element of LFilter, then discard LFilter.
3011
3
      SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
3012
3
      // If Filter is empty then it is a subset of LFilter.
3013
3
      if (!FElts) {
3014
0
        // Discard LFilter.
3015
0
        NewClauses.erase(J);
3016
0
        MakeNewInstruction = true;
3017
0
        // Move on to the next filter.
3018
0
        continue;
3019
0
      }
3020
3
      unsigned LElts = LTy->getNumElements();
3021
3
      // If Filter is longer than LFilter then it cannot be a subset of it.
3022
3
      if (FElts > LElts)
3023
0
        // Move on to the next filter.
3024
0
        continue;
3025
3
      // At this point we know that LFilter has at least one element.
3026
3
      if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
3027
0
        // Filter is a subset of LFilter iff Filter contains only zeros (as we
3028
0
        // already know that Filter is not longer than LFilter).
3029
0
        if (isa<ConstantAggregateZero>(Filter)) {
3030
0
          assert(FElts <= LElts && "Should have handled this case earlier!");
3031
0
          // Discard LFilter.
3032
0
          NewClauses.erase(J);
3033
0
          MakeNewInstruction = true;
3034
0
        }
3035
0
        // Move on to the next filter.
3036
0
        continue;
3037
0
      }
3038
3
      ConstantArray *LArray = cast<ConstantArray>(LFilter);
3039
3
      if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
3040
1
        // Since Filter is non-empty and contains only zeros, it is a subset of
3041
1
        // LFilter iff LFilter contains a zero.
3042
1
        assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
3043
2
        for (unsigned l = 0; l != LElts; 
++l1
)
3044
2
          if (LArray->getOperand(l)->isNullValue()) {
3045
1
            // LFilter contains a zero - discard it.
3046
1
            NewClauses.erase(J);
3047
1
            MakeNewInstruction = true;
3048
1
            break;
3049
1
          }
3050
1
        // Move on to the next filter.
3051
1
        continue;
3052
1
      }
3053
2
      // At this point we know that both filters are ConstantArrays.  Loop over
3054
2
      // operands to see whether every element of Filter is also an element of
3055
2
      // LFilter.  Since filters tend to be short this is probably faster than
3056
2
      // using a method that scales nicely.
3057
2
      ConstantArray *FArray = cast<ConstantArray>(Filter);
3058
2
      bool AllFound = true;
3059
4
      for (unsigned f = 0; f != FElts; 
++f2
) {
3060
2
        Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
3061
2
        AllFound = false;
3062
4
        for (unsigned l = 0; l != LElts; 
++l2
) {
3063
4
          Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
3064
4
          if (LTypeInfo == FTypeInfo) {
3065
2
            AllFound = true;
3066
2
            break;
3067
2
          }
3068
4
        }
3069
2
        if (!AllFound)
3070
0
          break;
3071
2
      }
3072
2
      if (AllFound) {
3073
2
        // Discard LFilter.
3074
2
        NewClauses.erase(J);
3075
2
        MakeNewInstruction = true;
3076
2
      }
3077
2
      // Move on to the next filter.
3078
2
    }
3079
14
  }
3080
240k
3081
240k
  // If we changed any of the clauses, replace the old landingpad instruction
3082
240k
  // with a new one.
3083
240k
  if (MakeNewInstruction) {
3084
53
    LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
3085
53
                                                 NewClauses.size());
3086
105
    for (unsigned i = 0, e = NewClauses.size(); i != e; 
++i52
)
3087
52
      NLI->addClause(NewClauses[i]);
3088
53
    // A landing pad with no clauses must have the cleanup flag set.  It is
3089
53
    // theoretically possible, though highly unlikely, that we eliminated all
3090
53
    // clauses.  If so, force the cleanup flag to true.
3091
53
    if (NewClauses.empty())
3092
6
      CleanupFlag = true;
3093
53
    NLI->setCleanup(CleanupFlag);
3094
53
    return NLI;
3095
53
  }
3096
240k
3097
240k
  // Even if none of the clauses changed, we may nonetheless have understood
3098
240k
  // that the cleanup flag is pointless.  Clear it if so.
3099
240k
  if (LI.isCleanup() != CleanupFlag) {
3100
329
    assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
3101
329
    LI.setCleanup(CleanupFlag);
3102
329
    return &LI;
3103
329
  }
3104
239k
3105
239k
  return nullptr;
3106
239k
}
3107
3108
/// Try to move the specified instruction from its current block into the
3109
/// beginning of DestBlock, which can only happen if it's safe to move the
3110
/// instruction past all of the instructions between it and the end of its
3111
/// block.
3112
340k
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
3113
340k
  assert(I->hasOneUse() && "Invariants didn't hold!");
3114
340k
  BasicBlock *SrcBlock = I->getParent();
3115
340k
3116
340k
  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
3117
340k
  if (isa<PHINode>(I) || 
I->isEHPad()156k
||
I->mayHaveSideEffects()152k
||
3118
340k
      
I->isTerminator()75.1k
)
3119
264k
    return false;
3120
75.1k
3121
75.1k
  // Do not sink static or dynamic alloca instructions. Static allocas must
3122
75.1k
  // remain in the entry block, and dynamic allocas must not be sunk in between
3123
75.1k
  // a stacksave / stackrestore pair, which would incorrectly shorten its
3124
75.1k
  // lifetime.
3125
75.1k
  if (isa<AllocaInst>(I))
3126
1.96k
    return false;
3127
73.1k
3128
73.1k
  // Do not sink into catchswitch blocks.
3129
73.1k
  if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
3130
1
    return false;
3131
73.1k
3132
73.1k
  // Do not sink convergent call instructions.
3133
73.1k
  if (auto *CI = dyn_cast<CallInst>(I)) {
3134
257
    if (CI->isConvergent())
3135
4
      return false;
3136
73.1k
  }
3137
73.1k
  // We can only sink load instructions if there is nothing between the load and
3138
73.1k
  // the end of block that could change the value.
3139
73.1k
  if (I->mayReadFromMemory()) {
3140
18.3k
    for (BasicBlock::iterator Scan = I->getIterator(),
3141
18.3k
                              E = I->getParent()->end();
3142
114k
         Scan != E; 
++Scan95.7k
)
3143
109k
      if (Scan->mayWriteToMemory())
3144
13.4k
        return false;
3145
18.3k
  }
3146
73.1k
  BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
3147
59.7k
  I->moveBefore(&*InsertPos);
3148
59.7k
  ++NumSunkInst;
3149
59.7k
3150
59.7k
  // Also sink all related debug uses from the source basic block. Otherwise we
3151
59.7k
  // get debug use before the def. Attempt to salvage debug uses first, to
3152
59.7k
  // maximise the range variables have location for. If we cannot salvage, then
3153
59.7k
  // mark the location undef: we know it was supposed to receive a new location
3154
59.7k
  // here, but that computation has been sunk.
3155
59.7k
  SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
3156
59.7k
  findDbgUsers(DbgUsers, I);
3157
59.7k
  for (auto *DII : reverse(DbgUsers)) {
3158
7
    if (DII->getParent() == SrcBlock) {
3159
6
      // dbg.value is in the same basic block as the sunk inst, see if we can
3160
6
      // salvage it. Clone a new copy of the instruction: on success we need
3161
6
      // both salvaged and unsalvaged copies.
3162
6
      SmallVector<DbgVariableIntrinsic *, 1> TmpUser{
3163
6
          cast<DbgVariableIntrinsic>(DII->clone())};
3164
6
3165
6
      if (!salvageDebugInfoForDbgValues(*I, TmpUser)) {
3166
4
        // We are unable to salvage: sink the cloned dbg.value, and mark the
3167
4
        // original as undef, terminating any earlier variable location.
3168
4
        LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
3169
4
        TmpUser[0]->insertBefore(&*InsertPos);
3170
4
        Value *Undef = UndefValue::get(I->getType());
3171
4
        DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
3172
4
                                                ValueAsMetadata::get(Undef)));
3173
4
      } else {
3174
2
        // We successfully salvaged: place the salvaged dbg.value in the
3175
2
        // original location, and move the unmodified dbg.value to sink with
3176
2
        // the sunk inst.
3177
2
        TmpUser[0]->insertBefore(DII);
3178
2
        DII->moveBefore(&*InsertPos);
3179
2
      }
3180
6
    }
3181
7
  }
3182
59.7k
  return true;
3183
73.1k
}
3184
3185
3.72M
bool InstCombiner::run() {
3186
170M
  while (!Worklist.isEmpty()) {
3187
166M
    Instruction *I = Worklist.RemoveOne();
3188
166M
    if (I == nullptr) 
continue203k
; // skip null values.
3189
166M
3190
166M
    // Check to see if we can DCE the instruction.
3191
166M
    if (isInstructionTriviallyDead(I, &TLI)) {
3192
904k
      LLVM_DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
3193
904k
      eraseInstFromFunction(*I);
3194
904k
      ++NumDeadInst;
3195
904k
      MadeIRChange = true;
3196
904k
      continue;
3197
904k
    }
3198
165M
3199
165M
    if (!DebugCounter::shouldExecute(VisitCounter))
3200
0
      continue;
3201
165M
3202
165M
    // Instruction isn't dead, see if we can constant propagate it.
3203
165M
    if (!I->use_empty() &&
3204
165M
        
(112M
I->getNumOperands() == 0112M
||
isa<Constant>(I->getOperand(0))112M
)) {
3205
9.91M
      if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
3206
19.6k
        LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
3207
19.6k
                          << '\n');
3208
19.6k
3209
19.6k
        // Add operands to the worklist.
3210
19.6k
        replaceInstUsesWith(*I, C);
3211
19.6k
        ++NumConstProp;
3212
19.6k
        if (isInstructionTriviallyDead(I, &TLI))
3213
19.6k
          eraseInstFromFunction(*I);
3214
19.6k
        MadeIRChange = true;
3215
19.6k
        continue;
3216
19.6k
      }
3217
165M
    }
3218
165M
3219
165M
    // In general, it is possible for computeKnownBits to determine all bits in
3220
165M
    // a value even when the operands are not all constants.
3221
165M
    Type *Ty = I->getType();
3222
165M
    if (ExpensiveCombines && 
!I->use_empty()164M
&&
Ty->isIntOrIntVectorTy()111M
) {
3223
56.3M
      KnownBits Known = computeKnownBits(I, /*Depth*/0, I);
3224
56.3M
      if (Known.isConstant()) {
3225
1.20k
        Constant *C = ConstantInt::get(Ty, Known.getConstant());
3226
1.20k
        LLVM_DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C
3227
1.20k
                          << " from: " << *I << '\n');
3228
1.20k
3229
1.20k
        // Add operands to the worklist.
3230
1.20k
        replaceInstUsesWith(*I, C);
3231
1.20k
        ++NumConstProp;
3232
1.20k
        if (isInstructionTriviallyDead(I, &TLI))
3233
1.20k
          eraseInstFromFunction(*I);
3234
1.20k
        MadeIRChange = true;
3235
1.20k
        continue;
3236
1.20k
      }
3237
165M
    }
3238
165M
3239
165M
    // See if we can trivially sink this instruction to a successor basic block.
3240
165M
    if (EnableCodeSinking && 
I->hasOneUse()165M
) {
3241
86.6M
      BasicBlock *BB = I->getParent();
3242
86.6M
      Instruction *UserInst = cast<Instruction>(*I->user_begin());
3243
86.6M
      BasicBlock *UserParent;
3244
86.6M
3245
86.6M
      // Get the block the use occurs in.
3246
86.6M
      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
3247
5.94M
        UserParent = PN->getIncomingBlock(*I->use_begin());
3248
80.6M
      else
3249
80.6M
        UserParent = UserInst->getParent();
3250
86.6M
3251
86.6M
      if (UserParent != BB) {
3252
3.14M
        bool UserIsSuccessor = false;
3253
3.14M
        // See if the user is one of our successors.
3254
6.42M
        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; 
++SI3.27M
)
3255
4.57M
          if (*SI == UserParent) {
3256
1.29M
            UserIsSuccessor = true;
3257
1.29M
            break;
3258
1.29M
          }
3259
3.14M
3260
3.14M
        // If the user is one of our immediate successors, and if that successor
3261
3.14M
        // only has us as a predecessors (we'd have to split the critical edge
3262
3.14M
        // otherwise), we can keep going.
3263
3.14M
        if (UserIsSuccessor && 
UserParent->getUniquePredecessor()1.29M
) {
3264
340k
          // Okay, the CFG is simple enough, try to sink this instruction.
3265
340k
          if (TryToSinkInstruction(I, UserParent)) {
3266
59.7k
            LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
3267
59.7k
            MadeIRChange = true;
3268
59.7k
            // We'll add uses of the sunk instruction below, but since sinking
3269
59.7k
            // can expose opportunities for it's *operands* add them to the
3270
59.7k
            // worklist
3271
59.7k
            for (Use &U : I->operands())
3272
105k
              if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
3273
54.9k
                Worklist.Add(OpI);
3274
59.7k
          }
3275
340k
        }
3276
3.14M
      }
3277
86.6M
    }
3278
165M
3279
165M
    // Now that we have an instruction, try combining it to simplify it.
3280
165M
    Builder.SetInsertPoint(I);
3281
165M
    Builder.SetCurrentDebugLocation(I->getDebugLoc());
3282
165M
3283
#ifndef NDEBUG
3284
    std::string OrigI;
3285
#endif
3286
165M
    LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
3287
165M
    LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
3288
165M
3289
165M
    if (Instruction *Result = visit(*I)) {
3290
4.20M
      ++NumCombined;
3291
4.20M
      // Should we replace the old instruction with a new one?
3292
4.20M
      if (Result != I) {
3293
1.08M
        LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
3294
1.08M
                          << "    New = " << *Result << '\n');
3295
1.08M
3296
1.08M
        if (I->getDebugLoc())
3297
224k
          Result->setDebugLoc(I->getDebugLoc());
3298
1.08M
        // Everything uses the new instruction now.
3299
1.08M
        I->replaceAllUsesWith(Result);
3300
1.08M
3301
1.08M
        // Move the name to the new instruction first.
3302
1.08M
        Result->takeName(I);
3303
1.08M
3304
1.08M
        // Push the new instruction and any users onto the worklist.
3305
1.08M
        Worklist.AddUsersToWorkList(*Result);
3306
1.08M
        Worklist.Add(Result);
3307
1.08M
3308
1.08M
        // Insert the new instruction into the basic block...
3309
1.08M
        BasicBlock *InstParent = I->getParent();
3310
1.08M
        BasicBlock::iterator InsertPos = I->getIterator();
3311
1.08M
3312
1.08M
        // If we replace a PHI with something that isn't a PHI, fix up the
3313
1.08M
        // insertion point.
3314
1.08M
        if (!isa<PHINode>(Result) && isa<PHINode>(InsertPos))
3315
15.8k
          InsertPos = InstParent->getFirstInsertionPt();
3316
1.08M
3317
1.08M
        InstParent->getInstList().insert(InsertPos, Result);
3318
1.08M
3319
1.08M
        eraseInstFromFunction(*I);
3320
3.12M
      } else {
3321
3.12M
        LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
3322
3.12M
                          << "    New = " << *I << '\n');
3323
3.12M
3324
3.12M
        // If the instruction was modified, it's possible that it is now dead.
3325
3.12M
        // if so, remove it.
3326
3.12M
        if (isInstructionTriviallyDead(I, &TLI)) {
3327
702k
          eraseInstFromFunction(*I);
3328
2.41M
        } else {
3329
2.41M
          Worklist.AddUsersToWorkList(*I);
3330
2.41M
          Worklist.Add(I);
3331
2.41M
        }
3332
3.12M
      }
3333
4.20M
      MadeIRChange = true;
3334
4.20M
    }
3335
165M
  }
3336
3.72M
3337
3.72M
  Worklist.Zap();
3338
3.72M
  return MadeIRChange;
3339
3.72M
}
3340
3341
/// Walk the function in depth-first order, adding all reachable code to the
3342
/// worklist.
3343
///
3344
/// This has a couple of tricks to make the code faster and more powerful.  In
3345
/// particular, we constant fold and DCE instructions as we go, to avoid adding
3346
/// them to the worklist (this significantly speeds up instcombine on code where
3347
/// many instructions are dead or constant).  Additionally, if we find a branch
3348
/// whose condition is a known constant, we only visit the reachable successors.
3349
static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
3350
                                       SmallPtrSetImpl<BasicBlock *> &Visited,
3351
                                       InstCombineWorklist &ICWorklist,
3352
3.72M
                                       const TargetLibraryInfo *TLI) {
3353
3.72M
  bool MadeIRChange = false;
3354
3.72M
  SmallVector<BasicBlock*, 256> Worklist;
3355
3.72M
  Worklist.push_back(BB);
3356
3.72M
3357
3.72M
  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
3358
3.72M
  DenseMap<Constant *, Constant *> FoldedConstants;
3359
3.72M
3360
43.1M
  do {
3361
43.1M
    BB = Worklist.pop_back_val();
3362
43.1M
3363
43.1M
    // We have now visited this block!  If we've already been here, ignore it.
3364
43.1M
    if (!Visited.insert(BB).second)
3365
14.3M
      continue;
3366
28.8M
3367
189M
    
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); 28.8M
BBI != E; ) {
3368
160M
      Instruction *Inst = &*BBI++;
3369
160M
3370
160M
      // DCE instruction if trivially dead.
3371
160M
      if (isInstructionTriviallyDead(Inst, TLI)) {
3372
1.20M
        ++NumDeadInst;
3373
1.20M
        LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
3374
1.20M
        if (!salvageDebugInfo(*Inst))
3375
1.20M
          replaceDbgUsesWithUndef(Inst);
3376
1.20M
        Inst->eraseFromParent();
3377
1.20M
        MadeIRChange = true;
3378
1.20M
        continue;
3379
1.20M
      }
3380
159M
3381
159M
      // ConstantProp instruction if trivially constant.
3382
159M
      if (!Inst->use_empty() &&
3383
159M
          
(107M
Inst->getNumOperands() == 0107M
||
isa<Constant>(Inst->getOperand(0))106M
))
3384
9.35M
        if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
3385
15.5k
          LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst
3386
15.5k
                            << '\n');
3387
15.5k
          Inst->replaceAllUsesWith(C);
3388
15.5k
          ++NumConstProp;
3389
15.5k
          if (isInstructionTriviallyDead(Inst, TLI))
3390
15.5k
            Inst->eraseFromParent();
3391
15.5k
          MadeIRChange = true;
3392
15.5k
          continue;
3393
15.5k
        }
3394
159M
3395
159M
      // See if we can constant fold its operands.
3396
344M
      
for (Use &U : Inst->operands())159M
{
3397
344M
        if (!isa<ConstantVector>(U) && 
!isa<ConstantExpr>(U)344M
)
3398
336M
          continue;
3399
8.18M
3400
8.18M
        auto *C = cast<Constant>(U);
3401
8.18M
        Constant *&FoldRes = FoldedConstants[C];
3402
8.18M
        if (!FoldRes)
3403
5.91M
          FoldRes = ConstantFoldConstant(C, DL, TLI);
3404
8.18M
        if (!FoldRes)
3405
0
          FoldRes = C;
3406
8.18M
3407
8.18M
        if (FoldRes != C) {
3408
144k
          LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
3409
144k
                            << "\n    Old = " << *C
3410
144k
                            << "\n    New = " << *FoldRes << '\n');
3411
144k
          U = FoldRes;
3412
144k
          MadeIRChange = true;
3413
144k
        }
3414
8.18M
      }
3415
159M
3416
159M
      // Skip processing debug intrinsics in InstCombine. Processing these call instructions
3417
159M
      // consumes non-trivial amount of time and provides no value for the optimization.
3418
159M
      if (!isa<DbgInfoIntrinsic>(Inst))
3419
159M
        InstrsForInstCombineWorklist.push_back(Inst);
3420
159M
    }
3421
28.8M
3422
28.8M
    // Recursively visit successors.  If this is a branch or switch on a
3423
28.8M
    // constant, only visit the reachable successor.
3424
28.8M
    Instruction *TI = BB->getTerminator();
3425
28.8M
    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
3426
23.9M
      if (BI->isConditional() && 
isa<ConstantInt>(BI->getCondition())13.6M
) {
3427
64.6k
        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
3428
64.6k
        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
3429
64.6k
        Worklist.push_back(ReachableBB);
3430
64.6k
        continue;
3431
64.6k
      }
3432
4.87M
    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
3433
233k
      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
3434
135
        Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
3435
135
        continue;
3436
135
      }
3437
28.7M
    }
3438
28.7M
3439
28.7M
    for (BasicBlock *SuccBB : successors(TI))
3440
39.3M
      Worklist.push_back(SuccBB);
3441
43.1M
  } while (!Worklist.empty());
3442
3.72M
3443
3.72M
  // Once we've found all of the instructions to add to instcombine's worklist,
3444
3.72M
  // add them in reverse order.  This way instcombine will visit from the top
3445
3.72M
  // of the function down.  This jives well with the way that it adds all uses
3446
3.72M
  // of instructions to the worklist after doing a transformation, thus avoiding
3447
3.72M
  // some N^2 behavior in pathological cases.
3448
3.72M
  ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist);
3449
3.72M
3450
3.72M
  return MadeIRChange;
3451
3.72M
}
3452
3453
/// Populate the IC worklist from a function, and prune any dead basic
3454
/// blocks discovered in the process.
3455
///
3456
/// This also does basic constant propagation and other forward fixing to make
3457
/// the combiner itself run much faster.
3458
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
3459
                                          TargetLibraryInfo *TLI,
3460
3.72M
                                          InstCombineWorklist &ICWorklist) {
3461
3.72M
  bool MadeIRChange = false;
3462
3.72M
3463
3.72M
  // Do a depth-first traversal of the function, populate the worklist with
3464
3.72M
  // the reachable instructions.  Ignore blocks that are not reachable.  Keep
3465
3.72M
  // track of which blocks we visit.
3466
3.72M
  SmallPtrSet<BasicBlock *, 32> Visited;
3467
3.72M
  MadeIRChange |=
3468
3.72M
      AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI);
3469
3.72M
3470
3.72M
  // Do a quick scan over the function.  If we find any blocks that are
3471
3.72M
  // unreachable, remove any instructions inside of them.  This prevents
3472
3.72M
  // the instcombine code from having to deal with some bad special cases.
3473
28.9M
  for (BasicBlock &BB : F) {
3474
28.9M
    if (Visited.count(&BB))
3475
28.8M
      continue;
3476
84.9k
3477
84.9k
    unsigned NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
3478
84.9k
    MadeIRChange |= NumDeadInstInBB > 0;
3479
84.9k
    NumDeadInst += NumDeadInstInBB;
3480
84.9k
  }
3481
3.72M
3482
3.72M
  return MadeIRChange;
3483
3.72M
}
3484
3485
static bool combineInstructionsOverFunction(
3486
    Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
3487
    AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
3488
    OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
3489
    ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
3490
3.18M
    LoopInfo *LI = nullptr) {
3491
3.18M
  auto &DL = F.getParent()->getDataLayout();
3492
3.18M
  ExpensiveCombines |= EnableExpensiveCombines;
3493
3.18M
3494
3.18M
  /// Builder - This is an IRBuilder that automatically inserts new
3495
3.18M
  /// instructions into the worklist when they are created.
3496
3.18M
  IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
3497
3.18M
      F.getContext(), TargetFolder(DL),
3498
3.18M
      IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
3499
477k
        Worklist.Add(I);
3500
477k
        if (match(I, m_Intrinsic<Intrinsic::assume>()))
3501
6
          AC.registerAssumption(cast<CallInst>(I));
3502
477k
      }));
3503
3.18M
3504
3.18M
  // Lower dbg.declare intrinsics otherwise their value may be clobbered
3505
3.18M
  // by instcombiner.
3506
3.18M
  bool MadeIRChange = false;
3507
3.18M
  if (ShouldLowerDbgDeclare)
3508
3.18M
    MadeIRChange = LowerDbgDeclare(F);
3509
3.18M
3510
3.18M
  // Iterate while there is work to do.
3511
3.18M
  int Iteration = 0;
3512
3.72M
  while (
true3.72M
) {
3513
3.72M
    ++Iteration;
3514
3.72M
    LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
3515
3.72M
                      << F.getName() << "\n");
3516
3.72M
3517
3.72M
    MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
3518
3.72M
3519
3.72M
    InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
3520
3.72M
                    AC, TLI, DT, ORE, BFI, PSI, DL, LI);
3521
3.72M
    IC.MaxArraySizeForCombine = MaxArraySize;
3522
3.72M
3523
3.72M
    if (!IC.run())
3524
3.18M
      break;
3525
3.72M
  }
3526
3.18M
3527
3.18M
  return MadeIRChange || 
Iteration > 13.10M
;
3528
3.18M
}
3529
3530
PreservedAnalyses InstCombinePass::run(Function &F,
3531
8.62k
                                       FunctionAnalysisManager &AM) {
3532
8.62k
  auto &AC = AM.getResult<AssumptionAnalysis>(F);
3533
8.62k
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
3534
8.62k
  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
3535
8.62k
  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
3536
8.62k
3537
8.62k
  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
3538
8.62k
3539
8.62k
  auto *AA = &AM.getResult<AAManager>(F);
3540
8.62k
  const ModuleAnalysisManager &MAM =
3541
8.62k
      AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
3542
8.62k
  ProfileSummaryInfo *PSI =
3543
8.62k
      MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3544
8.62k
  auto *BFI = (PSI && 
PSI->hasProfileSummary()7.33k
) ?
3545
8.27k
      
&AM.getResult<BlockFrequencyAnalysis>(F)352
: nullptr;
3546
8.62k
3547
8.62k
  if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
3548
8.62k
                                       BFI, PSI, ExpensiveCombines, LI))
3549
7.90k
    // No changes, all analyses are preserved.
3550
7.90k
    return PreservedAnalyses::all();
3551
726
3552
726
  // Mark all the analyses that instcombine updates as preserved.
3553
726
  PreservedAnalyses PA;
3554
726
  PA.preserveSet<CFGAnalyses>();
3555
726
  PA.preserve<AAManager>();
3556
726
  PA.preserve<BasicAA>();
3557
726
  PA.preserve<GlobalsAA>();
3558
726
  return PA;
3559
726
}
3560
3561
107k
void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
3562
107k
  AU.setPreservesCFG();
3563
107k
  AU.addRequired<AAResultsWrapperPass>();
3564
107k
  AU.addRequired<AssumptionCacheTracker>();
3565
107k
  AU.addRequired<TargetLibraryInfoWrapperPass>();
3566
107k
  AU.addRequired<DominatorTreeWrapperPass>();
3567
107k
  AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
3568
107k
  AU.addPreserved<DominatorTreeWrapperPass>();
3569
107k
  AU.addPreserved<AAResultsWrapperPass>();
3570
107k
  AU.addPreserved<BasicAAWrapperPass>();
3571
107k
  AU.addPreserved<GlobalsAAWrapperPass>();
3572
107k
  AU.addRequired<ProfileSummaryInfoWrapperPass>();
3573
107k
  LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
3574
107k
}
3575
3576
3.17M
bool InstructionCombiningPass::runOnFunction(Function &F) {
3577
3.17M
  if (skipFunction(F))
3578
351
    return false;
3579
3.17M
3580
3.17M
  // Required analyses.
3581
3.17M
  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3582
3.17M
  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3583
3.17M
  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
3584
3.17M
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3585
3.17M
  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
3586
3.17M
3587
3.17M
  // Optional analyses.
3588
3.17M
  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
3589
18.4E
  auto *LI = LIWP ? 
&LIWP->getLoopInfo()3.17M
: nullptr;
3590
3.17M
  ProfileSummaryInfo *PSI =
3591
3.17M
      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
3592
3.17M
  BlockFrequencyInfo *BFI =
3593
3.17M
      (
PSI3.17M
&& PSI->hasProfileSummary()) ?
3594
384
      &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
3595
3.17M
      
nullptr3.17M
;
3596
3.17M
3597
3.17M
  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
3598
3.17M
                                         BFI, PSI, ExpensiveCombines, LI);
3599
3.17M
}
3600
3601
char InstructionCombiningPass::ID = 0;
3602
3603
23.9k
INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
3604
23.9k
                      "Combine redundant instructions", false, false)
3605
23.9k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3606
23.9k
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3607
23.9k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3608
23.9k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
3609
23.9k
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
3610
23.9k
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3611
23.9k
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
3612
23.9k
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
3613
23.9k
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
3614
                    "Combine redundant instructions", false, false)
3615
3616
// Initialization Routines
3617
11.0k
void llvm::initializeInstCombine(PassRegistry &Registry) {
3618
11.0k
  initializeInstructionCombiningPassPass(Registry);
3619
11.0k
}
3620
3621
0
void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
3622
0
  initializeInstructionCombiningPassPass(*unwrap(R));
3623
0
}
3624
3625
105k
FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) {
3626
105k
  return new InstructionCombiningPass(ExpensiveCombines);
3627
105k
}
3628
3629
7
void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
3630
7
  unwrap(PM)->add(createInstructionCombiningPass());
3631
7
}