Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file contains logic for simplifying instructions based on information
11
// about how they are used.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "InstCombineInternal.h"
16
#include "llvm/Analysis/ValueTracking.h"
17
#include "llvm/IR/IntrinsicInst.h"
18
#include "llvm/IR/PatternMatch.h"
19
#include "llvm/Support/KnownBits.h"
20
21
using namespace llvm;
22
using namespace llvm::PatternMatch;
23
24
#define DEBUG_TYPE "instcombine"
25
26
/// Check to see if the specified operand of the specified instruction is a
27
/// constant integer. If so, check to see if there are any bits set in the
28
/// constant that are not demanded. If so, shrink the constant and return true.
29
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
30
15.8M
                                   const APInt &Demanded) {
31
15.8M
  assert(I && "No instruction?");
32
15.8M
  assert(OpNo < I->getNumOperands() && "Operand index too large");
33
15.8M
34
15.8M
  // The operand must be a constant integer or splat integer.
35
15.8M
  Value *Op = I->getOperand(OpNo);
36
15.8M
  const APInt *C;
37
15.8M
  if (!match(Op, m_APInt(C)))
38
9.48M
    return false;
39
6.39M
40
6.39M
  // If there are no bits set that aren't demanded, nothing to do.
41
6.39M
  
if (6.39M
C->isSubsetOf(Demanded)6.39M
)
42
6.38M
    return false;
43
7.60k
44
7.60k
  // This instruction is producing bits that are not demanded. Shrink the RHS.
45
7.60k
  I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
46
7.60k
47
7.60k
  return true;
48
7.60k
}
49
50
51
52
/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
53
/// the instruction has any properties that allow us to simplify its operands.
54
8.99M
bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
55
8.99M
  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
56
8.99M
  KnownBits Known(BitWidth);
57
8.99M
  APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
58
8.99M
59
8.99M
  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
60
8.99M
                                     0, &Inst);
61
8.99M
  if (
!V8.99M
)
return false8.97M
;
62
28.2k
  
if (28.2k
V == &Inst28.2k
)
return true21.3k
;
63
6.89k
  replaceInstUsesWith(Inst, V);
64
6.89k
  return true;
65
6.89k
}
66
67
/// This form of SimplifyDemandedBits simplifies the specified instruction
68
/// operand if possible, updating it in place. It returns true if it made any
69
/// change and false otherwise.
70
bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
71
                                        const APInt &DemandedMask,
72
                                        KnownBits &Known,
73
75.6M
                                        unsigned Depth) {
74
75.6M
  Use &U = I->getOperandUse(OpNo);
75
75.6M
  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
76
75.6M
                                          Depth, I);
77
75.6M
  if (
!NewVal75.6M
)
return false75.6M
;
78
30.3k
  U = NewVal;
79
30.3k
  return true;
80
30.3k
}
81
82
83
/// This function attempts to replace V with a simpler value based on the
84
/// demanded bits. When this function is called, it is known that only the bits
85
/// set in DemandedMask of the result of V are ever used downstream.
86
/// Consequently, depending on the mask and V, it may be possible to replace V
87
/// with a constant or one of its operands. In such cases, this function does
88
/// the replacement and returns true. In all other cases, it returns false after
89
/// analyzing the expression and setting KnownOne and known to be one in the
90
/// expression. Known.Zero contains all the bits that are known to be zero in
91
/// the expression. These are provided to potentially allow the caller (which
92
/// might recursively be SimplifyDemandedBits itself) to simplify the
93
/// expression.
94
/// Known.One and Known.Zero always follow the invariant that:
95
///   Known.One & Known.Zero == 0.
96
/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
97
/// Known.Zero may only be accurate for those bits set in DemandedMask. Note
98
/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
99
/// be the same.
100
///
101
/// This returns null if it did not change anything and it permits no
102
/// simplification.  This returns V itself if it did some simplification of V's
103
/// operands based on the information about what bits are demanded. This returns
104
/// some other non-null value if it found out that V is equal to another value
105
/// in the context where the specified bits are demanded, but not for all users.
106
Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
107
                                             KnownBits &Known, unsigned Depth,
108
84.6M
                                             Instruction *CxtI) {
109
84.6M
  assert(V != nullptr && "Null pointer of Value???");
110
84.6M
  assert(Depth <= 6 && "Limit Search Depth");
111
84.6M
  uint32_t BitWidth = DemandedMask.getBitWidth();
112
84.6M
  Type *VTy = V->getType();
113
84.6M
  assert(
114
84.6M
      (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
115
84.6M
      Known.getBitWidth() == BitWidth &&
116
84.6M
      "Value *V, DemandedMask and Known must have same BitWidth");
117
84.6M
118
84.6M
  if (
isa<Constant>(V)84.6M
) {
119
22.4M
    computeKnownBits(V, Known, Depth, CxtI);
120
22.4M
    return nullptr;
121
22.4M
  }
122
62.2M
123
62.2M
  Known.resetAll();
124
62.2M
  if (DemandedMask.isNullValue())     // Not demanding any bits from V.
125
299
    return UndefValue::get(VTy);
126
62.2M
127
62.2M
  
if (62.2M
Depth == 662.2M
) // Limit search depth.
128
84.6k
    return nullptr;
129
62.1M
130
62.1M
  Instruction *I = dyn_cast<Instruction>(V);
131
62.1M
  if (
!I62.1M
) {
132
4.55M
    computeKnownBits(V, Known, Depth, CxtI);
133
4.55M
    return nullptr;        // Only analyze instructions.
134
4.55M
  }
135
57.6M
136
57.6M
  // If there are multiple uses of this value and we aren't at the root, then
137
57.6M
  // we can't do any simplifications of the operands, because DemandedMask
138
57.6M
  // only reflects the bits demanded by *one* of the users.
139
57.6M
  
if (57.6M
Depth != 0 && 57.6M
!I->hasOneUse()24.6M
)
140
12.4M
    return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
141
45.1M
142
45.1M
  KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
143
45.1M
144
45.1M
  // If this is the root being simplified, allow it to have multiple uses,
145
45.1M
  // just set the DemandedMask to all bits so that we can try to simplify the
146
45.1M
  // operands.  This allows visitTruncInst (for example) to simplify the
147
45.1M
  // operand of a trunc without duplicating all the logic below.
148
45.1M
  if (
Depth == 0 && 45.1M
!V->hasOneUse()32.9M
)
149
16.8M
    DemandedMask.setAllBits();
150
45.1M
151
45.1M
  switch (I->getOpcode()) {
152
20.0M
  default:
153
20.0M
    computeKnownBits(I, Known, Depth, CxtI);
154
20.0M
    break;
155
3.92M
  case Instruction::And: {
156
3.92M
    // If either the LHS or the RHS are Zero, the result is zero.
157
3.92M
    if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
158
3.92M
        SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
159
3.92M
                             Depth + 1))
160
5.99k
      return I;
161
3.92M
    assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
162
3.91M
    assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
163
3.91M
164
3.91M
    // Output known-0 are known to be clear if zero in either the LHS | RHS.
165
3.91M
    APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
166
3.91M
    // Output known-1 bits are only known if set in both the LHS & RHS.
167
3.91M
    APInt IKnownOne = RHSKnown.One & LHSKnown.One;
168
3.91M
169
3.91M
    // If the client is only demanding bits that we know, return the known
170
3.91M
    // constant.
171
3.91M
    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
172
42
      return Constant::getIntegerValue(VTy, IKnownOne);
173
3.91M
174
3.91M
    // If all of the demanded bits are known 1 on one side, return the other.
175
3.91M
    // These bits cannot contribute to the result of the 'and'.
176
3.91M
    
if (3.91M
DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)3.91M
)
177
9.79k
      return I->getOperand(0);
178
3.90M
    
if (3.90M
DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)3.90M
)
179
815
      return I->getOperand(1);
180
3.90M
181
3.90M
    // If the RHS is a constant, see if we can simplify it.
182
3.90M
    
if (3.90M
ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)3.90M
)
183
5.02k
      return I;
184
3.89M
185
3.89M
    Known.Zero = std::move(IKnownZero);
186
3.89M
    Known.One  = std::move(IKnownOne);
187
3.89M
    break;
188
3.89M
  }
189
1.48M
  case Instruction::Or: {
190
1.48M
    // If either the LHS or the RHS are One, the result is One.
191
1.48M
    if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
192
1.48M
        SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
193
1.48M
                             Depth + 1))
194
3.15k
      return I;
195
1.48M
    assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
196
1.48M
    assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
197
1.48M
198
1.48M
    // Output known-0 bits are only known if clear in both the LHS & RHS.
199
1.48M
    APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
200
1.48M
    // Output known-1 are known. to be set if s.et in either the LHS | RHS.
201
1.48M
    APInt IKnownOne = RHSKnown.One | LHSKnown.One;
202
1.48M
203
1.48M
    // If the client is only demanding bits that we know, return the known
204
1.48M
    // constant.
205
1.48M
    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
206
9
      return Constant::getIntegerValue(VTy, IKnownOne);
207
1.48M
208
1.48M
    // If all of the demanded bits are known zero on one side, return the other.
209
1.48M
    // These bits cannot contribute to the result of the 'or'.
210
1.48M
    
if (1.48M
DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)1.48M
)
211
254
      return I->getOperand(0);
212
1.48M
    
if (1.48M
DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)1.48M
)
213
75
      return I->getOperand(1);
214
1.48M
215
1.48M
    // If the RHS is a constant, see if we can simplify it.
216
1.48M
    
if (1.48M
ShrinkDemandedConstant(I, 1, DemandedMask)1.48M
)
217
7
      return I;
218
1.48M
219
1.48M
    Known.Zero = std::move(IKnownZero);
220
1.48M
    Known.One  = std::move(IKnownOne);
221
1.48M
    break;
222
1.48M
  }
223
533k
  case Instruction::Xor: {
224
533k
    if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
225
533k
        SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
226
331
      return I;
227
533k
    assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
228
533k
    assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
229
533k
230
533k
    // Output known-0 bits are known if clear or set in both the LHS & RHS.
231
533k
    APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
232
533k
                       (RHSKnown.One & LHSKnown.One);
233
533k
    // Output known-1 are known to be set if set in only one of the LHS, RHS.
234
533k
    APInt IKnownOne =  (RHSKnown.Zero & LHSKnown.One) |
235
533k
                       (RHSKnown.One & LHSKnown.Zero);
236
533k
237
533k
    // If the client is only demanding bits that we know, return the known
238
533k
    // constant.
239
533k
    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
240
6
      return Constant::getIntegerValue(VTy, IKnownOne);
241
533k
242
533k
    // If all of the demanded bits are known zero on one side, return the other.
243
533k
    // These bits cannot contribute to the result of the 'xor'.
244
533k
    
if (533k
DemandedMask.isSubsetOf(RHSKnown.Zero)533k
)
245
4
      return I->getOperand(0);
246
533k
    
if (533k
DemandedMask.isSubsetOf(LHSKnown.Zero)533k
)
247
0
      return I->getOperand(1);
248
533k
249
533k
    // If all of the demanded bits are known to be zero on one side or the
250
533k
    // other, turn this into an *inclusive* or.
251
533k
    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
252
533k
    
if (533k
DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)533k
) {
253
50
      Instruction *Or =
254
50
        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
255
50
                                 I->getName());
256
50
      return InsertNewInstWith(Or, *I);
257
50
    }
258
533k
259
533k
    // If all of the demanded bits on one side are known, and all of the set
260
533k
    // bits on that side are also known to be set on the other side, turn this
261
533k
    // into an AND, as we know the bits will be cleared.
262
533k
    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
263
533k
    
if (533k
DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
264
533k
        
RHSKnown.One.isSubsetOf(LHSKnown.One)301k
) {
265
8
      Constant *AndC = Constant::getIntegerValue(VTy,
266
8
                                                 ~RHSKnown.One & DemandedMask);
267
8
      Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
268
8
      return InsertNewInstWith(And, *I);
269
8
    }
270
533k
271
533k
    // If the RHS is a constant, see if we can simplify it.
272
533k
    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
273
533k
    
if (533k
ShrinkDemandedConstant(I, 1, DemandedMask)533k
)
274
549
      return I;
275
532k
276
532k
    // If our LHS is an 'and' and if it has one use, and if any of the bits we
277
532k
    // are flipping are known to be set, then the xor is just resetting those
278
532k
    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
279
532k
    // simplifying both of them.
280
532k
    
if (Instruction *532k
LHSInst532k
= dyn_cast<Instruction>(I->getOperand(0)))
281
523k
      
if (523k
LHSInst->getOpcode() == Instruction::And && 523k
LHSInst->hasOneUse()42.9k
&&
282
38.9k
          isa<ConstantInt>(I->getOperand(1)) &&
283
27.0k
          isa<ConstantInt>(LHSInst->getOperand(1)) &&
284
523k
          
(LHSKnown.One & RHSKnown.One & DemandedMask) != 015.3k
) {
285
0
        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
286
0
        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
287
0
        APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
288
0
289
0
        Constant *AndC =
290
0
          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
291
0
        Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
292
0
        InsertNewInstWith(NewAnd, *I);
293
0
294
0
        Constant *XorC =
295
0
          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
296
0
        Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
297
0
        return InsertNewInstWith(NewXor, *I);
298
0
      }
299
532k
300
532k
    // Output known-0 bits are known if clear or set in both the LHS & RHS.
301
532k
    Known.Zero = std::move(IKnownZero);
302
532k
    // Output known-1 are known to be set if set in only one of the LHS, RHS.
303
532k
    Known.One  = std::move(IKnownOne);
304
532k
    break;
305
532k
  }
306
934k
  case Instruction::Select:
307
934k
    // If this is a select as part of a min/max pattern, don't simplify any
308
934k
    // further in case we break the structure.
309
934k
    Value *LHS, *RHS;
310
934k
    if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
311
538k
      return nullptr;
312
395k
313
395k
    
if (395k
SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
314
395k
        SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
315
383
      return I;
316
395k
    assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
317
394k
    assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
318
394k
319
394k
    // If the operands are constants, see if we can simplify them.
320
394k
    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
321
394k
        ShrinkDemandedConstant(I, 2, DemandedMask))
322
314
      return I;
323
394k
324
394k
    // Only known if known in both the LHS and RHS.
325
394k
    Known.One = RHSKnown.One & LHSKnown.One;
326
394k
    Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
327
394k
    break;
328
4.47M
  case Instruction::ZExt:
329
4.47M
  case Instruction::Trunc: {
330
4.47M
    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
331
4.47M
332
4.47M
    APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
333
4.47M
    KnownBits InputKnown(SrcBitWidth);
334
4.47M
    if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
335
9.08k
      return I;
336
4.46M
    Known = Known.zextOrTrunc(BitWidth);
337
4.46M
    // Any top bits are known to be zero.
338
4.46M
    if (BitWidth > SrcBitWidth)
339
1.21M
      Known.Zero.setBitsFrom(SrcBitWidth);
340
4.46M
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
341
4.46M
    break;
342
4.46M
  }
343
46.9k
  case Instruction::BitCast:
344
46.9k
    if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
345
46.7k
      return nullptr;  // vector->int or fp->int?
346
181
347
181
    
if (VectorType *181
DstVTy181
= dyn_cast<VectorType>(I->getType())) {
348
109
      if (VectorType *SrcVTy =
349
86
            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
350
86
        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
351
86
          // Don't touch a bitcast between vectors of different element counts.
352
86
          return nullptr;
353
109
      } else
354
109
        // Don't touch a scalar-to-vector bitcast.
355
23
        return nullptr;
356
72
    } else 
if (72
I->getOperand(0)->getType()->isVectorTy()72
)
357
72
      // Don't touch a vector-to-scalar bitcast.
358
72
      return nullptr;
359
0
360
0
    
if (0
SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)0
)
361
0
      return I;
362
0
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
363
0
    break;
364
902k
  case Instruction::SExt: {
365
902k
    // Compute the bits in the result that are not present in the input.
366
902k
    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
367
902k
368
902k
    APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
369
902k
370
902k
    // If any of the sign extended bits are demanded, we know that the sign
371
902k
    // bit is demanded.
372
902k
    if (DemandedMask.getActiveBits() > SrcBitWidth)
373
899k
      InputDemandedBits.setBit(SrcBitWidth-1);
374
902k
375
902k
    KnownBits InputKnown(SrcBitWidth);
376
902k
    if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1))
377
20
      return I;
378
902k
379
902k
    // If the input sign bit is known zero, or if the NewBits are not demanded
380
902k
    // convert this into a zero extension.
381
902k
    
if (902k
InputKnown.isNonNegative() ||
382
902k
        
DemandedMask.getActiveBits() <= SrcBitWidth902k
) {
383
2.31k
      // Convert to ZExt cast.
384
2.31k
      CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
385
2.31k
      return InsertNewInstWith(NewCast, *I);
386
2.31k
     }
387
899k
388
899k
    // If the sign bit of the input is known set or clear, then we know the
389
899k
    // top bits of the result.
390
899k
    Known = InputKnown.sext(BitWidth);
391
899k
    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
392
899k
    break;
393
899k
  }
394
4.58M
  case Instruction::Add:
395
4.58M
  case Instruction::Sub: {
396
4.58M
    /// If the high-bits of an ADD/SUB are not demanded, then we do not care
397
4.58M
    /// about the high bits of the operands.
398
4.58M
    unsigned NLZ = DemandedMask.countLeadingZeros();
399
4.58M
    // Right fill the mask of bits for this ADD/SUB to demand the most
400
4.58M
    // significant bit and all those below it.
401
4.58M
    APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
402
4.58M
    if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
403
4.58M
        SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
404
4.58M
        ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
405
4.58M
        
SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)4.58M
) {
406
4.19k
      if (
NLZ > 04.19k
) {
407
4.01k
        // Disable the nsw and nuw flags here: We can no longer guarantee that
408
4.01k
        // we won't wrap after simplification. Removing the nsw/nuw flags is
409
4.01k
        // legal here because the top bit is not demanded.
410
4.01k
        BinaryOperator &BinOP = *cast<BinaryOperator>(I);
411
4.01k
        BinOP.setHasNoSignedWrap(false);
412
4.01k
        BinOP.setHasNoUnsignedWrap(false);
413
4.01k
      }
414
4.19k
      return I;
415
4.19k
    }
416
4.58M
417
4.58M
    // If we are known to be adding/subtracting zeros to every bit below
418
4.58M
    // the highest demanded bit, we just return the other side.
419
4.58M
    
if (4.58M
DemandedFromOps.isSubsetOf(RHSKnown.Zero)4.58M
)
420
395
      return I->getOperand(0);
421
4.58M
    // We can't do this with the LHS for subtraction, unless we are only
422
4.58M
    // demanding the LSB.
423
4.58M
    
if (4.58M
(I->getOpcode() == Instruction::Add ||
424
920k
         DemandedFromOps.isOneValue()) &&
425
3.66M
        DemandedFromOps.isSubsetOf(LHSKnown.Zero))
426
257
      return I->getOperand(1);
427
4.58M
428
4.58M
    // Otherwise just compute the known bits of the result.
429
4.58M
    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
430
4.58M
    Known = KnownBits::computeForAddSub(I->getOpcode() == Instruction::Add,
431
4.58M
                                        NSW, LHSKnown, RHSKnown);
432
4.58M
    break;
433
4.58M
  }
434
2.64M
  case Instruction::Shl: {
435
2.64M
    const APInt *SA;
436
2.64M
    if (
match(I->getOperand(1), m_APInt(SA))2.64M
) {
437
2.25M
      const APInt *ShrAmt;
438
2.25M
      if (
match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))2.25M
) {
439
35.7k
        Instruction *Shr = cast<Instruction>(I->getOperand(0));
440
35.7k
        if (Value *R = simplifyShrShlDemandedBits(
441
35.7k
                Shr, *ShrAmt, I, *SA, DemandedMask, Known))
442
414
          return R;
443
2.25M
      }
444
2.25M
445
2.25M
      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
446
2.25M
      APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
447
2.25M
448
2.25M
      // If the shift is NUW/NSW, then it does demand the high bits.
449
2.25M
      ShlOperator *IOp = cast<ShlOperator>(I);
450
2.25M
      if (IOp->hasNoSignedWrap())
451
994k
        DemandedMaskIn.setHighBits(ShiftAmt+1);
452
1.25M
      else 
if (1.25M
IOp->hasNoUnsignedWrap()1.25M
)
453
184k
        DemandedMaskIn.setHighBits(ShiftAmt);
454
2.25M
455
2.25M
      if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
456
1.68k
        return I;
457
2.25M
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
458
2.25M
      Known.Zero <<= ShiftAmt;
459
2.25M
      Known.One  <<= ShiftAmt;
460
2.25M
      // low bits known zero.
461
2.25M
      if (ShiftAmt)
462
2.25M
        Known.Zero.setLowBits(ShiftAmt);
463
2.25M
    }
464
2.64M
    break;
465
2.64M
  }
466
2.26M
  case Instruction::LShr: {
467
2.26M
    const APInt *SA;
468
2.26M
    if (
match(I->getOperand(1), m_APInt(SA))2.26M
) {
469
2.13M
      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
470
2.13M
471
2.13M
      // Unsigned shift right.
472
2.13M
      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
473
2.13M
474
2.13M
      // If the shift is exact, then it does demand the low bits (and knows that
475
2.13M
      // they are zero).
476
2.13M
      if (cast<LShrOperator>(I)->isExact())
477
23.7k
        DemandedMaskIn.setLowBits(ShiftAmt);
478
2.13M
479
2.13M
      if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
480
2.47k
        return I;
481
2.13M
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
482
2.12M
      Known.Zero.lshrInPlace(ShiftAmt);
483
2.12M
      Known.One.lshrInPlace(ShiftAmt);
484
2.12M
      if (ShiftAmt)
485
2.12M
        Known.Zero.setHighBits(ShiftAmt);  // high bits known zero.
486
2.13M
    }
487
2.26M
    break;
488
2.26M
  }
489
483k
  case Instruction::AShr: {
490
483k
    // If this is an arithmetic shift right and only the low-bit is set, we can
491
483k
    // always convert this into a logical shr, even if the shift amount is
492
483k
    // variable.  The low bit of the shift cannot be an input sign bit unless
493
483k
    // the shift amount is >= the size of the datatype, which is undefined.
494
483k
    if (
DemandedMask.isOneValue()483k
) {
495
211
      // Perform the logical shift right.
496
211
      Instruction *NewVal = BinaryOperator::CreateLShr(
497
211
                        I->getOperand(0), I->getOperand(1), I->getName());
498
211
      return InsertNewInstWith(NewVal, *I);
499
211
    }
500
482k
501
482k
    // If the sign bit is the only bit demanded by this ashr, then there is no
502
482k
    // need to do it, the shift doesn't change the high bit.
503
482k
    
if (482k
DemandedMask.isSignMask()482k
)
504
47
      return I->getOperand(0);
505
482k
506
482k
    const APInt *SA;
507
482k
    if (
match(I->getOperand(1), m_APInt(SA))482k
) {
508
427k
      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
509
427k
510
427k
      // Signed shift right.
511
427k
      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
512
427k
      // If any of the high bits are demanded, we should set the sign bit as
513
427k
      // demanded.
514
427k
      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
515
422k
        DemandedMaskIn.setSignBit();
516
427k
517
427k
      // If the shift is exact, then it does demand the low bits (and knows that
518
427k
      // they are zero).
519
427k
      if (cast<AShrOperator>(I)->isExact())
520
223k
        DemandedMaskIn.setLowBits(ShiftAmt);
521
427k
522
427k
      if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
523
1.09k
        return I;
524
426k
525
426k
      unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
526
426k
527
426k
      assert(!Known.hasConflict() && "Bits known to be one AND zero?");
528
426k
      // Compute the new bits that are at the top now plus sign bits.
529
426k
      APInt HighBits(APInt::getHighBitsSet(
530
426k
          BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth)));
531
426k
      Known.Zero.lshrInPlace(ShiftAmt);
532
426k
      Known.One.lshrInPlace(ShiftAmt);
533
426k
534
426k
      // If the input sign bit is known to be zero, or if none of the top bits
535
426k
      // are demanded, turn this into an unsigned shift right.
536
426k
      assert(BitWidth > ShiftAmt && "Shift amount not saturated?");
537
426k
      if (Known.Zero[BitWidth-ShiftAmt-1] ||
538
426k
          
!DemandedMask.intersects(HighBits)423k
) {
539
5.66k
        BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
540
5.66k
                                                          I->getOperand(1));
541
5.66k
        LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
542
5.66k
        return InsertNewInstWith(LShr, *I);
543
420k
      } else 
if (420k
Known.One[BitWidth-ShiftAmt-1]420k
) { // New bits are known one.
544
10
        Known.One |= HighBits;
545
10
      }
546
427k
    }
547
476k
    break;
548
482k
  }
549
60.5k
  case Instruction::SRem:
550
60.5k
    if (ConstantInt *
Rem60.5k
= dyn_cast<ConstantInt>(I->getOperand(1))) {
551
17.0k
      // X % -1 demands all the bits because we don't want to introduce
552
17.0k
      // INT_MIN % -1 (== undef) by accident.
553
17.0k
      if (Rem->isMinusOne())
554
0
        break;
555
17.0k
      APInt RA = Rem->getValue().abs();
556
17.0k
      if (
RA.isPowerOf2()17.0k
) {
557
6.26k
        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
558
1
          return I->getOperand(0);
559
6.26k
560
6.26k
        APInt LowBits = RA - 1;
561
6.26k
        APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
562
6.26k
        if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1))
563
2
          return I;
564
6.26k
565
6.26k
        // The low bits of LHS are unchanged by the srem.
566
6.26k
        Known.Zero = LHSKnown.Zero & LowBits;
567
6.26k
        Known.One = LHSKnown.One & LowBits;
568
6.26k
569
6.26k
        // If LHS is non-negative or has all low bits zero, then the upper bits
570
6.26k
        // are all zero.
571
6.26k
        if (
LHSKnown.isNonNegative() || 6.26k
LowBits.isSubsetOf(LHSKnown.Zero)6.24k
)
572
18
          Known.Zero |= ~LowBits;
573
6.26k
574
6.26k
        // If LHS is negative and not all low bits are zero, then the upper bits
575
6.26k
        // are all one.
576
6.26k
        if (
LHSKnown.isNegative() && 6.26k
LowBits.intersects(LHSKnown.One)0
)
577
0
          Known.One |= ~LowBits;
578
6.26k
579
6.26k
        assert(!Known.hasConflict() && "Bits known to be one AND zero?");
580
6.26k
        break;
581
6.26k
      }
582
17.0k
    }
583
54.3k
584
54.3k
    // The sign bit is the LHS's sign bit, except when the result of the
585
54.3k
    // remainder is zero.
586
54.3k
    
if (54.3k
DemandedMask.isSignBitSet()54.3k
) {
587
51.5k
      computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
588
51.5k
      // If it's known zero, our sign bit is also zero.
589
51.5k
      if (LHSKnown.isNonNegative())
590
76
        Known.makeNonNegative();
591
51.5k
    }
592
54.3k
    break;
593
28.7k
  case Instruction::URem: {
594
28.7k
    KnownBits Known2(BitWidth);
595
28.7k
    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
596
28.7k
    if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
597
28.7k
        SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
598
0
      return I;
599
28.7k
600
28.7k
    unsigned Leaders = Known2.countMinLeadingZeros();
601
28.7k
    Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
602
28.7k
    break;
603
28.7k
  }
604
2.65M
  case Instruction::Call:
605
2.65M
    if (IntrinsicInst *
II2.65M
= dyn_cast<IntrinsicInst>(I)) {
606
53.3k
      switch (II->getIntrinsicID()) {
607
51.9k
      default: break;
608
1.44k
      case Intrinsic::bswap: {
609
1.44k
        // If the only bits demanded come from one byte of the bswap result,
610
1.44k
        // just shift the input byte into position to eliminate the bswap.
611
1.44k
        unsigned NLZ = DemandedMask.countLeadingZeros();
612
1.44k
        unsigned NTZ = DemandedMask.countTrailingZeros();
613
1.44k
614
1.44k
        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
615
1.44k
        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
616
1.44k
        // have 14 leading zeros, round to 8.
617
1.44k
        NLZ &= ~7;
618
1.44k
        NTZ &= ~7;
619
1.44k
        // If we need exactly one byte, we can do this transformation.
620
1.44k
        if (
BitWidth-NLZ-NTZ == 81.44k
) {
621
10
          unsigned ResultBit = NTZ;
622
10
          unsigned InputBit = BitWidth-NTZ-8;
623
10
624
10
          // Replace this with either a left or right shift to get the byte into
625
10
          // the right place.
626
10
          Instruction *NewVal;
627
10
          if (InputBit > ResultBit)
628
7
            NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
629
7
                    ConstantInt::get(I->getType(), InputBit-ResultBit));
630
10
          else
631
3
            NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
632
3
                    ConstantInt::get(I->getType(), ResultBit-InputBit));
633
10
          NewVal->takeName(I);
634
10
          return InsertNewInstWith(NewVal, *I);
635
10
        }
636
1.43k
637
1.43k
        // TODO: Could compute known zero/one bits based on the input.
638
1.43k
        break;
639
1.43k
      }
640
14
      case Intrinsic::x86_mmx_pmovmskb:
641
14
      case Intrinsic::x86_sse_movmsk_ps:
642
14
      case Intrinsic::x86_sse2_movmsk_pd:
643
14
      case Intrinsic::x86_sse2_pmovmskb_128:
644
14
      case Intrinsic::x86_avx_movmsk_ps_256:
645
14
      case Intrinsic::x86_avx_movmsk_pd_256:
646
14
      case Intrinsic::x86_avx2_pmovmskb: {
647
14
        // MOVMSK copies the vector elements' sign bits to the low bits
648
14
        // and zeros the high bits.
649
14
        unsigned ArgWidth;
650
14
        if (
II->getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb14
) {
651
2
          ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
652
14
        } else {
653
12
          auto Arg = II->getArgOperand(0);
654
12
          auto ArgType = cast<VectorType>(Arg->getType());
655
12
          ArgWidth = ArgType->getNumElements();
656
12
        }
657
14
658
14
        // If we don't need any of low bits then return zero,
659
14
        // we know that DemandedMask is non-zero already.
660
14
        APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
661
14
        if (DemandedElts.isNullValue())
662
6
          return ConstantInt::getNullValue(VTy);
663
8
664
8
        // We know that the upper bits are set to zero.
665
8
        Known.Zero.setBitsFrom(ArgWidth);
666
8
        return nullptr;
667
8
      }
668
1
      case Intrinsic::x86_sse42_crc32_64_64:
669
1
        Known.Zero.setBitsFrom(32);
670
1
        return nullptr;
671
2.65M
      }
672
2.65M
    }
673
2.65M
    computeKnownBits(V, Known, Depth, CxtI);
674
2.65M
    break;
675
44.4M
  }
676
44.4M
677
44.4M
  // If the client is only demanding bits that we know, return the known
678
44.4M
  // constant.
679
44.4M
  
if (44.4M
DemandedMask.isSubsetOf(Known.Zero|Known.One)44.4M
)
680
317
    return Constant::getIntegerValue(VTy, Known.One);
681
44.4M
  return nullptr;
682
44.4M
}
683
684
/// Helper routine of SimplifyDemandedUseBits. It computes Known
685
/// bits. It also tries to handle simplifications that can be done based on
686
/// DemandedMask, but without modifying the Instruction.
687
Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
688
                                                     const APInt &DemandedMask,
689
                                                     KnownBits &Known,
690
                                                     unsigned Depth,
691
12.4M
                                                     Instruction *CxtI) {
692
12.4M
  unsigned BitWidth = DemandedMask.getBitWidth();
693
12.4M
  Type *ITy = I->getType();
694
12.4M
695
12.4M
  KnownBits LHSKnown(BitWidth);
696
12.4M
  KnownBits RHSKnown(BitWidth);
697
12.4M
698
12.4M
  // Despite the fact that we can't simplify this instruction in all User's
699
12.4M
  // context, we can at least compute the known bits, and we can
700
12.4M
  // do simplifications that apply to *just* the one user if we know that
701
12.4M
  // this instruction has a simpler value in that context.
702
12.4M
  switch (I->getOpcode()) {
703
376k
  case Instruction::And: {
704
376k
    // If either the LHS or the RHS are Zero, the result is zero.
705
376k
    computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
706
376k
    computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
707
376k
                     CxtI);
708
376k
709
376k
    // Output known-0 are known to be clear if zero in either the LHS | RHS.
710
376k
    APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
711
376k
    // Output known-1 bits are only known if set in both the LHS & RHS.
712
376k
    APInt IKnownOne = RHSKnown.One & LHSKnown.One;
713
376k
714
376k
    // If the client is only demanding bits that we know, return the known
715
376k
    // constant.
716
376k
    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
717
5
      return Constant::getIntegerValue(ITy, IKnownOne);
718
376k
719
376k
    // If all of the demanded bits are known 1 on one side, return the other.
720
376k
    // These bits cannot contribute to the result of the 'and' in this
721
376k
    // context.
722
376k
    
if (376k
DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)376k
)
723
1.00k
      return I->getOperand(0);
724
375k
    
if (375k
DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)375k
)
725
0
      return I->getOperand(1);
726
375k
727
375k
    Known.Zero = std::move(IKnownZero);
728
375k
    Known.One  = std::move(IKnownOne);
729
375k
    break;
730
375k
  }
731
302k
  case Instruction::Or: {
732
302k
    // We can simplify (X|Y) -> X or Y in the user's context if we know that
733
302k
    // only bits from X or Y are demanded.
734
302k
735
302k
    // If either the LHS or the RHS are One, the result is One.
736
302k
    computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
737
302k
    computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
738
302k
                     CxtI);
739
302k
740
302k
    // Output known-0 bits are only known if clear in both the LHS & RHS.
741
302k
    APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
742
302k
    // Output known-1 are known to be set if set in either the LHS | RHS.
743
302k
    APInt IKnownOne = RHSKnown.One | LHSKnown.One;
744
302k
745
302k
    // If the client is only demanding bits that we know, return the known
746
302k
    // constant.
747
302k
    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
748
5
      return Constant::getIntegerValue(ITy, IKnownOne);
749
302k
750
302k
    // If all of the demanded bits are known zero on one side, return the
751
302k
    // other.  These bits cannot contribute to the result of the 'or' in this
752
302k
    // context.
753
302k
    
if (302k
DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)302k
)
754
1.26k
      return I->getOperand(0);
755
301k
    
if (301k
DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)301k
)
756
677
      return I->getOperand(1);
757
300k
758
300k
    Known.Zero = std::move(IKnownZero);
759
300k
    Known.One  = std::move(IKnownOne);
760
300k
    break;
761
300k
  }
762
132k
  case Instruction::Xor: {
763
132k
    // We can simplify (X^Y) -> X or Y in the user's context if we know that
764
132k
    // only bits from X or Y are demanded.
765
132k
766
132k
    computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
767
132k
    computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
768
132k
                     CxtI);
769
132k
770
132k
    // Output known-0 bits are known if clear or set in both the LHS & RHS.
771
132k
    APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
772
132k
                       (RHSKnown.One & LHSKnown.One);
773
132k
    // Output known-1 are known to be set if set in only one of the LHS, RHS.
774
132k
    APInt IKnownOne =  (RHSKnown.Zero & LHSKnown.One) |
775
132k
                       (RHSKnown.One & LHSKnown.Zero);
776
132k
777
132k
    // If the client is only demanding bits that we know, return the known
778
132k
    // constant.
779
132k
    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
780
1
      return Constant::getIntegerValue(ITy, IKnownOne);
781
132k
782
132k
    // If all of the demanded bits are known zero on one side, return the
783
132k
    // other.
784
132k
    
if (132k
DemandedMask.isSubsetOf(RHSKnown.Zero)132k
)
785
47
      return I->getOperand(0);
786
132k
    
if (132k
DemandedMask.isSubsetOf(LHSKnown.Zero)132k
)
787
97
      return I->getOperand(1);
788
132k
789
132k
    // Output known-0 bits are known if clear or set in both the LHS & RHS.
790
132k
    Known.Zero = std::move(IKnownZero);
791
132k
    // Output known-1 are known to be set if set in only one of the LHS, RHS.
792
132k
    Known.One  = std::move(IKnownOne);
793
132k
    break;
794
132k
  }
795
11.6M
  default:
796
11.6M
    // Compute the Known bits to simplify things downstream.
797
11.6M
    computeKnownBits(I, Known, Depth, CxtI);
798
11.6M
799
11.6M
    // If this user is only demanding bits that we know, return the known
800
11.6M
    // constant.
801
11.6M
    if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
802
126
      return Constant::getIntegerValue(ITy, Known.One);
803
11.6M
804
11.6M
    break;
805
12.4M
  }
806
12.4M
807
12.4M
  return nullptr;
808
12.4M
}
809
810
811
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
812
/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
813
/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
814
/// of "C2-C1".
815
///
816
/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
817
/// ..., bn}, without considering the specific value X is holding.
818
/// This transformation is legal iff one of following conditions is hold:
819
///  1) All the bit in S are 0, in this case E1 == E2.
820
///  2) We don't care those bits in S, per the input DemandedMask.
821
///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
822
///     rest bits.
823
///
824
/// Currently we only test condition 2).
825
///
826
/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
827
/// not successful.
828
Value *
829
InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
830
                                         Instruction *Shl, const APInt &ShlOp1,
831
                                         const APInt &DemandedMask,
832
35.7k
                                         KnownBits &Known) {
833
35.7k
  if (
!ShlOp1 || 35.7k
!ShrOp135.7k
)
834
0
    return nullptr; // No-op.
835
35.7k
836
35.7k
  Value *VarX = Shr->getOperand(0);
837
35.7k
  Type *Ty = VarX->getType();
838
35.7k
  unsigned BitWidth = Ty->getScalarSizeInBits();
839
35.7k
  if (
ShlOp1.uge(BitWidth) || 35.7k
ShrOp1.uge(BitWidth)35.7k
)
840
0
    return nullptr; // Undef.
841
35.7k
842
35.7k
  unsigned ShlAmt = ShlOp1.getZExtValue();
843
35.7k
  unsigned ShrAmt = ShrOp1.getZExtValue();
844
35.7k
845
35.7k
  Known.One.clearAllBits();
846
35.7k
  Known.Zero.setLowBits(ShlAmt - 1);
847
35.7k
  Known.Zero &= DemandedMask;
848
35.7k
849
35.7k
  APInt BitMask1(APInt::getAllOnesValue(BitWidth));
850
35.7k
  APInt BitMask2(APInt::getAllOnesValue(BitWidth));
851
35.7k
852
35.7k
  bool isLshr = (Shr->getOpcode() == Instruction::LShr);
853
32.5k
  BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
854
3.13k
                      (BitMask1.ashr(ShrAmt) << ShlAmt);
855
35.7k
856
35.7k
  if (
ShrAmt <= ShlAmt35.7k
) {
857
24.7k
    BitMask2 <<= (ShlAmt - ShrAmt);
858
35.7k
  } else {
859
8.60k
    BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
860
2.35k
                        BitMask2.ashr(ShrAmt - ShlAmt);
861
10.9k
  }
862
35.7k
863
35.7k
  // Check if condition-2 (see the comment to this function) is satified.
864
35.7k
  if (
(BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)35.7k
) {
865
883
    if (ShrAmt == ShlAmt)
866
114
      return VarX;
867
769
868
769
    
if (769
!Shr->hasOneUse()769
)
869
469
      return nullptr;
870
300
871
300
    BinaryOperator *New;
872
300
    if (
ShrAmt < ShlAmt300
) {
873
232
      Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
874
232
      New = BinaryOperator::CreateShl(VarX, Amt);
875
232
      BinaryOperator *Orig = cast<BinaryOperator>(Shl);
876
232
      New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
877
232
      New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
878
300
    } else {
879
68
      Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
880
66
      New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
881
2
                     BinaryOperator::CreateAShr(VarX, Amt);
882
68
      if (cast<BinaryOperator>(Shr)->isExact())
883
2
        New->setIsExact(true);
884
68
    }
885
883
886
883
    return InsertNewInstWith(New, *Shl);
887
883
  }
888
34.8k
889
34.8k
  return nullptr;
890
34.8k
}
891
892
/// The specified value produces a vector with any number of elements.
893
/// DemandedElts contains the set of elements that are actually used by the
894
/// caller. This method analyzes which elements of the operand are undef and
895
/// returns that information in UndefElts.
896
///
897
/// If the information about demanded elements can be used to simplify the
898
/// operation, the operation is simplified, then the resultant value is
899
/// returned.  This returns null if no change was made.
900
Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
901
                                                APInt &UndefElts,
902
1.41M
                                                unsigned Depth) {
903
1.41M
  unsigned VWidth = V->getType()->getVectorNumElements();
904
1.41M
  APInt EltMask(APInt::getAllOnesValue(VWidth));
905
1.41M
  assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
906
1.41M
907
1.41M
  if (
isa<UndefValue>(V)1.41M
) {
908
405k
    // If the entire vector is undefined, just return this info.
909
405k
    UndefElts = EltMask;
910
405k
    return nullptr;
911
405k
  }
912
1.00M
913
1.00M
  
if (1.00M
DemandedElts.isNullValue()1.00M
) { // If nothing is demanded, provide undef.
914
3.88k
    UndefElts = EltMask;
915
3.88k
    return UndefValue::get(V->getType());
916
3.88k
  }
917
1.00M
918
1.00M
  UndefElts = 0;
919
1.00M
920
1.00M
  // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
921
1.00M
  if (Constant *
C1.00M
= dyn_cast<Constant>(V)) {
922
14.4k
    // Check if this is identity. If so, return 0 since we are not simplifying
923
14.4k
    // anything.
924
14.4k
    if (DemandedElts.isAllOnesValue())
925
8.76k
      return nullptr;
926
5.68k
927
5.68k
    Type *EltTy = cast<VectorType>(V->getType())->getElementType();
928
5.68k
    Constant *Undef = UndefValue::get(EltTy);
929
5.68k
930
5.68k
    SmallVector<Constant*, 16> Elts;
931
29.9k
    for (unsigned i = 0; 
i != VWidth29.9k
;
++i24.2k
) {
932
24.2k
      if (
!DemandedElts[i]24.2k
) { // If not demanded, set to undef.
933
10.0k
        Elts.push_back(Undef);
934
10.0k
        UndefElts.setBit(i);
935
10.0k
        continue;
936
10.0k
      }
937
14.2k
938
14.2k
      Constant *Elt = C->getAggregateElement(i);
939
14.2k
      if (
!Elt14.2k
)
return nullptr0
;
940
14.2k
941
14.2k
      
if (14.2k
isa<UndefValue>(Elt)14.2k
) { // Already undef.
942
814
        Elts.push_back(Undef);
943
814
        UndefElts.setBit(i);
944
14.2k
      } else {                               // Otherwise, defined.
945
13.4k
        Elts.push_back(Elt);
946
13.4k
      }
947
24.2k
    }
948
5.68k
949
5.68k
    // If we changed the constant, return it.
950
5.68k
    Constant *NewCV = ConstantVector::get(Elts);
951
5.68k
    return NewCV != C ? 
NewCV662
:
nullptr5.02k
;
952
991k
  }
953
991k
954
991k
  // Limit search depth.
955
991k
  
if (991k
Depth == 10991k
)
956
332
    return nullptr;
957
991k
958
991k
  // If multiple users are using the root value, proceed with
959
991k
  // simplification conservatively assuming that all elements
960
991k
  // are needed.
961
991k
  
if (991k
!V->hasOneUse()991k
) {
962
272k
    // Quit if we find multiple users of a non-root value though.
963
272k
    // They'll be handled when it's their turn to be visited by
964
272k
    // the main instcombine process.
965
272k
    if (Depth != 0)
966
272k
      // TODO: Just compute the UndefElts information recursively.
967
249k
      return nullptr;
968
22.8k
969
22.8k
    // Conservatively assume that all elements are needed.
970
22.8k
    DemandedElts = EltMask;
971
22.8k
  }
972
991k
973
741k
  Instruction *I = dyn_cast<Instruction>(V);
974
741k
  if (
!I741k
)
return nullptr9.82k
; // Only analyze instructions.
975
732k
976
732k
  bool MadeChange = false;
977
732k
  APInt UndefElts2(VWidth, 0);
978
732k
  APInt UndefElts3(VWidth, 0);
979
732k
  Value *TmpV;
980
732k
  switch (I->getOpcode()) {
981
83.5k
  default: break;
982
732k
983
277k
  case Instruction::InsertElement: {
984
277k
    // If this is a variable index, we don't know which element it overwrites.
985
277k
    // demand exactly the same input as we produce.
986
277k
    ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
987
277k
    if (
!Idx277k
) {
988
444
      // Note that we can't propagate undef elt info, because we don't know
989
444
      // which elt is getting updated.
990
444
      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
991
444
                                        UndefElts2, Depth + 1);
992
444
      if (
TmpV444
)
{ I->setOperand(0, TmpV); MadeChange = true; }0
993
444
      break;
994
444
    }
995
277k
996
277k
    // The element inserted overwrites whatever was there, so the input demanded
997
277k
    // set is simpler than the output set.
998
277k
    unsigned IdxNo = Idx->getZExtValue();
999
277k
    APInt PreInsertDemandedElts = DemandedElts;
1000
277k
    if (IdxNo < VWidth)
1001
277k
      PreInsertDemandedElts.clearBit(IdxNo);
1002
277k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), PreInsertDemandedElts,
1003
277k
                                      UndefElts, Depth + 1);
1004
277k
    if (
TmpV277k
)
{ I->setOperand(0, TmpV); MadeChange = true; }8.85k
1005
277k
1006
277k
    // If this is inserting an element that isn't demanded, remove this
1007
277k
    // insertelement.
1008
277k
    if (
IdxNo >= VWidth || 277k
!DemandedElts[IdxNo]277k
) {
1009
142
      Worklist.Add(I);
1010
142
      return I->getOperand(0);
1011
142
    }
1012
276k
1013
276k
    // The inserted element is defined.
1014
276k
    UndefElts.clearBit(IdxNo);
1015
276k
    break;
1016
276k
  }
1017
328k
  case Instruction::ShuffleVector: {
1018
328k
    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1019
328k
    unsigned LHSVWidth =
1020
328k
      Shuffle->getOperand(0)->getType()->getVectorNumElements();
1021
328k
    APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
1022
2.02M
    for (unsigned i = 0; 
i < VWidth2.02M
;
i++1.69M
) {
1023
1.69M
      if (
DemandedElts[i]1.69M
) {
1024
1.62M
        unsigned MaskVal = Shuffle->getMaskValue(i);
1025
1.62M
        if (
MaskVal != -1u1.62M
) {
1026
1.55M
          assert(MaskVal < LHSVWidth * 2 &&
1027
1.55M
                 "shufflevector mask index out of range!");
1028
1.55M
          if (MaskVal < LHSVWidth)
1029
1.22M
            LeftDemanded.setBit(MaskVal);
1030
1.55M
          else
1031
325k
            RightDemanded.setBit(MaskVal - LHSVWidth);
1032
1.55M
        }
1033
1.62M
      }
1034
1.69M
    }
1035
328k
1036
328k
    APInt LHSUndefElts(LHSVWidth, 0);
1037
328k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
1038
328k
                                      LHSUndefElts, Depth + 1);
1039
328k
    if (
TmpV328k
)
{ I->setOperand(0, TmpV); MadeChange = true; }1.36k
1040
328k
1041
328k
    APInt RHSUndefElts(LHSVWidth, 0);
1042
328k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
1043
328k
                                      RHSUndefElts, Depth + 1);
1044
328k
    if (
TmpV328k
)
{ I->setOperand(1, TmpV); MadeChange = true; }1.32k
1045
328k
1046
328k
    bool NewUndefElts = false;
1047
328k
    unsigned LHSIdx = -1u, LHSValIdx = -1u;
1048
328k
    unsigned RHSIdx = -1u, RHSValIdx = -1u;
1049
328k
    bool LHSUniform = true;
1050
328k
    bool RHSUniform = true;
1051
2.02M
    for (unsigned i = 0; 
i < VWidth2.02M
;
i++1.69M
) {
1052
1.69M
      unsigned MaskVal = Shuffle->getMaskValue(i);
1053
1.69M
      if (
MaskVal == -1u1.69M
) {
1054
133k
        UndefElts.setBit(i);
1055
1.69M
      } else 
if (1.56M
!DemandedElts[i]1.56M
) {
1056
11.2k
        NewUndefElts = true;
1057
11.2k
        UndefElts.setBit(i);
1058
1.56M
      } else 
if (1.55M
MaskVal < LHSVWidth1.55M
) {
1059
1.22M
        if (
LHSUndefElts[MaskVal]1.22M
) {
1060
253
          NewUndefElts = true;
1061
253
          UndefElts.setBit(i);
1062
1.22M
        } else {
1063
1.22M
          LHSIdx = LHSIdx == -1u ? 
i326k
:
LHSVWidth899k
;
1064
1.22M
          LHSValIdx = LHSValIdx == -1u ? 
MaskVal326k
:
LHSVWidth899k
;
1065
813k
          LHSUniform = LHSUniform && (MaskVal == i);
1066
1.22M
        }
1067
1.55M
      } else {
1068
325k
        if (
RHSUndefElts[MaskVal - LHSVWidth]325k
) {
1069
7
          NewUndefElts = true;
1070
7
          UndefElts.setBit(i);
1071
325k
        } else {
1072
325k
          RHSIdx = RHSIdx == -1u ? 
i55.0k
:
LHSVWidth270k
;
1073
325k
          RHSValIdx = RHSValIdx == -1u ? 
MaskVal - LHSVWidth55.0k
:
LHSVWidth270k
;
1074
58.7k
          RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i);
1075
325k
        }
1076
1.56M
      }
1077
1.69M
    }
1078
328k
1079
328k
    // Try to transform shuffle with constant vector and single element from
1080
328k
    // this constant vector to single insertelement instruction.
1081
328k
    // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1082
328k
    // insertelement V, C[ci], ci-n
1083
328k
    if (
LHSVWidth == Shuffle->getType()->getNumElements()328k
) {
1084
47.5k
      Value *Op = nullptr;
1085
47.5k
      Constant *Value = nullptr;
1086
47.5k
      unsigned Idx = -1u;
1087
47.5k
1088
47.5k
      // Find constant vector with the single element in shuffle (LHS or RHS).
1089
47.5k
      if (
LHSIdx < LHSVWidth && 47.5k
RHSUniform4.75k
) {
1090
3.94k
        if (auto *
CV3.94k
= dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
1091
1
          Op = Shuffle->getOperand(1);
1092
1
          Value = CV->getOperand(LHSValIdx);
1093
1
          Idx = LHSIdx;
1094
1
        }
1095
3.94k
      }
1096
47.5k
      if (
RHSIdx < LHSVWidth && 47.5k
LHSUniform1.56k
) {
1097
961
        if (auto *
CV961
= dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
1098
14
          Op = Shuffle->getOperand(0);
1099
14
          Value = CV->getOperand(RHSValIdx);
1100
14
          Idx = RHSIdx;
1101
14
        }
1102
961
      }
1103
47.5k
      // Found constant vector with single element - convert to insertelement.
1104
47.5k
      if (
Op && 47.5k
Value15
) {
1105
15
        Instruction *New = InsertElementInst::Create(
1106
15
            Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
1107
15
            Shuffle->getName());
1108
15
        InsertNewInstWith(New, *Shuffle);
1109
15
        return New;
1110
15
      }
1111
328k
    }
1112
328k
    
if (328k
NewUndefElts328k
) {
1113
1.14k
      // Add additional discovered undefs.
1114
1.14k
      SmallVector<Constant*, 16> Elts;
1115
18.1k
      for (unsigned i = 0; 
i < VWidth18.1k
;
++i17.0k
) {
1116
17.0k
        if (UndefElts[i])
1117
11.6k
          Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
1118
17.0k
        else
1119
5.41k
          Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
1120
5.41k
                                          Shuffle->getMaskValue(i)));
1121
17.0k
      }
1122
1.14k
      I->setOperand(2, ConstantVector::get(Elts));
1123
1.14k
      MadeChange = true;
1124
1.14k
    }
1125
328k
    break;
1126
328k
  }
1127
7.37k
  case Instruction::Select: {
1128
7.37k
    APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
1129
7.37k
    if (ConstantVector* 
CV7.37k
= dyn_cast<ConstantVector>(I->getOperand(0))) {
1130
10
      for (unsigned i = 0; 
i < VWidth10
;
i++8
) {
1131
8
        Constant *CElt = CV->getAggregateElement(i);
1132
8
        // Method isNullValue always returns false when called on a
1133
8
        // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
1134
8
        // to avoid propagating incorrect information.
1135
8
        if (isa<ConstantExpr>(CElt))
1136
1
          continue;
1137
7
        
if (7
CElt->isNullValue()7
)
1138
2
          LeftDemanded.clearBit(i);
1139
7
        else
1140
5
          RightDemanded.clearBit(i);
1141
8
      }
1142
2
    }
1143
7.37k
1144
7.37k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
1145
7.37k
                                      Depth + 1);
1146
7.37k
    if (
TmpV7.37k
)
{ I->setOperand(1, TmpV); MadeChange = true; }1
1147
7.37k
1148
7.37k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
1149
7.37k
                                      UndefElts2, Depth + 1);
1150
7.37k
    if (
TmpV7.37k
)
{ I->setOperand(2, TmpV); MadeChange = true; }0
1151
7.37k
1152
7.37k
    // Output elements are undefined if both are undefined.
1153
7.37k
    UndefElts &= UndefElts2;
1154
7.37k
    break;
1155
328k
  }
1156
1.90k
  case Instruction::BitCast: {
1157
1.90k
    // Vector->vector casts only.
1158
1.90k
    VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
1159
1.90k
    if (
!VTy1.90k
)
break745
;
1160
1.15k
    unsigned InVWidth = VTy->getNumElements();
1161
1.15k
    APInt InputDemandedElts(InVWidth, 0);
1162
1.15k
    UndefElts2 = APInt(InVWidth, 0);
1163
1.15k
    unsigned Ratio;
1164
1.15k
1165
1.15k
    if (
VWidth == InVWidth1.15k
) {
1166
70
      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1167
70
      // elements as are demanded of us.
1168
70
      Ratio = 1;
1169
70
      InputDemandedElts = DemandedElts;
1170
1.15k
    } else 
if (1.08k
(VWidth % InVWidth) == 01.08k
) {
1171
610
      // If the number of elements in the output is a multiple of the number of
1172
610
      // elements in the input then an input element is live if any of the
1173
610
      // corresponding output elements are live.
1174
610
      Ratio = VWidth / InVWidth;
1175
6.38k
      for (unsigned OutIdx = 0; 
OutIdx != VWidth6.38k
;
++OutIdx5.77k
)
1176
5.77k
        
if (5.77k
DemandedElts[OutIdx]5.77k
)
1177
3.62k
          InputDemandedElts.setBit(OutIdx / Ratio);
1178
1.08k
    } else 
if (478
(InVWidth % VWidth) == 0478
) {
1179
478
      // If the number of elements in the input is a multiple of the number of
1180
478
      // elements in the output then an input element is live if the
1181
478
      // corresponding output element is live.
1182
478
      Ratio = InVWidth / VWidth;
1183
6.65k
      for (unsigned InIdx = 0; 
InIdx != InVWidth6.65k
;
++InIdx6.17k
)
1184
6.17k
        
if (6.17k
DemandedElts[InIdx / Ratio]6.17k
)
1185
3.04k
          InputDemandedElts.setBit(InIdx);
1186
0
    } else {
1187
0
      // Unsupported so far.
1188
0
      break;
1189
0
    }
1190
1.15k
1191
1.15k
    // div/rem demand all inputs, because they don't want divide by zero.
1192
1.15k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
1193
1.15k
                                      UndefElts2, Depth + 1);
1194
1.15k
    if (
TmpV1.15k
) {
1195
11
      I->setOperand(0, TmpV);
1196
11
      MadeChange = true;
1197
11
    }
1198
1.15k
1199
1.15k
    if (
VWidth == InVWidth1.15k
) {
1200
70
      UndefElts = UndefElts2;
1201
1.15k
    } else 
if (1.08k
(VWidth % InVWidth) == 01.08k
) {
1202
610
      // If the number of elements in the output is a multiple of the number of
1203
610
      // elements in the input then an output element is undef if the
1204
610
      // corresponding input element is undef.
1205
6.38k
      for (unsigned OutIdx = 0; 
OutIdx != VWidth6.38k
;
++OutIdx5.77k
)
1206
5.77k
        
if (5.77k
UndefElts2[OutIdx / Ratio]5.77k
)
1207
28
          UndefElts.setBit(OutIdx);
1208
1.08k
    } else 
if (478
(InVWidth % VWidth) == 0478
) {
1209
478
      // If the number of elements in the input is a multiple of the number of
1210
478
      // elements in the output then an output element is undef if all of the
1211
478
      // corresponding input elements are undef.
1212
1.54k
      for (unsigned OutIdx = 0; 
OutIdx != VWidth1.54k
;
++OutIdx1.06k
) {
1213
1.06k
        APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
1214
1.06k
        if (SubUndef.countPopulation() == Ratio)
1215
29
          UndefElts.setBit(OutIdx);
1216
1.06k
      }
1217
0
    } else {
1218
0
      llvm_unreachable("Unimp");
1219
1.08k
    }
1220
1.15k
    break;
1221
1.15k
  }
1222
3.43k
  case Instruction::And:
1223
3.43k
  case Instruction::Or:
1224
3.43k
  case Instruction::Xor:
1225
3.43k
  case Instruction::Add:
1226
3.43k
  case Instruction::Sub:
1227
3.43k
  case Instruction::Mul:
1228
3.43k
    // div/rem demand all inputs, because they don't want divide by zero.
1229
3.43k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
1230
3.43k
                                      Depth + 1);
1231
3.43k
    if (
TmpV3.43k
)
{ I->setOperand(0, TmpV); MadeChange = true; }5
1232
3.43k
    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
1233
3.43k
                                      UndefElts2, Depth + 1);
1234
3.43k
    if (
TmpV3.43k
)
{ I->setOperand(1, TmpV); MadeChange = true; }8
1235
3.43k
1236
3.43k
    // Output elements are undefined if both are undefined.  Consider things
1237
3.43k
    // like undef&0.  The result is known zero, not undef.
1238
3.43k
    UndefElts &= UndefElts2;
1239
3.43k
    break;
1240
130
  case Instruction::FPTrunc:
1241
130
  case Instruction::FPExt:
1242
130
    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
1243
130
                                      Depth + 1);
1244
130
    if (
TmpV130
)
{ I->setOperand(0, TmpV); MadeChange = true; }2
1245
130
    break;
1246
130
1247
29.9k
  case Instruction::Call: {
1248
29.9k
    IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1249
29.9k
    if (
!II29.9k
)
break12.9k
;
1250
16.9k
    switch (II->getIntrinsicID()) {
1251
16.0k
    default: break;
1252
16.9k
1253
23
    case Intrinsic::x86_xop_vfrcz_ss:
1254
23
    case Intrinsic::x86_xop_vfrcz_sd:
1255
23
      // The instructions for these intrinsics are speced to zero upper bits not
1256
23
      // pass them through like other scalar intrinsics. So we shouldn't just
1257
23
      // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
1258
23
      // Instead we should return a zero vector.
1259
23
      if (
!DemandedElts[0]23
) {
1260
2
        Worklist.Add(II);
1261
2
        return ConstantAggregateZero::get(II->getType());
1262
2
      }
1263
21
1264
21
      // Only the lower element is used.
1265
21
      DemandedElts = 1;
1266
21
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1267
21
                                        UndefElts, Depth + 1);
1268
21
      if (
TmpV21
)
{ II->setArgOperand(0, TmpV); MadeChange = true; }6
1269
21
1270
21
      // Only the lower element is undefined. The high elements are zero.
1271
21
      UndefElts = UndefElts[0];
1272
21
      break;
1273
21
1274
21
    // Unary scalar-as-vector operations that work column-wise.
1275
16
    case Intrinsic::x86_sse_rcp_ss:
1276
16
    case Intrinsic::x86_sse_rsqrt_ss:
1277
16
    case Intrinsic::x86_sse_sqrt_ss:
1278
16
    case Intrinsic::x86_sse2_sqrt_sd:
1279
16
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1280
16
                                        UndefElts, Depth + 1);
1281
16
      if (
TmpV16
)
{ II->setArgOperand(0, TmpV); MadeChange = true; }8
1282
16
1283
16
      // If lowest element of a scalar op isn't used then use Arg0.
1284
16
      if (
!DemandedElts[0]16
) {
1285
4
        Worklist.Add(II);
1286
4
        return II->getArgOperand(0);
1287
4
      }
1288
12
      // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
1289
12
      // checks).
1290
12
      break;
1291
12
1292
12
    // Binary scalar-as-vector operations that work column-wise. The high
1293
12
    // elements come from operand 0. The low element is a function of both
1294
12
    // operands.
1295
311
    case Intrinsic::x86_sse_min_ss:
1296
311
    case Intrinsic::x86_sse_max_ss:
1297
311
    case Intrinsic::x86_sse_cmp_ss:
1298
311
    case Intrinsic::x86_sse2_min_sd:
1299
311
    case Intrinsic::x86_sse2_max_sd:
1300
311
    case Intrinsic::x86_sse2_cmp_sd: {
1301
311
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1302
311
                                        UndefElts, Depth + 1);
1303
311
      if (
TmpV311
)
{ II->setArgOperand(0, TmpV); MadeChange = true; }14
1304
311
1305
311
      // If lowest element of a scalar op isn't used then use Arg0.
1306
311
      if (
!DemandedElts[0]311
) {
1307
6
        Worklist.Add(II);
1308
6
        return II->getArgOperand(0);
1309
6
      }
1310
305
1311
305
      // Only lower element is used for operand 1.
1312
305
      DemandedElts = 1;
1313
305
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1314
305
                                        UndefElts2, Depth + 1);
1315
305
      if (
TmpV305
)
{ II->setArgOperand(1, TmpV); MadeChange = true; }17
1316
305
1317
305
      // Lower element is undefined if both lower elements are undefined.
1318
305
      // Consider things like undef&0.  The result is known zero, not undef.
1319
305
      if (!UndefElts2[0])
1320
305
        UndefElts.clearBit(0);
1321
305
1322
305
      break;
1323
305
    }
1324
305
1325
305
    // Binary scalar-as-vector operations that work column-wise. The high
1326
305
    // elements come from operand 0 and the low element comes from operand 1.
1327
25
    case Intrinsic::x86_sse41_round_ss:
1328
25
    case Intrinsic::x86_sse41_round_sd: {
1329
25
      // Don't use the low element of operand 0.
1330
25
      APInt DemandedElts2 = DemandedElts;
1331
25
      DemandedElts2.clearBit(0);
1332
25
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
1333
25
                                        UndefElts, Depth + 1);
1334
25
      if (
TmpV25
)
{ II->setArgOperand(0, TmpV); MadeChange = true; }9
1335
25
1336
25
      // If lowest element of a scalar op isn't used then use Arg0.
1337
25
      if (
!DemandedElts[0]25
) {
1338
2
        Worklist.Add(II);
1339
2
        return II->getArgOperand(0);
1340
2
      }
1341
23
1342
23
      // Only lower element is used for operand 1.
1343
23
      DemandedElts = 1;
1344
23
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1345
23
                                        UndefElts2, Depth + 1);
1346
23
      if (
TmpV23
)
{ II->setArgOperand(1, TmpV); MadeChange = true; }6
1347
23
1348
23
      // Take the high undef elements from operand 0 and take the lower element
1349
23
      // from operand 1.
1350
23
      UndefElts.clearBit(0);
1351
23
      UndefElts |= UndefElts2[0];
1352
23
      break;
1353
23
    }
1354
23
1355
23
    // Three input scalar-as-vector operations that work column-wise. The high
1356
23
    // elements come from operand 0 and the low element is a function of all
1357
23
    // three inputs.
1358
258
    case Intrinsic::x86_avx512_mask_add_ss_round:
1359
258
    case Intrinsic::x86_avx512_mask_div_ss_round:
1360
258
    case Intrinsic::x86_avx512_mask_mul_ss_round:
1361
258
    case Intrinsic::x86_avx512_mask_sub_ss_round:
1362
258
    case Intrinsic::x86_avx512_mask_max_ss_round:
1363
258
    case Intrinsic::x86_avx512_mask_min_ss_round:
1364
258
    case Intrinsic::x86_avx512_mask_add_sd_round:
1365
258
    case Intrinsic::x86_avx512_mask_div_sd_round:
1366
258
    case Intrinsic::x86_avx512_mask_mul_sd_round:
1367
258
    case Intrinsic::x86_avx512_mask_sub_sd_round:
1368
258
    case Intrinsic::x86_avx512_mask_max_sd_round:
1369
258
    case Intrinsic::x86_avx512_mask_min_sd_round:
1370
258
    case Intrinsic::x86_fma_vfmadd_ss:
1371
258
    case Intrinsic::x86_fma_vfmsub_ss:
1372
258
    case Intrinsic::x86_fma_vfnmadd_ss:
1373
258
    case Intrinsic::x86_fma_vfnmsub_ss:
1374
258
    case Intrinsic::x86_fma_vfmadd_sd:
1375
258
    case Intrinsic::x86_fma_vfmsub_sd:
1376
258
    case Intrinsic::x86_fma_vfnmadd_sd:
1377
258
    case Intrinsic::x86_fma_vfnmsub_sd:
1378
258
    case Intrinsic::x86_avx512_mask_vfmadd_ss:
1379
258
    case Intrinsic::x86_avx512_mask_vfmadd_sd:
1380
258
    case Intrinsic::x86_avx512_maskz_vfmadd_ss:
1381
258
    case Intrinsic::x86_avx512_maskz_vfmadd_sd:
1382
258
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1383
258
                                        UndefElts, Depth + 1);
1384
258
      if (
TmpV258
)
{ II->setArgOperand(0, TmpV); MadeChange = true; }36
1385
258
1386
258
      // If lowest element of a scalar op isn't used then use Arg0.
1387
258
      if (
!DemandedElts[0]258
) {
1388
24
        Worklist.Add(II);
1389
24
        return II->getArgOperand(0);
1390
24
      }
1391
234
1392
234
      // Only lower element is used for operand 1 and 2.
1393
234
      DemandedElts = 1;
1394
234
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1395
234
                                        UndefElts2, Depth + 1);
1396
234
      if (
TmpV234
)
{ II->setArgOperand(1, TmpV); MadeChange = true; }36
1397
234
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
1398
234
                                        UndefElts3, Depth + 1);
1399
234
      if (
TmpV234
)
{ II->setArgOperand(2, TmpV); MadeChange = true; }24
1400
234
1401
234
      // Lower element is undefined if all three lower elements are undefined.
1402
234
      // Consider things like undef&0.  The result is known zero, not undef.
1403
234
      if (
!UndefElts2[0] || 234
!UndefElts3[0]0
)
1404
234
        UndefElts.clearBit(0);
1405
234
1406
234
      break;
1407
234
1408
69
    case Intrinsic::x86_avx512_mask3_vfmadd_ss:
1409
69
    case Intrinsic::x86_avx512_mask3_vfmadd_sd:
1410
69
    case Intrinsic::x86_avx512_mask3_vfmsub_ss:
1411
69
    case Intrinsic::x86_avx512_mask3_vfmsub_sd:
1412
69
    case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
1413
69
    case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
1414
69
      // These intrinsics get the passthru bits from operand 2.
1415
69
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
1416
69
                                        UndefElts, Depth + 1);
1417
69
      if (
TmpV69
)
{ II->setArgOperand(2, TmpV); MadeChange = true; }12
1418
69
1419
69
      // If lowest element of a scalar op isn't used then use Arg2.
1420
69
      if (
!DemandedElts[0]69
) {
1421
6
        Worklist.Add(II);
1422
6
        return II->getArgOperand(2);
1423
6
      }
1424
63
1425
63
      // Only lower element is used for operand 0 and 1.
1426
63
      DemandedElts = 1;
1427
63
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
1428
63
                                        UndefElts2, Depth + 1);
1429
63
      if (
TmpV63
)
{ II->setArgOperand(0, TmpV); MadeChange = true; }6
1430
63
      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
1431
63
                                        UndefElts3, Depth + 1);
1432
63
      if (
TmpV63
)
{ II->setArgOperand(1, TmpV); MadeChange = true; }6
1433
63
1434
63
      // Lower element is undefined if all three lower elements are undefined.
1435
63
      // Consider things like undef&0.  The result is known zero, not undef.
1436
63
      if (
!UndefElts2[0] || 63
!UndefElts3[0]0
)
1437
63
        UndefElts.clearBit(0);
1438
63
1439
63
      break;
1440
63
1441
36
    case Intrinsic::x86_sse2_pmulu_dq:
1442
36
    case Intrinsic::x86_sse41_pmuldq:
1443
36
    case Intrinsic::x86_avx2_pmul_dq:
1444
36
    case Intrinsic::x86_avx2_pmulu_dq:
1445
36
    case Intrinsic::x86_avx512_pmul_dq_512:
1446
36
    case Intrinsic::x86_avx512_pmulu_dq_512: {
1447
36
      Value *Op0 = II->getArgOperand(0);
1448
36
      Value *Op1 = II->getArgOperand(1);
1449
36
      unsigned InnerVWidth = Op0->getType()->getVectorNumElements();
1450
36
      assert((VWidth * 2) == InnerVWidth && "Unexpected input size");
1451
36
1452
36
      APInt InnerDemandedElts(InnerVWidth, 0);
1453
204
      for (unsigned i = 0; 
i != VWidth204
;
++i168
)
1454
168
        
if (168
DemandedElts[i]168
)
1455
140
          InnerDemandedElts.setBit(i * 2);
1456
36
1457
36
      UndefElts2 = APInt(InnerVWidth, 0);
1458
36
      TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2,
1459
36
                                        Depth + 1);
1460
36
      if (
TmpV36
)
{ II->setArgOperand(0, TmpV); MadeChange = true; }9
1461
36
1462
36
      UndefElts3 = APInt(InnerVWidth, 0);
1463
36
      TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3,
1464
36
                                        Depth + 1);
1465
36
      if (
TmpV36
)
{ II->setArgOperand(1, TmpV); MadeChange = true; }9
1466
36
1467
36
      break;
1468
36
    }
1469
36
1470
25
    case Intrinsic::x86_sse2_packssdw_128:
1471
25
    case Intrinsic::x86_sse2_packsswb_128:
1472
25
    case Intrinsic::x86_sse2_packuswb_128:
1473
25
    case Intrinsic::x86_sse41_packusdw:
1474
25
    case Intrinsic::x86_avx2_packssdw:
1475
25
    case Intrinsic::x86_avx2_packsswb:
1476
25
    case Intrinsic::x86_avx2_packusdw:
1477
25
    case Intrinsic::x86_avx2_packuswb:
1478
25
    case Intrinsic::x86_avx512_packssdw_512:
1479
25
    case Intrinsic::x86_avx512_packsswb_512:
1480
25
    case Intrinsic::x86_avx512_packusdw_512:
1481
25
    case Intrinsic::x86_avx512_packuswb_512: {
1482
25
      auto *Ty0 = II->getArgOperand(0)->getType();
1483
25
      unsigned InnerVWidth = Ty0->getVectorNumElements();
1484
25
      assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
1485
25
1486
25
      unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
1487
25
      unsigned VWidthPerLane = VWidth / NumLanes;
1488
25
      unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
1489
25
1490
25
      // Per lane, pack the elements of the first input and then the second.
1491
25
      // e.g.
1492
25
      // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
1493
25
      // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
1494
75
      for (int OpNum = 0; 
OpNum != 275
;
++OpNum50
) {
1495
50
        APInt OpDemandedElts(InnerVWidth, 0);
1496
164
        for (unsigned Lane = 0; 
Lane != NumLanes164
;
++Lane114
) {
1497
114
          unsigned LaneIdx = Lane * VWidthPerLane;
1498
682
          for (unsigned Elt = 0; 
Elt != InnerVWidthPerLane682
;
++Elt568
) {
1499
568
            unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
1500
568
            if (DemandedElts[Idx])
1501
154
              OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
1502
568
          }
1503
114
        }
1504
50
1505
50
        // Demand elements from the operand.
1506
50
        auto *Op = II->getArgOperand(OpNum);
1507
50
        APInt OpUndefElts(InnerVWidth, 0);
1508
50
        TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts,
1509
50
                                          Depth + 1);
1510
50
        if (
TmpV50
) {
1511
19
          II->setArgOperand(OpNum, TmpV);
1512
19
          MadeChange = true;
1513
19
        }
1514
50
1515
50
        // Pack the operand's UNDEF elements, one lane at a time.
1516
50
        OpUndefElts = OpUndefElts.zext(VWidth);
1517
164
        for (unsigned Lane = 0; 
Lane != NumLanes164
;
++Lane114
) {
1518
114
          APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
1519
114
          LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
1520
114
          LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
1521
114
          UndefElts |= LaneElts;
1522
114
        }
1523
50
      }
1524
25
      break;
1525
25
    }
1526
25
1527
25
    // PSHUFB
1528
26
    case Intrinsic::x86_ssse3_pshuf_b_128:
1529
26
    case Intrinsic::x86_avx2_pshuf_b:
1530
26
    case Intrinsic::x86_avx512_pshuf_b_512:
1531
26
    // PERMILVAR
1532
26
    case Intrinsic::x86_avx_vpermilvar_ps:
1533
26
    case Intrinsic::x86_avx_vpermilvar_ps_256:
1534
26
    case Intrinsic::x86_avx512_vpermilvar_ps_512:
1535
26
    case Intrinsic::x86_avx_vpermilvar_pd:
1536
26
    case Intrinsic::x86_avx_vpermilvar_pd_256:
1537
26
    case Intrinsic::x86_avx512_vpermilvar_pd_512:
1538
26
    // PERMV
1539
26
    case Intrinsic::x86_avx2_permd:
1540
26
    case Intrinsic::x86_avx2_permps: {
1541
26
      Value *Op1 = II->getArgOperand(1);
1542
26
      TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
1543
26
                                        Depth + 1);
1544
26
      if (
TmpV26
)
{ II->setArgOperand(1, TmpV); MadeChange = true; }11
1545
26
      break;
1546
26
    }
1547
26
1548
26
    // SSE4A instructions leave the upper 64-bits of the 128-bit result
1549
26
    // in an undefined state.
1550
4
    case Intrinsic::x86_sse4a_extrq:
1551
4
    case Intrinsic::x86_sse4a_extrqi:
1552
4
    case Intrinsic::x86_sse4a_insertq:
1553
4
    case Intrinsic::x86_sse4a_insertqi:
1554
4
      UndefElts.setHighBits(VWidth / 2);
1555
4
      break;
1556
97
    case Intrinsic::amdgcn_buffer_load:
1557
97
    case Intrinsic::amdgcn_buffer_load_format:
1558
97
    case Intrinsic::amdgcn_image_sample:
1559
97
    case Intrinsic::amdgcn_image_sample_cl:
1560
97
    case Intrinsic::amdgcn_image_sample_d:
1561
97
    case Intrinsic::amdgcn_image_sample_d_cl:
1562
97
    case Intrinsic::amdgcn_image_sample_l:
1563
97
    case Intrinsic::amdgcn_image_sample_b:
1564
97
    case Intrinsic::amdgcn_image_sample_b_cl:
1565
97
    case Intrinsic::amdgcn_image_sample_lz:
1566
97
    case Intrinsic::amdgcn_image_sample_cd:
1567
97
    case Intrinsic::amdgcn_image_sample_cd_cl:
1568
97
1569
97
    case Intrinsic::amdgcn_image_sample_c:
1570
97
    case Intrinsic::amdgcn_image_sample_c_cl:
1571
97
    case Intrinsic::amdgcn_image_sample_c_d:
1572
97
    case Intrinsic::amdgcn_image_sample_c_d_cl:
1573
97
    case Intrinsic::amdgcn_image_sample_c_l:
1574
97
    case Intrinsic::amdgcn_image_sample_c_b:
1575
97
    case Intrinsic::amdgcn_image_sample_c_b_cl:
1576
97
    case Intrinsic::amdgcn_image_sample_c_lz:
1577
97
    case Intrinsic::amdgcn_image_sample_c_cd:
1578
97
    case Intrinsic::amdgcn_image_sample_c_cd_cl:
1579
97
1580
97
    case Intrinsic::amdgcn_image_sample_o:
1581
97
    case Intrinsic::amdgcn_image_sample_cl_o:
1582
97
    case Intrinsic::amdgcn_image_sample_d_o:
1583
97
    case Intrinsic::amdgcn_image_sample_d_cl_o:
1584
97
    case Intrinsic::amdgcn_image_sample_l_o:
1585
97
    case Intrinsic::amdgcn_image_sample_b_o:
1586
97
    case Intrinsic::amdgcn_image_sample_b_cl_o:
1587
97
    case Intrinsic::amdgcn_image_sample_lz_o:
1588
97
    case Intrinsic::amdgcn_image_sample_cd_o:
1589
97
    case Intrinsic::amdgcn_image_sample_cd_cl_o:
1590
97
1591
97
    case Intrinsic::amdgcn_image_sample_c_o:
1592
97
    case Intrinsic::amdgcn_image_sample_c_cl_o:
1593
97
    case Intrinsic::amdgcn_image_sample_c_d_o:
1594
97
    case Intrinsic::amdgcn_image_sample_c_d_cl_o:
1595
97
    case Intrinsic::amdgcn_image_sample_c_l_o:
1596
97
    case Intrinsic::amdgcn_image_sample_c_b_o:
1597
97
    case Intrinsic::amdgcn_image_sample_c_b_cl_o:
1598
97
    case Intrinsic::amdgcn_image_sample_c_lz_o:
1599
97
    case Intrinsic::amdgcn_image_sample_c_cd_o:
1600
97
    case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
1601
97
1602
97
    case Intrinsic::amdgcn_image_getlod: {
1603
97
      if (
VWidth == 1 || 97
!DemandedElts.isMask()97
)
1604
11
        return nullptr;
1605
86
1606
86
      // TODO: Handle 3 vectors when supported in code gen.
1607
86
      unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countTrailingOnes());
1608
86
      if (NewNumElts == VWidth)
1609
10
        return nullptr;
1610
76
1611
76
      Module *M = II->getParent()->getParent()->getParent();
1612
76
      Type *EltTy = V->getType()->getVectorElementType();
1613
76
1614
67
      Type *NewTy = (NewNumElts == 1) ? EltTy :
1615
9
        VectorType::get(EltTy, NewNumElts);
1616
76
1617
76
      auto IID = II->getIntrinsicID();
1618
76
1619
76
      bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load ||
1620
70
                      IID == Intrinsic::amdgcn_buffer_load_format;
1621
76
1622
76
      Function *NewIntrin = IsBuffer ?
1623
12
        Intrinsic::getDeclaration(M, IID, NewTy) :
1624
76
        // Samplers have 3 mangled types.
1625
64
        Intrinsic::getDeclaration(M, IID,
1626
64
                                  { NewTy, II->getArgOperand(0)->getType(),
1627
64
                                      II->getArgOperand(1)->getType()});
1628
76
1629
76
      SmallVector<Value *, 5> Args;
1630
712
      for (unsigned I = 0, E = II->getNumArgOperands(); 
I != E712
;
++I636
)
1631
636
        Args.push_back(II->getArgOperand(I));
1632
76
1633
76
      IRBuilderBase::InsertPointGuard Guard(Builder);
1634
76
      Builder.SetInsertPoint(II);
1635
76
1636
76
      CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
1637
76
      NewCall->takeName(II);
1638
76
      NewCall->copyMetadata(*II);
1639
76
1640
76
      if (
!IsBuffer76
) {
1641
64
        ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
1642
64
        if (
DMask64
) {
1643
63
          unsigned DMaskVal = DMask->getZExtValue() & 0xf;
1644
63
1645
63
          unsigned PopCnt = 0;
1646
63
          unsigned NewDMask = 0;
1647
153
          for (unsigned I = 0; 
I < 4153
;
++I90
) {
1648
145
            const unsigned Bit = 1 << I;
1649
145
            if (
!!(DMaskVal & Bit)145
) {
1650
120
              if (++PopCnt > NewNumElts)
1651
55
                break;
1652
65
1653
65
              NewDMask |= Bit;
1654
65
            }
1655
145
          }
1656
63
1657
63
          NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
1658
63
        }
1659
64
      }
1660
76
1661
76
1662
76
      if (
NewNumElts == 176
) {
1663
67
        return Builder.CreateInsertElement(UndefValue::get(V->getType()),
1664
67
                                           NewCall, static_cast<uint64_t>(0));
1665
67
      }
1666
9
1667
9
      SmallVector<uint32_t, 8> EltMask;
1668
43
      for (unsigned I = 0; 
I < VWidth43
;
++I34
)
1669
34
        EltMask.push_back(I);
1670
23
1671
23
      Value *Shuffle = Builder.CreateShuffleVector(
1672
23
        NewCall, UndefValue::get(NewTy), EltMask);
1673
23
1674
23
      MadeChange = true;
1675
23
      return Shuffle;
1676
23
    }
1677
16.8k
    }
1678
16.8k
    break;
1679
16.8k
  }
1680
731k
  }
1681
731k
  
return MadeChange ? 731k
I12.2k
:
nullptr719k
;
1682
1.41M
}