Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- InstCombineVectorOps.cpp -------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements instcombine for ExtractElement, InsertElement and
10
// ShuffleVector.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "InstCombineInternal.h"
15
#include "llvm/ADT/APInt.h"
16
#include "llvm/ADT/ArrayRef.h"
17
#include "llvm/ADT/DenseMap.h"
18
#include "llvm/ADT/STLExtras.h"
19
#include "llvm/ADT/SmallVector.h"
20
#include "llvm/Analysis/InstructionSimplify.h"
21
#include "llvm/Analysis/VectorUtils.h"
22
#include "llvm/IR/BasicBlock.h"
23
#include "llvm/IR/Constant.h"
24
#include "llvm/IR/Constants.h"
25
#include "llvm/IR/DerivedTypes.h"
26
#include "llvm/IR/InstrTypes.h"
27
#include "llvm/IR/Instruction.h"
28
#include "llvm/IR/Instructions.h"
29
#include "llvm/IR/Operator.h"
30
#include "llvm/IR/PatternMatch.h"
31
#include "llvm/IR/Type.h"
32
#include "llvm/IR/User.h"
33
#include "llvm/IR/Value.h"
34
#include "llvm/Support/Casting.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
37
#include <cassert>
38
#include <cstdint>
39
#include <iterator>
40
#include <utility>
41
42
using namespace llvm;
43
using namespace PatternMatch;
44
45
#define DEBUG_TYPE "instcombine"
46
47
/// Return true if the value is cheaper to scalarize than it is to leave as a
48
/// vector operation. IsConstantExtractIndex indicates whether we are extracting
49
/// one known element from a vector constant.
50
///
51
/// FIXME: It's possible to create more instructions than previously existed.
52
45.8k
static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53
45.8k
  // If we can pick a scalar constant value out of a vector, that is free.
54
45.8k
  if (auto *C = dyn_cast<Constant>(V))
55
61
    return IsConstantExtractIndex || 
C->getSplatValue()6
;
56
45.7k
57
45.7k
  // An insertelement to the same constant index as our extract will simplify
58
45.7k
  // to the scalar inserted element. An insertelement to a different constant
59
45.7k
  // index is irrelevant to our extract.
60
45.7k
  if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt())))
61
14
    return IsConstantExtractIndex;
62
45.7k
63
45.7k
  if (match(V, m_OneUse(m_Load(m_Value()))))
64
1
    return true;
65
45.7k
66
45.7k
  Value *V0, *V1;
67
45.7k
  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
68
1.32k
    if (cheapToScalarize(V0, IsConstantExtractIndex) ||
69
1.32k
        
cheapToScalarize(V1, IsConstantExtractIndex)1.29k
)
70
72
      return true;
71
45.6k
72
45.6k
  CmpInst::Predicate UnusedPred;
73
45.6k
  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
74
12
    if (cheapToScalarize(V0, IsConstantExtractIndex) ||
75
12
        
cheapToScalarize(V1, IsConstantExtractIndex)10
)
76
10
      return true;
77
45.6k
78
45.6k
  return false;
79
45.6k
}
80
81
// If we have a PHI node with a vector type that is only used to feed
82
// itself and be an operand of extractelement at a constant location,
83
// try to replace the PHI of the vector type with a PHI of a scalar type.
84
827
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
85
827
  SmallVector<Instruction *, 2> Extracts;
86
827
  // The users we want the PHI to have are:
87
827
  // 1) The EI ExtractElement (we already know this)
88
827
  // 2) Possibly more ExtractElements with the same index.
89
827
  // 3) Another operand, which will feed back into the PHI.
90
827
  Instruction *PHIUser = nullptr;
91
1.48k
  for (auto U : PN->users()) {
92
1.48k
    if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
93
1.33k
      if (EI.getIndexOperand() == EU->getIndexOperand())
94
599
        Extracts.push_back(EU);
95
739
      else
96
739
        return nullptr;
97
149
    } else if (!PHIUser) {
98
110
      PHIUser = cast<Instruction>(U);
99
110
    } else {
100
39
      return nullptr;
101
39
    }
102
1.48k
  }
103
827
104
827
  
if (49
!PHIUser49
)
105
19
    return nullptr;
106
30
107
30
  // Verify that this PHI user has one use, which is the PHI itself,
108
30
  // and that it is a binary operation which is cheap to scalarize.
109
30
  // otherwise return nullptr.
110
30
  if (!PHIUser->hasOneUse() || 
!(PHIUser->user_back() == PN)13
||
111
30
      
!(isa<BinaryOperator>(PHIUser))8
||
!cheapToScalarize(PHIUser, true)8
)
112
22
    return nullptr;
113
8
114
8
  // Create a scalar PHI node that will replace the vector PHI node
115
8
  // just before the current PHI node.
116
8
  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
117
8
      PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
118
8
  // Scalarize each PHI operand.
119
24
  for (unsigned i = 0; i < PN->getNumIncomingValues(); 
i++16
) {
120
16
    Value *PHIInVal = PN->getIncomingValue(i);
121
16
    BasicBlock *inBB = PN->getIncomingBlock(i);
122
16
    Value *Elt = EI.getIndexOperand();
123
16
    // If the operand is the PHI induction variable:
124
16
    if (PHIInVal == PHIUser) {
125
8
      // Scalarize the binary operation. Its first operand is the
126
8
      // scalar PHI, and the second operand is extracted from the other
127
8
      // vector operand.
128
8
      BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
129
8
      unsigned opId = (B0->getOperand(0) == PN) ? 1 : 
00
;
130
8
      Value *Op = InsertNewInstWith(
131
8
          ExtractElementInst::Create(B0->getOperand(opId), Elt,
132
8
                                     B0->getOperand(opId)->getName() + ".Elt"),
133
8
          *B0);
134
8
      Value *newPHIUser = InsertNewInstWith(
135
8
          BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
136
8
                                                scalarPHI, Op, B0), *B0);
137
8
      scalarPHI->addIncoming(newPHIUser, inBB);
138
8
    } else {
139
8
      // Scalarize PHI input:
140
8
      Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
141
8
      // Insert the new instruction into the predecessor basic block.
142
8
      Instruction *pos = dyn_cast<Instruction>(PHIInVal);
143
8
      BasicBlock::iterator InsertPos;
144
8
      if (pos && 
!isa<PHINode>(pos)4
) {
145
4
        InsertPos = ++pos->getIterator();
146
4
      } else {
147
4
        InsertPos = inBB->getFirstInsertionPt();
148
4
      }
149
8
150
8
      InsertNewInstWith(newEI, *InsertPos);
151
8
152
8
      scalarPHI->addIncoming(newEI, inBB);
153
8
    }
154
16
  }
155
8
156
8
  for (auto E : Extracts)
157
9
    replaceInstUsesWith(*E, scalarPHI);
158
8
159
8
  return &EI;
160
8
}
161
162
static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
163
                                      InstCombiner::BuilderTy &Builder,
164
100k
                                      bool IsBigEndian) {
165
100k
  Value *X;
166
100k
  uint64_t ExtIndexC;
167
100k
  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
168
100k
      
!X->getType()->isVectorTy()19.8k
||
169
100k
      
!match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC))19.7k
)
170
80.4k
    return nullptr;
171
19.7k
172
19.7k
  // If this extractelement is using a bitcast from a vector of the same number
173
19.7k
  // of elements, see if we can find the source element from the source vector:
174
19.7k
  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
175
19.7k
  Type *SrcTy = X->getType();
176
19.7k
  Type *DestTy = Ext.getType();
177
19.7k
  unsigned NumSrcElts = SrcTy->getVectorNumElements();
178
19.7k
  unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
179
19.7k
  if (NumSrcElts == NumElts)
180
527
    if (Value *Elt = findScalarElement(X, ExtIndexC))
181
12
      return new BitCastInst(Elt, DestTy);
182
19.7k
183
19.7k
  // If the source elements are wider than the destination, try to shift and
184
19.7k
  // truncate a subset of scalar bits of an insert op.
185
19.7k
  if (NumSrcElts < NumElts) {
186
249
    Value *Scalar;
187
249
    uint64_t InsIndexC;
188
249
    if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
189
249
                                  m_ConstantInt(InsIndexC))))
190
218
      return nullptr;
191
31
192
31
    // The extract must be from the subset of vector elements that we inserted
193
31
    // into. Example: if we inserted element 1 of a <2 x i64> and we are
194
31
    // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
195
31
    // of elements 4-7 of the bitcasted vector.
196
31
    unsigned NarrowingRatio = NumElts / NumSrcElts;
197
31
    if (ExtIndexC / NarrowingRatio != InsIndexC)
198
0
      return nullptr;
199
31
200
31
    // We are extracting part of the original scalar. How that scalar is
201
31
    // inserted into the vector depends on the endian-ness. Example:
202
31
    //              Vector Byte Elt Index:    0  1  2  3  4  5  6  7
203
31
    //                                       +--+--+--+--+--+--+--+--+
204
31
    // inselt <2 x i32> V, <i32> S, 1:       |V0|V1|V2|V3|S0|S1|S2|S3|
205
31
    // extelt <4 x i16> V', 3:               |                 |S2|S3|
206
31
    //                                       +--+--+--+--+--+--+--+--+
207
31
    // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
208
31
    // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
209
31
    // In this example, we must right-shift little-endian. Big-endian is just a
210
31
    // truncate.
211
31
    unsigned Chunk = ExtIndexC % NarrowingRatio;
212
31
    if (IsBigEndian)
213
15
      Chunk = NarrowingRatio - 1 - Chunk;
214
31
215
31
    // Bail out if this is an FP vector to FP vector sequence. That would take
216
31
    // more instructions than we started with unless there is no shift, and it
217
31
    // may not be handled as well in the backend.
218
31
    bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
219
31
    bool NeedDestBitcast = DestTy->isFloatingPointTy();
220
31
    if (NeedSrcBitcast && 
NeedDestBitcast13
)
221
6
      return nullptr;
222
25
223
25
    unsigned SrcWidth = SrcTy->getScalarSizeInBits();
224
25
    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
225
25
    unsigned ShAmt = Chunk * DestWidth;
226
25
227
25
    // TODO: This limitation is more strict than necessary. We could sum the
228
25
    // number of new instructions and subtract the number eliminated to know if
229
25
    // we can proceed.
230
25
    if (!X->hasOneUse() || 
!Ext.getVectorOperand()->hasOneUse()21
)
231
10
      if (NeedSrcBitcast || 
NeedDestBitcast6
)
232
8
        return nullptr;
233
17
234
17
    if (NeedSrcBitcast) {
235
3
      Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
236
3
      Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
237
3
    }
238
17
239
17
    if (ShAmt) {
240
10
      // Bail out if we could end with more instructions than we started with.
241
10
      if (!Ext.getVectorOperand()->hasOneUse())
242
1
        return nullptr;
243
9
      Scalar = Builder.CreateLShr(Scalar, ShAmt);
244
9
    }
245
17
246
17
    
if (16
NeedDestBitcast16
) {
247
2
      Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
248
2
      return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
249
2
    }
250
14
    return new TruncInst(Scalar, DestTy);
251
14
  }
252
19.5k
253
19.5k
  return nullptr;
254
19.5k
}
255
256
101k
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
257
101k
  Value *SrcVec = EI.getVectorOperand();
258
101k
  Value *Index = EI.getIndexOperand();
259
101k
  if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
260
221
                                            SQ.getWithInstruction(&EI)))
261
221
    return replaceInstUsesWith(EI, V);
262
101k
263
101k
  // If extracting a specified index from the vector, see if we can recursively
264
101k
  // find a previously computed scalar that was inserted into the vector.
265
101k
  auto *IndexC = dyn_cast<ConstantInt>(Index);
266
101k
  if (IndexC) {
267
100k
    unsigned NumElts = EI.getVectorOperandType()->getNumElements();
268
100k
269
100k
    // InstSimplify should handle cases where the index is invalid.
270
100k
    if (!IndexC->getValue().ule(NumElts))
271
0
      return nullptr;
272
100k
273
100k
    // This instruction only demands the single element from the input vector.
274
100k
    // If the input vector has a single use, simplify it based on this use
275
100k
    // property.
276
100k
    if (SrcVec->hasOneUse() && 
NumElts != 122.8k
) {
277
11.9k
      APInt UndefElts(NumElts, 0);
278
11.9k
      APInt DemandedElts(NumElts, 0);
279
11.9k
      DemandedElts.setBit(IndexC->getZExtValue());
280
11.9k
      if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts,
281
336
                                                UndefElts)) {
282
336
        EI.setOperand(0, V);
283
336
        return &EI;
284
336
      }
285
100k
    }
286
100k
287
100k
    if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
288
28
      return I;
289
100k
290
100k
    // If there's a vector PHI feeding a scalar use through this extractelement
291
100k
    // instruction, try to scalarize the PHI.
292
100k
    if (auto *Phi = dyn_cast<PHINode>(SrcVec))
293
827
      if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
294
8
        return ScalarPHI;
295
101k
  }
296
101k
297
101k
  BinaryOperator *BO;
298
101k
  if (match(SrcVec, m_BinOp(BO)) && 
cheapToScalarize(SrcVec, IndexC)41.4k
) {
299
58
    // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
300
58
    Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
301
58
    Value *E0 = Builder.CreateExtractElement(X, Index);
302
58
    Value *E1 = Builder.CreateExtractElement(Y, Index);
303
58
    return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
304
58
  }
305
100k
306
100k
  Value *X, *Y;
307
100k
  CmpInst::Predicate Pred;
308
100k
  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
309
100k
      
cheapToScalarize(SrcVec, IndexC)1.70k
) {
310
8
    // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
311
8
    Value *E0 = Builder.CreateExtractElement(X, Index);
312
8
    Value *E1 = Builder.CreateExtractElement(Y, Index);
313
8
    return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
314
8
  }
315
100k
316
100k
  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
317
95.8k
    if (auto *IE = dyn_cast<InsertElementInst>(I)) {
318
6.02k
      // Extracting the inserted element?
319
6.02k
      if (IE->getOperand(2) == Index)
320
0
        return replaceInstUsesWith(EI, IE->getOperand(1));
321
6.02k
      // If the inserted and extracted elements are constants, they must not
322
6.02k
      // be the same value, extract from the pre-inserted value instead.
323
6.02k
      if (isa<Constant>(IE->getOperand(2)) && 
IndexC6.02k
) {
324
6.02k
        Worklist.AddValue(SrcVec);
325
6.02k
        EI.setOperand(0, IE->getOperand(0));
326
6.02k
        return &EI;
327
6.02k
      }
328
89.8k
    } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
329
637
      // If this is extracting an element from a shufflevector, figure out where
330
637
      // it came from and extract from the appropriate input element instead.
331
637
      if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
332
636
        int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
333
636
        Value *Src;
334
636
        unsigned LHSWidth =
335
636
          SVI->getOperand(0)->getType()->getVectorNumElements();
336
636
337
636
        if (SrcIdx < 0)
338
0
          return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
339
636
        if (SrcIdx < (int)LHSWidth)
340
415
          Src = SVI->getOperand(0);
341
221
        else {
342
221
          SrcIdx -= LHSWidth;
343
221
          Src = SVI->getOperand(1);
344
221
        }
345
636
        Type *Int32Ty = Type::getInt32Ty(EI.getContext());
346
636
        return ExtractElementInst::Create(Src,
347
636
                                          ConstantInt::get(Int32Ty,
348
636
                                                           SrcIdx, false));
349
636
      }
350
89.1k
    } else if (auto *CI = dyn_cast<CastInst>(I)) {
351
20.3k
      // Canonicalize extractelement(cast) -> cast(extractelement).
352
20.3k
      // Bitcasts can change the number of vector elements, and they cost
353
20.3k
      // nothing.
354
20.3k
      if (CI->hasOneUse() && 
(CI->getOpcode() != Instruction::BitCast)7.06k
) {
355
5
        Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
356
5
        Worklist.AddValue(EE);
357
5
        return CastInst::Create(CI->getOpcode(), EE, EI.getType());
358
5
      }
359
94.3k
    }
360
95.8k
  }
361
94.3k
  return nullptr;
362
94.3k
}
363
364
/// If V is a shuffle of values that ONLY returns elements from either LHS or
365
/// RHS, return the shuffle mask and true. Otherwise, return false.
366
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
367
2.29k
                                         SmallVectorImpl<Constant*> &Mask) {
368
2.29k
  assert(LHS->getType() == RHS->getType() &&
369
2.29k
         "Invalid CollectSingleShuffleElements");
370
2.29k
  unsigned NumElts = V->getType()->getVectorNumElements();
371
2.29k
372
2.29k
  if (isa<UndefValue>(V)) {
373
472
    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
374
472
    return true;
375
472
  }
376
1.81k
377
1.81k
  if (V == LHS) {
378
0
    for (unsigned i = 0; i != NumElts; ++i)
379
0
      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
380
0
    return true;
381
0
  }
382
1.81k
383
1.81k
  if (V == RHS) {
384
0
    for (unsigned i = 0; i != NumElts; ++i)
385
0
      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
386
0
                                      i+NumElts));
387
0
    return true;
388
0
  }
389
1.81k
390
1.81k
  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
391
1.81k
    // If this is an insert of an extract from some other vector, include it.
392
1.81k
    Value *VecOp    = IEI->getOperand(0);
393
1.81k
    Value *ScalarOp = IEI->getOperand(1);
394
1.81k
    Value *IdxOp    = IEI->getOperand(2);
395
1.81k
396
1.81k
    if (!isa<ConstantInt>(IdxOp))
397
0
      return false;
398
1.81k
    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
399
1.81k
400
1.81k
    if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
401
0
      // We can handle this if the vector we are inserting into is
402
0
      // transitively ok.
403
0
      if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
404
0
        // If so, update the mask to reflect the inserted undef.
405
0
        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
406
0
        return true;
407
0
      }
408
1.81k
    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
409
1.81k
      if (isa<ConstantInt>(EI->getOperand(1))) {
410
1.81k
        unsigned ExtractedIdx =
411
1.81k
        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
412
1.81k
        unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
413
1.81k
414
1.81k
        // This must be extracting from either LHS or RHS.
415
1.81k
        if (EI->getOperand(0) == LHS || 
EI->getOperand(0) == RHS393
) {
416
1.80k
          // We can handle this if the vector we are inserting into is
417
1.80k
          // transitively ok.
418
1.80k
          if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
419
1.77k
            // If so, update the mask to reflect the inserted value.
420
1.77k
            if (EI->getOperand(0) == LHS) {
421
1.39k
              Mask[InsertedIdx % NumElts] =
422
1.39k
              ConstantInt::get(Type::getInt32Ty(V->getContext()),
423
1.39k
                               ExtractedIdx);
424
1.39k
            } else {
425
380
              assert(EI->getOperand(0) == RHS);
426
380
              Mask[InsertedIdx % NumElts] =
427
380
              ConstantInt::get(Type::getInt32Ty(V->getContext()),
428
380
                               ExtractedIdx + NumLHSElts);
429
380
            }
430
1.77k
            return true;
431
1.77k
          }
432
42
        }
433
1.81k
      }
434
1.81k
    }
435
1.81k
  }
436
42
437
42
  return false;
438
42
}
439
440
/// If we have insertion into a vector that is wider than the vector that we
441
/// are extracting from, try to widen the source vector to allow a single
442
/// shufflevector to replace one or more insert/extract pairs.
443
static void replaceExtractElements(InsertElementInst *InsElt,
444
                                   ExtractElementInst *ExtElt,
445
69
                                   InstCombiner &IC) {
446
69
  VectorType *InsVecType = InsElt->getType();
447
69
  VectorType *ExtVecType = ExtElt->getVectorOperandType();
448
69
  unsigned NumInsElts = InsVecType->getVectorNumElements();
449
69
  unsigned NumExtElts = ExtVecType->getVectorNumElements();
450
69
451
69
  // The inserted-to vector must be wider than the extracted-from vector.
452
69
  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
453
69
      NumExtElts >= NumInsElts)
454
19
    return;
455
50
456
50
  // Create a shuffle mask to widen the extended-from vector using undefined
457
50
  // values. The mask selects all of the values of the original vector followed
458
50
  // by as many undefined values as needed to create a vector of the same length
459
50
  // as the inserted-to vector.
460
50
  SmallVector<Constant *, 16> ExtendMask;
461
50
  IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
462
206
  for (unsigned i = 0; i < NumExtElts; 
++i156
)
463
156
    ExtendMask.push_back(ConstantInt::get(IntType, i));
464
204
  for (unsigned i = NumExtElts; i < NumInsElts; 
++i154
)
465
154
    ExtendMask.push_back(UndefValue::get(IntType));
466
50
467
50
  Value *ExtVecOp = ExtElt->getVectorOperand();
468
50
  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
469
50
  BasicBlock *InsertionBlock = (ExtVecOpInst && 
!isa<PHINode>(ExtVecOpInst)29
)
470
50
                                   ? 
ExtVecOpInst->getParent()28
471
50
                                   : 
ExtElt->getParent()22
;
472
50
473
50
  // TODO: This restriction matches the basic block check below when creating
474
50
  // new extractelement instructions. If that limitation is removed, this one
475
50
  // could also be removed. But for now, we just bail out to ensure that we
476
50
  // will replace the extractelement instruction that is feeding our
477
50
  // insertelement instruction. This allows the insertelement to then be
478
50
  // replaced by a shufflevector. If the insertelement is not replaced, we can
479
50
  // induce infinite looping because there's an optimization for extractelement
480
50
  // that will delete our widening shuffle. This would trigger another attempt
481
50
  // here to create that shuffle, and we spin forever.
482
50
  if (InsertionBlock != InsElt->getParent())
483
8
    return;
484
42
485
42
  // TODO: This restriction matches the check in visitInsertElementInst() and
486
42
  // prevents an infinite loop caused by not turning the extract/insert pair
487
42
  // into a shuffle. We really should not need either check, but we're lacking
488
42
  // folds for shufflevectors because we're afraid to generate shuffle masks
489
42
  // that the backend can't handle.
490
42
  if (InsElt->hasOneUse() && 
isa<InsertElementInst>(InsElt->user_back())29
)
491
11
    return;
492
31
493
31
  auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
494
31
                                        ConstantVector::get(ExtendMask));
495
31
496
31
  // Insert the new shuffle after the vector operand of the extract is defined
497
31
  // (as long as it's not a PHI) or at the start of the basic block of the
498
31
  // extract, so any subsequent extracts in the same basic block can use it.
499
31
  // TODO: Insert before the earliest ExtractElementInst that is replaced.
500
31
  if (ExtVecOpInst && 
!isa<PHINode>(ExtVecOpInst)21
)
501
20
    WideVec->insertAfter(ExtVecOpInst);
502
11
  else
503
11
    IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
504
31
505
31
  // Replace extracts from the original narrow vector with extracts from the new
506
31
  // wide vector.
507
77
  for (User *U : ExtVecOp->users()) {
508
77
    ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
509
77
    if (!OldExt || 
OldExt->getParent() != WideVec->getParent()45
)
510
33
      continue;
511
44
    auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
512
44
    NewExt->insertAfter(OldExt);
513
44
    IC.replaceInstUsesWith(*OldExt, NewExt);
514
44
  }
515
31
}
516
517
/// We are building a shuffle to create V, which is a sequence of insertelement,
518
/// extractelement pairs. If PermittedRHS is set, then we must either use it or
519
/// not rely on the second vector source. Return a std::pair containing the
520
/// left and right vectors of the proposed shuffle (or 0), and set the Mask
521
/// parameter as required.
522
///
523
/// Note: we intentionally don't try to fold earlier shuffles since they have
524
/// often been chosen carefully to be efficiently implementable on the target.
525
using ShuffleOps = std::pair<Value *, Value *>;
526
527
static ShuffleOps collectShuffleElements(Value *V,
528
                                         SmallVectorImpl<Constant *> &Mask,
529
                                         Value *PermittedRHS,
530
9.25k
                                         InstCombiner &IC) {
531
9.25k
  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
532
9.25k
  unsigned NumElts = V->getType()->getVectorNumElements();
533
9.25k
534
9.25k
  if (isa<UndefValue>(V)) {
535
1.54k
    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
536
1.54k
    return std::make_pair(
537
1.54k
        PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : 
V0
, nullptr);
538
1.54k
  }
539
7.70k
540
7.70k
  if (isa<ConstantAggregateZero>(V)) {
541
2
    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
542
2
    return std::make_pair(V, nullptr);
543
2
  }
544
7.70k
545
7.70k
  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
546
7.59k
    // If this is an insert of an extract from some other vector, include it.
547
7.59k
    Value *VecOp    = IEI->getOperand(0);
548
7.59k
    Value *ScalarOp = IEI->getOperand(1);
549
7.59k
    Value *IdxOp    = IEI->getOperand(2);
550
7.59k
551
7.59k
    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
552
7.55k
      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
553
7.55k
        unsigned ExtractedIdx =
554
7.55k
          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
555
7.55k
        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
556
7.55k
557
7.55k
        // Either the extracted from or inserted into vector must be RHSVec,
558
7.55k
        // otherwise we'd end up with a shuffle of three inputs.
559
7.55k
        if (EI->getOperand(0) == PermittedRHS || 
PermittedRHS == nullptr2.68k
) {
560
7.06k
          Value *RHS = EI->getOperand(0);
561
7.06k
          ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
562
7.06k
          assert(LR.second == nullptr || LR.second == RHS);
563
7.06k
564
7.06k
          if (LR.first->getType() != RHS->getType()) {
565
69
            // Although we are giving up for now, see if we can create extracts
566
69
            // that match the inserts for another round of combining.
567
69
            replaceExtractElements(IEI, EI, IC);
568
69
569
69
            // We tried our best, but we can't find anything compatible with RHS
570
69
            // further up the chain. Return a trivial shuffle.
571
463
            for (unsigned i = 0; i < NumElts; 
++i394
)
572
394
              Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
573
69
            return std::make_pair(V, nullptr);
574
69
          }
575
6.99k
576
6.99k
          unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
577
6.99k
          Mask[InsertedIdx % NumElts] =
578
6.99k
            ConstantInt::get(Type::getInt32Ty(V->getContext()),
579
6.99k
                             NumLHSElts+ExtractedIdx);
580
6.99k
          return std::make_pair(LR.first, RHS);
581
6.99k
        }
582
494
583
494
        if (VecOp == PermittedRHS) {
584
2
          // We've gone as far as we can: anything on the other side of the
585
2
          // extractelement will already have been converted into a shuffle.
586
2
          unsigned NumLHSElts =
587
2
              EI->getOperand(0)->getType()->getVectorNumElements();
588
10
          for (unsigned i = 0; i != NumElts; 
++i8
)
589
8
            Mask.push_back(ConstantInt::get(
590
8
                Type::getInt32Ty(V->getContext()),
591
8
                i == InsertedIdx ? 
ExtractedIdx2
:
NumLHSElts + i6
));
592
2
          return std::make_pair(EI->getOperand(0), PermittedRHS);
593
2
        }
594
492
595
492
        // If this insertelement is a chain that comes from exactly these two
596
492
        // vectors, return the vector and the effective shuffle.
597
492
        if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
598
492
            collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
599
484
                                         Mask))
600
472
          return std::make_pair(EI->getOperand(0), PermittedRHS);
601
171
      }
602
7.55k
    }
603
7.59k
  }
604
171
605
171
  // Otherwise, we can't do anything fancy. Return an identity vector.
606
981
  
for (unsigned i = 0; 171
i != NumElts;
++i810
)
607
810
    Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
608
171
  return std::make_pair(V, nullptr);
609
171
}
610
611
/// Try to find redundant insertvalue instructions, like the following ones:
612
///  %0 = insertvalue { i8, i32 } undef, i8 %x, 0
613
///  %1 = insertvalue { i8, i32 } %0,    i8 %y, 0
614
/// Here the second instruction inserts values at the same indices, as the
615
/// first one, making the first one redundant.
616
/// It should be transformed to:
617
///  %0 = insertvalue { i8, i32 } undef, i8 %y, 0
618
142k
Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
619
142k
  bool IsRedundant = false;
620
142k
  ArrayRef<unsigned int> FirstIndices = I.getIndices();
621
142k
622
142k
  // If there is a chain of insertvalue instructions (each of them except the
623
142k
  // last one has only one use and it's another insertvalue insn from this
624
142k
  // chain), check if any of the 'children' uses the same indices as the first
625
142k
  // instruction. In this case, the first one is redundant.
626
142k
  Value *V = &I;
627
142k
  unsigned Depth = 0;
628
254k
  while (V->hasOneUse() && 
Depth < 10232k
) {
629
232k
    User *U = V->user_back();
630
232k
    auto UserInsInst = dyn_cast<InsertValueInst>(U);
631
232k
    if (!UserInsInst || 
U->getOperand(0) != V112k
)
632
120k
      break;
633
112k
    if (UserInsInst->getIndices() == FirstIndices) {
634
12
      IsRedundant = true;
635
12
      break;
636
12
    }
637
112k
    V = UserInsInst;
638
112k
    Depth++;
639
112k
  }
640
142k
641
142k
  if (IsRedundant)
642
12
    return replaceInstUsesWith(I, I.getOperand(0));
643
142k
  return nullptr;
644
142k
}
645
646
83
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
647
83
  int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
648
83
  int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
649
83
650
83
  // A vector select does not change the size of the operands.
651
83
  if (MaskSize != VecSize)
652
4
    return false;
653
79
654
79
  // Each mask element must be undefined or choose a vector element from one of
655
79
  // the source operands without crossing vector lanes.
656
434
  
for (int i = 0; 79
i != MaskSize;
++i355
) {
657
362
    int Elt = Shuf.getMaskValue(i);
658
362
    if (Elt != -1 && 
Elt != i288
&&
Elt != i + VecSize159
)
659
7
      return false;
660
362
  }
661
79
662
79
  
return true72
;
663
79
}
664
665
/// Turn a chain of inserts that splats a value into an insert + shuffle:
666
/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
667
/// shufflevector(insertelt(X, %k, 0), undef, zero)
668
127k
static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
669
127k
  // We are interested in the last insert in a chain. So if this insert has a
670
127k
  // single user and that user is an insert, bail.
671
127k
  if (InsElt.hasOneUse() && 
isa<InsertElementInst>(InsElt.user_back())122k
)
672
40.4k
    return nullptr;
673
86.8k
674
86.8k
  auto *VecTy = cast<VectorType>(InsElt.getType());
675
86.8k
  unsigned NumElements = VecTy->getNumElements();
676
86.8k
677
86.8k
  // Do not try to do this for a one-element vector, since that's a nop,
678
86.8k
  // and will cause an inf-loop.
679
86.8k
  if (NumElements == 1)
680
68
    return nullptr;
681
86.8k
682
86.8k
  Value *SplatVal = InsElt.getOperand(1);
683
86.8k
  InsertElementInst *CurrIE = &InsElt;
684
86.8k
  SmallVector<bool, 16> ElementPresent(NumElements, false);
685
86.8k
  InsertElementInst *FirstIE = nullptr;
686
86.8k
687
86.8k
  // Walk the chain backwards, keeping track of which indices we inserted into,
688
86.8k
  // until we hit something that isn't an insert of the splatted value.
689
176k
  while (CurrIE) {
690
103k
    auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
691
103k
    if (!Idx || 
CurrIE->getOperand(1) != SplatVal102k
)
692
13.8k
      return nullptr;
693
89.7k
694
89.7k
    auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
695
89.7k
    // Check none of the intermediate steps have any additional uses, except
696
89.7k
    // for the root insertelement instruction, which can be re-used, if it
697
89.7k
    // inserts at position 0.
698
89.7k
    if (CurrIE != &InsElt &&
699
89.7k
        
(3.79k
!CurrIE->hasOneUse()3.79k
&&
(96
NextIE != nullptr96
||
!Idx->isZero()96
)))
700
1
      return nullptr;
701
89.7k
702
89.7k
    ElementPresent[Idx->getZExtValue()] = true;
703
89.7k
    FirstIE = CurrIE;
704
89.7k
    CurrIE = NextIE;
705
89.7k
  }
706
86.8k
707
86.8k
  // If this is just a single insertelement (not a sequence), we are done.
708
86.8k
  
if (72.9k
FirstIE == &InsElt72.9k
)
709
70.2k
    return nullptr;
710
2.64k
711
2.64k
  // If we are not inserting into an undef vector, make sure we've seen an
712
2.64k
  // insert into every element.
713
2.64k
  // TODO: If the base vector is not undef, it might be better to create a splat
714
2.64k
  //       and then a select-shuffle (blend) with the base vector.
715
2.64k
  if (!isa<UndefValue>(FirstIE->getOperand(0)))
716
44
    
if (22
any_of(ElementPresent, [](bool Present) 22
{ return !Present; }))
717
22
      return nullptr;
718
2.62k
719
2.62k
  // Create the insert + shuffle.
720
2.62k
  Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
721
2.62k
  UndefValue *UndefVec = UndefValue::get(VecTy);
722
2.62k
  Constant *Zero = ConstantInt::get(Int32Ty, 0);
723
2.62k
  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
724
2
    FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
725
2.62k
726
2.62k
  // Splat from element 0, but replace absent elements with undef in the mask.
727
2.62k
  SmallVector<Constant *, 16> Mask(NumElements, Zero);
728
9.01k
  for (unsigned i = 0; i != NumElements; 
++i6.39k
)
729
6.39k
    if (!ElementPresent[i])
730
8
      Mask[i] = UndefValue::get(Int32Ty);
731
2.62k
732
2.62k
  return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
733
2.62k
}
734
735
/// Try to fold an insert element into an existing splat shuffle by changing
736
/// the shuffle's mask to include the index of this insert element.
737
124k
static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
738
124k
  // Check if the vector operand of this insert is a canonical splat shuffle.
739
124k
  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
740
124k
  if (!Shuf || 
!Shuf->isZeroEltSplat()55
)
741
124k
    return nullptr;
742
22
743
22
  // Check for a constant insertion index.
744
22
  uint64_t IdxC;
745
22
  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
746
1
    return nullptr;
747
21
748
21
  // Check if the splat shuffle's input is the same as this insert's scalar op.
749
21
  Value *X = InsElt.getOperand(1);
750
21
  Value *Op0 = Shuf->getOperand(0);
751
21
  if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
752
13
    return nullptr;
753
8
754
8
  // Replace the shuffle mask element at the index of this insert with a zero.
755
8
  // For example:
756
8
  // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
757
8
  //   --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
758
8
  unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
759
8
  SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
760
8
  Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
761
8
  Constant *Zero = ConstantInt::getNullValue(I32Ty);
762
40
  for (unsigned i = 0; i != NumMaskElts; 
++i32
)
763
32
    NewMaskVec[i] = i == IdxC ? 
Zero8
:
Shuf->getMask()->getAggregateElement(i)24
;
764
8
765
8
  Constant *NewMask = ConstantVector::get(NewMaskVec);
766
8
  return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
767
8
}
768
769
/// If we have an insertelement instruction feeding into another insertelement
770
/// and the 2nd is inserting a constant into the vector, canonicalize that
771
/// constant insertion before the insertion of a variable:
772
///
773
/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
774
/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
775
///
776
/// This has the potential of eliminating the 2nd insertelement instruction
777
/// via constant folding of the scalar constant into a vector constant.
778
static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
779
127k
                                     InstCombiner::BuilderTy &Builder) {
780
127k
  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
781
127k
  if (!InsElt1 || 
!InsElt1->hasOneUse()38.9k
)
782
89.3k
    return nullptr;
783
38.3k
784
38.3k
  Value *X, *Y;
785
38.3k
  Constant *ScalarC;
786
38.3k
  ConstantInt *IdxC1, *IdxC2;
787
38.3k
  if (match(InsElt1->getOperand(0), m_Value(X)) &&
788
38.3k
      match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
789
38.3k
      
match(InsElt1->getOperand(2), m_ConstantInt(IdxC1))38.3k
&&
790
38.3k
      
match(InsElt2.getOperand(1), m_Constant(ScalarC))38.3k
&&
791
38.3k
      
match(InsElt2.getOperand(2), m_ConstantInt(IdxC2))440
&&
IdxC1 != IdxC2440
) {
792
440
    Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
793
440
    return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
794
440
  }
795
37.9k
796
37.9k
  return nullptr;
797
37.9k
}
798
799
/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
800
/// --> shufflevector X, CVec', Mask'
801
127k
static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
802
127k
  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
803
127k
  // Bail out if the parent has more than one use. In that case, we'd be
804
127k
  // replacing the insertelt with a shuffle, and that's not a clear win.
805
127k
  if (!Inst || 
!Inst->hasOneUse()44.4k
)
806
88.2k
    return nullptr;
807
39.6k
  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
808
107
    // The shuffle must have a constant vector operand. The insertelt must have
809
107
    // a constant scalar being inserted at a constant position in the vector.
810
107
    Constant *ShufConstVec, *InsEltScalar;
811
107
    uint64_t InsEltIndex;
812
107
    if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
813
107
        
!match(InsElt.getOperand(1), m_Constant(InsEltScalar))103
||
814
107
        
!match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))85
)
815
24
      return nullptr;
816
83
817
83
    // Adding an element to an arbitrary shuffle could be expensive, but a
818
83
    // shuffle that selects elements from vectors without crossing lanes is
819
83
    // assumed cheap.
820
83
    // If we're just adding a constant into that shuffle, it will still be
821
83
    // cheap.
822
83
    if (!isShuffleEquivalentToSelect(*Shuf))
823
11
      return nullptr;
824
72
825
72
    // From the above 'select' check, we know that the mask has the same number
826
72
    // of elements as the vector input operands. We also know that each constant
827
72
    // input element is used in its lane and can not be used more than once by
828
72
    // the shuffle. Therefore, replace the constant in the shuffle's constant
829
72
    // vector with the insertelt constant. Replace the constant in the shuffle's
830
72
    // mask vector with the insertelt index plus the length of the vector
831
72
    // (because the constant vector operand of a shuffle is always the 2nd
832
72
    // operand).
833
72
    Constant *Mask = Shuf->getMask();
834
72
    unsigned NumElts = Mask->getType()->getVectorNumElements();
835
72
    SmallVector<Constant *, 16> NewShufElts(NumElts);
836
72
    SmallVector<Constant *, 16> NewMaskElts(NumElts);
837
424
    for (unsigned I = 0; I != NumElts; 
++I352
) {
838
352
      if (I == InsEltIndex) {
839
72
        NewShufElts[I] = InsEltScalar;
840
72
        Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
841
72
        NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
842
280
      } else {
843
280
        // Copy over the existing values.
844
280
        NewShufElts[I] = ShufConstVec->getAggregateElement(I);
845
280
        NewMaskElts[I] = Mask->getAggregateElement(I);
846
280
      }
847
352
    }
848
72
849
72
    // Create new operands for a shuffle that includes the constant of the
850
72
    // original insertelt. The old shuffle will be dead now.
851
72
    return new ShuffleVectorInst(Shuf->getOperand(0),
852
72
                                 ConstantVector::get(NewShufElts),
853
72
                                 ConstantVector::get(NewMaskElts));
854
39.5k
  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
855
38.4k
    // Transform sequences of insertelements ops with constant data/indexes into
856
38.4k
    // a single shuffle op.
857
38.4k
    unsigned NumElts = InsElt.getType()->getNumElements();
858
38.4k
859
38.4k
    uint64_t InsertIdx[2];
860
38.4k
    Constant *Val[2];
861
38.4k
    if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
862
38.4k
        
!match(InsElt.getOperand(1), m_Constant(Val[0]))38.4k
||
863
38.4k
        
!match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1]))521
||
864
38.4k
        
!match(IEI->getOperand(1), m_Constant(Val[1]))520
)
865
38.3k
      return nullptr;
866
80
    SmallVector<Constant *, 16> Values(NumElts);
867
80
    SmallVector<Constant *, 16> Mask(NumElts);
868
80
    auto ValI = std::begin(Val);
869
80
    // Generate new constant vector and mask.
870
80
    // We have 2 values/masks from the insertelements instructions. Insert them
871
80
    // into new value/mask vectors.
872
160
    for (uint64_t I : InsertIdx) {
873
160
      if (!Values[I]) {
874
160
        assert(!Mask[I]);
875
160
        Values[I] = *ValI;
876
160
        Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
877
160
                                   NumElts + I);
878
160
      }
879
160
      ++ValI;
880
160
    }
881
80
    // Remaining values are filled with 'undef' values.
882
488
    for (unsigned I = 0; I < NumElts; 
++I408
) {
883
408
      if (!Values[I]) {
884
248
        assert(!Mask[I]);
885
248
        Values[I] = UndefValue::get(InsElt.getType()->getElementType());
886
248
        Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
887
248
      }
888
408
    }
889
80
    // Create new operands for a shuffle that includes the constant of the
890
80
    // original insertelt.
891
80
    return new ShuffleVectorInst(IEI->getOperand(0),
892
80
                                 ConstantVector::get(Values),
893
80
                                 ConstantVector::get(Mask));
894
80
  }
895
1.11k
  return nullptr;
896
1.11k
}
897
898
131k
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
899
131k
  Value *VecOp    = IE.getOperand(0);
900
131k
  Value *ScalarOp = IE.getOperand(1);
901
131k
  Value *IdxOp    = IE.getOperand(2);
902
131k
903
131k
  if (auto *V = SimplifyInsertElementInst(
904
15
          VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
905
15
    return replaceInstUsesWith(IE, V);
906
131k
907
131k
  // If the vector and scalar are both bitcast from the same element type, do
908
131k
  // the insert in that source type followed by bitcast.
909
131k
  Value *VecSrc, *ScalarSrc;
910
131k
  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
911
131k
      
match(ScalarOp, m_BitCast(m_Value(ScalarSrc)))38
&&
912
131k
      
(6
VecOp->hasOneUse()6
||
ScalarOp->hasOneUse()2
) &&
913
131k
      
VecSrc->getType()->isVectorTy()5
&&
!ScalarSrc->getType()->isVectorTy()4
&&
914
131k
      
VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()3
) {
915
3
    // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
916
3
    //   bitcast (inselt VecSrc, ScalarSrc, IdxOp)
917
3
    Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
918
3
    return new BitCastInst(NewInsElt, IE.getType());
919
3
  }
920
131k
921
131k
  // If the inserted element was extracted from some other vector and both
922
131k
  // indexes are valid constants, try to turn this into a shuffle.
923
131k
  uint64_t InsertedIdx, ExtractedIdx;
924
131k
  Value *ExtVecOp;
925
131k
  if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
926
131k
      match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
927
130k
                                       m_ConstantInt(ExtractedIdx))) &&
928
131k
      
ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()9.16k
) {
929
9.16k
    // TODO: Looking at the user(s) to determine if this insert is a
930
9.16k
    // fold-to-shuffle opportunity does not match the usual instcombine
931
9.16k
    // constraints. We should decide if the transform is worthy based only
932
9.16k
    // on this instruction and its operands, but that may not work currently.
933
9.16k
    //
934
9.16k
    // Here, we are trying to avoid creating shuffles before reaching
935
9.16k
    // the end of a chain of extract-insert pairs. This is complicated because
936
9.16k
    // we do not generally form arbitrary shuffle masks in instcombine
937
9.16k
    // (because those may codegen poorly), but collectShuffleElements() does
938
9.16k
    // exactly that.
939
9.16k
    //
940
9.16k
    // The rules for determining what is an acceptable target-independent
941
9.16k
    // shuffle mask are fuzzy because they evolve based on the backend's
942
9.16k
    // capabilities and real-world impact.
943
9.16k
    auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
944
9.16k
      if (!Insert.hasOneUse())
945
102
        return true;
946
9.06k
      auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
947
9.06k
      if (!InsertUser)
948
2.09k
        return true;
949
6.96k
      return false;
950
6.96k
    };
951
9.16k
952
9.16k
    // Try to form a shuffle from a chain of extract-insert ops.
953
9.16k
    if (isShuffleRootCandidate(IE)) {
954
2.19k
      SmallVector<Constant*, 16> Mask;
955
2.19k
      ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
956
2.19k
957
2.19k
      // The proposed shuffle may be trivial, in which case we shouldn't
958
2.19k
      // perform the combine.
959
2.19k
      if (LR.first != &IE && 
LR.second != &IE2.13k
) {
960
2.13k
        // We now have a shuffle of LHS, RHS, Mask.
961
2.13k
        if (LR.second == nullptr)
962
0
          LR.second = UndefValue::get(LR.first->getType());
963
2.13k
        return new ShuffleVectorInst(LR.first, LR.second,
964
2.13k
                                     ConstantVector::get(Mask));
965
2.13k
      }
966
129k
    }
967
9.16k
  }
968
129k
969
129k
  unsigned VWidth = VecOp->getType()->getVectorNumElements();
970
129k
  APInt UndefElts(VWidth, 0);
971
129k
  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
972
129k
  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
973
1.50k
    if (V != &IE)
974
0
      return replaceInstUsesWith(IE, V);
975
1.50k
    return &IE;
976
1.50k
  }
977
127k
978
127k
  if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
979
152
    return Shuf;
980
127k
981
127k
  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
982
440
    return NewInsElt;
983
127k
984
127k
  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
985
2.62k
    return Broadcast;
986
124k
987
124k
  if (Instruction *Splat = foldInsEltIntoSplat(IE))
988
8
    return Splat;
989
124k
990
124k
  return nullptr;
991
124k
}
992
993
/// Return true if we can evaluate the specified expression tree if the vector
994
/// elements were shuffled in a different order.
995
static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
996
176k
                                unsigned Depth = 5) {
997
176k
  // We can always reorder the elements of a constant.
998
176k
  if (isa<Constant>(V))
999
59
    return true;
1000
176k
1001
176k
  // We won't reorder vector arguments. No IPO here.
1002
176k
  Instruction *I = dyn_cast<Instruction>(V);
1003
176k
  if (!I) 
return false2.21k
;
1004
174k
1005
174k
  // Two users may expect different orders of the elements. Don't try it.
1006
174k
  if (!I->hasOneUse())
1007
104k
    return false;
1008
70.5k
1009
70.5k
  if (Depth == 0) 
return false0
;
1010
70.5k
1011
70.5k
  switch (I->getOpcode()) {
1012
70.5k
    case Instruction::Add:
1013
1.46k
    case Instruction::FAdd:
1014
1.46k
    case Instruction::Sub:
1015
1.46k
    case Instruction::FSub:
1016
1.46k
    case Instruction::Mul:
1017
1.46k
    case Instruction::FMul:
1018
1.46k
    case Instruction::UDiv:
1019
1.46k
    case Instruction::SDiv:
1020
1.46k
    case Instruction::FDiv:
1021
1.46k
    case Instruction::URem:
1022
1.46k
    case Instruction::SRem:
1023
1.46k
    case Instruction::FRem:
1024
1.46k
    case Instruction::Shl:
1025
1.46k
    case Instruction::LShr:
1026
1.46k
    case Instruction::AShr:
1027
1.46k
    case Instruction::And:
1028
1.46k
    case Instruction::Or:
1029
1.46k
    case Instruction::Xor:
1030
1.46k
    case Instruction::ICmp:
1031
1.46k
    case Instruction::FCmp:
1032
1.46k
    case Instruction::Trunc:
1033
1.46k
    case Instruction::ZExt:
1034
1.46k
    case Instruction::SExt:
1035
1.46k
    case Instruction::FPToUI:
1036
1.46k
    case Instruction::FPToSI:
1037
1.46k
    case Instruction::UIToFP:
1038
1.46k
    case Instruction::SIToFP:
1039
1.46k
    case Instruction::FPTrunc:
1040
1.46k
    case Instruction::FPExt:
1041
1.46k
    case Instruction::GetElementPtr: {
1042
1.46k
      // Bail out if we would create longer vector ops. We could allow creating
1043
1.46k
      // longer vector ops, but that may result in more expensive codegen. We
1044
1.46k
      // would also need to limit the transform to avoid undefined behavior for
1045
1.46k
      // integer div/rem.
1046
1.46k
      Type *ITy = I->getType();
1047
1.46k
      if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
1048
461
        return false;
1049
1.04k
      
for (Value *Operand : I->operands())999
{
1050
1.04k
        if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1051
990
          return false;
1052
1.04k
      }
1053
999
      
return true9
;
1054
999
    }
1055
55.3k
    case Instruction::InsertElement: {
1056
55.3k
      ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1057
55.3k
      if (!CI) 
return false1
;
1058
55.3k
      int ElementNumber = CI->getLimitedValue();
1059
55.3k
1060
55.3k
      // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1061
55.3k
      // can't put an element into multiple indices.
1062
55.3k
      bool SeenOnce = false;
1063
110k
      for (int i = 0, e = Mask.size(); i != e; 
++i55.4k
) {
1064
110k
        if (Mask[i] == ElementNumber) {
1065
110k
          if (SeenOnce)
1066
55.3k
            return false;
1067
55.3k
          SeenOnce = true;
1068
55.3k
        }
1069
110k
      }
1070
55.3k
      
return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1)17
;
1071
13.6k
    }
1072
13.6k
  }
1073
13.6k
  return false;
1074
13.6k
}
1075
1076
/// Rebuild a new instruction just like 'I' but with the new operands given.
1077
/// In the event of type mismatch, the type of the operands is correct.
1078
9
static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
1079
9
  // We don't want to use the IRBuilder here because we want the replacement
1080
9
  // instructions to appear next to 'I', not the builder's insertion point.
1081
9
  switch (I->getOpcode()) {
1082
9
    case Instruction::Add:
1083
6
    case Instruction::FAdd:
1084
6
    case Instruction::Sub:
1085
6
    case Instruction::FSub:
1086
6
    case Instruction::Mul:
1087
6
    case Instruction::FMul:
1088
6
    case Instruction::UDiv:
1089
6
    case Instruction::SDiv:
1090
6
    case Instruction::FDiv:
1091
6
    case Instruction::URem:
1092
6
    case Instruction::SRem:
1093
6
    case Instruction::FRem:
1094
6
    case Instruction::Shl:
1095
6
    case Instruction::LShr:
1096
6
    case Instruction::AShr:
1097
6
    case Instruction::And:
1098
6
    case Instruction::Or:
1099
6
    case Instruction::Xor: {
1100
6
      BinaryOperator *BO = cast<BinaryOperator>(I);
1101
6
      assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1102
6
      BinaryOperator *New =
1103
6
          BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1104
6
                                 NewOps[0], NewOps[1], "", BO);
1105
6
      if (isa<OverflowingBinaryOperator>(BO)) {
1106
3
        New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1107
3
        New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1108
3
      }
1109
6
      if (isa<PossiblyExactOperator>(BO)) {
1110
1
        New->setIsExact(BO->isExact());
1111
1
      }
1112
6
      if (isa<FPMathOperator>(BO))
1113
1
        New->copyFastMathFlags(I);
1114
6
      return New;
1115
6
    }
1116
6
    case Instruction::ICmp:
1117
0
      assert(NewOps.size() == 2 && "icmp with #ops != 2");
1118
0
      return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1119
0
                          NewOps[0], NewOps[1]);
1120
6
    case Instruction::FCmp:
1121
0
      assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1122
0
      return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1123
0
                          NewOps[0], NewOps[1]);
1124
6
    case Instruction::Trunc:
1125
2
    case Instruction::ZExt:
1126
2
    case Instruction::SExt:
1127
2
    case Instruction::FPToUI:
1128
2
    case Instruction::FPToSI:
1129
2
    case Instruction::UIToFP:
1130
2
    case Instruction::SIToFP:
1131
2
    case Instruction::FPTrunc:
1132
2
    case Instruction::FPExt: {
1133
2
      // It's possible that the mask has a different number of elements from
1134
2
      // the original cast. We recompute the destination type to match the mask.
1135
2
      Type *DestTy =
1136
2
          VectorType::get(I->getType()->getScalarType(),
1137
2
                          NewOps[0]->getType()->getVectorNumElements());
1138
2
      assert(NewOps.size() == 1 && "cast with #ops != 1");
1139
2
      return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1140
2
                              "", I);
1141
2
    }
1142
2
    case Instruction::GetElementPtr: {
1143
1
      Value *Ptr = NewOps[0];
1144
1
      ArrayRef<Value*> Idx = NewOps.slice(1);
1145
1
      GetElementPtrInst *GEP = GetElementPtrInst::Create(
1146
1
          cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1147
1
      GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1148
1
      return GEP;
1149
0
    }
1150
0
  }
1151
0
  llvm_unreachable("failed to rebuild vector instructions");
1152
0
}
1153
1154
43
static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
1155
43
  // Mask.size() does not need to be equal to the number of vector elements.
1156
43
1157
43
  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1158
43
  Type *EltTy = V->getType()->getScalarType();
1159
43
  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1160
43
  if (isa<UndefValue>(V))
1161
8
    return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1162
35
1163
35
  if (isa<ConstantAggregateZero>(V))
1164
0
    return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1165
35
1166
35
  if (Constant *C = dyn_cast<Constant>(V)) {
1167
11
    SmallVector<Constant *, 16> MaskValues;
1168
41
    for (int i = 0, e = Mask.size(); i != e; 
++i30
) {
1169
30
      if (Mask[i] == -1)
1170
3
        MaskValues.push_back(UndefValue::get(I32Ty));
1171
27
      else
1172
27
        MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
1173
30
    }
1174
11
    return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
1175
11
                                          ConstantVector::get(MaskValues));
1176
11
  }
1177
24
1178
24
  Instruction *I = cast<Instruction>(V);
1179
24
  switch (I->getOpcode()) {
1180
24
    case Instruction::Add:
1181
9
    case Instruction::FAdd:
1182
9
    case Instruction::Sub:
1183
9
    case Instruction::FSub:
1184
9
    case Instruction::Mul:
1185
9
    case Instruction::FMul:
1186
9
    case Instruction::UDiv:
1187
9
    case Instruction::SDiv:
1188
9
    case Instruction::FDiv:
1189
9
    case Instruction::URem:
1190
9
    case Instruction::SRem:
1191
9
    case Instruction::FRem:
1192
9
    case Instruction::Shl:
1193
9
    case Instruction::LShr:
1194
9
    case Instruction::AShr:
1195
9
    case Instruction::And:
1196
9
    case Instruction::Or:
1197
9
    case Instruction::Xor:
1198
9
    case Instruction::ICmp:
1199
9
    case Instruction::FCmp:
1200
9
    case Instruction::Trunc:
1201
9
    case Instruction::ZExt:
1202
9
    case Instruction::SExt:
1203
9
    case Instruction::FPToUI:
1204
9
    case Instruction::FPToSI:
1205
9
    case Instruction::UIToFP:
1206
9
    case Instruction::SIToFP:
1207
9
    case Instruction::FPTrunc:
1208
9
    case Instruction::FPExt:
1209
9
    case Instruction::Select:
1210
9
    case Instruction::GetElementPtr: {
1211
9
      SmallVector<Value*, 8> NewOps;
1212
9
      bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1213
26
      for (int i = 0, e = I->getNumOperands(); i != e; 
++i17
) {
1214
17
        Value *V;
1215
17
        // Recursively call evaluateInDifferentElementOrder on vector arguments
1216
17
        // as well. E.g. GetElementPtr may have scalar operands even if the
1217
17
        // return value is a vector, so we need to examine the operand type.
1218
17
        if (I->getOperand(i)->getType()->isVectorTy())
1219
15
          V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1220
2
        else
1221
2
          V = I->getOperand(i);
1222
17
        NewOps.push_back(V);
1223
17
        NeedsRebuild |= (V != I->getOperand(i));
1224
17
      }
1225
9
      if (NeedsRebuild) {
1226
9
        return buildNew(I, NewOps);
1227
9
      }
1228
0
      return I;
1229
0
    }
1230
15
    case Instruction::InsertElement: {
1231
15
      int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1232
15
1233
15
      // The insertelement was inserting at Element. Figure out which element
1234
15
      // that becomes after shuffling. The answer is guaranteed to be unique
1235
15
      // by CanEvaluateShuffled.
1236
15
      bool Found = false;
1237
15
      int Index = 0;
1238
28
      for (int e = Mask.size(); Index != e; 
++Index13
) {
1239
25
        if (Mask[Index] == Element) {
1240
12
          Found = true;
1241
12
          break;
1242
12
        }
1243
25
      }
1244
15
1245
15
      // If element is not in Mask, no need to handle the operand 1 (element to
1246
15
      // be inserted). Just evaluate values in operand 0 according to Mask.
1247
15
      if (!Found)
1248
3
        return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1249
12
1250
12
      Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1251
12
      return InsertElementInst::Create(V, I->getOperand(1),
1252
12
                                       ConstantInt::get(I32Ty, Index), "", I);
1253
12
    }
1254
0
  }
1255
0
  llvm_unreachable("failed to reorder elements of vector instruction!");
1256
0
}
1257
1258
static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
1259
75.4k
                                  bool &isLHSID, bool &isRHSID) {
1260
75.4k
  isLHSID = isRHSID = true;
1261
75.4k
1262
415k
  for (unsigned i = 0, e = Mask.size(); i != e; 
++i339k
) {
1263
339k
    if (Mask[i] < 0) 
continue8.38k
; // Ignore undef values.
1264
331k
    // Is this an identity shuffle of the LHS value?
1265
331k
    isLHSID &= (Mask[i] == (int)i);
1266
331k
1267
331k
    // Is this an identity shuffle of the RHS value?
1268
331k
    isRHSID &= (Mask[i]-e == i);
1269
331k
  }
1270
75.4k
}
1271
1272
// Returns true if the shuffle is extracting a contiguous range of values from
1273
// LHS, for example:
1274
//                 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1275
//   Input:        |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1276
//   Shuffles to:  |EE|FF|GG|HH|
1277
//                 +--+--+--+--+
1278
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
1279
204k
                                       SmallVector<int, 16> &Mask) {
1280
204k
  unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
1281
204k
  unsigned MaskElems = Mask.size();
1282
204k
  unsigned BegIdx = Mask.front();
1283
204k
  unsigned EndIdx = Mask.back();
1284
204k
  if (BegIdx > EndIdx || 
EndIdx >= LHSElems200k
||
EndIdx - BegIdx != MaskElems - 1163k
)
1285
109k
    return false;
1286
298k
  
for (unsigned I = 0; 94.8k
I != MaskElems;
++I204k
)
1287
205k
    if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1288
944
      return false;
1289
94.8k
  
return true93.8k
;
1290
94.8k
}
1291
1292
/// These are the ingredients in an alternate form binary operator as described
1293
/// below.
1294
struct BinopElts {
1295
  BinaryOperator::BinaryOps Opcode;
1296
  Value *Op0;
1297
  Value *Op1;
1298
  BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
1299
            Value *V0 = nullptr, Value *V1 = nullptr) :
1300
136
      Opcode(Opc), Op0(V0), Op1(V1) {}
1301
136
  operator bool() const { return Opcode != 0; }
1302
};
1303
1304
/// Binops may be transformed into binops with different opcodes and operands.
1305
/// Reverse the usual canonicalization to enable folds with the non-canonical
1306
/// form of the binop. If a transform is possible, return the elements of the
1307
/// new binop. If not, return invalid elements.
1308
136
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
1309
136
  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1310
136
  Type *Ty = BO->getType();
1311
136
  switch (BO->getOpcode()) {
1312
136
    case Instruction::Shl: {
1313
35
      // shl X, C --> mul X, (1 << C)
1314
35
      Constant *C;
1315
35
      if (match(BO1, m_Constant(C))) {
1316
35
        Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1317
35
        return { Instruction::Mul, BO0, ShlOne };
1318
35
      }
1319
0
      break;
1320
0
    }
1321
17
    case Instruction::Or: {
1322
17
      // or X, C --> add X, C (when X and C have no common bits set)
1323
17
      const APInt *C;
1324
17
      if (match(BO1, m_APInt(C)) && 
MaskedValueIsZero(BO0, *C, DL)7
)
1325
4
        return { Instruction::Add, BO0, BO1 };
1326
13
      break;
1327
13
    }
1328
84
    default:
1329
84
      break;
1330
97
  }
1331
97
  return {};
1332
97
}
1333
1334
1.54k
static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
1335
1.54k
  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1336
1.54k
1337
1.54k
  // Are we shuffling together some value and that same value after it has been
1338
1.54k
  // modified by a binop with a constant?
1339
1.54k
  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1340
1.54k
  Constant *C;
1341
1.54k
  bool Op0IsBinop;
1342
1.54k
  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1343
14
    Op0IsBinop = true;
1344
1.53k
  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1345
10
    Op0IsBinop = false;
1346
1.52k
  else
1347
1.52k
    return nullptr;
1348
24
1349
24
  // The identity constant for a binop leaves a variable operand unchanged. For
1350
24
  // a vector, this is a splat of something like 0, -1, or 1.
1351
24
  // If there's no identity constant for this binop, we're done.
1352
24
  auto *BO = cast<BinaryOperator>(Op0IsBinop ? 
Op014
:
Op110
);
1353
24
  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1354
24
  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1355
24
  if (!IdC)
1356
0
    return nullptr;
1357
24
1358
24
  // Shuffle identity constants into the lanes that return the original value.
1359
24
  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1360
24
  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1361
24
  // The existing binop constant vector remains in the same operand position.
1362
24
  Constant *Mask = Shuf.getMask();
1363
24
  Constant *NewC = Op0IsBinop ? 
ConstantExpr::getShuffleVector(C, IdC, Mask)14
:
1364
24
                                
ConstantExpr::getShuffleVector(IdC, C, Mask)10
;
1365
24
1366
24
  bool MightCreatePoisonOrUB =
1367
24
      Mask->containsUndefElement() &&
1368
24
      
(11
Instruction::isIntDivRem(BOpcode)11
||
Instruction::isShift(BOpcode)9
);
1369
24
  if (MightCreatePoisonOrUB)
1370
6
    NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1371
24
1372
24
  // shuf (bop X, C), X, M --> bop X, C'
1373
24
  // shuf X, (bop X, C), M --> bop X, C'
1374
24
  Value *X = Op0IsBinop ? 
Op114
:
Op010
;
1375
24
  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1376
24
  NewBO->copyIRFlags(BO);
1377
24
1378
24
  // An undef shuffle mask element may propagate as an undef constant element in
1379
24
  // the new binop. That would produce poison where the original code might not.
1380
24
  // If we already made a safe constant, then there's no danger.
1381
24
  if (Mask->containsUndefElement() && 
!MightCreatePoisonOrUB11
)
1382
5
    NewBO->dropPoisonGeneratingFlags();
1383
24
  return NewBO;
1384
24
}
1385
1386
/// If we have an insert of a scalar to a non-zero element of an undefined
1387
/// vector and then shuffle that value, that's the same as inserting to the zero
1388
/// element and shuffling. Splatting from the zero element is recognized as the
1389
/// canonical form of splat.
1390
static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
1391
205k
                                            InstCombiner::BuilderTy &Builder) {
1392
205k
  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1393
205k
  Constant *Mask = Shuf.getMask();
1394
205k
  Value *X;
1395
205k
  uint64_t IndexC;
1396
205k
1397
205k
  // Match a shuffle that is a splat to a non-zero element.
1398
205k
  if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
1399
205k
                                           m_ConstantInt(IndexC)))) ||
1400
205k
      
!match(Op1, m_Undef())55.3k
||
match(Mask, m_ZeroInt())55.2k
||
IndexC == 07
)
1401
205k
    return nullptr;
1402
5
1403
5
  // Insert into element 0 of an undef vector.
1404
5
  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1405
5
  Constant *Zero = Builder.getInt32(0);
1406
5
  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1407
5
1408
5
  // Splat from element 0. Any mask element that is undefined remains undefined.
1409
5
  // For example:
1410
5
  // shuf (inselt undef, X, 2), undef, <2,2,undef>
1411
5
  //   --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1412
5
  unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
1413
5
  SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
1414
25
  for (unsigned i = 0; i != NumMaskElts; 
++i20
)
1415
20
    if (isa<UndefValue>(Mask->getAggregateElement(i)))
1416
5
      NewMask[i] = Mask->getAggregateElement(i);
1417
5
1418
5
  return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
1419
5
}
1420
1421
/// Try to fold shuffles that are the equivalent of a vector select.
1422
static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
1423
                                      InstCombiner::BuilderTy &Builder,
1424
205k
                                      const DataLayout &DL) {
1425
205k
  if (!Shuf.isSelect())
1426
204k
    return nullptr;
1427
1.64k
1428
1.64k
  // Canonicalize to choose from operand 0 first.
1429
1.64k
  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1430
1.64k
  if (Shuf.getMaskValue(0) >= (int)NumElts) {
1431
99
    // TODO: Can we assert that both operands of a shuffle-select are not undef
1432
99
    // (otherwise, it would have been folded by instsimplify?
1433
99
    Shuf.commute();
1434
99
    return &Shuf;
1435
99
  }
1436
1.54k
1437
1.54k
  if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
1438
24
    return I;
1439
1.52k
1440
1.52k
  BinaryOperator *B0, *B1;
1441
1.52k
  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1442
1.52k
      
!match(Shuf.getOperand(1), m_BinOp(B1))741
)
1443
797
    return nullptr;
1444
728
1445
728
  Value *X, *Y;
1446
728
  Constant *C0, *C1;
1447
728
  bool ConstantsAreOp1;
1448
728
  if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1449
728
      
match(B1, m_BinOp(m_Value(Y), m_Constant(C1)))116
)
1450
113
    ConstantsAreOp1 = true;
1451
615
  else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1452
615
           
match(B1, m_BinOp(m_Constant(C1), m_Value(Y)))34
)
1453
28
    ConstantsAreOp1 = false;
1454
587
  else
1455
587
    return nullptr;
1456
141
1457
141
  // We need matching binops to fold the lanes together.
1458
141
  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1459
141
  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1460
141
  bool DropNSW = false;
1461
141
  if (ConstantsAreOp1 && 
Opc0 != Opc1113
) {
1462
70
    // TODO: We drop "nsw" if shift is converted into multiply because it may
1463
70
    // not be correct when the shift amount is BitWidth - 1. We could examine
1464
70
    // each vector element to determine if it is safe to keep that flag.
1465
70
    if (Opc0 == Instruction::Shl || 
Opc1 == Instruction::Shl68
)
1466
35
      DropNSW = true;
1467
70
    if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1468
4
      assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1469
4
      Opc0 = AltB0.Opcode;
1470
4
      C0 = cast<Constant>(AltB0.Op1);
1471
66
    } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1472
35
      assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1473
35
      Opc1 = AltB1.Opcode;
1474
35
      C1 = cast<Constant>(AltB1.Op1);
1475
35
    }
1476
70
  }
1477
141
1478
141
  if (Opc0 != Opc1)
1479
61
    return nullptr;
1480
80
1481
80
  // The opcodes must be the same. Use a new name to make that clear.
1482
80
  BinaryOperator::BinaryOps BOpc = Opc0;
1483
80
1484
80
  // Select the constant elements needed for the single binop.
1485
80
  Constant *Mask = Shuf.getMask();
1486
80
  Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1487
80
1488
80
  // We are moving a binop after a shuffle. When a shuffle has an undefined
1489
80
  // mask element, the result is undefined, but it is not poison or undefined
1490
80
  // behavior. That is not necessarily true for div/rem/shift.
1491
80
  bool MightCreatePoisonOrUB =
1492
80
      Mask->containsUndefElement() &&
1493
80
      
(32
Instruction::isIntDivRem(BOpc)32
||
Instruction::isShift(BOpc)23
);
1494
80
  if (MightCreatePoisonOrUB)
1495
15
    NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
1496
80
1497
80
  Value *V;
1498
80
  if (X == Y) {
1499
41
    // Remove a binop and the shuffle by rearranging the constant:
1500
41
    // shuffle (op V, C0), (op V, C1), M --> op V, C'
1501
41
    // shuffle (op C0, V), (op C1, V), M --> op C', V
1502
41
    V = X;
1503
41
  } else {
1504
39
    // If there are 2 different variable operands, we must create a new shuffle
1505
39
    // (select) first, so check uses to ensure that we don't end up with more
1506
39
    // instructions than we started with.
1507
39
    if (!B0->hasOneUse() && 
!B1->hasOneUse()1
)
1508
1
      return nullptr;
1509
38
1510
38
    // If we use the original shuffle mask and op1 is *variable*, we would be
1511
38
    // putting an undef into operand 1 of div/rem/shift. This is either UB or
1512
38
    // poison. We do not have to guard against UB when *constants* are op1
1513
38
    // because safe constants guarantee that we do not overflow sdiv/srem (and
1514
38
    // there's no danger for other opcodes).
1515
38
    // TODO: To allow this case, create a new shuffle mask with no undefs.
1516
38
    if (MightCreatePoisonOrUB && 
!ConstantsAreOp19
)
1517
5
      return nullptr;
1518
33
1519
33
    // Note: In general, we do not create new shuffles in InstCombine because we
1520
33
    // do not know if a target can lower an arbitrary shuffle optimally. In this
1521
33
    // case, the shuffle uses the existing mask, so there is no additional risk.
1522
33
1523
33
    // Select the variable vectors first, then perform the binop:
1524
33
    // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1525
33
    // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1526
33
    V = Builder.CreateShuffleVector(X, Y, Mask);
1527
33
  }
1528
80
1529
80
  
Instruction *NewBO = ConstantsAreOp1 74
?
BinaryOperator::Create(BOpc, V, NewC)51
:
1530
74
                                         
BinaryOperator::Create(BOpc, NewC, V)23
;
1531
74
1532
74
  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1533
74
  // 1. If we changed an opcode, poison conditions might have changed.
1534
74
  // 2. If the shuffle had undef mask elements, the new binop might have undefs
1535
74
  //    where the original code did not. But if we already made a safe constant,
1536
74
  //    then there's no danger.
1537
74
  NewBO->copyIRFlags(B0);
1538
74
  NewBO->andIRFlags(B1);
1539
74
  if (DropNSW)
1540
5
    NewBO->setHasNoSignedWrap(false);
1541
74
  if (Mask->containsUndefElement() && 
!MightCreatePoisonOrUB27
)
1542
17
    NewBO->dropPoisonGeneratingFlags();
1543
74
  return NewBO;
1544
80
}
1545
1546
/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1547
/// narrowing (concatenating with undef and extracting back to the original
1548
/// length). This allows replacing the wide select with a narrow select.
1549
static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
1550
205k
                                       InstCombiner::BuilderTy &Builder) {
1551
205k
  // This must be a narrowing identity shuffle. It extracts the 1st N elements
1552
205k
  // of the 1st vector operand of a shuffle.
1553
205k
  if (!match(Shuf.getOperand(1), m_Undef()) || 
!Shuf.isIdentityWithExtract()176k
)
1554
190k
    return nullptr;
1555
15.2k
1556
15.2k
  // The vector being shuffled must be a vector select that we can eliminate.
1557
15.2k
  // TODO: The one-use requirement could be eased if X and/or Y are constants.
1558
15.2k
  Value *Cond, *X, *Y;
1559
15.2k
  if (!match(Shuf.getOperand(0),
1560
15.2k
             m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1561
15.2k
    return nullptr;
1562
11
1563
11
  // We need a narrow condition value. It must be extended with undef elements
1564
11
  // and have the same number of elements as this shuffle.
1565
11
  unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1566
11
  Value *NarrowCond;
1567
11
  if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1568
11
                                            m_Constant()))) ||
1569
11
      
NarrowCond->getType()->getVectorNumElements() != NarrowNumElts10
||
1570
11
      
!cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding()6
)
1571
6
    return nullptr;
1572
5
1573
5
  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1574
5
  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1575
5
  Value *Undef = UndefValue::get(X->getType());
1576
5
  Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1577
5
  Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1578
5
  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1579
5
}
1580
1581
/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1582
204k
static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
1583
204k
  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1584
204k
  if (!Shuf.isIdentityWithExtract() || 
!isa<UndefValue>(Op1)15.1k
)
1585
189k
    return nullptr;
1586
15.1k
1587
15.1k
  Value *X, *Y;
1588
15.1k
  Constant *Mask;
1589
15.1k
  if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
1590
14.9k
    return nullptr;
1591
279
1592
279
  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1593
279
  // then combining may result in worse codegen.
1594
279
  if (!Op0->hasOneUse())
1595
197
    return nullptr;
1596
82
1597
82
  // We are extracting a subvector from a shuffle. Remove excess elements from
1598
82
  // the 1st shuffle mask to eliminate the extract.
1599
82
  //
1600
82
  // This transform is conservatively limited to identity extracts because we do
1601
82
  // not allow arbitrary shuffle mask creation as a target-independent transform
1602
82
  // (because we can't guarantee that will lower efficiently).
1603
82
  //
1604
82
  // If the extracting shuffle has an undef mask element, it transfers to the
1605
82
  // new shuffle mask. Otherwise, copy the original mask element. Example:
1606
82
  //   shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1607
82
  //   shuf X, Y, <C0, undef, C2, undef>
1608
82
  unsigned NumElts = Shuf.getType()->getVectorNumElements();
1609
82
  SmallVector<Constant *, 16> NewMask(NumElts);
1610
82
  assert(NumElts < Mask->getType()->getVectorNumElements() &&
1611
82
         "Identity with extract must have less elements than its inputs");
1612
82
1613
1.13k
  for (unsigned i = 0; i != NumElts; 
++i1.05k
) {
1614
1.05k
    Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
1615
1.05k
    Constant *MaskElt = Mask->getAggregateElement(i);
1616
1.05k
    NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? 
ExtractMaskElt4
:
MaskElt1.05k
;
1617
1.05k
  }
1618
82
  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1619
82
}
1620
1621
/// Try to replace a shuffle with an insertelement.
1622
204k
static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
1623
204k
  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
1624
204k
  SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1625
204k
1626
204k
  // The shuffle must not change vector sizes.
1627
204k
  // TODO: This restriction could be removed if the insert has only one use
1628
204k
  //       (because the transform would require a new length-changing shuffle).
1629
204k
  int NumElts = Mask.size();
1630
204k
  if (NumElts != (int)(V0->getType()->getVectorNumElements()))
1631
130k
    return nullptr;
1632
74.4k
1633
74.4k
  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1634
148k
  
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) 74.4k
{
1635
148k
    // We need an insertelement with a constant index.
1636
148k
    if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1637
148k
                                   m_ConstantInt(IndexC))))
1638
92.7k
      return false;
1639
56.0k
1640
56.0k
    // Test the shuffle mask to see if it splices the inserted scalar into the
1641
56.0k
    // operand 1 vector of the shuffle.
1642
56.0k
    int NewInsIndex = -1;
1643
112k
    for (int i = 0; i != NumElts; 
++i56.1k
) {
1644
112k
      // Ignore undef mask elements.
1645
112k
      if (Mask[i] == -1)
1646
62
        continue;
1647
112k
1648
112k
      // The shuffle takes elements of operand 1 without lane changes.
1649
112k
      if (Mask[i] == NumElts + i)
1650
31
        continue;
1651
112k
1652
112k
      // The shuffle must choose the inserted scalar exactly once.
1653
112k
      if (NewInsIndex != -1 || 
Mask[i] != IndexC->getSExtValue()56.0k
)
1654
56.0k
        return false;
1655
56.0k
1656
56.0k
      // The shuffle is placing the inserted scalar into element i.
1657
56.0k
      NewInsIndex = i;
1658
56.0k
    }
1659
56.0k
1660
56.0k
    assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
1661
28
1662
28
    // Index is updated to the potentially translated insertion lane.
1663
28
    IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1664
28
    return true;
1665
56.0k
  };
1666
74.4k
1667
74.4k
  // If the shuffle is unnecessary, insert the scalar operand directly into
1668
74.4k
  // operand 1 of the shuffle. Example:
1669
74.4k
  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1670
74.4k
  Value *Scalar;
1671
74.4k
  ConstantInt *IndexC;
1672
74.4k
  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1673
25
    return InsertElementInst::Create(V1, Scalar, IndexC);
1674
74.4k
1675
74.4k
  // Try again after commuting shuffle. Example:
1676
74.4k
  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1677
74.4k
  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1678
74.4k
  std::swap(V0, V1);
1679
74.4k
  ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
1680
74.4k
  if (isShufflingScalarIntoOp1(Scalar, IndexC))
1681
3
    return InsertElementInst::Create(V1, Scalar, IndexC);
1682
74.4k
1683
74.4k
  return nullptr;
1684
74.4k
}
1685
1686
204k
static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
1687
204k
  // Match the operands as identity with padding (also known as concatenation
1688
204k
  // with undef) shuffles of the same source type. The backend is expected to
1689
204k
  // recreate these concatenations from a shuffle of narrow operands.
1690
204k
  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1691
204k
  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1692
204k
  if (!Shuffle0 || 
!Shuffle0->isIdentityWithPadding()6.77k
||
1693
204k
      
!Shuffle1258
||
!Shuffle1->isIdentityWithPadding()17
)
1694
204k
    return nullptr;
1695
8
1696
8
  // We limit this transform to power-of-2 types because we expect that the
1697
8
  // backend can convert the simplified IR patterns to identical nodes as the
1698
8
  // original IR.
1699
8
  // TODO: If we can verify the same behavior for arbitrary types, the
1700
8
  //       power-of-2 checks can be removed.
1701
8
  Value *X = Shuffle0->getOperand(0);
1702
8
  Value *Y = Shuffle1->getOperand(0);
1703
8
  if (X->getType() != Y->getType() ||
1704
8
      
!isPowerOf2_32(Shuf.getType()->getVectorNumElements())7
||
1705
8
      
!isPowerOf2_32(Shuffle0->getType()->getVectorNumElements())6
||
1706
8
      
!isPowerOf2_32(X->getType()->getVectorNumElements())4
||
1707
8
      
isa<UndefValue>(X)4
||
isa<UndefValue>(Y)4
)
1708
4
    return nullptr;
1709
4
  assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
1710
4
         isa<UndefValue>(Shuffle1->getOperand(1)) &&
1711
4
         "Unexpected operand for identity shuffle");
1712
4
1713
4
  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1714
4
  // operands directly by adjusting the shuffle mask to account for the narrower
1715
4
  // types:
1716
4
  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
1717
4
  int NarrowElts = X->getType()->getVectorNumElements();
1718
4
  int WideElts = Shuffle0->getType()->getVectorNumElements();
1719
4
  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
1720
4
1721
4
  Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
1722
4
  SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1723
4
  SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1724
22
  for (int i = 0, e = Mask.size(); i != e; 
++i18
) {
1725
18
    if (Mask[i] == -1)
1726
3
      continue;
1727
15
1728
15
    // If this shuffle is choosing an undef element from 1 of the sources, that
1729
15
    // element is undef.
1730
15
    if (Mask[i] < WideElts) {
1731
7
      if (Shuffle0->getMaskValue(Mask[i]) == -1)
1732
0
        continue;
1733
8
    } else {
1734
8
      if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
1735
2
        continue;
1736
13
    }
1737
13
1738
13
    // If this shuffle is choosing from the 1st narrow op, the mask element is
1739
13
    // the same. If this shuffle is choosing from the 2nd narrow op, the mask
1740
13
    // element is offset down to adjust for the narrow vector widths.
1741
13
    if (Mask[i] < WideElts) {
1742
7
      assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
1743
7
      NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
1744
7
    } else {
1745
6
      assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
1746
6
      NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
1747
6
    }
1748
13
  }
1749
4
  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1750
4
}
1751
1752
208k
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1753
208k
  Value *LHS = SVI.getOperand(0);
1754
208k
  Value *RHS = SVI.getOperand(1);
1755
208k
  if (auto *V = SimplifyShuffleVectorInst(
1756
1.97k
          LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
1757
1.97k
    return replaceInstUsesWith(SVI, V);
1758
206k
1759
206k
  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
1760
206k
  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
1761
206k
  unsigned VWidth = SVI.getType()->getVectorNumElements();
1762
206k
  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1763
206k
  SmallVector<int, 16> Mask = SVI.getShuffleMask();
1764
206k
  Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
1765
206k
  if (LHS == RHS || 
isa<UndefValue>(LHS)206k
) {
1766
1.01k
    // Remap any references to RHS to use LHS.
1767
1.01k
    SmallVector<Constant*, 16> Elts;
1768
6.44k
    for (unsigned i = 0, e = LHSWidth; i != VWidth; 
++i5.42k
) {
1769
5.42k
      if (Mask[i] < 0) {
1770
1.08k
        Elts.push_back(UndefValue::get(Int32Ty));
1771
1.08k
        continue;
1772
1.08k
      }
1773
4.33k
1774
4.33k
      if ((Mask[i] >= (int)e && 
isa<UndefValue>(RHS)2.52k
) ||
1775
4.33k
          (Mask[i] <  (int)e && 
isa<UndefValue>(LHS)1.81k
)) {
1776
117
        Mask[i] = -1;     // Turn into undef.
1777
117
        Elts.push_back(UndefValue::get(Int32Ty));
1778
4.22k
      } else {
1779
4.22k
        Mask[i] = Mask[i] % e;  // Force to LHS.
1780
4.22k
        Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
1781
4.22k
      }
1782
4.33k
    }
1783
1.01k
    SVI.setOperand(0, SVI.getOperand(1));
1784
1.01k
    SVI.setOperand(1, UndefValue::get(RHS->getType()));
1785
1.01k
    SVI.setOperand(2, ConstantVector::get(Elts));
1786
1.01k
    return &SVI;
1787
1.01k
  }
1788
205k
1789
205k
  if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
1790
5
    return I;
1791
205k
1792
205k
  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1793
197
    return I;
1794
205k
1795
205k
  if (Instruction *I = narrowVectorSelect(SVI, Builder))
1796
5
    return I;
1797
205k
1798
205k
  APInt UndefElts(VWidth, 0);
1799
205k
  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1800
205k
  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1801
873
    if (V != &SVI)
1802
16
      return replaceInstUsesWith(SVI, V);
1803
857
    return &SVI;
1804
857
  }
1805
204k
1806
204k
  if (Instruction *I = foldIdentityExtractShuffle(SVI))
1807
82
    return I;
1808
204k
1809
204k
  // These transforms have the potential to lose undef knowledge, so they are
1810
204k
  // intentionally placed after SimplifyDemandedVectorElts().
1811
204k
  if (Instruction *I = foldShuffleWithInsert(SVI))
1812
28
    return I;
1813
204k
  if (Instruction *I = foldIdentityPaddedShuffles(SVI))
1814
4
    return I;
1815
204k
1816
204k
  if (VWidth == LHSWidth) {
1817
74.4k
    // Analyze the shuffle, are the LHS or RHS and identity shuffles?
1818
74.4k
    bool isLHSID, isRHSID;
1819
74.4k
    recognizeIdentityMask(Mask, isLHSID, isRHSID);
1820
74.4k
1821
74.4k
    // Eliminate identity shuffles.
1822
74.4k
    if (isLHSID) 
return replaceInstUsesWith(SVI, LHS)281
;
1823
74.1k
    if (isRHSID) 
return replaceInstUsesWith(SVI, RHS)0
;
1824
204k
  }
1825
204k
1826
204k
  if (isa<UndefValue>(RHS) && 
canEvaluateShuffled(LHS, Mask)175k
) {
1827
13
    Value *V = evaluateInDifferentElementOrder(LHS, Mask);
1828
13
    return replaceInstUsesWith(SVI, V);
1829
13
  }
1830
204k
1831
204k
  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1832
204k
  // a non-vector type. We can instead bitcast the original vector followed by
1833
204k
  // an extract of the desired element:
1834
204k
  //
1835
204k
  //   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1836
204k
  //                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1837
204k
  //   %1 = bitcast <4 x i8> %sroa to i32
1838
204k
  // Becomes:
1839
204k
  //   %bc = bitcast <16 x i8> %in to <4 x i32>
1840
204k
  //   %ext = extractelement <4 x i32> %bc, i32 0
1841
204k
  //
1842
204k
  // If the shuffle is extracting a contiguous range of values from the input
1843
204k
  // vector then each use which is a bitcast of the extracted size can be
1844
204k
  // replaced. This will work if the vector types are compatible, and the begin
1845
204k
  // index is aligned to a value in the casted vector type. If the begin index
1846
204k
  // isn't aligned then we can shuffle the original vector (keeping the same
1847
204k
  // vector type) before extracting.
1848
204k
  //
1849
204k
  // This code will bail out if the target type is fundamentally incompatible
1850
204k
  // with vectors of the source type.
1851
204k
  //
1852
204k
  // Example of <16 x i8>, target type i32:
1853
204k
  // Index range [4,8):         v-----------v Will work.
1854
204k
  //                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1855
204k
  //     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
1856
204k
  //     <4 x i32>: |           |           |           |           |
1857
204k
  //                +-----------+-----------+-----------+-----------+
1858
204k
  // Index range [6,10):              ^-----------^ Needs an extra shuffle.
1859
204k
  // Target type i40:           ^--------------^ Won't work, bail.
1860
204k
  bool MadeChange = false;
1861
204k
  if (isShuffleExtractingFromLHS(SVI, Mask)) {
1862
93.8k
    Value *V = LHS;
1863
93.8k
    unsigned MaskElems = Mask.size();
1864
93.8k
    VectorType *SrcTy = cast<VectorType>(V->getType());
1865
93.8k
    unsigned VecBitWidth = SrcTy->getBitWidth();
1866
93.8k
    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
1867
93.8k
    assert(SrcElemBitWidth && "vector elements must have a bitwidth");
1868
93.8k
    unsigned SrcNumElems = SrcTy->getNumElements();
1869
93.8k
    SmallVector<BitCastInst *, 8> BCs;
1870
93.8k
    DenseMap<Type *, Value *> NewBCs;
1871
93.8k
    for (User *U : SVI.users())
1872
94.1k
      if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
1873
52
        if (!BC->use_empty())
1874
33
          // Only visit bitcasts that weren't previously handled.
1875
33
          BCs.push_back(BC);
1876
93.8k
    for (BitCastInst *BC : BCs) {
1877
33
      unsigned BegIdx = Mask.front();
1878
33
      Type *TgtTy = BC->getDestTy();
1879
33
      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
1880
33
      if (!TgtElemBitWidth)
1881
0
        continue;
1882
33
      unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
1883
33
      bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
1884
33
      bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
1885
33
      if (!VecBitWidthsEqual)
1886
1
        continue;
1887
32
      if (!VectorType::isValidElementType(TgtTy))
1888
16
        continue;
1889
16
      VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
1890
16
      if (!BegIsAligned) {
1891
1
        // Shuffle the input so [0,NumElements) contains the output, and
1892
1
        // [NumElems,SrcNumElems) is undef.
1893
1
        SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
1894
1
                                                UndefValue::get(Int32Ty));
1895
5
        for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; 
++Idx, ++I4
)
1896
4
          ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
1897
1
        V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
1898
1
                                        ConstantVector::get(ShuffleMask),
1899
1
                                        SVI.getName() + ".extract");
1900
1
        BegIdx = 0;
1901
1
      }
1902
16
      unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
1903
16
      assert(SrcElemsPerTgtElem);
1904
16
      BegIdx /= SrcElemsPerTgtElem;
1905
16
      bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
1906
16
      auto *NewBC =
1907
16
          BCAlreadyExists
1908
16
              ? 
NewBCs[CastSrcTy]1
1909
16
              : 
Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc")15
;
1910
16
      if (!BCAlreadyExists)
1911
15
        NewBCs[CastSrcTy] = NewBC;
1912
16
      auto *Ext = Builder.CreateExtractElement(
1913
16
          NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
1914
16
      // The shufflevector isn't being replaced: the bitcast that used it
1915
16
      // is. InstCombine will visit the newly-created instructions.
1916
16
      replaceInstUsesWith(*BC, Ext);
1917
16
      MadeChange = true;
1918
16
    }
1919
93.8k
  }
1920
204k
1921
204k
  // If the LHS is a shufflevector itself, see if we can combine it with this
1922
204k
  // one without producing an unusual shuffle.
1923
204k
  // Cases that might be simplified:
1924
204k
  // 1.
1925
204k
  // x1=shuffle(v1,v2,mask1)
1926
204k
  //  x=shuffle(x1,undef,mask)
1927
204k
  //        ==>
1928
204k
  //  x=shuffle(v1,undef,newMask)
1929
204k
  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
1930
204k
  // 2.
1931
204k
  // x1=shuffle(v1,undef,mask1)
1932
204k
  //  x=shuffle(x1,x2,mask)
1933
204k
  // where v1.size() == mask1.size()
1934
204k
  //        ==>
1935
204k
  //  x=shuffle(v1,x2,newMask)
1936
204k
  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
1937
204k
  // 3.
1938
204k
  // x2=shuffle(v2,undef,mask2)
1939
204k
  //  x=shuffle(x1,x2,mask)
1940
204k
  // where v2.size() == mask2.size()
1941
204k
  //        ==>
1942
204k
  //  x=shuffle(x1,v2,newMask)
1943
204k
  // newMask[i] = (mask[i] < x1.size())
1944
204k
  //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
1945
204k
  // 4.
1946
204k
  // x1=shuffle(v1,undef,mask1)
1947
204k
  // x2=shuffle(v2,undef,mask2)
1948
204k
  //  x=shuffle(x1,x2,mask)
1949
204k
  // where v1.size() == v2.size()
1950
204k
  //        ==>
1951
204k
  //  x=shuffle(v1,v2,newMask)
1952
204k
  // newMask[i] = (mask[i] < x1.size())
1953
204k
  //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
1954
204k
  //
1955
204k
  // Here we are really conservative:
1956
204k
  // we are absolutely afraid of producing a shuffle mask not in the input
1957
204k
  // program, because the code gen may not be smart enough to turn a merged
1958
204k
  // shuffle into two specific shuffles: it may produce worse code.  As such,
1959
204k
  // we only merge two shuffles if the result is either a splat or one of the
1960
204k
  // input shuffle masks.  In this case, merging the shuffles just removes
1961
204k
  // one instruction, which we know is safe.  This is good for things like
1962
204k
  // turning: (splat(splat)) -> splat, or
1963
204k
  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
1964
204k
  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
1965
204k
  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
1966
204k
  if (LHSShuffle)
1967
6.63k
    if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && 
!isa<UndefValue>(RHS)4.12k
)
1968
3.22k
      LHSShuffle = nullptr;
1969
204k
  if (RHSShuffle)
1970
6.70k
    if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
1971
1.26k
      RHSShuffle = nullptr;
1972
204k
  if (!LHSShuffle && 
!RHSShuffle200k
)
1973
195k
    return MadeChange ? 
&SVI13
:
nullptr195k
;
1974
8.37k
1975
8.37k
  Value* LHSOp0 = nullptr;
1976
8.37k
  Value* LHSOp1 = nullptr;
1977
8.37k
  Value* RHSOp0 = nullptr;
1978
8.37k
  unsigned LHSOp0Width = 0;
1979
8.37k
  unsigned RHSOp0Width = 0;
1980
8.37k
  if (LHSShuffle) {
1981
3.40k
    LHSOp0 = LHSShuffle->getOperand(0);
1982
3.40k
    LHSOp1 = LHSShuffle->getOperand(1);
1983
3.40k
    LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
1984
3.40k
  }
1985
8.37k
  if (RHSShuffle) {
1986
5.43k
    RHSOp0 = RHSShuffle->getOperand(0);
1987
5.43k
    RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
1988
5.43k
  }
1989
8.37k
  Value* newLHS = LHS;
1990
8.37k
  Value* newRHS = RHS;
1991
8.37k
  if (LHSShuffle) {
1992
3.40k
    // case 1
1993
3.40k
    if (isa<UndefValue>(RHS)) {
1994
2.80k
      newLHS = LHSOp0;
1995
2.80k
      newRHS = LHSOp1;
1996
2.80k
    }
1997
597
    // case 2 or 4
1998
597
    else if (LHSOp0Width == LHSWidth) {
1999
149
      newLHS = LHSOp0;
2000
149
    }
2001
3.40k
  }
2002
8.37k
  // case 3 or 4
2003
8.37k
  if (RHSShuffle && 
RHSOp0Width == LHSWidth5.43k
) {
2004
61
    newRHS = RHSOp0;
2005
61
  }
2006
8.37k
  // case 4
2007
8.37k
  if (LHSOp0 == RHSOp0) {
2008
397
    newLHS = LHSOp0;
2009
397
    newRHS = nullptr;
2010
397
  }
2011
8.37k
2012
8.37k
  if (newLHS == LHS && 
newRHS == RHS5.02k
)
2013
5.01k
    return MadeChange ? 
&SVI0
: nullptr;
2014
3.36k
2015
3.36k
  SmallVector<int, 16> LHSMask;
2016
3.36k
  SmallVector<int, 16> RHSMask;
2017
3.36k
  if (newLHS != LHS)
2018
3.35k
    LHSMask = LHSShuffle->getShuffleMask();
2019
3.36k
  if (RHSShuffle && 
newRHS != RHS458
)
2020
458
    RHSMask = RHSShuffle->getShuffleMask();
2021
3.36k
2022
3.36k
  unsigned newLHSWidth = (newLHS != LHS) ? 
LHSOp0Width3.35k
:
LHSWidth11
;
2023
3.36k
  SmallVector<int, 16> newMask;
2024
3.36k
  bool isSplat = true;
2025
3.36k
  int SplatElt = -1;
2026
3.36k
  // Create a new mask for the new ShuffleVectorInst so that the new
2027
3.36k
  // ShuffleVectorInst is equivalent to the original one.
2028
22.5k
  for (unsigned i = 0; i < VWidth; 
++i19.2k
) {
2029
19.2k
    int eltMask;
2030
19.2k
    if (Mask[i] < 0) {
2031
1.76k
      // This element is an undef value.
2032
1.76k
      eltMask = -1;
2033
17.4k
    } else if (Mask[i] < (int)LHSWidth) {
2034
14.9k
      // This element is from left hand side vector operand.
2035
14.9k
      //
2036
14.9k
      // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2037
14.9k
      // new mask value for the element.
2038
14.9k
      if (newLHS != LHS) {
2039
14.8k
        eltMask = LHSMask[Mask[i]];
2040
14.8k
        // If the value selected is an undef value, explicitly specify it
2041
14.8k
        // with a -1 mask value.
2042
14.8k
        if (eltMask >= (int)LHSOp0Width && 
isa<UndefValue>(LHSOp1)2.77k
)
2043
0
          eltMask = -1;
2044
14.8k
      } else
2045
44
        eltMask = Mask[i];
2046
14.9k
    } else {
2047
2.56k
      // This element is from right hand side vector operand
2048
2.56k
      //
2049
2.56k
      // If the value selected is an undef value, explicitly specify it
2050
2.56k
      // with a -1 mask value. (case 1)
2051
2.56k
      if (isa<UndefValue>(RHS))
2052
0
        eltMask = -1;
2053
2.56k
      // If RHS is going to be replaced (case 3 or 4), calculate the
2054
2.56k
      // new mask value for the element.
2055
2.56k
      else if (newRHS != RHS) {
2056
2.33k
        eltMask = RHSMask[Mask[i]-LHSWidth];
2057
2.33k
        // If the value selected is an undef value, explicitly specify it
2058
2.33k
        // with a -1 mask value.
2059
2.33k
        if (eltMask >= (int)RHSOp0Width) {
2060
0
          assert(isa<UndefValue>(RHSShuffle->getOperand(1))
2061
0
                 && "should have been check above");
2062
0
          eltMask = -1;
2063
0
        }
2064
2.33k
      } else
2065
226
        eltMask = Mask[i]-LHSWidth;
2066
2.56k
2067
2.56k
      // If LHS's width is changed, shift the mask value accordingly.
2068
2.56k
      // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2069
2.56k
      // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2070
2.56k
      // If newRHS == newLHS, we want to remap any references from newRHS to
2071
2.56k
      // newLHS so that we can properly identify splats that may occur due to
2072
2.56k
      // obfuscation across the two vectors.
2073
2.56k
      if (eltMask >= 0 && newRHS != nullptr && 
newLHS != newRHS702
)
2074
692
        eltMask += newLHSWidth;
2075
2.56k
    }
2076
19.2k
2077
19.2k
    // Check if this could still be a splat.
2078
19.2k
    if (eltMask >= 0) {
2079
17.4k
      if (SplatElt >= 0 && 
SplatElt != eltMask14.1k
)
2080
10.6k
        isSplat = false;
2081
17.4k
      SplatElt = eltMask;
2082
17.4k
    }
2083
19.2k
2084
19.2k
    newMask.push_back(eltMask);
2085
19.2k
  }
2086
3.36k
2087
3.36k
  // If the result mask is equal to one of the original shuffle masks,
2088
3.36k
  // or is a splat, do the replacement.
2089
3.36k
  if (isSplat || 
newMask == LHSMask1.89k
||
newMask == RHSMask1.89k
||
newMask == Mask1.89k
) {
2090
2.34k
    SmallVector<Constant*, 16> Elts;
2091
11.8k
    for (unsigned i = 0, e = newMask.size(); i != e; 
++i9.48k
) {
2092
9.48k
      if (newMask[i] < 0) {
2093
644
        Elts.push_back(UndefValue::get(Int32Ty));
2094
8.83k
      } else {
2095
8.83k
        Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2096
8.83k
      }
2097
9.48k
    }
2098
2.34k
    if (!newRHS)
2099
5
      newRHS = UndefValue::get(newLHS->getType());
2100
2.34k
    return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
2101
2.34k
  }
2102
1.02k
2103
1.02k
  // If the result mask is an identity, replace uses of this instruction with
2104
1.02k
  // corresponding argument.
2105
1.02k
  bool isLHSID, isRHSID;
2106
1.02k
  recognizeIdentityMask(newMask, isLHSID, isRHSID);
2107
1.02k
  if (isLHSID && 
VWidth == LHSOp0Width1
)
return replaceInstUsesWith(SVI, newLHS)0
;
2108
1.02k
  if (isRHSID && 
VWidth == RHSOp0Width0
)
return replaceInstUsesWith(SVI, newRHS)0
;
2109
1.02k
2110
1.02k
  return MadeChange ? 
&SVI0
: nullptr;
2111
1.02k
}