Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/Scalarizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass converts vector operations into scalar operations, in order
11
// to expose optimization opportunities on the individual scalar operations.
12
// It is mainly intended for targets that do not have vector units, but it
13
// may also be useful for revectorizing code to different vector widths.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/Analysis/VectorUtils.h"
19
#include "llvm/IR/IRBuilder.h"
20
#include "llvm/IR/InstVisitor.h"
21
#include "llvm/Pass.h"
22
#include "llvm/Transforms/Scalar.h"
23
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
24
25
using namespace llvm;
26
27
#define DEBUG_TYPE "scalarizer"
28
29
namespace {
30
// Used to store the scattered form of a vector.
31
typedef SmallVector<Value *, 8> ValueVector;
32
33
// Used to map a vector Value to its scattered form.  We use std::map
34
// because we want iterators to persist across insertion and because the
35
// values are relatively large.
36
typedef std::map<Value *, ValueVector> ScatterMap;
37
38
// Lists Instructions that have been replaced with scalar implementations,
39
// along with a pointer to their scattered forms.
40
typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
41
42
// Provides a very limited vector-like interface for lazily accessing one
43
// component of a scattered vector or vector pointer.
44
class Scatterer {
45
public:
46
14
  Scatterer() {}
47
48
  // Scatter V into Size components.  If new instructions are needed,
49
  // insert them before BBI in BB.  If Cache is nonnull, use it to cache
50
  // the results.
51
  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
52
            ValueVector *cachePtr = nullptr);
53
54
  // Return component I, creating a new Value for it if necessary.
55
  Value *operator[](unsigned I);
56
57
  // Return the number of components.
58
17
  unsigned size() const { return Size; }
59
60
private:
61
  BasicBlock *BB;
62
  BasicBlock::iterator BBI;
63
  Value *V;
64
  ValueVector *CachePtr;
65
  PointerType *PtrTy;
66
  ValueVector Tmp;
67
  unsigned Size;
68
};
69
70
// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
71
// called Name that compares X and Y in the same way as FCI.
72
struct FCmpSplitter {
73
1
  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
74
  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
75
4
                    const Twine &Name) const {
76
4
    return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
77
4
  }
78
  FCmpInst &FCI;
79
};
80
81
// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
82
// called Name that compares X and Y in the same way as ICI.
83
struct ICmpSplitter {
84
6
  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
85
  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
86
4
                    const Twine &Name) const {
87
4
    return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
88
4
  }
89
  ICmpInst &ICI;
90
};
91
92
// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
93
// a binary operator like BO called Name with operands X and Y.
94
struct BinarySplitter {
95
18
  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
96
  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
97
72
                    const Twine &Name) const {
98
72
    return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
99
72
  }
100
  BinaryOperator &BO;
101
};
102
103
// Information about a load or store that we're scalarizing.
104
struct VectorLayout {
105
28
  VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
106
107
  // Return the alignment of element I.
108
100
  uint64_t getElemAlign(unsigned I) {
109
100
    return MinAlign(VecAlign, I * ElemSize);
110
100
  }
111
112
  // The type of the vector.
113
  VectorType *VecTy;
114
115
  // The type of each element.
116
  Type *ElemTy;
117
118
  // The alignment of the vector.
119
  uint64_t VecAlign;
120
121
  // The size of each element.
122
  uint64_t ElemSize;
123
};
124
125
class Scalarizer : public FunctionPass,
126
                   public InstVisitor<Scalarizer, bool> {
127
public:
128
  static char ID;
129
130
  Scalarizer() :
131
8
    FunctionPass(ID) {
132
8
    initializeScalarizerPass(*PassRegistry::getPassRegistry());
133
8
  }
134
135
  bool doInitialization(Module &M) override;
136
  bool runOnFunction(Function &F) override;
137
138
  // InstVisitor methods.  They return true if the instruction was scalarized,
139
  // false if nothing changed.
140
58
  bool visitInstruction(Instruction &) { return false; }
141
  bool visitSelectInst(SelectInst &SI);
142
  bool visitICmpInst(ICmpInst &);
143
  bool visitFCmpInst(FCmpInst &);
144
  bool visitBinaryOperator(BinaryOperator &);
145
  bool visitGetElementPtrInst(GetElementPtrInst &);
146
  bool visitCastInst(CastInst &);
147
  bool visitBitCastInst(BitCastInst &);
148
  bool visitShuffleVectorInst(ShuffleVectorInst &);
149
  bool visitPHINode(PHINode &);
150
  bool visitLoadInst(LoadInst &);
151
  bool visitStoreInst(StoreInst &);
152
  bool visitCallInst(CallInst &I);
153
154
24.6k
  static void registerOptions() {
155
24.6k
    // This is disabled by default because having separate loads and stores
156
24.6k
    // makes it more likely that the -combiner-alias-analysis limits will be
157
24.6k
    // reached.
158
24.6k
    OptionRegistry::registerOption<bool, Scalarizer,
159
24.6k
                                 &Scalarizer::ScalarizeLoadStore>(
160
24.6k
        "scalarize-load-store",
161
24.6k
        "Allow the scalarizer pass to scalarize loads and store", false);
162
24.6k
  }
163
164
private:
165
  Scatterer scatter(Instruction *, Value *);
166
  void gather(Instruction *, const ValueVector &);
167
  bool canTransferMetadata(unsigned Kind);
168
  void transferMetadata(Instruction *, const ValueVector &);
169
  bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
170
  bool finish();
171
172
  template<typename T> bool splitBinary(Instruction &, const T &);
173
174
  bool splitCall(CallInst &CI);
175
176
  ScatterMap Scattered;
177
  GatherList Gathered;
178
  unsigned ParallelLoopAccessMDKind;
179
  bool ScalarizeLoadStore;
180
};
181
182
char Scalarizer::ID = 0;
183
} // end anonymous namespace
184
185
INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
186
                             "Scalarize vector operations", false, false)
187
188
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
189
                     ValueVector *cachePtr)
190
125
  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
191
125
  Type *Ty = V->getType();
192
125
  PtrTy = dyn_cast<PointerType>(Ty);
193
125
  if (PtrTy)
194
25
    Ty = PtrTy->getElementType();
195
125
  Size = Ty->getVectorNumElements();
196
125
  if (!CachePtr)
197
14
    Tmp.resize(Size, nullptr);
198
111
  else 
if (111
CachePtr->empty()111
)
199
65
    CachePtr->resize(Size, nullptr);
200
111
  else
201
111
    assert(Size == CachePtr->size() && "Inconsistent vector sizes");
202
125
}
203
204
// Return component I, creating a new Value for it if necessary.
205
492
Value *Scatterer::operator[](unsigned I) {
206
492
  ValueVector &CV = (CachePtr ? 
*CachePtr449
:
Tmp43
);
207
492
  // Try to reuse a previous value.
208
492
  if (CV[I])
209
180
    return CV[I];
210
312
  IRBuilder<> Builder(BB, BBI);
211
312
  if (
PtrTy312
) {
212
92
    if (
!CV[0]92
) {
213
23
      Type *Ty =
214
23
        PointerType::get(PtrTy->getElementType()->getVectorElementType(),
215
23
                         PtrTy->getAddressSpace());
216
23
      CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
217
23
    }
218
92
    if (I != 0)
219
69
      CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
220
69
                                         V->getName() + ".i" + Twine(I));
221
312
  } else {
222
220
    // Search through a chain of InsertElementInsts looking for element I.
223
220
    // Record other elements in the cache.  The new V is still suitable
224
220
    // for all uncached indices.
225
229
    for (;;) {
226
229
      InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
227
229
      if (!Insert)
228
209
        break;
229
20
      ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
230
20
      if (!Idx)
231
4
        break;
232
16
      unsigned J = Idx->getZExtValue();
233
16
      V = Insert->getOperand(0);
234
16
      if (
I == J16
) {
235
7
        CV[J] = Insert->getOperand(1);
236
7
        return CV[J];
237
9
      } else 
if (9
!CV[J]9
) {
238
8
        // Only cache the first entry we find for each index we're not actively
239
8
        // searching for. This prevents us from going too far up the chain and
240
8
        // caching incorrect entries.
241
8
        CV[J] = Insert->getOperand(1);
242
8
      }
243
229
    }
244
213
    CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
245
213
                                         V->getName() + ".i" + Twine(I));
246
213
  }
247
305
  return CV[I];
248
492
}
249
250
8
bool Scalarizer::doInitialization(Module &M) {
251
8
  ParallelLoopAccessMDKind =
252
8
      M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
253
8
  ScalarizeLoadStore =
254
8
      M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
255
8
  return false;
256
8
}
257
258
32
bool Scalarizer::runOnFunction(Function &F) {
259
32
  if (skipFunction(F))
260
0
    return false;
261
32
  assert(Gathered.empty() && Scattered.empty());
262
47
  for (BasicBlock &BB : F) {
263
211
    for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); 
II != IE211
;) {
264
164
      Instruction *I = &*II;
265
164
      bool Done = visit(I);
266
164
      ++II;
267
164
      if (
Done && 164
I->getType()->isVoidTy()69
)
268
14
        I->eraseFromParent();
269
164
    }
270
47
  }
271
32
  return finish();
272
32
}
273
274
// Return a scattered form of V that can be accessed by Point.  V must be a
275
// vector or a pointer to a vector.
276
125
Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
277
125
  if (Argument *
VArg125
= dyn_cast<Argument>(V)) {
278
36
    // Put the scattered form of arguments in the entry block,
279
36
    // so that it can be used everywhere.
280
36
    Function *F = VArg->getParent();
281
36
    BasicBlock *BB = &F->getEntryBlock();
282
36
    return Scatterer(BB, BB->begin(), V, &Scattered[V]);
283
36
  }
284
89
  
if (Instruction *89
VOp89
= dyn_cast<Instruction>(V)) {
285
75
    // Put the scattered form of an instruction directly after the
286
75
    // instruction.
287
75
    BasicBlock *BB = VOp->getParent();
288
75
    return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
289
75
                     V, &Scattered[V]);
290
75
  }
291
14
  // In the fallback case, just put the scattered before Point and
292
14
  // keep the result local to Point.
293
14
  return Scatterer(Point->getParent(), Point->getIterator(), V);
294
14
}
295
296
// Replace Op with the gathered form of the components in CV.  Defer the
297
// deletion of Op and creation of the gathered form to the end of the pass,
298
// so that we can avoid creating the gathered form if all uses of Op are
299
// replaced with uses of CV.
300
55
void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
301
55
  // Since we're not deleting Op yet, stub out its operands, so that it
302
55
  // doesn't make anything live unnecessarily.
303
161
  for (unsigned I = 0, E = Op->getNumOperands(); 
I != E161
;
++I106
)
304
106
    Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
305
55
306
55
  transferMetadata(Op, CV);
307
55
308
55
  // If we already have a scattered form of Op (created from ExtractElements
309
55
  // of Op itself), replace them with the new form.
310
55
  ValueVector &SV = Scattered[Op];
311
55
  if (
!SV.empty()55
) {
312
18
    for (unsigned I = 0, E = SV.size(); 
I != E18
;
++I14
) {
313
14
      Value *V = SV[I];
314
14
      if (V == nullptr)
315
1
        continue;
316
13
317
13
      Instruction *Old = cast<Instruction>(V);
318
13
      CV[I]->takeName(Old);
319
13
      Old->replaceAllUsesWith(CV[I]);
320
13
      Old->eraseFromParent();
321
13
    }
322
4
  }
323
55
  SV = CV;
324
55
  Gathered.push_back(GatherList::value_type(Op, &SV));
325
55
}
326
327
// Return true if it is safe to transfer the given metadata tag from
328
// vector to scalar instructions.
329
48
bool Scalarizer::canTransferMetadata(unsigned Tag) {
330
48
  return (Tag == LLVMContext::MD_tbaa
331
28
          || Tag == LLVMContext::MD_fpmath
332
24
          || Tag == LLVMContext::MD_tbaa_struct
333
16
          || Tag == LLVMContext::MD_invariant_load
334
16
          || Tag == LLVMContext::MD_alias_scope
335
16
          || Tag == LLVMContext::MD_noalias
336
16
          || Tag == ParallelLoopAccessMDKind);
337
48
}
338
339
// Transfer metadata from Op to the instructions in CV if it is known
340
// to be safe to do so.
341
69
void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
342
69
  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
343
69
  Op->getAllMetadataOtherThanDebugLoc(MDs);
344
347
  for (unsigned I = 0, E = CV.size(); 
I != E347
;
++I278
) {
345
278
    if (Instruction *
New278
= dyn_cast<Instruction>(CV[I])) {
346
277
      for (const auto &MD : MDs)
347
48
        
if (48
canTransferMetadata(MD.first)48
)
348
40
          New->setMetadata(MD.first, MD.second);
349
277
      if (
Op->getDebugLoc() && 277
!New->getDebugLoc()16
)
350
0
        New->setDebugLoc(Op->getDebugLoc());
351
277
    }
352
278
  }
353
69
}
354
355
// Try to fill in Layout from Ty, returning true on success.  Alignment is
356
// the alignment of the vector, or 0 if the ABI default should be used.
357
bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
358
28
                                 VectorLayout &Layout, const DataLayout &DL) {
359
28
  // Make sure we're dealing with a vector.
360
28
  Layout.VecTy = dyn_cast<VectorType>(Ty);
361
28
  if (!Layout.VecTy)
362
0
    return false;
363
28
364
28
  // Check that we're dealing with full-byte elements.
365
28
  Layout.ElemTy = Layout.VecTy->getElementType();
366
28
  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
367
28
      DL.getTypeStoreSizeInBits(Layout.ElemTy))
368
3
    return false;
369
25
370
25
  
if (25
Alignment25
)
371
7
    Layout.VecAlign = Alignment;
372
25
  else
373
18
    Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
374
28
  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
375
28
  return true;
376
28
}
377
378
// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
379
// to create an instruction like I with operands X and Y and name Name.
380
template<typename Splitter>
381
25
bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
382
25
  VectorType *VT = dyn_cast<VectorType>(I.getType());
383
25
  if (!VT)
384
12
    return false;
385
13
386
13
  unsigned NumElems = VT->getNumElements();
387
13
  IRBuilder<> Builder(&I);
388
13
  Scatterer Op0 = scatter(&I, I.getOperand(0));
389
13
  Scatterer Op1 = scatter(&I, I.getOperand(1));
390
13
  assert(Op0.size() == NumElems && "Mismatched binary operation");
391
13
  assert(Op1.size() == NumElems && "Mismatched binary operation");
392
13
  ValueVector Res;
393
13
  Res.resize(NumElems);
394
93
  for (unsigned Elem = 0; 
Elem < NumElems93
;
++Elem80
)
395
80
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
396
80
                      I.getName() + ".i" + Twine(Elem));
397
25
  gather(&I, Res);
398
25
  return true;
399
25
}
Scalarizer.cpp:bool (anonymous namespace)::Scalarizer::splitBinary<(anonymous namespace)::BinarySplitter>(llvm::Instruction&, (anonymous namespace)::BinarySplitter const&)
Line
Count
Source
381
18
bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
382
18
  VectorType *VT = dyn_cast<VectorType>(I.getType());
383
18
  if (!VT)
384
7
    return false;
385
11
386
11
  unsigned NumElems = VT->getNumElements();
387
11
  IRBuilder<> Builder(&I);
388
11
  Scatterer Op0 = scatter(&I, I.getOperand(0));
389
11
  Scatterer Op1 = scatter(&I, I.getOperand(1));
390
11
  assert(Op0.size() == NumElems && "Mismatched binary operation");
391
11
  assert(Op1.size() == NumElems && "Mismatched binary operation");
392
11
  ValueVector Res;
393
11
  Res.resize(NumElems);
394
83
  for (unsigned Elem = 0; 
Elem < NumElems83
;
++Elem72
)
395
72
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
396
72
                      I.getName() + ".i" + Twine(Elem));
397
18
  gather(&I, Res);
398
18
  return true;
399
18
}
Scalarizer.cpp:bool (anonymous namespace)::Scalarizer::splitBinary<(anonymous namespace)::ICmpSplitter>(llvm::Instruction&, (anonymous namespace)::ICmpSplitter const&)
Line
Count
Source
381
6
bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
382
6
  VectorType *VT = dyn_cast<VectorType>(I.getType());
383
6
  if (!VT)
384
5
    return false;
385
1
386
1
  unsigned NumElems = VT->getNumElements();
387
1
  IRBuilder<> Builder(&I);
388
1
  Scatterer Op0 = scatter(&I, I.getOperand(0));
389
1
  Scatterer Op1 = scatter(&I, I.getOperand(1));
390
1
  assert(Op0.size() == NumElems && "Mismatched binary operation");
391
1
  assert(Op1.size() == NumElems && "Mismatched binary operation");
392
1
  ValueVector Res;
393
1
  Res.resize(NumElems);
394
5
  for (unsigned Elem = 0; 
Elem < NumElems5
;
++Elem4
)
395
4
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
396
4
                      I.getName() + ".i" + Twine(Elem));
397
6
  gather(&I, Res);
398
6
  return true;
399
6
}
Scalarizer.cpp:bool (anonymous namespace)::Scalarizer::splitBinary<(anonymous namespace)::FCmpSplitter>(llvm::Instruction&, (anonymous namespace)::FCmpSplitter const&)
Line
Count
Source
381
1
bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
382
1
  VectorType *VT = dyn_cast<VectorType>(I.getType());
383
1
  if (!VT)
384
0
    return false;
385
1
386
1
  unsigned NumElems = VT->getNumElements();
387
1
  IRBuilder<> Builder(&I);
388
1
  Scatterer Op0 = scatter(&I, I.getOperand(0));
389
1
  Scatterer Op1 = scatter(&I, I.getOperand(1));
390
1
  assert(Op0.size() == NumElems && "Mismatched binary operation");
391
1
  assert(Op1.size() == NumElems && "Mismatched binary operation");
392
1
  ValueVector Res;
393
1
  Res.resize(NumElems);
394
5
  for (unsigned Elem = 0; 
Elem < NumElems5
;
++Elem4
)
395
4
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
396
4
                      I.getName() + ".i" + Twine(Elem));
397
1
  gather(&I, Res);
398
1
  return true;
399
1
}
400
401
6
static bool isTriviallyScalariable(Intrinsic::ID ID) {
402
6
  return isTriviallyVectorizable(ID);
403
6
}
404
405
// All of the current scalarizable intrinsics only have one mangled type.
406
static Function *getScalarIntrinsicDeclaration(Module *M,
407
                                               Intrinsic::ID ID,
408
6
                                               VectorType *Ty) {
409
6
  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
410
6
}
411
412
/// If a call to a vector typed intrinsic function, split into a scalar call per
413
/// element if possible for the intrinsic.
414
12
bool Scalarizer::splitCall(CallInst &CI) {
415
12
  VectorType *VT = dyn_cast<VectorType>(CI.getType());
416
12
  if (!VT)
417
4
    return false;
418
8
419
8
  Function *F = CI.getCalledFunction();
420
8
  if (!F)
421
0
    return false;
422
8
423
8
  Intrinsic::ID ID = F->getIntrinsicID();
424
8
  if (
ID == Intrinsic::not_intrinsic || 8
!isTriviallyScalariable(ID)6
)
425
2
    return false;
426
6
427
6
  unsigned NumElems = VT->getNumElements();
428
6
  unsigned NumArgs = CI.getNumArgOperands();
429
6
430
6
  ValueVector ScalarOperands(NumArgs);
431
6
  SmallVector<Scatterer, 8> Scattered(NumArgs);
432
6
433
6
  Scattered.resize(NumArgs);
434
6
435
6
  // Assumes that any vector type has the same number of elements as the return
436
6
  // vector type, which is true for all current intrinsics.
437
17
  for (unsigned I = 0; 
I != NumArgs17
;
++I11
) {
438
11
    Value *OpI = CI.getOperand(I);
439
11
    if (
OpI->getType()->isVectorTy()11
) {
440
9
      Scattered[I] = scatter(&CI, OpI);
441
9
      assert(Scattered[I].size() == NumElems && "mismatched call operands");
442
11
    } else {
443
2
      ScalarOperands[I] = OpI;
444
2
    }
445
11
  }
446
6
447
6
  ValueVector Res(NumElems);
448
6
  ValueVector ScalarCallOps(NumArgs);
449
6
450
6
  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
451
6
  IRBuilder<> Builder(&CI);
452
6
453
6
  // Perform actual scalarization, taking care to preserve any scalar operands.
454
18
  for (unsigned Elem = 0; 
Elem < NumElems18
;
++Elem12
) {
455
12
    ScalarCallOps.clear();
456
12
457
34
    for (unsigned J = 0; 
J != NumArgs34
;
++J22
) {
458
22
      if (hasVectorInstrinsicScalarOpd(ID, J))
459
4
        ScalarCallOps.push_back(ScalarOperands[J]);
460
22
      else
461
18
        ScalarCallOps.push_back(Scattered[J][Elem]);
462
22
    }
463
12
464
12
    Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
465
12
                                   CI.getName() + ".i" + Twine(Elem));
466
12
  }
467
12
468
12
  gather(&CI, Res);
469
12
  return true;
470
12
}
471
472
2
bool Scalarizer::visitSelectInst(SelectInst &SI) {
473
2
  VectorType *VT = dyn_cast<VectorType>(SI.getType());
474
2
  if (!VT)
475
0
    return false;
476
2
477
2
  unsigned NumElems = VT->getNumElements();
478
2
  IRBuilder<> Builder(&SI);
479
2
  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
480
2
  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
481
2
  assert(Op1.size() == NumElems && "Mismatched select");
482
2
  assert(Op2.size() == NumElems && "Mismatched select");
483
2
  ValueVector Res;
484
2
  Res.resize(NumElems);
485
2
486
2
  if (
SI.getOperand(0)->getType()->isVectorTy()2
) {
487
2
    Scatterer Op0 = scatter(&SI, SI.getOperand(0));
488
2
    assert(Op0.size() == NumElems && "Mismatched select");
489
10
    for (unsigned I = 0; 
I < NumElems10
;
++I8
)
490
8
      Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
491
8
                                    SI.getName() + ".i" + Twine(I));
492
0
  } else {
493
0
    Value *Op0 = SI.getOperand(0);
494
0
    for (unsigned I = 0; 
I < NumElems0
;
++I0
)
495
0
      Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
496
0
                                    SI.getName() + ".i" + Twine(I));
497
0
  }
498
2
  gather(&SI, Res);
499
2
  return true;
500
2
}
501
502
6
bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
503
6
  return splitBinary(ICI, ICmpSplitter(ICI));
504
6
}
505
506
1
bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
507
1
  return splitBinary(FCI, FCmpSplitter(FCI));
508
1
}
509
510
18
bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
511
18
  return splitBinary(BO, BinarySplitter(BO));
512
18
}
513
514
12
bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
515
12
  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
516
12
  if (!VT)
517
5
    return false;
518
7
519
7
  IRBuilder<> Builder(&GEPI);
520
7
  unsigned NumElems = VT->getNumElements();
521
7
  unsigned NumIndices = GEPI.getNumIndices();
522
7
523
7
  // The base pointer might be scalar even if it's a vector GEP. In those cases,
524
7
  // splat the pointer into a vector value, and scatter that vector.
525
7
  Value *Op0 = GEPI.getOperand(0);
526
7
  if (!Op0->getType()->isVectorTy())
527
2
    Op0 = Builder.CreateVectorSplat(NumElems, Op0);
528
7
  Scatterer Base = scatter(&GEPI, Op0);
529
7
530
7
  SmallVector<Scatterer, 8> Ops;
531
7
  Ops.resize(NumIndices);
532
15
  for (unsigned I = 0; 
I < NumIndices15
;
++I8
) {
533
8
    Value *Op = GEPI.getOperand(I + 1);
534
8
535
8
    // The indices might be scalars even if it's a vector GEP. In those cases,
536
8
    // splat the scalar into a vector value, and scatter that vector.
537
8
    if (!Op->getType()->isVectorTy())
538
3
      Op = Builder.CreateVectorSplat(NumElems, Op);
539
8
540
8
    Ops[I] = scatter(&GEPI, Op);
541
8
  }
542
7
543
7
  ValueVector Res;
544
7
  Res.resize(NumElems);
545
35
  for (unsigned I = 0; 
I < NumElems35
;
++I28
) {
546
28
    SmallVector<Value *, 8> Indices;
547
28
    Indices.resize(NumIndices);
548
60
    for (unsigned J = 0; 
J < NumIndices60
;
++J32
)
549
32
      Indices[J] = Ops[J][I];
550
28
    Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
551
28
                               GEPI.getName() + ".i" + Twine(I));
552
28
    if (GEPI.isInBounds())
553
8
      
if (GetElementPtrInst *8
NewGEPI8
= dyn_cast<GetElementPtrInst>(Res[I]))
554
8
        NewGEPI->setIsInBounds();
555
28
  }
556
12
  gather(&GEPI, Res);
557
12
  return true;
558
12
}
559
560
2
bool Scalarizer::visitCastInst(CastInst &CI) {
561
2
  VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
562
2
  if (!VT)
563
0
    return false;
564
2
565
2
  unsigned NumElems = VT->getNumElements();
566
2
  IRBuilder<> Builder(&CI);
567
2
  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
568
2
  assert(Op0.size() == NumElems && "Mismatched cast");
569
2
  ValueVector Res;
570
2
  Res.resize(NumElems);
571
10
  for (unsigned I = 0; 
I < NumElems10
;
++I8
)
572
8
    Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
573
8
                                CI.getName() + ".i" + Twine(I));
574
2
  gather(&CI, Res);
575
2
  return true;
576
2
}
577
578
5
bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
579
5
  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
580
5
  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
581
5
  if (
!DstVT || 5
!SrcVT4
)
582
1
    return false;
583
4
584
4
  unsigned DstNumElems = DstVT->getNumElements();
585
4
  unsigned SrcNumElems = SrcVT->getNumElements();
586
4
  IRBuilder<> Builder(&BCI);
587
4
  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
588
4
  ValueVector Res;
589
4
  Res.resize(DstNumElems);
590
4
591
4
  if (
DstNumElems == SrcNumElems4
) {
592
0
    for (unsigned I = 0; 
I < DstNumElems0
;
++I0
)
593
0
      Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
594
0
                                     BCI.getName() + ".i" + Twine(I));
595
4
  } else 
if (4
DstNumElems > SrcNumElems4
) {
596
2
    // <M x t1> -> <N*M x t2>.  Convert each t1 to <N x t2> and copy the
597
2
    // individual elements to the destination.
598
2
    unsigned FanOut = DstNumElems / SrcNumElems;
599
2
    Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
600
2
    unsigned ResI = 0;
601
6
    for (unsigned Op0I = 0; 
Op0I < SrcNumElems6
;
++Op0I4
) {
602
4
      Value *V = Op0[Op0I];
603
4
      Instruction *VI;
604
4
      // Look through any existing bitcasts before converting to <N x t2>.
605
4
      // In the best case, the resulting conversion might be a no-op.
606
8
      while ((VI = dyn_cast<Instruction>(V)) &&
607
8
             VI->getOpcode() == Instruction::BitCast)
608
4
        V = VI->getOperand(0);
609
4
      V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
610
4
      Scatterer Mid = scatter(&BCI, V);
611
12
      for (unsigned MidI = 0; 
MidI < FanOut12
;
++MidI8
)
612
8
        Res[ResI++] = Mid[MidI];
613
4
    }
614
4
  } else {
615
2
    // <N*M x t1> -> <M x t2>.  Convert each group of <N x t1> into a t2.
616
2
    unsigned FanIn = SrcNumElems / DstNumElems;
617
2
    Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
618
2
    unsigned Op0I = 0;
619
6
    for (unsigned ResI = 0; 
ResI < DstNumElems6
;
++ResI4
) {
620
4
      Value *V = UndefValue::get(MidTy);
621
12
      for (unsigned MidI = 0; 
MidI < FanIn12
;
++MidI8
)
622
8
        V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
623
8
                                        BCI.getName() + ".i" + Twine(ResI)
624
8
                                        + ".upto" + Twine(MidI));
625
4
      Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
626
4
                                        BCI.getName() + ".i" + Twine(ResI));
627
4
    }
628
4
  }
629
5
  gather(&BCI, Res);
630
5
  return true;
631
5
}
632
633
5
bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
634
5
  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
635
5
  if (!VT)
636
0
    return false;
637
5
638
5
  unsigned NumElems = VT->getNumElements();
639
5
  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
640
5
  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
641
5
  ValueVector Res;
642
5
  Res.resize(NumElems);
643
5
644
19
  for (unsigned I = 0; 
I < NumElems19
;
++I14
) {
645
14
    int Selector = SVI.getMaskValue(I);
646
14
    if (Selector < 0)
647
0
      Res[I] = UndefValue::get(VT->getElementType());
648
14
    else 
if (14
unsigned(Selector) < Op0.size()14
)
649
11
      Res[I] = Op0[Selector];
650
14
    else
651
3
      Res[I] = Op1[Selector - Op0.size()];
652
14
  }
653
5
  gather(&SVI, Res);
654
5
  return true;
655
5
}
656
657
10
bool Scalarizer::visitPHINode(PHINode &PHI) {
658
10
  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
659
10
  if (!VT)
660
5
    return false;
661
5
662
5
  unsigned NumElems = VT->getNumElements();
663
5
  IRBuilder<> Builder(&PHI);
664
5
  ValueVector Res;
665
5
  Res.resize(NumElems);
666
5
667
5
  unsigned NumOps = PHI.getNumOperands();
668
21
  for (unsigned I = 0; 
I < NumElems21
;
++I16
)
669
16
    Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
670
16
                               PHI.getName() + ".i" + Twine(I));
671
5
672
15
  for (unsigned I = 0; 
I < NumOps15
;
++I10
) {
673
10
    Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
674
10
    BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
675
42
    for (unsigned J = 0; 
J < NumElems42
;
++J32
)
676
32
      cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
677
10
  }
678
10
  gather(&PHI, Res);
679
10
  return true;
680
10
}
681
682
18
bool Scalarizer::visitLoadInst(LoadInst &LI) {
683
18
  if (!ScalarizeLoadStore)
684
5
    return false;
685
13
  
if (13
!LI.isSimple()13
)
686
0
    return false;
687
13
688
13
  VectorLayout Layout;
689
13
  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
690
13
                       LI.getModule()->getDataLayout()))
691
2
    return false;
692
11
693
11
  unsigned NumElems = Layout.VecTy->getNumElements();
694
11
  IRBuilder<> Builder(&LI);
695
11
  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
696
11
  ValueVector Res;
697
11
  Res.resize(NumElems);
698
11
699
55
  for (unsigned I = 0; 
I < NumElems55
;
++I44
)
700
44
    Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
701
44
                                       LI.getName() + ".i" + Twine(I));
702
18
  gather(&LI, Res);
703
18
  return true;
704
18
}
705
706
15
bool Scalarizer::visitStoreInst(StoreInst &SI) {
707
15
  if (!ScalarizeLoadStore)
708
0
    return false;
709
15
  
if (15
!SI.isSimple()15
)
710
0
    return false;
711
15
712
15
  VectorLayout Layout;
713
15
  Value *FullValue = SI.getValueOperand();
714
15
  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
715
15
                       SI.getModule()->getDataLayout()))
716
1
    return false;
717
14
718
14
  unsigned NumElems = Layout.VecTy->getNumElements();
719
14
  IRBuilder<> Builder(&SI);
720
14
  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
721
14
  Scatterer Val = scatter(&SI, FullValue);
722
14
723
14
  ValueVector Stores;
724
14
  Stores.resize(NumElems);
725
70
  for (unsigned I = 0; 
I < NumElems70
;
++I56
) {
726
56
    unsigned Align = Layout.getElemAlign(I);
727
56
    Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
728
56
  }
729
15
  transferMetadata(&SI, Stores);
730
15
  return true;
731
15
}
732
733
12
bool Scalarizer::visitCallInst(CallInst &CI) {
734
12
  return splitCall(CI);
735
12
}
736
737
// Delete the instructions that we scalarized.  If a full vector result
738
// is still needed, recreate it using InsertElements.
739
32
bool Scalarizer::finish() {
740
32
  // The presence of data in Gathered or Scattered indicates changes
741
32
  // made to the Function.
742
32
  if (
Gathered.empty() && 32
Scattered.empty()3
)
743
1
    return false;
744
31
  
for (const auto &GMI : Gathered) 31
{
745
55
    Instruction *Op = GMI.first;
746
55
    ValueVector &CV = *GMI.second;
747
55
    if (
!Op->use_empty()55
) {
748
13
      // The value is still needed, so recreate it using a series of
749
13
      // InsertElements.
750
13
      Type *Ty = Op->getType();
751
13
      Value *Res = UndefValue::get(Ty);
752
13
      BasicBlock *BB = Op->getParent();
753
13
      unsigned Count = Ty->getVectorNumElements();
754
13
      IRBuilder<> Builder(Op);
755
13
      if (isa<PHINode>(Op))
756
2
        Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
757
79
      for (unsigned I = 0; 
I < Count79
;
++I66
)
758
66
        Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
759
66
                                          Op->getName() + ".upto" + Twine(I));
760
13
      Res->takeName(Op);
761
13
      Op->replaceAllUsesWith(Res);
762
13
    }
763
55
    Op->eraseFromParent();
764
55
  }
765
32
  Gathered.clear();
766
32
  Scattered.clear();
767
32
  return true;
768
32
}
769
770
0
FunctionPass *llvm::createScalarizerPass() {
771
0
  return new Scalarizer();
772
0
}