Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/Scalarizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass converts vector operations into scalar operations, in order
10
// to expose optimization opportunities on the individual scalar operations.
11
// It is mainly intended for targets that do not have vector units, but it
12
// may also be useful for revectorizing code to different vector widths.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "llvm/ADT/PostOrderIterator.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/Twine.h"
19
#include "llvm/Analysis/VectorUtils.h"
20
#include "llvm/IR/Argument.h"
21
#include "llvm/IR/BasicBlock.h"
22
#include "llvm/IR/Constants.h"
23
#include "llvm/IR/DataLayout.h"
24
#include "llvm/IR/DerivedTypes.h"
25
#include "llvm/IR/Function.h"
26
#include "llvm/IR/IRBuilder.h"
27
#include "llvm/IR/InstVisitor.h"
28
#include "llvm/IR/InstrTypes.h"
29
#include "llvm/IR/Instruction.h"
30
#include "llvm/IR/Instructions.h"
31
#include "llvm/IR/Intrinsics.h"
32
#include "llvm/IR/LLVMContext.h"
33
#include "llvm/IR/Module.h"
34
#include "llvm/IR/Type.h"
35
#include "llvm/IR/Value.h"
36
#include "llvm/Pass.h"
37
#include "llvm/Support/Casting.h"
38
#include "llvm/Support/MathExtras.h"
39
#include "llvm/Support/Options.h"
40
#include "llvm/Transforms/Scalar.h"
41
#include "llvm/Transforms/Scalar/Scalarizer.h"
42
#include <cassert>
43
#include <cstdint>
44
#include <iterator>
45
#include <map>
46
#include <utility>
47
48
using namespace llvm;
49
50
#define DEBUG_TYPE "scalarizer"
51
52
// This is disabled by default because having separate loads and stores
53
// makes it more likely that the -combiner-alias-analysis limits will be
54
// reached.
55
static cl::opt<bool>
56
    ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
57
                       cl::desc("Allow the scalarizer pass to scalarize loads and store"));
58
59
namespace {
60
61
// Used to store the scattered form of a vector.
62
using ValueVector = SmallVector<Value *, 8>;
63
64
// Used to map a vector Value to its scattered form.  We use std::map
65
// because we want iterators to persist across insertion and because the
66
// values are relatively large.
67
using ScatterMap = std::map<Value *, ValueVector>;
68
69
// Lists Instructions that have been replaced with scalar implementations,
70
// along with a pointer to their scattered forms.
71
using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
72
73
// Provides a very limited vector-like interface for lazily accessing one
74
// component of a scattered vector or vector pointer.
75
class Scatterer {
76
public:
77
38
  Scatterer() = default;
78
79
  // Scatter V into Size components.  If new instructions are needed,
80
  // insert them before BBI in BB.  If Cache is nonnull, use it to cache
81
  // the results.
82
  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
83
            ValueVector *cachePtr = nullptr);
84
85
  // Return component I, creating a new Value for it if necessary.
86
  Value *operator[](unsigned I);
87
88
  // Return the number of components.
89
44
  unsigned size() const { return Size; }
90
91
private:
92
  BasicBlock *BB;
93
  BasicBlock::iterator BBI;
94
  Value *V;
95
  ValueVector *CachePtr;
96
  PointerType *PtrTy;
97
  ValueVector Tmp;
98
  unsigned Size;
99
};
100
101
// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
102
// called Name that compares X and Y in the same way as FCI.
103
struct FCmpSplitter {
104
6
  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
105
106
  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
107
20
                    const Twine &Name) const {
108
20
    return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
109
20
  }
110
111
  FCmpInst &FCI;
112
};
113
114
// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
115
// called Name that compares X and Y in the same way as ICI.
116
struct ICmpSplitter {
117
14
  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
118
119
  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
120
8
                    const Twine &Name) const {
121
8
    return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
122
8
  }
123
124
  ICmpInst &ICI;
125
};
126
127
// UnarySpliiter(UO)(Builder, X, Name) uses Builder to create
128
// a unary operator like UO called Name with operand X.
129
struct UnarySplitter {
130
4
  UnarySplitter(UnaryOperator &uo) : UO(uo) {}
131
132
12
  Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
133
12
    return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
134
12
  }
135
136
  UnaryOperator &UO;
137
};
138
139
// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
140
// a binary operator like BO called Name with operands X and Y.
141
struct BinarySplitter {
142
44
  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
143
144
  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
145
156
                    const Twine &Name) const {
146
156
    return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
147
156
  }
148
149
  BinaryOperator &BO;
150
};
151
152
// Information about a load or store that we're scalarizing.
153
struct VectorLayout {
154
60
  VectorLayout() = default;
155
156
  // Return the alignment of element I.
157
216
  uint64_t getElemAlign(unsigned I) {
158
216
    return MinAlign(VecAlign, I * ElemSize);
159
216
  }
160
161
  // The type of the vector.
162
  VectorType *VecTy = nullptr;
163
164
  // The type of each element.
165
  Type *ElemTy = nullptr;
166
167
  // The alignment of the vector.
168
  uint64_t VecAlign = 0;
169
170
  // The size of each element.
171
  uint64_t ElemSize = 0;
172
};
173
174
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
175
public:
176
  ScalarizerVisitor(unsigned ParallelLoopAccessMDKind)
177
90
    : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
178
90
  }
179
180
  bool visit(Function &F);
181
182
  // InstVisitor methods.  They return true if the instruction was scalarized,
183
  // false if nothing changed.
184
158
  bool visitInstruction(Instruction &I) { return false; }
185
  bool visitSelectInst(SelectInst &SI);
186
  bool visitICmpInst(ICmpInst &ICI);
187
  bool visitFCmpInst(FCmpInst &FCI);
188
  bool visitUnaryOperator(UnaryOperator &UO);
189
  bool visitBinaryOperator(BinaryOperator &BO);
190
  bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
191
  bool visitCastInst(CastInst &CI);
192
  bool visitBitCastInst(BitCastInst &BCI);
193
  bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
194
  bool visitPHINode(PHINode &PHI);
195
  bool visitLoadInst(LoadInst &LI);
196
  bool visitStoreInst(StoreInst &SI);
197
  bool visitCallInst(CallInst &ICI);
198
199
private:
200
  Scatterer scatter(Instruction *Point, Value *V);
201
  void gather(Instruction *Op, const ValueVector &CV);
202
  bool canTransferMetadata(unsigned Kind);
203
  void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
204
  bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
205
                       const DataLayout &DL);
206
  bool finish();
207
208
  template<typename T> bool splitUnary(Instruction &, const T &);
209
  template<typename T> bool splitBinary(Instruction &, const T &);
210
211
  bool splitCall(CallInst &CI);
212
213
  ScatterMap Scattered;
214
  GatherList Gathered;
215
216
  unsigned ParallelLoopAccessMDKind;
217
};
218
219
class ScalarizerLegacyPass : public FunctionPass {
220
public:
221
  static char ID;
222
223
10
  ScalarizerLegacyPass() : FunctionPass(ID) {
224
10
    initializeScalarizerLegacyPassPass(*PassRegistry::getPassRegistry());
225
10
  }
226
227
  bool runOnFunction(Function &F) override;
228
};
229
230
} // end anonymous namespace
231
232
char ScalarizerLegacyPass::ID = 0;
233
36.0k
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
234
36.0k
                      "Scalarize vector operations", false, false)
235
36.0k
INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
236
                    "Scalarize vector operations", false, false)
237
238
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
239
                     ValueVector *cachePtr)
240
324
  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
241
324
  Type *Ty = V->getType();
242
324
  PtrTy = dyn_cast<PointerType>(Ty);
243
324
  if (PtrTy)
244
54
    Ty = PtrTy->getElementType();
245
324
  Size = Ty->getVectorNumElements();
246
324
  if (!CachePtr)
247
40
    Tmp.resize(Size, nullptr);
248
284
  else if (CachePtr->empty())
249
176
    CachePtr->resize(Size, nullptr);
250
284
  else
251
284
    assert(Size == CachePtr->size() && "Inconsistent vector sizes");
252
324
}
253
254
// Return component I, creating a new Value for it if necessary.
255
1.17k
Value *Scatterer::operator[](unsigned I) {
256
1.17k
  ValueVector &CV = (CachePtr ? 
*CachePtr1.06k
:
Tmp108
);
257
1.17k
  // Try to reuse a previous value.
258
1.17k
  if (CV[I])
259
410
    return CV[I];
260
760
  IRBuilder<> Builder(BB, BBI);
261
760
  if (PtrTy) {
262
192
    Type *ElTy = PtrTy->getElementType()->getVectorElementType();
263
192
    if (!CV[0]) {
264
48
      Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
265
48
      CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
266
48
    }
267
192
    if (I != 0)
268
144
      CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
269
144
                                         V->getName() + ".i" + Twine(I));
270
568
  } else {
271
568
    // Search through a chain of InsertElementInsts looking for element I.
272
568
    // Record other elements in the cache.  The new V is still suitable
273
568
    // for all uncached indices.
274
586
    while (true) {
275
586
      InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
276
586
      if (!Insert)
277
544
        break;
278
42
      ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
279
42
      if (!Idx)
280
8
        break;
281
34
      unsigned J = Idx->getZExtValue();
282
34
      V = Insert->getOperand(0);
283
34
      if (I == J) {
284
16
        CV[J] = Insert->getOperand(1);
285
16
        return CV[J];
286
18
      } else if (!CV[J]) {
287
16
        // Only cache the first entry we find for each index we're not actively
288
16
        // searching for. This prevents us from going too far up the chain and
289
16
        // caching incorrect entries.
290
16
        CV[J] = Insert->getOperand(1);
291
16
      }
292
34
    }
293
568
    CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
294
552
                                         V->getName() + ".i" + Twine(I));
295
552
  }
296
760
  
return CV[I]744
;
297
760
}
298
299
45
bool ScalarizerLegacyPass::runOnFunction(Function &F) {
300
45
  if (skipFunction(F))
301
0
    return false;
302
45
303
45
  Module &M = *F.getParent();
304
45
  unsigned ParallelLoopAccessMDKind =
305
45
      M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
306
45
  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
307
45
  return Impl.visit(F);
308
45
}
309
310
0
FunctionPass *llvm::createScalarizerPass() {
311
0
  return new ScalarizerLegacyPass();
312
0
}
313
314
90
bool ScalarizerVisitor::visit(Function &F) {
315
90
  assert(Gathered.empty() && Scattered.empty());
316
90
317
90
  // To ensure we replace gathered components correctly we need to do an ordered
318
90
  // traversal of the basic blocks in the function.
319
90
  ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
320
134
  for (BasicBlock *BB : RPOT) {
321
556
    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
322
422
      Instruction *I = &*II;
323
422
      bool Done = InstVisitor::visit(I);
324
422
      ++II;
325
422
      if (Done && 
I->getType()->isVoidTy()180
)
326
30
        I->eraseFromParent();
327
422
    }
328
134
  }
329
90
  return finish();
330
90
}
331
332
// Return a scattered form of V that can be accessed by Point.  V must be a
333
// vector or a pointer to a vector.
334
324
Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
335
324
  if (Argument *VArg = dyn_cast<Argument>(V)) {
336
110
    // Put the scattered form of arguments in the entry block,
337
110
    // so that it can be used everywhere.
338
110
    Function *F = VArg->getParent();
339
110
    BasicBlock *BB = &F->getEntryBlock();
340
110
    return Scatterer(BB, BB->begin(), V, &Scattered[V]);
341
110
  }
342
214
  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
343
174
    // Put the scattered form of an instruction directly after the
344
174
    // instruction.
345
174
    BasicBlock *BB = VOp->getParent();
346
174
    return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
347
174
                     V, &Scattered[V]);
348
174
  }
349
40
  // In the fallback case, just put the scattered before Point and
350
40
  // keep the result local to Point.
351
40
  return Scatterer(Point->getParent(), Point->getIterator(), V);
352
40
}
353
354
// Replace Op with the gathered form of the components in CV.  Defer the
355
// deletion of Op and creation of the gathered form to the end of the pass,
356
// so that we can avoid creating the gathered form if all uses of Op are
357
// replaced with uses of CV.
358
150
void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
359
150
  // Since we're not deleting Op yet, stub out its operands, so that it
360
150
  // doesn't make anything live unnecessarily.
361
448
  for (unsigned I = 0, E = Op->getNumOperands(); I != E; 
++I298
)
362
298
    Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
363
150
364
150
  transferMetadataAndIRFlags(Op, CV);
365
150
366
150
  // If we already have a scattered form of Op (created from ExtractElements
367
150
  // of Op itself), replace them with the new form.
368
150
  ValueVector &SV = Scattered[Op];
369
150
  if (!SV.empty()) {
370
46
    for (unsigned I = 0, E = SV.size(); I != E; 
++I36
) {
371
36
      Value *V = SV[I];
372
36
      if (V == nullptr)
373
0
        continue;
374
36
375
36
      Instruction *Old = cast<Instruction>(V);
376
36
      CV[I]->takeName(Old);
377
36
      Old->replaceAllUsesWith(CV[I]);
378
36
      Old->eraseFromParent();
379
36
    }
380
10
  }
381
150
  SV = CV;
382
150
  Gathered.push_back(GatherList::value_type(Op, &SV));
383
150
}
384
385
// Return true if it is safe to transfer the given metadata tag from
386
// vector to scalar instructions.
387
96
bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
388
96
  return (Tag == LLVMContext::MD_tbaa
389
96
          || 
Tag == LLVMContext::MD_fpmath56
390
96
          || 
Tag == LLVMContext::MD_tbaa_struct48
391
96
          || 
Tag == LLVMContext::MD_invariant_load32
392
96
          || 
Tag == LLVMContext::MD_alias_scope32
393
96
          || 
Tag == LLVMContext::MD_noalias32
394
96
          || 
Tag == ParallelLoopAccessMDKind32
395
96
          || 
Tag == LLVMContext::MD_access_group32
);
396
96
}
397
398
// Transfer metadata from Op to the instructions in CV if it is known
399
// to be safe to do so.
400
void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
401
180
                                                   const ValueVector &CV) {
402
180
  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
403
180
  Op->getAllMetadataOtherThanDebugLoc(MDs);
404
846
  for (unsigned I = 0, E = CV.size(); I != E; 
++I666
) {
405
666
    if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
406
658
      for (const auto &MD : MDs)
407
96
        if (canTransferMetadata(MD.first))
408
80
          New->setMetadata(MD.first, MD.second);
409
658
      New->copyIRFlags(Op);
410
658
      if (Op->getDebugLoc() && 
!New->getDebugLoc()32
)
411
0
        New->setDebugLoc(Op->getDebugLoc());
412
658
    }
413
666
  }
414
180
}
415
416
// Try to fill in Layout from Ty, returning true on success.  Alignment is
417
// the alignment of the vector, or 0 if the ABI default should be used.
418
bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
419
60
                                 VectorLayout &Layout, const DataLayout &DL) {
420
60
  // Make sure we're dealing with a vector.
421
60
  Layout.VecTy = dyn_cast<VectorType>(Ty);
422
60
  if (!Layout.VecTy)
423
0
    return false;
424
60
425
60
  // Check that we're dealing with full-byte elements.
426
60
  Layout.ElemTy = Layout.VecTy->getElementType();
427
60
  if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
428
6
    return false;
429
54
430
54
  if (Alignment)
431
14
    Layout.VecAlign = Alignment;
432
40
  else
433
40
    Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
434
54
  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
435
54
  return true;
436
54
}
437
438
// Scalarize one-operand instruction I, using Split(Builder, X, Name)
439
// to create an instruction like I with operand X and name Name.
440
template<typename Splitter>
441
4
bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
442
4
  VectorType *VT = dyn_cast<VectorType>(I.getType());
443
4
  if (!VT)
444
0
    return false;
445
4
446
4
  unsigned NumElems = VT->getNumElements();
447
4
  IRBuilder<> Builder(&I);
448
4
  Scatterer Op = scatter(&I, I.getOperand(0));
449
4
  assert(Op.size() == NumElems && "Mismatched unary operation");
450
4
  ValueVector Res;
451
4
  Res.resize(NumElems);
452
16
  for (unsigned Elem = 0; Elem < NumElems; 
++Elem12
)
453
12
    Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
454
4
  gather(&I, Res);
455
4
  return true;
456
4
}
457
458
// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
459
// to create an instruction like I with operands X and Y and name Name.
460
template<typename Splitter>
461
64
bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
462
64
  VectorType *VT = dyn_cast<VectorType>(I.getType());
463
64
  if (!VT)
464
28
    return false;
465
36
466
36
  unsigned NumElems = VT->getNumElements();
467
36
  IRBuilder<> Builder(&I);
468
36
  Scatterer Op0 = scatter(&I, I.getOperand(0));
469
36
  Scatterer Op1 = scatter(&I, I.getOperand(1));
470
36
  assert(Op0.size() == NumElems && "Mismatched binary operation");
471
36
  assert(Op1.size() == NumElems && "Mismatched binary operation");
472
36
  ValueVector Res;
473
36
  Res.resize(NumElems);
474
220
  for (unsigned Elem = 0; Elem < NumElems; 
++Elem184
)
475
184
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
476
184
                      I.getName() + ".i" + Twine(Elem));
477
36
  gather(&I, Res);
478
36
  return true;
479
36
}
Scalarizer.cpp:bool (anonymous namespace)::ScalarizerVisitor::splitBinary<(anonymous namespace)::BinarySplitter>(llvm::Instruction&, (anonymous namespace)::BinarySplitter const&)
Line
Count
Source
461
44
bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
462
44
  VectorType *VT = dyn_cast<VectorType>(I.getType());
463
44
  if (!VT)
464
16
    return false;
465
28
466
28
  unsigned NumElems = VT->getNumElements();
467
28
  IRBuilder<> Builder(&I);
468
28
  Scatterer Op0 = scatter(&I, I.getOperand(0));
469
28
  Scatterer Op1 = scatter(&I, I.getOperand(1));
470
28
  assert(Op0.size() == NumElems && "Mismatched binary operation");
471
28
  assert(Op1.size() == NumElems && "Mismatched binary operation");
472
28
  ValueVector Res;
473
28
  Res.resize(NumElems);
474
184
  for (unsigned Elem = 0; Elem < NumElems; 
++Elem156
)
475
156
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
476
156
                      I.getName() + ".i" + Twine(Elem));
477
28
  gather(&I, Res);
478
28
  return true;
479
28
}
Scalarizer.cpp:bool (anonymous namespace)::ScalarizerVisitor::splitBinary<(anonymous namespace)::ICmpSplitter>(llvm::Instruction&, (anonymous namespace)::ICmpSplitter const&)
Line
Count
Source
461
14
bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
462
14
  VectorType *VT = dyn_cast<VectorType>(I.getType());
463
14
  if (!VT)
464
12
    return false;
465
2
466
2
  unsigned NumElems = VT->getNumElements();
467
2
  IRBuilder<> Builder(&I);
468
2
  Scatterer Op0 = scatter(&I, I.getOperand(0));
469
2
  Scatterer Op1 = scatter(&I, I.getOperand(1));
470
2
  assert(Op0.size() == NumElems && "Mismatched binary operation");
471
2
  assert(Op1.size() == NumElems && "Mismatched binary operation");
472
2
  ValueVector Res;
473
2
  Res.resize(NumElems);
474
10
  for (unsigned Elem = 0; Elem < NumElems; 
++Elem8
)
475
8
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
476
8
                      I.getName() + ".i" + Twine(Elem));
477
2
  gather(&I, Res);
478
2
  return true;
479
2
}
Scalarizer.cpp:bool (anonymous namespace)::ScalarizerVisitor::splitBinary<(anonymous namespace)::FCmpSplitter>(llvm::Instruction&, (anonymous namespace)::FCmpSplitter const&)
Line
Count
Source
461
6
bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
462
6
  VectorType *VT = dyn_cast<VectorType>(I.getType());
463
6
  if (!VT)
464
0
    return false;
465
6
466
6
  unsigned NumElems = VT->getNumElements();
467
6
  IRBuilder<> Builder(&I);
468
6
  Scatterer Op0 = scatter(&I, I.getOperand(0));
469
6
  Scatterer Op1 = scatter(&I, I.getOperand(1));
470
6
  assert(Op0.size() == NumElems && "Mismatched binary operation");
471
6
  assert(Op1.size() == NumElems && "Mismatched binary operation");
472
6
  ValueVector Res;
473
6
  Res.resize(NumElems);
474
26
  for (unsigned Elem = 0; Elem < NumElems; 
++Elem20
)
475
20
    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
476
20
                      I.getName() + ".i" + Twine(Elem));
477
6
  gather(&I, Res);
478
6
  return true;
479
6
}
480
481
22
static bool isTriviallyScalariable(Intrinsic::ID ID) {
482
22
  return isTriviallyVectorizable(ID);
483
22
}
484
485
// All of the current scalarizable intrinsics only have one mangled type.
486
static Function *getScalarIntrinsicDeclaration(Module *M,
487
                                               Intrinsic::ID ID,
488
22
                                               VectorType *Ty) {
489
22
  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
490
22
}
491
492
/// If a call to a vector typed intrinsic function, split into a scalar call per
493
/// element if possible for the intrinsic.
494
36
bool ScalarizerVisitor::splitCall(CallInst &CI) {
495
36
  VectorType *VT = dyn_cast<VectorType>(CI.getType());
496
36
  if (!VT)
497
8
    return false;
498
28
499
28
  Function *F = CI.getCalledFunction();
500
28
  if (!F)
501
0
    return false;
502
28
503
28
  Intrinsic::ID ID = F->getIntrinsicID();
504
28
  if (ID == Intrinsic::not_intrinsic || 
!isTriviallyScalariable(ID)22
)
505
6
    return false;
506
22
507
22
  unsigned NumElems = VT->getNumElements();
508
22
  unsigned NumArgs = CI.getNumArgOperands();
509
22
510
22
  ValueVector ScalarOperands(NumArgs);
511
22
  SmallVector<Scatterer, 8> Scattered(NumArgs);
512
22
513
22
  Scattered.resize(NumArgs);
514
22
515
22
  // Assumes that any vector type has the same number of elements as the return
516
22
  // vector type, which is true for all current intrinsics.
517
66
  for (unsigned I = 0; I != NumArgs; 
++I44
) {
518
44
    Value *OpI = CI.getOperand(I);
519
44
    if (OpI->getType()->isVectorTy()) {
520
38
      Scattered[I] = scatter(&CI, OpI);
521
38
      assert(Scattered[I].size() == NumElems && "mismatched call operands");
522
38
    } else {
523
6
      ScalarOperands[I] = OpI;
524
6
    }
525
44
  }
526
22
527
22
  ValueVector Res(NumElems);
528
22
  ValueVector ScalarCallOps(NumArgs);
529
22
530
22
  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
531
22
  IRBuilder<> Builder(&CI);
532
22
533
22
  // Perform actual scalarization, taking care to preserve any scalar operands.
534
66
  for (unsigned Elem = 0; Elem < NumElems; 
++Elem44
) {
535
44
    ScalarCallOps.clear();
536
44
537
132
    for (unsigned J = 0; J != NumArgs; 
++J88
) {
538
88
      if (hasVectorInstrinsicScalarOpd(ID, J))
539
12
        ScalarCallOps.push_back(ScalarOperands[J]);
540
76
      else
541
76
        ScalarCallOps.push_back(Scattered[J][Elem]);
542
88
    }
543
44
544
44
    Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
545
44
                                   CI.getName() + ".i" + Twine(Elem));
546
44
  }
547
22
548
22
  gather(&CI, Res);
549
22
  return true;
550
22
}
551
552
6
bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
553
6
  VectorType *VT = dyn_cast<VectorType>(SI.getType());
554
6
  if (!VT)
555
0
    return false;
556
6
557
6
  unsigned NumElems = VT->getNumElements();
558
6
  IRBuilder<> Builder(&SI);
559
6
  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
560
6
  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
561
6
  assert(Op1.size() == NumElems && "Mismatched select");
562
6
  assert(Op2.size() == NumElems && "Mismatched select");
563
6
  ValueVector Res;
564
6
  Res.resize(NumElems);
565
6
566
6
  if (SI.getOperand(0)->getType()->isVectorTy()) {
567
6
    Scatterer Op0 = scatter(&SI, SI.getOperand(0));
568
6
    assert(Op0.size() == NumElems && "Mismatched select");
569
30
    for (unsigned I = 0; I < NumElems; 
++I24
)
570
24
      Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
571
24
                                    SI.getName() + ".i" + Twine(I));
572
6
  } else {
573
0
    Value *Op0 = SI.getOperand(0);
574
0
    for (unsigned I = 0; I < NumElems; ++I)
575
0
      Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
576
0
                                    SI.getName() + ".i" + Twine(I));
577
0
  }
578
6
  gather(&SI, Res);
579
6
  return true;
580
6
}
581
582
14
bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
583
14
  return splitBinary(ICI, ICmpSplitter(ICI));
584
14
}
585
586
6
bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
587
6
  return splitBinary(FCI, FCmpSplitter(FCI));
588
6
}
589
590
4
bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
591
4
  return splitUnary(UO, UnarySplitter(UO));
592
4
}
593
594
44
bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
595
44
  return splitBinary(BO, BinarySplitter(BO));
596
44
}
597
598
26
bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
599
26
  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
600
26
  if (!VT)
601
12
    return false;
602
14
603
14
  IRBuilder<> Builder(&GEPI);
604
14
  unsigned NumElems = VT->getNumElements();
605
14
  unsigned NumIndices = GEPI.getNumIndices();
606
14
607
14
  // The base pointer might be scalar even if it's a vector GEP. In those cases,
608
14
  // splat the pointer into a vector value, and scatter that vector.
609
14
  Value *Op0 = GEPI.getOperand(0);
610
14
  if (!Op0->getType()->isVectorTy())
611
4
    Op0 = Builder.CreateVectorSplat(NumElems, Op0);
612
14
  Scatterer Base = scatter(&GEPI, Op0);
613
14
614
14
  SmallVector<Scatterer, 8> Ops;
615
14
  Ops.resize(NumIndices);
616
30
  for (unsigned I = 0; I < NumIndices; 
++I16
) {
617
16
    Value *Op = GEPI.getOperand(I + 1);
618
16
619
16
    // The indices might be scalars even if it's a vector GEP. In those cases,
620
16
    // splat the scalar into a vector value, and scatter that vector.
621
16
    if (!Op->getType()->isVectorTy())
622
6
      Op = Builder.CreateVectorSplat(NumElems, Op);
623
16
624
16
    Ops[I] = scatter(&GEPI, Op);
625
16
  }
626
14
627
14
  ValueVector Res;
628
14
  Res.resize(NumElems);
629
70
  for (unsigned I = 0; I < NumElems; 
++I56
) {
630
56
    SmallVector<Value *, 8> Indices;
631
56
    Indices.resize(NumIndices);
632
120
    for (unsigned J = 0; J < NumIndices; 
++J64
)
633
64
      Indices[J] = Ops[J][I];
634
56
    Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
635
56
                               GEPI.getName() + ".i" + Twine(I));
636
56
    if (GEPI.isInBounds())
637
16
      if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
638
16
        NewGEPI->setIsInBounds();
639
56
  }
640
14
  gather(&GEPI, Res);
641
14
  return true;
642
14
}
643
644
4
bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
645
4
  VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
646
4
  if (!VT)
647
0
    return false;
648
4
649
4
  unsigned NumElems = VT->getNumElements();
650
4
  IRBuilder<> Builder(&CI);
651
4
  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
652
4
  assert(Op0.size() == NumElems && "Mismatched cast");
653
4
  ValueVector Res;
654
4
  Res.resize(NumElems);
655
20
  for (unsigned I = 0; I < NumElems; 
++I16
)
656
16
    Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
657
16
                                CI.getName() + ".i" + Twine(I));
658
4
  gather(&CI, Res);
659
4
  return true;
660
4
}
661
662
12
bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
663
12
  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
664
12
  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
665
12
  if (!DstVT || 
!SrcVT10
)
666
2
    return false;
667
10
668
10
  unsigned DstNumElems = DstVT->getNumElements();
669
10
  unsigned SrcNumElems = SrcVT->getNumElements();
670
10
  IRBuilder<> Builder(&BCI);
671
10
  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
672
10
  ValueVector Res;
673
10
  Res.resize(DstNumElems);
674
10
675
10
  if (DstNumElems == SrcNumElems) {
676
10
    for (unsigned I = 0; I < DstNumElems; 
++I8
)
677
8
      Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
678
8
                                     BCI.getName() + ".i" + Twine(I));
679
8
  } else if (DstNumElems > SrcNumElems) {
680
4
    // <M x t1> -> <N*M x t2>.  Convert each t1 to <N x t2> and copy the
681
4
    // individual elements to the destination.
682
4
    unsigned FanOut = DstNumElems / SrcNumElems;
683
4
    Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
684
4
    unsigned ResI = 0;
685
12
    for (unsigned Op0I = 0; Op0I < SrcNumElems; 
++Op0I8
) {
686
8
      Value *V = Op0[Op0I];
687
8
      Instruction *VI;
688
8
      // Look through any existing bitcasts before converting to <N x t2>.
689
8
      // In the best case, the resulting conversion might be a no-op.
690
16
      while ((VI = dyn_cast<Instruction>(V)) &&
691
16
             VI->getOpcode() == Instruction::BitCast)
692
8
        V = VI->getOperand(0);
693
8
      V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
694
8
      Scatterer Mid = scatter(&BCI, V);
695
24
      for (unsigned MidI = 0; MidI < FanOut; 
++MidI16
)
696
16
        Res[ResI++] = Mid[MidI];
697
8
    }
698
4
  } else {
699
4
    // <N*M x t1> -> <M x t2>.  Convert each group of <N x t1> into a t2.
700
4
    unsigned FanIn = SrcNumElems / DstNumElems;
701
4
    Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
702
4
    unsigned Op0I = 0;
703
12
    for (unsigned ResI = 0; ResI < DstNumElems; 
++ResI8
) {
704
8
      Value *V = UndefValue::get(MidTy);
705
24
      for (unsigned MidI = 0; MidI < FanIn; 
++MidI16
)
706
16
        V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
707
16
                                        BCI.getName() + ".i" + Twine(ResI)
708
16
                                        + ".upto" + Twine(MidI));
709
8
      Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
710
8
                                        BCI.getName() + ".i" + Twine(ResI));
711
8
    }
712
4
  }
713
10
  gather(&BCI, Res);
714
10
  return true;
715
10
}
716
717
14
bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
718
14
  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
719
14
  if (!VT)
720
0
    return false;
721
14
722
14
  unsigned NumElems = VT->getNumElements();
723
14
  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
724
14
  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
725
14
  ValueVector Res;
726
14
  Res.resize(NumElems);
727
14
728
52
  for (unsigned I = 0; I < NumElems; 
++I38
) {
729
38
    int Selector = SVI.getMaskValue(I);
730
38
    if (Selector < 0)
731
0
      Res[I] = UndefValue::get(VT->getElementType());
732
38
    else if (unsigned(Selector) < Op0.size())
733
32
      Res[I] = Op0[Selector];
734
6
    else
735
6
      Res[I] = Op1[Selector - Op0.size()];
736
38
  }
737
14
  gather(&SVI, Res);
738
14
  return true;
739
14
}
740
741
28
bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
742
28
  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
743
28
  if (!VT)
744
12
    return false;
745
16
746
16
  unsigned NumElems = VT->getNumElements();
747
16
  IRBuilder<> Builder(&PHI);
748
16
  ValueVector Res;
749
16
  Res.resize(NumElems);
750
16
751
16
  unsigned NumOps = PHI.getNumOperands();
752
60
  for (unsigned I = 0; I < NumElems; 
++I44
)
753
44
    Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
754
44
                               PHI.getName() + ".i" + Twine(I));
755
16
756
44
  for (unsigned I = 0; I < NumOps; 
++I28
) {
757
28
    Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
758
28
    BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
759
112
    for (unsigned J = 0; J < NumElems; 
++J84
)
760
84
      cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
761
28
  }
762
16
  gather(&PHI, Res);
763
16
  return true;
764
16
}
765
766
38
bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
767
38
  if (!ScalarizeLoadStore)
768
10
    return false;
769
28
  if (!LI.isSimple())
770
0
    return false;
771
28
772
28
  VectorLayout Layout;
773
28
  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
774
28
                       LI.getModule()->getDataLayout()))
775
4
    return false;
776
24
777
24
  unsigned NumElems = Layout.VecTy->getNumElements();
778
24
  IRBuilder<> Builder(&LI);
779
24
  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
780
24
  ValueVector Res;
781
24
  Res.resize(NumElems);
782
24
783
120
  for (unsigned I = 0; I < NumElems; 
++I96
)
784
96
    Res[I] = Builder.CreateAlignedLoad(Layout.VecTy->getElementType(), Ptr[I],
785
96
                                       Layout.getElemAlign(I),
786
96
                                       LI.getName() + ".i" + Twine(I));
787
24
  gather(&LI, Res);
788
24
  return true;
789
24
}
790
791
32
bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
792
32
  if (!ScalarizeLoadStore)
793
0
    return false;
794
32
  if (!SI.isSimple())
795
0
    return false;
796
32
797
32
  VectorLayout Layout;
798
32
  Value *FullValue = SI.getValueOperand();
799
32
  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
800
32
                       SI.getModule()->getDataLayout()))
801
2
    return false;
802
30
803
30
  unsigned NumElems = Layout.VecTy->getNumElements();
804
30
  IRBuilder<> Builder(&SI);
805
30
  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
806
30
  Scatterer Val = scatter(&SI, FullValue);
807
30
808
30
  ValueVector Stores;
809
30
  Stores.resize(NumElems);
810
150
  for (unsigned I = 0; I < NumElems; 
++I120
) {
811
120
    unsigned Align = Layout.getElemAlign(I);
812
120
    Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
813
120
  }
814
30
  transferMetadataAndIRFlags(&SI, Stores);
815
30
  return true;
816
30
}
817
818
36
bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
819
36
  return splitCall(CI);
820
36
}
821
822
// Delete the instructions that we scalarized.  If a full vector result
823
// is still needed, recreate it using InsertElements.
824
90
bool ScalarizerVisitor::finish() {
825
90
  // The presence of data in Gathered or Scattered indicates changes
826
90
  // made to the Function.
827
90
  if (Gathered.empty() && 
Scattered.empty()6
)
828
2
    return false;
829
150
  
for (const auto &GMI : Gathered)88
{
830
150
    Instruction *Op = GMI.first;
831
150
    ValueVector &CV = *GMI.second;
832
150
    if (!Op->use_empty()) {
833
50
      // The value is still needed, so recreate it using a series of
834
50
      // InsertElements.
835
50
      Type *Ty = Op->getType();
836
50
      Value *Res = UndefValue::get(Ty);
837
50
      BasicBlock *BB = Op->getParent();
838
50
      unsigned Count = Ty->getVectorNumElements();
839
50
      IRBuilder<> Builder(Op);
840
50
      if (isa<PHINode>(Op))
841
4
        Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
842
238
      for (unsigned I = 0; I < Count; 
++I188
)
843
188
        Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
844
188
                                          Op->getName() + ".upto" + Twine(I));
845
50
      Res->takeName(Op);
846
50
      Op->replaceAllUsesWith(Res);
847
50
    }
848
150
    Op->eraseFromParent();
849
150
  }
850
88
  Gathered.clear();
851
88
  Scattered.clear();
852
88
  return true;
853
88
}
854
855
45
PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) {
856
45
  Module &M = *F.getParent();
857
45
  unsigned ParallelLoopAccessMDKind =
858
45
      M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
859
45
  ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
860
45
  bool Changed = Impl.visit(F);
861
45
  return Changed ? 
PreservedAnalyses::none()44
:
PreservedAnalyses::all()1
;
862
45
}