/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/Scalarizer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- Scalarizer.cpp - Scalarize vector operations ---------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass converts vector operations into scalar operations, in order |
11 | | // to expose optimization opportunities on the individual scalar operations. |
12 | | // It is mainly intended for targets that do not have vector units, but it |
13 | | // may also be useful for revectorizing code to different vector widths. |
14 | | // |
15 | | //===----------------------------------------------------------------------===// |
16 | | |
17 | | #include "llvm/ADT/STLExtras.h" |
18 | | #include "llvm/Analysis/VectorUtils.h" |
19 | | #include "llvm/IR/IRBuilder.h" |
20 | | #include "llvm/IR/InstVisitor.h" |
21 | | #include "llvm/Pass.h" |
22 | | #include "llvm/Transforms/Scalar.h" |
23 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
24 | | |
25 | | using namespace llvm; |
26 | | |
27 | | #define DEBUG_TYPE "scalarizer" |
28 | | |
29 | | namespace { |
30 | | // Used to store the scattered form of a vector. |
31 | | typedef SmallVector<Value *, 8> ValueVector; |
32 | | |
33 | | // Used to map a vector Value to its scattered form. We use std::map |
34 | | // because we want iterators to persist across insertion and because the |
35 | | // values are relatively large. |
36 | | typedef std::map<Value *, ValueVector> ScatterMap; |
37 | | |
38 | | // Lists Instructions that have been replaced with scalar implementations, |
39 | | // along with a pointer to their scattered forms. |
40 | | typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList; |
41 | | |
42 | | // Provides a very limited vector-like interface for lazily accessing one |
43 | | // component of a scattered vector or vector pointer. |
44 | | class Scatterer { |
45 | | public: |
46 | 14 | Scatterer() {} |
47 | | |
48 | | // Scatter V into Size components. If new instructions are needed, |
49 | | // insert them before BBI in BB. If Cache is nonnull, use it to cache |
50 | | // the results. |
51 | | Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, |
52 | | ValueVector *cachePtr = nullptr); |
53 | | |
54 | | // Return component I, creating a new Value for it if necessary. |
55 | | Value *operator[](unsigned I); |
56 | | |
57 | | // Return the number of components. |
58 | 17 | unsigned size() const { return Size; } |
59 | | |
60 | | private: |
61 | | BasicBlock *BB; |
62 | | BasicBlock::iterator BBI; |
63 | | Value *V; |
64 | | ValueVector *CachePtr; |
65 | | PointerType *PtrTy; |
66 | | ValueVector Tmp; |
67 | | unsigned Size; |
68 | | }; |
69 | | |
70 | | // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp |
71 | | // called Name that compares X and Y in the same way as FCI. |
72 | | struct FCmpSplitter { |
73 | 1 | FCmpSplitter(FCmpInst &fci) : FCI(fci) {} |
74 | | Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, |
75 | 4 | const Twine &Name) const { |
76 | 4 | return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); |
77 | 4 | } |
78 | | FCmpInst &FCI; |
79 | | }; |
80 | | |
81 | | // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp |
82 | | // called Name that compares X and Y in the same way as ICI. |
83 | | struct ICmpSplitter { |
84 | 6 | ICmpSplitter(ICmpInst &ici) : ICI(ici) {} |
85 | | Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, |
86 | 4 | const Twine &Name) const { |
87 | 4 | return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); |
88 | 4 | } |
89 | | ICmpInst &ICI; |
90 | | }; |
91 | | |
92 | | // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create |
93 | | // a binary operator like BO called Name with operands X and Y. |
94 | | struct BinarySplitter { |
95 | 18 | BinarySplitter(BinaryOperator &bo) : BO(bo) {} |
96 | | Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, |
97 | 72 | const Twine &Name) const { |
98 | 72 | return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); |
99 | 72 | } |
100 | | BinaryOperator &BO; |
101 | | }; |
102 | | |
103 | | // Information about a load or store that we're scalarizing. |
104 | | struct VectorLayout { |
105 | 28 | VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {} |
106 | | |
107 | | // Return the alignment of element I. |
108 | 100 | uint64_t getElemAlign(unsigned I) { |
109 | 100 | return MinAlign(VecAlign, I * ElemSize); |
110 | 100 | } |
111 | | |
112 | | // The type of the vector. |
113 | | VectorType *VecTy; |
114 | | |
115 | | // The type of each element. |
116 | | Type *ElemTy; |
117 | | |
118 | | // The alignment of the vector. |
119 | | uint64_t VecAlign; |
120 | | |
121 | | // The size of each element. |
122 | | uint64_t ElemSize; |
123 | | }; |
124 | | |
125 | | class Scalarizer : public FunctionPass, |
126 | | public InstVisitor<Scalarizer, bool> { |
127 | | public: |
128 | | static char ID; |
129 | | |
130 | | Scalarizer() : |
131 | 8 | FunctionPass(ID) { |
132 | 8 | initializeScalarizerPass(*PassRegistry::getPassRegistry()); |
133 | 8 | } |
134 | | |
135 | | bool doInitialization(Module &M) override; |
136 | | bool runOnFunction(Function &F) override; |
137 | | |
138 | | // InstVisitor methods. They return true if the instruction was scalarized, |
139 | | // false if nothing changed. |
140 | 58 | bool visitInstruction(Instruction &) { return false; } |
141 | | bool visitSelectInst(SelectInst &SI); |
142 | | bool visitICmpInst(ICmpInst &); |
143 | | bool visitFCmpInst(FCmpInst &); |
144 | | bool visitBinaryOperator(BinaryOperator &); |
145 | | bool visitGetElementPtrInst(GetElementPtrInst &); |
146 | | bool visitCastInst(CastInst &); |
147 | | bool visitBitCastInst(BitCastInst &); |
148 | | bool visitShuffleVectorInst(ShuffleVectorInst &); |
149 | | bool visitPHINode(PHINode &); |
150 | | bool visitLoadInst(LoadInst &); |
151 | | bool visitStoreInst(StoreInst &); |
152 | | bool visitCallInst(CallInst &I); |
153 | | |
154 | 24.6k | static void registerOptions() { |
155 | 24.6k | // This is disabled by default because having separate loads and stores |
156 | 24.6k | // makes it more likely that the -combiner-alias-analysis limits will be |
157 | 24.6k | // reached. |
158 | 24.6k | OptionRegistry::registerOption<bool, Scalarizer, |
159 | 24.6k | &Scalarizer::ScalarizeLoadStore>( |
160 | 24.6k | "scalarize-load-store", |
161 | 24.6k | "Allow the scalarizer pass to scalarize loads and store", false); |
162 | 24.6k | } |
163 | | |
164 | | private: |
165 | | Scatterer scatter(Instruction *, Value *); |
166 | | void gather(Instruction *, const ValueVector &); |
167 | | bool canTransferMetadata(unsigned Kind); |
168 | | void transferMetadata(Instruction *, const ValueVector &); |
169 | | bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &); |
170 | | bool finish(); |
171 | | |
172 | | template<typename T> bool splitBinary(Instruction &, const T &); |
173 | | |
174 | | bool splitCall(CallInst &CI); |
175 | | |
176 | | ScatterMap Scattered; |
177 | | GatherList Gathered; |
178 | | unsigned ParallelLoopAccessMDKind; |
179 | | bool ScalarizeLoadStore; |
180 | | }; |
181 | | |
182 | | char Scalarizer::ID = 0; |
183 | | } // end anonymous namespace |
184 | | |
185 | | INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer", |
186 | | "Scalarize vector operations", false, false) |
187 | | |
188 | | Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, |
189 | | ValueVector *cachePtr) |
190 | 125 | : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) { |
191 | 125 | Type *Ty = V->getType(); |
192 | 125 | PtrTy = dyn_cast<PointerType>(Ty); |
193 | 125 | if (PtrTy) |
194 | 25 | Ty = PtrTy->getElementType(); |
195 | 125 | Size = Ty->getVectorNumElements(); |
196 | 125 | if (!CachePtr) |
197 | 14 | Tmp.resize(Size, nullptr); |
198 | 111 | else if (111 CachePtr->empty()111 ) |
199 | 65 | CachePtr->resize(Size, nullptr); |
200 | 111 | else |
201 | 111 | assert(Size == CachePtr->size() && "Inconsistent vector sizes"); |
202 | 125 | } |
203 | | |
204 | | // Return component I, creating a new Value for it if necessary. |
205 | 492 | Value *Scatterer::operator[](unsigned I) { |
206 | 492 | ValueVector &CV = (CachePtr ? *CachePtr449 : Tmp43 ); |
207 | 492 | // Try to reuse a previous value. |
208 | 492 | if (CV[I]) |
209 | 180 | return CV[I]; |
210 | 312 | IRBuilder<> Builder(BB, BBI); |
211 | 312 | if (PtrTy312 ) { |
212 | 92 | if (!CV[0]92 ) { |
213 | 23 | Type *Ty = |
214 | 23 | PointerType::get(PtrTy->getElementType()->getVectorElementType(), |
215 | 23 | PtrTy->getAddressSpace()); |
216 | 23 | CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0"); |
217 | 23 | } |
218 | 92 | if (I != 0) |
219 | 69 | CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I, |
220 | 69 | V->getName() + ".i" + Twine(I)); |
221 | 312 | } else { |
222 | 220 | // Search through a chain of InsertElementInsts looking for element I. |
223 | 220 | // Record other elements in the cache. The new V is still suitable |
224 | 220 | // for all uncached indices. |
225 | 229 | for (;;) { |
226 | 229 | InsertElementInst *Insert = dyn_cast<InsertElementInst>(V); |
227 | 229 | if (!Insert) |
228 | 209 | break; |
229 | 20 | ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2)); |
230 | 20 | if (!Idx) |
231 | 4 | break; |
232 | 16 | unsigned J = Idx->getZExtValue(); |
233 | 16 | V = Insert->getOperand(0); |
234 | 16 | if (I == J16 ) { |
235 | 7 | CV[J] = Insert->getOperand(1); |
236 | 7 | return CV[J]; |
237 | 9 | } else if (9 !CV[J]9 ) { |
238 | 8 | // Only cache the first entry we find for each index we're not actively |
239 | 8 | // searching for. This prevents us from going too far up the chain and |
240 | 8 | // caching incorrect entries. |
241 | 8 | CV[J] = Insert->getOperand(1); |
242 | 8 | } |
243 | 229 | } |
244 | 213 | CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I), |
245 | 213 | V->getName() + ".i" + Twine(I)); |
246 | 213 | } |
247 | 305 | return CV[I]; |
248 | 492 | } |
249 | | |
250 | 8 | bool Scalarizer::doInitialization(Module &M) { |
251 | 8 | ParallelLoopAccessMDKind = |
252 | 8 | M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); |
253 | 8 | ScalarizeLoadStore = |
254 | 8 | M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>(); |
255 | 8 | return false; |
256 | 8 | } |
257 | | |
258 | 32 | bool Scalarizer::runOnFunction(Function &F) { |
259 | 32 | if (skipFunction(F)) |
260 | 0 | return false; |
261 | 32 | assert(Gathered.empty() && Scattered.empty()); |
262 | 47 | for (BasicBlock &BB : F) { |
263 | 211 | for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE211 ;) { |
264 | 164 | Instruction *I = &*II; |
265 | 164 | bool Done = visit(I); |
266 | 164 | ++II; |
267 | 164 | if (Done && 164 I->getType()->isVoidTy()69 ) |
268 | 14 | I->eraseFromParent(); |
269 | 164 | } |
270 | 47 | } |
271 | 32 | return finish(); |
272 | 32 | } |
273 | | |
274 | | // Return a scattered form of V that can be accessed by Point. V must be a |
275 | | // vector or a pointer to a vector. |
276 | 125 | Scatterer Scalarizer::scatter(Instruction *Point, Value *V) { |
277 | 125 | if (Argument *VArg125 = dyn_cast<Argument>(V)) { |
278 | 36 | // Put the scattered form of arguments in the entry block, |
279 | 36 | // so that it can be used everywhere. |
280 | 36 | Function *F = VArg->getParent(); |
281 | 36 | BasicBlock *BB = &F->getEntryBlock(); |
282 | 36 | return Scatterer(BB, BB->begin(), V, &Scattered[V]); |
283 | 36 | } |
284 | 89 | if (Instruction *89 VOp89 = dyn_cast<Instruction>(V)) { |
285 | 75 | // Put the scattered form of an instruction directly after the |
286 | 75 | // instruction. |
287 | 75 | BasicBlock *BB = VOp->getParent(); |
288 | 75 | return Scatterer(BB, std::next(BasicBlock::iterator(VOp)), |
289 | 75 | V, &Scattered[V]); |
290 | 75 | } |
291 | 14 | // In the fallback case, just put the scattered before Point and |
292 | 14 | // keep the result local to Point. |
293 | 14 | return Scatterer(Point->getParent(), Point->getIterator(), V); |
294 | 14 | } |
295 | | |
296 | | // Replace Op with the gathered form of the components in CV. Defer the |
297 | | // deletion of Op and creation of the gathered form to the end of the pass, |
298 | | // so that we can avoid creating the gathered form if all uses of Op are |
299 | | // replaced with uses of CV. |
300 | 55 | void Scalarizer::gather(Instruction *Op, const ValueVector &CV) { |
301 | 55 | // Since we're not deleting Op yet, stub out its operands, so that it |
302 | 55 | // doesn't make anything live unnecessarily. |
303 | 161 | for (unsigned I = 0, E = Op->getNumOperands(); I != E161 ; ++I106 ) |
304 | 106 | Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType())); |
305 | 55 | |
306 | 55 | transferMetadata(Op, CV); |
307 | 55 | |
308 | 55 | // If we already have a scattered form of Op (created from ExtractElements |
309 | 55 | // of Op itself), replace them with the new form. |
310 | 55 | ValueVector &SV = Scattered[Op]; |
311 | 55 | if (!SV.empty()55 ) { |
312 | 18 | for (unsigned I = 0, E = SV.size(); I != E18 ; ++I14 ) { |
313 | 14 | Value *V = SV[I]; |
314 | 14 | if (V == nullptr) |
315 | 1 | continue; |
316 | 13 | |
317 | 13 | Instruction *Old = cast<Instruction>(V); |
318 | 13 | CV[I]->takeName(Old); |
319 | 13 | Old->replaceAllUsesWith(CV[I]); |
320 | 13 | Old->eraseFromParent(); |
321 | 13 | } |
322 | 4 | } |
323 | 55 | SV = CV; |
324 | 55 | Gathered.push_back(GatherList::value_type(Op, &SV)); |
325 | 55 | } |
326 | | |
327 | | // Return true if it is safe to transfer the given metadata tag from |
328 | | // vector to scalar instructions. |
329 | 48 | bool Scalarizer::canTransferMetadata(unsigned Tag) { |
330 | 48 | return (Tag == LLVMContext::MD_tbaa |
331 | 28 | || Tag == LLVMContext::MD_fpmath |
332 | 24 | || Tag == LLVMContext::MD_tbaa_struct |
333 | 16 | || Tag == LLVMContext::MD_invariant_load |
334 | 16 | || Tag == LLVMContext::MD_alias_scope |
335 | 16 | || Tag == LLVMContext::MD_noalias |
336 | 16 | || Tag == ParallelLoopAccessMDKind); |
337 | 48 | } |
338 | | |
339 | | // Transfer metadata from Op to the instructions in CV if it is known |
340 | | // to be safe to do so. |
341 | 69 | void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) { |
342 | 69 | SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; |
343 | 69 | Op->getAllMetadataOtherThanDebugLoc(MDs); |
344 | 347 | for (unsigned I = 0, E = CV.size(); I != E347 ; ++I278 ) { |
345 | 278 | if (Instruction *New278 = dyn_cast<Instruction>(CV[I])) { |
346 | 277 | for (const auto &MD : MDs) |
347 | 48 | if (48 canTransferMetadata(MD.first)48 ) |
348 | 40 | New->setMetadata(MD.first, MD.second); |
349 | 277 | if (Op->getDebugLoc() && 277 !New->getDebugLoc()16 ) |
350 | 0 | New->setDebugLoc(Op->getDebugLoc()); |
351 | 277 | } |
352 | 278 | } |
353 | 69 | } |
354 | | |
355 | | // Try to fill in Layout from Ty, returning true on success. Alignment is |
356 | | // the alignment of the vector, or 0 if the ABI default should be used. |
357 | | bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, |
358 | 28 | VectorLayout &Layout, const DataLayout &DL) { |
359 | 28 | // Make sure we're dealing with a vector. |
360 | 28 | Layout.VecTy = dyn_cast<VectorType>(Ty); |
361 | 28 | if (!Layout.VecTy) |
362 | 0 | return false; |
363 | 28 | |
364 | 28 | // Check that we're dealing with full-byte elements. |
365 | 28 | Layout.ElemTy = Layout.VecTy->getElementType(); |
366 | 28 | if (DL.getTypeSizeInBits(Layout.ElemTy) != |
367 | 28 | DL.getTypeStoreSizeInBits(Layout.ElemTy)) |
368 | 3 | return false; |
369 | 25 | |
370 | 25 | if (25 Alignment25 ) |
371 | 7 | Layout.VecAlign = Alignment; |
372 | 25 | else |
373 | 18 | Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy); |
374 | 28 | Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy); |
375 | 28 | return true; |
376 | 28 | } |
377 | | |
378 | | // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) |
379 | | // to create an instruction like I with operands X and Y and name Name. |
380 | | template<typename Splitter> |
381 | 25 | bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { |
382 | 25 | VectorType *VT = dyn_cast<VectorType>(I.getType()); |
383 | 25 | if (!VT) |
384 | 12 | return false; |
385 | 13 | |
386 | 13 | unsigned NumElems = VT->getNumElements(); |
387 | 13 | IRBuilder<> Builder(&I); |
388 | 13 | Scatterer Op0 = scatter(&I, I.getOperand(0)); |
389 | 13 | Scatterer Op1 = scatter(&I, I.getOperand(1)); |
390 | 13 | assert(Op0.size() == NumElems && "Mismatched binary operation"); |
391 | 13 | assert(Op1.size() == NumElems && "Mismatched binary operation"); |
392 | 13 | ValueVector Res; |
393 | 13 | Res.resize(NumElems); |
394 | 93 | for (unsigned Elem = 0; Elem < NumElems93 ; ++Elem80 ) |
395 | 80 | Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], |
396 | 80 | I.getName() + ".i" + Twine(Elem)); |
397 | 25 | gather(&I, Res); |
398 | 25 | return true; |
399 | 25 | } Scalarizer.cpp:bool (anonymous namespace)::Scalarizer::splitBinary<(anonymous namespace)::BinarySplitter>(llvm::Instruction&, (anonymous namespace)::BinarySplitter const&) Line | Count | Source | 381 | 18 | bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { | 382 | 18 | VectorType *VT = dyn_cast<VectorType>(I.getType()); | 383 | 18 | if (!VT) | 384 | 7 | return false; | 385 | 11 | | 386 | 11 | unsigned NumElems = VT->getNumElements(); | 387 | 11 | IRBuilder<> Builder(&I); | 388 | 11 | Scatterer Op0 = scatter(&I, I.getOperand(0)); | 389 | 11 | Scatterer Op1 = scatter(&I, I.getOperand(1)); | 390 | 11 | assert(Op0.size() == NumElems && "Mismatched binary operation"); | 391 | 11 | assert(Op1.size() == NumElems && "Mismatched binary operation"); | 392 | 11 | ValueVector Res; | 393 | 11 | Res.resize(NumElems); | 394 | 83 | for (unsigned Elem = 0; Elem < NumElems83 ; ++Elem72 ) | 395 | 72 | Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], | 396 | 72 | I.getName() + ".i" + Twine(Elem)); | 397 | 18 | gather(&I, Res); | 398 | 18 | return true; | 399 | 18 | } |
Scalarizer.cpp:bool (anonymous namespace)::Scalarizer::splitBinary<(anonymous namespace)::ICmpSplitter>(llvm::Instruction&, (anonymous namespace)::ICmpSplitter const&) Line | Count | Source | 381 | 6 | bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { | 382 | 6 | VectorType *VT = dyn_cast<VectorType>(I.getType()); | 383 | 6 | if (!VT) | 384 | 5 | return false; | 385 | 1 | | 386 | 1 | unsigned NumElems = VT->getNumElements(); | 387 | 1 | IRBuilder<> Builder(&I); | 388 | 1 | Scatterer Op0 = scatter(&I, I.getOperand(0)); | 389 | 1 | Scatterer Op1 = scatter(&I, I.getOperand(1)); | 390 | 1 | assert(Op0.size() == NumElems && "Mismatched binary operation"); | 391 | 1 | assert(Op1.size() == NumElems && "Mismatched binary operation"); | 392 | 1 | ValueVector Res; | 393 | 1 | Res.resize(NumElems); | 394 | 5 | for (unsigned Elem = 0; Elem < NumElems5 ; ++Elem4 ) | 395 | 4 | Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], | 396 | 4 | I.getName() + ".i" + Twine(Elem)); | 397 | 6 | gather(&I, Res); | 398 | 6 | return true; | 399 | 6 | } |
Scalarizer.cpp:bool (anonymous namespace)::Scalarizer::splitBinary<(anonymous namespace)::FCmpSplitter>(llvm::Instruction&, (anonymous namespace)::FCmpSplitter const&) Line | Count | Source | 381 | 1 | bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { | 382 | 1 | VectorType *VT = dyn_cast<VectorType>(I.getType()); | 383 | 1 | if (!VT) | 384 | 0 | return false; | 385 | 1 | | 386 | 1 | unsigned NumElems = VT->getNumElements(); | 387 | 1 | IRBuilder<> Builder(&I); | 388 | 1 | Scatterer Op0 = scatter(&I, I.getOperand(0)); | 389 | 1 | Scatterer Op1 = scatter(&I, I.getOperand(1)); | 390 | 1 | assert(Op0.size() == NumElems && "Mismatched binary operation"); | 391 | 1 | assert(Op1.size() == NumElems && "Mismatched binary operation"); | 392 | 1 | ValueVector Res; | 393 | 1 | Res.resize(NumElems); | 394 | 5 | for (unsigned Elem = 0; Elem < NumElems5 ; ++Elem4 ) | 395 | 4 | Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], | 396 | 4 | I.getName() + ".i" + Twine(Elem)); | 397 | 1 | gather(&I, Res); | 398 | 1 | return true; | 399 | 1 | } |
|
400 | | |
401 | 6 | static bool isTriviallyScalariable(Intrinsic::ID ID) { |
402 | 6 | return isTriviallyVectorizable(ID); |
403 | 6 | } |
404 | | |
405 | | // All of the current scalarizable intrinsics only have one mangled type. |
406 | | static Function *getScalarIntrinsicDeclaration(Module *M, |
407 | | Intrinsic::ID ID, |
408 | 6 | VectorType *Ty) { |
409 | 6 | return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() }); |
410 | 6 | } |
411 | | |
412 | | /// If a call to a vector typed intrinsic function, split into a scalar call per |
413 | | /// element if possible for the intrinsic. |
414 | 12 | bool Scalarizer::splitCall(CallInst &CI) { |
415 | 12 | VectorType *VT = dyn_cast<VectorType>(CI.getType()); |
416 | 12 | if (!VT) |
417 | 4 | return false; |
418 | 8 | |
419 | 8 | Function *F = CI.getCalledFunction(); |
420 | 8 | if (!F) |
421 | 0 | return false; |
422 | 8 | |
423 | 8 | Intrinsic::ID ID = F->getIntrinsicID(); |
424 | 8 | if (ID == Intrinsic::not_intrinsic || 8 !isTriviallyScalariable(ID)6 ) |
425 | 2 | return false; |
426 | 6 | |
427 | 6 | unsigned NumElems = VT->getNumElements(); |
428 | 6 | unsigned NumArgs = CI.getNumArgOperands(); |
429 | 6 | |
430 | 6 | ValueVector ScalarOperands(NumArgs); |
431 | 6 | SmallVector<Scatterer, 8> Scattered(NumArgs); |
432 | 6 | |
433 | 6 | Scattered.resize(NumArgs); |
434 | 6 | |
435 | 6 | // Assumes that any vector type has the same number of elements as the return |
436 | 6 | // vector type, which is true for all current intrinsics. |
437 | 17 | for (unsigned I = 0; I != NumArgs17 ; ++I11 ) { |
438 | 11 | Value *OpI = CI.getOperand(I); |
439 | 11 | if (OpI->getType()->isVectorTy()11 ) { |
440 | 9 | Scattered[I] = scatter(&CI, OpI); |
441 | 9 | assert(Scattered[I].size() == NumElems && "mismatched call operands"); |
442 | 11 | } else { |
443 | 2 | ScalarOperands[I] = OpI; |
444 | 2 | } |
445 | 11 | } |
446 | 6 | |
447 | 6 | ValueVector Res(NumElems); |
448 | 6 | ValueVector ScalarCallOps(NumArgs); |
449 | 6 | |
450 | 6 | Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT); |
451 | 6 | IRBuilder<> Builder(&CI); |
452 | 6 | |
453 | 6 | // Perform actual scalarization, taking care to preserve any scalar operands. |
454 | 18 | for (unsigned Elem = 0; Elem < NumElems18 ; ++Elem12 ) { |
455 | 12 | ScalarCallOps.clear(); |
456 | 12 | |
457 | 34 | for (unsigned J = 0; J != NumArgs34 ; ++J22 ) { |
458 | 22 | if (hasVectorInstrinsicScalarOpd(ID, J)) |
459 | 4 | ScalarCallOps.push_back(ScalarOperands[J]); |
460 | 22 | else |
461 | 18 | ScalarCallOps.push_back(Scattered[J][Elem]); |
462 | 22 | } |
463 | 12 | |
464 | 12 | Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps, |
465 | 12 | CI.getName() + ".i" + Twine(Elem)); |
466 | 12 | } |
467 | 12 | |
468 | 12 | gather(&CI, Res); |
469 | 12 | return true; |
470 | 12 | } |
471 | | |
472 | 2 | bool Scalarizer::visitSelectInst(SelectInst &SI) { |
473 | 2 | VectorType *VT = dyn_cast<VectorType>(SI.getType()); |
474 | 2 | if (!VT) |
475 | 0 | return false; |
476 | 2 | |
477 | 2 | unsigned NumElems = VT->getNumElements(); |
478 | 2 | IRBuilder<> Builder(&SI); |
479 | 2 | Scatterer Op1 = scatter(&SI, SI.getOperand(1)); |
480 | 2 | Scatterer Op2 = scatter(&SI, SI.getOperand(2)); |
481 | 2 | assert(Op1.size() == NumElems && "Mismatched select"); |
482 | 2 | assert(Op2.size() == NumElems && "Mismatched select"); |
483 | 2 | ValueVector Res; |
484 | 2 | Res.resize(NumElems); |
485 | 2 | |
486 | 2 | if (SI.getOperand(0)->getType()->isVectorTy()2 ) { |
487 | 2 | Scatterer Op0 = scatter(&SI, SI.getOperand(0)); |
488 | 2 | assert(Op0.size() == NumElems && "Mismatched select"); |
489 | 10 | for (unsigned I = 0; I < NumElems10 ; ++I8 ) |
490 | 8 | Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I], |
491 | 8 | SI.getName() + ".i" + Twine(I)); |
492 | 0 | } else { |
493 | 0 | Value *Op0 = SI.getOperand(0); |
494 | 0 | for (unsigned I = 0; I < NumElems0 ; ++I0 ) |
495 | 0 | Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I], |
496 | 0 | SI.getName() + ".i" + Twine(I)); |
497 | 0 | } |
498 | 2 | gather(&SI, Res); |
499 | 2 | return true; |
500 | 2 | } |
501 | | |
502 | 6 | bool Scalarizer::visitICmpInst(ICmpInst &ICI) { |
503 | 6 | return splitBinary(ICI, ICmpSplitter(ICI)); |
504 | 6 | } |
505 | | |
506 | 1 | bool Scalarizer::visitFCmpInst(FCmpInst &FCI) { |
507 | 1 | return splitBinary(FCI, FCmpSplitter(FCI)); |
508 | 1 | } |
509 | | |
510 | 18 | bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { |
511 | 18 | return splitBinary(BO, BinarySplitter(BO)); |
512 | 18 | } |
513 | | |
514 | 12 | bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { |
515 | 12 | VectorType *VT = dyn_cast<VectorType>(GEPI.getType()); |
516 | 12 | if (!VT) |
517 | 5 | return false; |
518 | 7 | |
519 | 7 | IRBuilder<> Builder(&GEPI); |
520 | 7 | unsigned NumElems = VT->getNumElements(); |
521 | 7 | unsigned NumIndices = GEPI.getNumIndices(); |
522 | 7 | |
523 | 7 | // The base pointer might be scalar even if it's a vector GEP. In those cases, |
524 | 7 | // splat the pointer into a vector value, and scatter that vector. |
525 | 7 | Value *Op0 = GEPI.getOperand(0); |
526 | 7 | if (!Op0->getType()->isVectorTy()) |
527 | 2 | Op0 = Builder.CreateVectorSplat(NumElems, Op0); |
528 | 7 | Scatterer Base = scatter(&GEPI, Op0); |
529 | 7 | |
530 | 7 | SmallVector<Scatterer, 8> Ops; |
531 | 7 | Ops.resize(NumIndices); |
532 | 15 | for (unsigned I = 0; I < NumIndices15 ; ++I8 ) { |
533 | 8 | Value *Op = GEPI.getOperand(I + 1); |
534 | 8 | |
535 | 8 | // The indices might be scalars even if it's a vector GEP. In those cases, |
536 | 8 | // splat the scalar into a vector value, and scatter that vector. |
537 | 8 | if (!Op->getType()->isVectorTy()) |
538 | 3 | Op = Builder.CreateVectorSplat(NumElems, Op); |
539 | 8 | |
540 | 8 | Ops[I] = scatter(&GEPI, Op); |
541 | 8 | } |
542 | 7 | |
543 | 7 | ValueVector Res; |
544 | 7 | Res.resize(NumElems); |
545 | 35 | for (unsigned I = 0; I < NumElems35 ; ++I28 ) { |
546 | 28 | SmallVector<Value *, 8> Indices; |
547 | 28 | Indices.resize(NumIndices); |
548 | 60 | for (unsigned J = 0; J < NumIndices60 ; ++J32 ) |
549 | 32 | Indices[J] = Ops[J][I]; |
550 | 28 | Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices, |
551 | 28 | GEPI.getName() + ".i" + Twine(I)); |
552 | 28 | if (GEPI.isInBounds()) |
553 | 8 | if (GetElementPtrInst *8 NewGEPI8 = dyn_cast<GetElementPtrInst>(Res[I])) |
554 | 8 | NewGEPI->setIsInBounds(); |
555 | 28 | } |
556 | 12 | gather(&GEPI, Res); |
557 | 12 | return true; |
558 | 12 | } |
559 | | |
560 | 2 | bool Scalarizer::visitCastInst(CastInst &CI) { |
561 | 2 | VectorType *VT = dyn_cast<VectorType>(CI.getDestTy()); |
562 | 2 | if (!VT) |
563 | 0 | return false; |
564 | 2 | |
565 | 2 | unsigned NumElems = VT->getNumElements(); |
566 | 2 | IRBuilder<> Builder(&CI); |
567 | 2 | Scatterer Op0 = scatter(&CI, CI.getOperand(0)); |
568 | 2 | assert(Op0.size() == NumElems && "Mismatched cast"); |
569 | 2 | ValueVector Res; |
570 | 2 | Res.resize(NumElems); |
571 | 10 | for (unsigned I = 0; I < NumElems10 ; ++I8 ) |
572 | 8 | Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(), |
573 | 8 | CI.getName() + ".i" + Twine(I)); |
574 | 2 | gather(&CI, Res); |
575 | 2 | return true; |
576 | 2 | } |
577 | | |
578 | 5 | bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { |
579 | 5 | VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy()); |
580 | 5 | VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy()); |
581 | 5 | if (!DstVT || 5 !SrcVT4 ) |
582 | 1 | return false; |
583 | 4 | |
584 | 4 | unsigned DstNumElems = DstVT->getNumElements(); |
585 | 4 | unsigned SrcNumElems = SrcVT->getNumElements(); |
586 | 4 | IRBuilder<> Builder(&BCI); |
587 | 4 | Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); |
588 | 4 | ValueVector Res; |
589 | 4 | Res.resize(DstNumElems); |
590 | 4 | |
591 | 4 | if (DstNumElems == SrcNumElems4 ) { |
592 | 0 | for (unsigned I = 0; I < DstNumElems0 ; ++I0 ) |
593 | 0 | Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), |
594 | 0 | BCI.getName() + ".i" + Twine(I)); |
595 | 4 | } else if (4 DstNumElems > SrcNumElems4 ) { |
596 | 2 | // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the |
597 | 2 | // individual elements to the destination. |
598 | 2 | unsigned FanOut = DstNumElems / SrcNumElems; |
599 | 2 | Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut); |
600 | 2 | unsigned ResI = 0; |
601 | 6 | for (unsigned Op0I = 0; Op0I < SrcNumElems6 ; ++Op0I4 ) { |
602 | 4 | Value *V = Op0[Op0I]; |
603 | 4 | Instruction *VI; |
604 | 4 | // Look through any existing bitcasts before converting to <N x t2>. |
605 | 4 | // In the best case, the resulting conversion might be a no-op. |
606 | 8 | while ((VI = dyn_cast<Instruction>(V)) && |
607 | 8 | VI->getOpcode() == Instruction::BitCast) |
608 | 4 | V = VI->getOperand(0); |
609 | 4 | V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); |
610 | 4 | Scatterer Mid = scatter(&BCI, V); |
611 | 12 | for (unsigned MidI = 0; MidI < FanOut12 ; ++MidI8 ) |
612 | 8 | Res[ResI++] = Mid[MidI]; |
613 | 4 | } |
614 | 4 | } else { |
615 | 2 | // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2. |
616 | 2 | unsigned FanIn = SrcNumElems / DstNumElems; |
617 | 2 | Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn); |
618 | 2 | unsigned Op0I = 0; |
619 | 6 | for (unsigned ResI = 0; ResI < DstNumElems6 ; ++ResI4 ) { |
620 | 4 | Value *V = UndefValue::get(MidTy); |
621 | 12 | for (unsigned MidI = 0; MidI < FanIn12 ; ++MidI8 ) |
622 | 8 | V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), |
623 | 8 | BCI.getName() + ".i" + Twine(ResI) |
624 | 8 | + ".upto" + Twine(MidI)); |
625 | 4 | Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), |
626 | 4 | BCI.getName() + ".i" + Twine(ResI)); |
627 | 4 | } |
628 | 4 | } |
629 | 5 | gather(&BCI, Res); |
630 | 5 | return true; |
631 | 5 | } |
632 | | |
633 | 5 | bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { |
634 | 5 | VectorType *VT = dyn_cast<VectorType>(SVI.getType()); |
635 | 5 | if (!VT) |
636 | 0 | return false; |
637 | 5 | |
638 | 5 | unsigned NumElems = VT->getNumElements(); |
639 | 5 | Scatterer Op0 = scatter(&SVI, SVI.getOperand(0)); |
640 | 5 | Scatterer Op1 = scatter(&SVI, SVI.getOperand(1)); |
641 | 5 | ValueVector Res; |
642 | 5 | Res.resize(NumElems); |
643 | 5 | |
644 | 19 | for (unsigned I = 0; I < NumElems19 ; ++I14 ) { |
645 | 14 | int Selector = SVI.getMaskValue(I); |
646 | 14 | if (Selector < 0) |
647 | 0 | Res[I] = UndefValue::get(VT->getElementType()); |
648 | 14 | else if (14 unsigned(Selector) < Op0.size()14 ) |
649 | 11 | Res[I] = Op0[Selector]; |
650 | 14 | else |
651 | 3 | Res[I] = Op1[Selector - Op0.size()]; |
652 | 14 | } |
653 | 5 | gather(&SVI, Res); |
654 | 5 | return true; |
655 | 5 | } |
656 | | |
657 | 10 | bool Scalarizer::visitPHINode(PHINode &PHI) { |
658 | 10 | VectorType *VT = dyn_cast<VectorType>(PHI.getType()); |
659 | 10 | if (!VT) |
660 | 5 | return false; |
661 | 5 | |
662 | 5 | unsigned NumElems = VT->getNumElements(); |
663 | 5 | IRBuilder<> Builder(&PHI); |
664 | 5 | ValueVector Res; |
665 | 5 | Res.resize(NumElems); |
666 | 5 | |
667 | 5 | unsigned NumOps = PHI.getNumOperands(); |
668 | 21 | for (unsigned I = 0; I < NumElems21 ; ++I16 ) |
669 | 16 | Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, |
670 | 16 | PHI.getName() + ".i" + Twine(I)); |
671 | 5 | |
672 | 15 | for (unsigned I = 0; I < NumOps15 ; ++I10 ) { |
673 | 10 | Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); |
674 | 10 | BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); |
675 | 42 | for (unsigned J = 0; J < NumElems42 ; ++J32 ) |
676 | 32 | cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock); |
677 | 10 | } |
678 | 10 | gather(&PHI, Res); |
679 | 10 | return true; |
680 | 10 | } |
681 | | |
682 | 18 | bool Scalarizer::visitLoadInst(LoadInst &LI) { |
683 | 18 | if (!ScalarizeLoadStore) |
684 | 5 | return false; |
685 | 13 | if (13 !LI.isSimple()13 ) |
686 | 0 | return false; |
687 | 13 | |
688 | 13 | VectorLayout Layout; |
689 | 13 | if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout, |
690 | 13 | LI.getModule()->getDataLayout())) |
691 | 2 | return false; |
692 | 11 | |
693 | 11 | unsigned NumElems = Layout.VecTy->getNumElements(); |
694 | 11 | IRBuilder<> Builder(&LI); |
695 | 11 | Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); |
696 | 11 | ValueVector Res; |
697 | 11 | Res.resize(NumElems); |
698 | 11 | |
699 | 55 | for (unsigned I = 0; I < NumElems55 ; ++I44 ) |
700 | 44 | Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I), |
701 | 44 | LI.getName() + ".i" + Twine(I)); |
702 | 18 | gather(&LI, Res); |
703 | 18 | return true; |
704 | 18 | } |
705 | | |
706 | 15 | bool Scalarizer::visitStoreInst(StoreInst &SI) { |
707 | 15 | if (!ScalarizeLoadStore) |
708 | 0 | return false; |
709 | 15 | if (15 !SI.isSimple()15 ) |
710 | 0 | return false; |
711 | 15 | |
712 | 15 | VectorLayout Layout; |
713 | 15 | Value *FullValue = SI.getValueOperand(); |
714 | 15 | if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout, |
715 | 15 | SI.getModule()->getDataLayout())) |
716 | 1 | return false; |
717 | 14 | |
718 | 14 | unsigned NumElems = Layout.VecTy->getNumElements(); |
719 | 14 | IRBuilder<> Builder(&SI); |
720 | 14 | Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); |
721 | 14 | Scatterer Val = scatter(&SI, FullValue); |
722 | 14 | |
723 | 14 | ValueVector Stores; |
724 | 14 | Stores.resize(NumElems); |
725 | 70 | for (unsigned I = 0; I < NumElems70 ; ++I56 ) { |
726 | 56 | unsigned Align = Layout.getElemAlign(I); |
727 | 56 | Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align); |
728 | 56 | } |
729 | 15 | transferMetadata(&SI, Stores); |
730 | 15 | return true; |
731 | 15 | } |
732 | | |
733 | 12 | bool Scalarizer::visitCallInst(CallInst &CI) { |
734 | 12 | return splitCall(CI); |
735 | 12 | } |
736 | | |
737 | | // Delete the instructions that we scalarized. If a full vector result |
738 | | // is still needed, recreate it using InsertElements. |
739 | 32 | bool Scalarizer::finish() { |
740 | 32 | // The presence of data in Gathered or Scattered indicates changes |
741 | 32 | // made to the Function. |
742 | 32 | if (Gathered.empty() && 32 Scattered.empty()3 ) |
743 | 1 | return false; |
744 | 31 | for (const auto &GMI : Gathered) 31 { |
745 | 55 | Instruction *Op = GMI.first; |
746 | 55 | ValueVector &CV = *GMI.second; |
747 | 55 | if (!Op->use_empty()55 ) { |
748 | 13 | // The value is still needed, so recreate it using a series of |
749 | 13 | // InsertElements. |
750 | 13 | Type *Ty = Op->getType(); |
751 | 13 | Value *Res = UndefValue::get(Ty); |
752 | 13 | BasicBlock *BB = Op->getParent(); |
753 | 13 | unsigned Count = Ty->getVectorNumElements(); |
754 | 13 | IRBuilder<> Builder(Op); |
755 | 13 | if (isa<PHINode>(Op)) |
756 | 2 | Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); |
757 | 79 | for (unsigned I = 0; I < Count79 ; ++I66 ) |
758 | 66 | Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), |
759 | 66 | Op->getName() + ".upto" + Twine(I)); |
760 | 13 | Res->takeName(Op); |
761 | 13 | Op->replaceAllUsesWith(Res); |
762 | 13 | } |
763 | 55 | Op->eraseFromParent(); |
764 | 55 | } |
765 | 32 | Gathered.clear(); |
766 | 32 | Scattered.clear(); |
767 | 32 | return true; |
768 | 32 | } |
769 | | |
770 | 0 | FunctionPass *llvm::createScalarizerPass() { |
771 | 0 | return new Scalarizer(); |
772 | 0 | } |