/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Analysis/TargetTransformInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | #include "llvm/Analysis/TargetTransformInfo.h" |
11 | | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
12 | | #include "llvm/IR/CallSite.h" |
13 | | #include "llvm/IR/DataLayout.h" |
14 | | #include "llvm/IR/Instruction.h" |
15 | | #include "llvm/IR/Instructions.h" |
16 | | #include "llvm/IR/IntrinsicInst.h" |
17 | | #include "llvm/IR/Module.h" |
18 | | #include "llvm/IR/Operator.h" |
19 | | #include "llvm/IR/PatternMatch.h" |
20 | | #include "llvm/Support/CommandLine.h" |
21 | | #include "llvm/Support/ErrorHandling.h" |
22 | | #include <utility> |
23 | | |
24 | | using namespace llvm; |
25 | | using namespace PatternMatch; |
26 | | |
27 | | #define DEBUG_TYPE "tti" |
28 | | |
29 | | static cl::opt<bool> UseWideMemcpyLoopLowering( |
30 | | "use-wide-memcpy-loop-lowering", cl::init(false), |
31 | | cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), |
32 | | cl::Hidden); |
33 | | |
34 | | static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), |
35 | | cl::Hidden, |
36 | | cl::desc("Recognize reduction patterns.")); |
37 | | |
38 | | namespace { |
39 | | /// \brief No-op implementation of the TTI interface using the utility base |
40 | | /// classes. |
41 | | /// |
42 | | /// This is used when no target specific information is available. |
43 | | struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { |
44 | | explicit NoTTIImpl(const DataLayout &DL) |
45 | 12.0k | : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} |
46 | | }; |
47 | | } |
48 | | |
49 | | TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) |
50 | 12.0k | : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} |
51 | | |
52 | 17.0M | TargetTransformInfo::~TargetTransformInfo() {} |
53 | | |
54 | | TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) |
55 | 69.9k | : TTIImpl(std::move(Arg.TTIImpl)) {} |
56 | | |
57 | 16.9M | TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { |
58 | 16.9M | TTIImpl = std::move(RHS.TTIImpl); |
59 | 16.9M | return *this; |
60 | 16.9M | } |
61 | | |
62 | | int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, |
63 | 0 | Type *OpTy) const { |
64 | 0 | int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy); |
65 | 0 | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
66 | 0 | return Cost; |
67 | 0 | } |
68 | | |
69 | 0 | int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { |
70 | 0 | int Cost = TTIImpl->getCallCost(FTy, NumArgs); |
71 | 0 | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
72 | 0 | return Cost; |
73 | 0 | } |
74 | | |
75 | | int TargetTransformInfo::getCallCost(const Function *F, |
76 | 0 | ArrayRef<const Value *> Arguments) const { |
77 | 0 | int Cost = TTIImpl->getCallCost(F, Arguments); |
78 | 0 | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
79 | 0 | return Cost; |
80 | 0 | } |
81 | | |
82 | 515k | unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { |
83 | 515k | return TTIImpl->getInliningThresholdMultiplier(); |
84 | 515k | } |
85 | | |
86 | | int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, |
87 | 29.5k | ArrayRef<const Value *> Operands) const { |
88 | 29.5k | return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); |
89 | 29.5k | } |
90 | | |
91 | | int TargetTransformInfo::getExtCost(const Instruction *I, |
92 | 0 | const Value *Src) const { |
93 | 0 | return TTIImpl->getExtCost(I, Src); |
94 | 0 | } |
95 | | |
96 | | int TargetTransformInfo::getIntrinsicCost( |
97 | 4 | Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { |
98 | 4 | int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); |
99 | 4 | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
100 | 4 | return Cost; |
101 | 4 | } |
102 | | |
103 | | unsigned |
104 | | TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
105 | 59.1k | unsigned &JTSize) const { |
106 | 59.1k | return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); |
107 | 59.1k | } |
108 | | |
109 | | int TargetTransformInfo::getUserCost(const User *U, |
110 | 43.9M | ArrayRef<const Value *> Operands) const { |
111 | 43.9M | int Cost = TTIImpl->getUserCost(U, Operands); |
112 | 43.9M | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
113 | 43.9M | return Cost; |
114 | 43.9M | } |
115 | | |
116 | 579k | bool TargetTransformInfo::hasBranchDivergence() const { |
117 | 579k | return TTIImpl->hasBranchDivergence(); |
118 | 579k | } |
119 | | |
120 | 517k | bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { |
121 | 517k | return TTIImpl->isSourceOfDivergence(V); |
122 | 517k | } |
123 | | |
124 | 147k | bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { |
125 | 147k | return TTIImpl->isAlwaysUniform(V); |
126 | 147k | } |
127 | | |
128 | 18.5k | unsigned TargetTransformInfo::getFlatAddressSpace() const { |
129 | 18.5k | return TTIImpl->getFlatAddressSpace(); |
130 | 18.5k | } |
131 | | |
132 | 4.71M | bool TargetTransformInfo::isLoweredToCall(const Function *F) const { |
133 | 4.71M | return TTIImpl->isLoweredToCall(F); |
134 | 4.71M | } |
135 | | |
136 | | void TargetTransformInfo::getUnrollingPreferences( |
137 | 647k | Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { |
138 | 647k | return TTIImpl->getUnrollingPreferences(L, SE, UP); |
139 | 647k | } |
140 | | |
141 | 3.94M | bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { |
142 | 3.94M | return TTIImpl->isLegalAddImmediate(Imm); |
143 | 3.94M | } |
144 | | |
145 | 942k | bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { |
146 | 942k | return TTIImpl->isLegalICmpImmediate(Imm); |
147 | 942k | } |
148 | | |
149 | | bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, |
150 | | int64_t BaseOffset, |
151 | | bool HasBaseReg, |
152 | | int64_t Scale, |
153 | | unsigned AddrSpace, |
154 | 71.6M | Instruction *I) const { |
155 | 71.6M | return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, |
156 | 71.6M | Scale, AddrSpace, I); |
157 | 71.6M | } |
158 | | |
159 | 6.40M | bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { |
160 | 6.40M | return TTIImpl->isLSRCostLess(C1, C2); |
161 | 6.40M | } |
162 | | |
163 | 395 | bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { |
164 | 395 | return TTIImpl->isLegalMaskedStore(DataType); |
165 | 395 | } |
166 | | |
167 | 803 | bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { |
168 | 803 | return TTIImpl->isLegalMaskedLoad(DataType); |
169 | 803 | } |
170 | | |
171 | 25.0k | bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { |
172 | 25.0k | return TTIImpl->isLegalMaskedGather(DataType); |
173 | 25.0k | } |
174 | | |
175 | 8.52k | bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { |
176 | 8.52k | return TTIImpl->isLegalMaskedScatter(DataType); |
177 | 8.52k | } |
178 | | |
179 | 576 | bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { |
180 | 576 | return TTIImpl->hasDivRemOp(DataType, IsSigned); |
181 | 576 | } |
182 | | |
183 | 45.8k | bool TargetTransformInfo::prefersVectorizedAddressing() const { |
184 | 45.8k | return TTIImpl->prefersVectorizedAddressing(); |
185 | 45.8k | } |
186 | | |
187 | | int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
188 | | int64_t BaseOffset, |
189 | | bool HasBaseReg, |
190 | | int64_t Scale, |
191 | 8.55M | unsigned AddrSpace) const { |
192 | 8.55M | int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, |
193 | 8.55M | Scale, AddrSpace); |
194 | 8.55M | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
195 | 8.55M | return Cost; |
196 | 8.55M | } |
197 | | |
198 | 6.37M | bool TargetTransformInfo::LSRWithInstrQueries() const { |
199 | 6.37M | return TTIImpl->LSRWithInstrQueries(); |
200 | 6.37M | } |
201 | | |
202 | 2.17M | bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { |
203 | 2.17M | return TTIImpl->isTruncateFree(Ty1, Ty2); |
204 | 2.17M | } |
205 | | |
206 | 145k | bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { |
207 | 145k | return TTIImpl->isProfitableToHoist(I); |
208 | 145k | } |
209 | | |
210 | 36.8k | bool TargetTransformInfo::isTypeLegal(Type *Ty) const { |
211 | 36.8k | return TTIImpl->isTypeLegal(Ty); |
212 | 36.8k | } |
213 | | |
214 | 0 | unsigned TargetTransformInfo::getJumpBufAlignment() const { |
215 | 0 | return TTIImpl->getJumpBufAlignment(); |
216 | 0 | } |
217 | | |
218 | 0 | unsigned TargetTransformInfo::getJumpBufSize() const { |
219 | 0 | return TTIImpl->getJumpBufSize(); |
220 | 0 | } |
221 | | |
222 | 58.3k | bool TargetTransformInfo::shouldBuildLookupTables() const { |
223 | 58.3k | return TTIImpl->shouldBuildLookupTables(); |
224 | 58.3k | } |
225 | 132k | bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const { |
226 | 132k | return TTIImpl->shouldBuildLookupTablesForConstant(C); |
227 | 132k | } |
228 | | |
229 | | unsigned TargetTransformInfo:: |
230 | 16.2k | getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { |
231 | 16.2k | return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); |
232 | 16.2k | } |
233 | | |
234 | | unsigned TargetTransformInfo:: |
235 | | getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
236 | 22.6k | unsigned VF) const { |
237 | 22.6k | return TTIImpl->getOperandsScalarizationOverhead(Args, VF); |
238 | 22.6k | } |
239 | | |
240 | 21.6k | bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { |
241 | 21.6k | return TTIImpl->supportsEfficientVectorElementLoadStore(); |
242 | 21.6k | } |
243 | | |
244 | 2.11k | bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { |
245 | 2.11k | return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); |
246 | 2.11k | } |
247 | | |
248 | 1.76k | bool TargetTransformInfo::enableMemCmpExpansion(unsigned &MaxLoadSize) const { |
249 | 1.76k | return TTIImpl->enableMemCmpExpansion(MaxLoadSize); |
250 | 1.76k | } |
251 | | |
252 | 28.0k | bool TargetTransformInfo::enableInterleavedAccessVectorization() const { |
253 | 28.0k | return TTIImpl->enableInterleavedAccessVectorization(); |
254 | 28.0k | } |
255 | | |
256 | 15.9k | bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const { |
257 | 15.9k | return TTIImpl->isFPVectorizationPotentiallyUnsafe(); |
258 | 15.9k | } |
259 | | |
260 | | bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, |
261 | | unsigned BitWidth, |
262 | | unsigned AddressSpace, |
263 | | unsigned Alignment, |
264 | 772 | bool *Fast) const { |
265 | 772 | return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, |
266 | 772 | Alignment, Fast); |
267 | 772 | } |
268 | | |
269 | | TargetTransformInfo::PopcntSupportKind |
270 | 224k | TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { |
271 | 224k | return TTIImpl->getPopcntSupport(IntTyWidthInBit); |
272 | 224k | } |
273 | | |
274 | 761 | bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { |
275 | 761 | return TTIImpl->haveFastSqrt(Ty); |
276 | 761 | } |
277 | | |
278 | 569k | int TargetTransformInfo::getFPOpCost(Type *Ty) const { |
279 | 569k | int Cost = TTIImpl->getFPOpCost(Ty); |
280 | 569k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
281 | 569k | return Cost; |
282 | 569k | } |
283 | | |
284 | | int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, |
285 | | const APInt &Imm, |
286 | 2.10k | Type *Ty) const { |
287 | 2.10k | int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty); |
288 | 2.10k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
289 | 2.10k | return Cost; |
290 | 2.10k | } |
291 | | |
292 | 18 | int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { |
293 | 18 | int Cost = TTIImpl->getIntImmCost(Imm, Ty); |
294 | 18 | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
295 | 18 | return Cost; |
296 | 18 | } |
297 | | |
298 | | int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, |
299 | 6.53M | const APInt &Imm, Type *Ty) const { |
300 | 6.53M | int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); |
301 | 6.53M | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
302 | 6.53M | return Cost; |
303 | 6.53M | } |
304 | | |
305 | | int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, |
306 | 377k | const APInt &Imm, Type *Ty) const { |
307 | 377k | int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); |
308 | 377k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
309 | 377k | return Cost; |
310 | 377k | } |
311 | | |
312 | 12.8M | unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { |
313 | 12.8M | return TTIImpl->getNumberOfRegisters(Vector); |
314 | 12.8M | } |
315 | | |
316 | 514k | unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { |
317 | 514k | return TTIImpl->getRegisterBitWidth(Vector); |
318 | 514k | } |
319 | | |
320 | 459k | unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { |
321 | 459k | return TTIImpl->getMinVectorRegisterBitWidth(); |
322 | 459k | } |
323 | | |
324 | | bool TargetTransformInfo::shouldConsiderAddressTypePromotion( |
325 | 1.41M | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { |
326 | 1.41M | return TTIImpl->shouldConsiderAddressTypePromotion( |
327 | 1.41M | I, AllowPromotionWithoutCommonHeader); |
328 | 1.41M | } |
329 | | |
330 | 15 | unsigned TargetTransformInfo::getCacheLineSize() const { |
331 | 15 | return TTIImpl->getCacheLineSize(); |
332 | 15 | } |
333 | | |
334 | | llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level) |
335 | 4 | const { |
336 | 4 | return TTIImpl->getCacheSize(Level); |
337 | 4 | } |
338 | | |
339 | | llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity( |
340 | 5 | CacheLevel Level) const { |
341 | 5 | return TTIImpl->getCacheAssociativity(Level); |
342 | 5 | } |
343 | | |
344 | 741k | unsigned TargetTransformInfo::getPrefetchDistance() const { |
345 | 741k | return TTIImpl->getPrefetchDistance(); |
346 | 741k | } |
347 | | |
348 | 16.6k | unsigned TargetTransformInfo::getMinPrefetchStride() const { |
349 | 16.6k | return TTIImpl->getMinPrefetchStride(); |
350 | 16.6k | } |
351 | | |
352 | 278k | unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const { |
353 | 278k | return TTIImpl->getMaxPrefetchIterationsAhead(); |
354 | 278k | } |
355 | | |
356 | 29.2k | unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { |
357 | 29.2k | return TTIImpl->getMaxInterleaveFactor(VF); |
358 | 29.2k | } |
359 | | |
360 | | int TargetTransformInfo::getArithmeticInstrCost( |
361 | | unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, |
362 | | OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, |
363 | | OperandValueProperties Opd2PropInfo, |
364 | 741k | ArrayRef<const Value *> Args) const { |
365 | 741k | int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, |
366 | 741k | Opd1PropInfo, Opd2PropInfo, Args); |
367 | 741k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
368 | 741k | return Cost; |
369 | 741k | } |
370 | | |
371 | | int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, |
372 | 81.2k | Type *SubTp) const { |
373 | 81.2k | int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); |
374 | 81.2k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
375 | 81.2k | return Cost; |
376 | 81.2k | } |
377 | | |
378 | | int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, |
379 | 243k | Type *Src, const Instruction *I) const { |
380 | 243k | assert ((I == nullptr || I->getOpcode() == Opcode) && |
381 | 243k | "Opcode should reflect passed instruction."); |
382 | 243k | int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); |
383 | 243k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
384 | 243k | return Cost; |
385 | 243k | } |
386 | | |
387 | | int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
388 | | VectorType *VecTy, |
389 | 12.5k | unsigned Index) const { |
390 | 12.5k | int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index); |
391 | 12.5k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
392 | 12.5k | return Cost; |
393 | 12.5k | } |
394 | | |
395 | 176k | int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { |
396 | 176k | int Cost = TTIImpl->getCFInstrCost(Opcode); |
397 | 176k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
398 | 176k | return Cost; |
399 | 176k | } |
400 | | |
401 | | int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, |
402 | 249k | Type *CondTy, const Instruction *I) const { |
403 | 249k | assert ((I == nullptr || I->getOpcode() == Opcode) && |
404 | 249k | "Opcode should reflect passed instruction."); |
405 | 249k | int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); |
406 | 249k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
407 | 249k | return Cost; |
408 | 249k | } |
409 | | |
410 | | int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, |
411 | 1.98M | unsigned Index) const { |
412 | 1.98M | int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); |
413 | 1.98M | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
414 | 1.98M | return Cost; |
415 | 1.98M | } |
416 | | |
417 | | int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, |
418 | | unsigned Alignment, |
419 | | unsigned AddressSpace, |
420 | 752k | const Instruction *I) const { |
421 | 752k | assert ((I == nullptr || I->getOpcode() == Opcode) && |
422 | 752k | "Opcode should reflect passed instruction."); |
423 | 752k | int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); |
424 | 752k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
425 | 752k | return Cost; |
426 | 752k | } |
427 | | |
428 | | int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
429 | | unsigned Alignment, |
430 | 125 | unsigned AddressSpace) const { |
431 | 125 | int Cost = |
432 | 125 | TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); |
433 | 125 | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
434 | 125 | return Cost; |
435 | 125 | } |
436 | | |
437 | | int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
438 | | Value *Ptr, bool VariableMask, |
439 | 103 | unsigned Alignment) const { |
440 | 103 | int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, |
441 | 103 | Alignment); |
442 | 103 | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
443 | 103 | return Cost; |
444 | 103 | } |
445 | | |
446 | | int TargetTransformInfo::getInterleavedMemoryOpCost( |
447 | | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
448 | 2.51k | unsigned Alignment, unsigned AddressSpace) const { |
449 | 2.51k | int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
450 | 2.51k | Alignment, AddressSpace); |
451 | 2.51k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
452 | 2.51k | return Cost; |
453 | 2.51k | } |
454 | | |
455 | | int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, |
456 | | ArrayRef<Type *> Tys, FastMathFlags FMF, |
457 | 2.21k | unsigned ScalarizationCostPassed) const { |
458 | 2.21k | int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, |
459 | 2.21k | ScalarizationCostPassed); |
460 | 2.21k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
461 | 2.21k | return Cost; |
462 | 2.21k | } |
463 | | |
464 | | int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, |
465 | 4.51k | ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const { |
466 | 4.51k | int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); |
467 | 4.51k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
468 | 4.51k | return Cost; |
469 | 4.51k | } |
470 | | |
471 | | int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, |
472 | 1.32k | ArrayRef<Type *> Tys) const { |
473 | 1.32k | int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); |
474 | 1.32k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
475 | 1.32k | return Cost; |
476 | 1.32k | } |
477 | | |
478 | 814k | unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { |
479 | 814k | return TTIImpl->getNumberOfParts(Tp); |
480 | 814k | } |
481 | | |
482 | | int TargetTransformInfo::getAddressComputationCost(Type *Tp, |
483 | | ScalarEvolution *SE, |
484 | 78.5k | const SCEV *Ptr) const { |
485 | 78.5k | int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr); |
486 | 78.5k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
487 | 78.5k | return Cost; |
488 | 78.5k | } |
489 | | |
490 | | int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty, |
491 | 7.70k | bool IsPairwiseForm) const { |
492 | 7.70k | int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); |
493 | 7.70k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
494 | 7.70k | return Cost; |
495 | 7.70k | } |
496 | | |
497 | | int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy, |
498 | | bool IsPairwiseForm, |
499 | 6.56k | bool IsUnsigned) const { |
500 | 6.56k | int Cost = |
501 | 6.56k | TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); |
502 | 6.56k | assert(Cost >= 0 && "TTI should not produce negative costs!"); |
503 | 6.56k | return Cost; |
504 | 6.56k | } |
505 | | |
506 | | unsigned |
507 | 198k | TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { |
508 | 198k | return TTIImpl->getCostOfKeepingLiveOverCall(Tys); |
509 | 198k | } |
510 | | |
511 | | bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, |
512 | 395k | MemIntrinsicInfo &Info) const { |
513 | 395k | return TTIImpl->getTgtMemIntrinsic(Inst, Info); |
514 | 395k | } |
515 | | |
516 | 9 | unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { |
517 | 9 | return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); |
518 | 9 | } |
519 | | |
520 | | Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( |
521 | 136 | IntrinsicInst *Inst, Type *ExpectedType) const { |
522 | 136 | return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); |
523 | 136 | } |
524 | | |
525 | | Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context, |
526 | | Value *Length, |
527 | | unsigned SrcAlign, |
528 | 11 | unsigned DestAlign) const { |
529 | 11 | return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign, |
530 | 11 | DestAlign); |
531 | 11 | } |
532 | | |
533 | | void TargetTransformInfo::getMemcpyLoopResidualLoweringType( |
534 | | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, |
535 | 0 | unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const { |
536 | 0 | TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, |
537 | 0 | SrcAlign, DestAlign); |
538 | 0 | } |
539 | | |
540 | 26 | bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const { |
541 | 26 | return UseWideMemcpyLoopLowering; |
542 | 26 | } |
543 | | |
544 | | bool TargetTransformInfo::areInlineCompatible(const Function *Caller, |
545 | 2.02M | const Function *Callee) const { |
546 | 2.02M | return TTIImpl->areInlineCompatible(Caller, Callee); |
547 | 2.02M | } |
548 | | |
549 | 25.3k | unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { |
550 | 25.3k | return TTIImpl->getLoadStoreVecRegBitWidth(AS); |
551 | 25.3k | } |
552 | | |
553 | 9.95k | bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const { |
554 | 9.95k | return TTIImpl->isLegalToVectorizeLoad(LI); |
555 | 9.95k | } |
556 | | |
557 | 14.5k | bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { |
558 | 14.5k | return TTIImpl->isLegalToVectorizeStore(SI); |
559 | 14.5k | } |
560 | | |
561 | | bool TargetTransformInfo::isLegalToVectorizeLoadChain( |
562 | 745 | unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { |
563 | 745 | return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, |
564 | 745 | AddrSpace); |
565 | 745 | } |
566 | | |
567 | | bool TargetTransformInfo::isLegalToVectorizeStoreChain( |
568 | 370 | unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { |
569 | 370 | return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, |
570 | 370 | AddrSpace); |
571 | 370 | } |
572 | | |
573 | | unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, |
574 | | unsigned LoadSize, |
575 | | unsigned ChainSizeInBytes, |
576 | 741 | VectorType *VecTy) const { |
577 | 741 | return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); |
578 | 741 | } |
579 | | |
580 | | unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, |
581 | | unsigned StoreSize, |
582 | | unsigned ChainSizeInBytes, |
583 | 322 | VectorType *VecTy) const { |
584 | 322 | return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); |
585 | 322 | } |
586 | | |
587 | | bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, |
588 | 1.28k | Type *Ty, ReductionFlags Flags) const { |
589 | 1.28k | return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); |
590 | 1.28k | } |
591 | | |
592 | 926 | bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { |
593 | 926 | return TTIImpl->shouldExpandReduction(II); |
594 | 926 | } |
595 | | |
596 | 10 | int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { |
597 | 10 | return TTIImpl->getInstructionLatency(I); |
598 | 10 | } |
599 | | |
600 | 1.65k | static bool isReverseVectorMask(ArrayRef<int> Mask) { |
601 | 5.38k | for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize5.38k ; ++i3.73k ) |
602 | 5.18k | if (5.18k Mask[i] >= 0 && 5.18k Mask[i] != (int)(MaskSize - 1 - i)5.17k ) |
603 | 1.45k | return false; |
604 | 199 | return true; |
605 | 1.65k | } |
606 | | |
607 | 643 | static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { |
608 | 643 | bool Vec0 = false; |
609 | 643 | bool Vec1 = false; |
610 | 9.07k | for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts9.07k ; ++i8.43k ) { |
611 | 8.43k | if (Mask[i] >= 08.43k ) { |
612 | 7.53k | if ((unsigned)Mask[i] >= NumVecElts) |
613 | 453 | Vec1 = true; |
614 | 7.53k | else |
615 | 7.08k | Vec0 = true; |
616 | 7.53k | } |
617 | 8.43k | } |
618 | 643 | return !(Vec0 && Vec1); |
619 | 643 | } |
620 | | |
621 | 1.25k | static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { |
622 | 11.1k | for (unsigned i = 0; i < Mask.size()11.1k ; ++i9.88k ) |
623 | 10.5k | if (10.5k Mask[i] > 010.5k ) |
624 | 643 | return false; |
625 | 614 | return true; |
626 | 1.25k | } |
627 | | |
628 | 1.45k | static bool isAlternateVectorMask(ArrayRef<int> Mask) { |
629 | 1.45k | bool isAlternate = true; |
630 | 1.45k | unsigned MaskSize = Mask.size(); |
631 | 1.45k | |
632 | 1.45k | // Example: shufflevector A, B, <0,5,2,7> |
633 | 4.25k | for (unsigned i = 0; i < MaskSize && 4.25k isAlternate4.06k ; ++i2.80k ) { |
634 | 2.80k | if (Mask[i] < 0) |
635 | 164 | continue; |
636 | 2.64k | isAlternate = Mask[i] == (int)((i & 1) ? 2.64k MaskSize + i968 : i1.67k ); |
637 | 2.80k | } |
638 | 1.45k | |
639 | 1.45k | if (isAlternate) |
640 | 119 | return true; |
641 | 1.33k | |
642 | 1.33k | isAlternate = true; |
643 | 1.33k | // Example: shufflevector A, B, <4,1,6,3> |
644 | 3.18k | for (unsigned i = 0; i < MaskSize && 3.18k isAlternate3.10k ; ++i1.85k ) { |
645 | 1.85k | if (Mask[i] < 0) |
646 | 0 | continue; |
647 | 1.85k | isAlternate = Mask[i] == (int)((i & 1) ? 1.85k i297 : MaskSize + i1.55k ); |
648 | 1.85k | } |
649 | 1.45k | |
650 | 1.45k | return isAlternate; |
651 | 1.45k | } |
652 | | |
653 | 9.99k | static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { |
654 | 9.99k | TargetTransformInfo::OperandValueKind OpInfo = |
655 | 9.99k | TargetTransformInfo::OK_AnyValue; |
656 | 9.99k | |
657 | 9.99k | // Check for a splat of a constant or for a non uniform vector of constants. |
658 | 9.99k | if (isa<ConstantVector>(V) || 9.99k isa<ConstantDataVector>(V)9.98k ) { |
659 | 1.51k | OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; |
660 | 1.51k | if (cast<Constant>(V)->getSplatValue() != nullptr) |
661 | 927 | OpInfo = TargetTransformInfo::OK_UniformConstantValue; |
662 | 1.51k | } |
663 | 9.99k | |
664 | 9.99k | // Check for a splat of a uniform value. This is not loop aware, so return |
665 | 9.99k | // true only for the obviously uniform cases (argument, globalvalue) |
666 | 9.99k | const Value *Splat = getSplatValue(V); |
667 | 9.99k | if (Splat && 9.99k (isa<Argument>(Splat) || 1.36k isa<GlobalValue>(Splat)927 )) |
668 | 435 | OpInfo = TargetTransformInfo::OK_UniformValue; |
669 | 9.99k | |
670 | 9.99k | return OpInfo; |
671 | 9.99k | } |
672 | | |
673 | | static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, |
674 | 204 | unsigned Level) { |
675 | 204 | // We don't need a shuffle if we just want to have element 0 in position 0 of |
676 | 204 | // the vector. |
677 | 204 | if (!SI && 204 Level == 04 && IsLeft4 ) |
678 | 4 | return true; |
679 | 200 | else if (200 !SI200 ) |
680 | 0 | return false; |
681 | 200 | |
682 | 200 | SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); |
683 | 200 | |
684 | 200 | // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether |
685 | 200 | // we look at the left or right side. |
686 | 556 | for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e556 ; ++i, val += 2356 ) |
687 | 356 | Mask[i] = val; |
688 | 204 | |
689 | 204 | SmallVector<int, 16> ActualMask = SI->getShuffleMask(); |
690 | 204 | return Mask == ActualMask; |
691 | 204 | } |
692 | | |
693 | | namespace { |
694 | | /// Kind of the reduction data. |
695 | | enum ReductionKind { |
696 | | RK_None, /// Not a reduction. |
697 | | RK_Arithmetic, /// Binary reduction data. |
698 | | RK_MinMax, /// Min/max reduction data. |
699 | | RK_UnsignedMinMax, /// Unsigned min/max reduction data. |
700 | | }; |
701 | | /// Contains opcode + LHS/RHS parts of the reduction operations. |
702 | | struct ReductionData { |
703 | | ReductionData() = delete; |
704 | | ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) |
705 | 440 | : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { |
706 | 440 | assert(Kind != RK_None && "expected binary or min/max reduction only."); |
707 | 440 | } |
708 | | unsigned Opcode = 0; |
709 | | Value *LHS = nullptr; |
710 | | Value *RHS = nullptr; |
711 | | ReductionKind Kind = RK_None; |
712 | 200 | bool hasSameData(ReductionData &RD) const { |
713 | 200 | return Kind == RD.Kind && Opcode == RD.Opcode; |
714 | 200 | } |
715 | | }; |
716 | | } // namespace |
717 | | |
718 | 440 | static Optional<ReductionData> getReductionData(Instruction *I) { |
719 | 440 | Value *L, *R; |
720 | 440 | if (m_BinOp(m_Value(L), m_Value(R)).match(I)) |
721 | 440 | return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); |
722 | 0 | if (auto *0 SI0 = dyn_cast<SelectInst>(I)) { |
723 | 0 | if (m_SMin(m_Value(L), m_Value(R)).match(SI) || |
724 | 0 | m_SMax(m_Value(L), m_Value(R)).match(SI) || |
725 | 0 | m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || |
726 | 0 | m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || |
727 | 0 | m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || |
728 | 0 | m_UnordFMax(m_Value(L), m_Value(R)).match(SI)0 ) { |
729 | 0 | auto *CI = cast<CmpInst>(SI->getCondition()); |
730 | 0 | return ReductionData(RK_MinMax, CI->getOpcode(), L, R); |
731 | 0 | } |
732 | 0 | if (0 m_UMin(m_Value(L), m_Value(R)).match(SI) || |
733 | 0 | m_UMax(m_Value(L), m_Value(R)).match(SI)0 ) { |
734 | 0 | auto *CI = cast<CmpInst>(SI->getCondition()); |
735 | 0 | return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); |
736 | 0 | } |
737 | 0 | } |
738 | 0 | return llvm::None; |
739 | 0 | } |
740 | | |
741 | | static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, |
742 | | unsigned Level, |
743 | 100 | unsigned NumLevels) { |
744 | 100 | // Match one level of pairwise operations. |
745 | 100 | // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, |
746 | 100 | // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> |
747 | 100 | // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, |
748 | 100 | // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> |
749 | 100 | // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 |
750 | 100 | if (!I) |
751 | 0 | return RK_None; |
752 | 100 | |
753 | 100 | assert(I->getType()->isVectorTy() && "Expecting a vector type"); |
754 | 100 | |
755 | 100 | Optional<ReductionData> RD = getReductionData(I); |
756 | 100 | if (!RD) |
757 | 0 | return RK_None; |
758 | 100 | |
759 | 100 | ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS); |
760 | 100 | if (!LS && 100 Level4 ) |
761 | 0 | return RK_None; |
762 | 100 | ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS); |
763 | 100 | if (!RS && 100 Level0 ) |
764 | 0 | return RK_None; |
765 | 100 | |
766 | 100 | // On level 0 we can omit one shufflevector instruction. |
767 | 100 | if (100 !Level && 100 !RS48 && !LS0 ) |
768 | 0 | return RK_None; |
769 | 100 | |
770 | 100 | // Shuffle inputs must match. |
771 | 100 | Value *NextLevelOpL = LS ? 100 LS->getOperand(0)96 : nullptr4 ; |
772 | 100 | Value *NextLevelOpR = RS ? RS->getOperand(0)100 : nullptr0 ; |
773 | 100 | Value *NextLevelOp = nullptr; |
774 | 100 | if (NextLevelOpR && 100 NextLevelOpL100 ) { |
775 | 96 | // If we have two shuffles their operands must match. |
776 | 96 | if (NextLevelOpL != NextLevelOpR) |
777 | 0 | return RK_None; |
778 | 96 | |
779 | 96 | NextLevelOp = NextLevelOpL; |
780 | 100 | } else if (4 Level == 0 && 4 (NextLevelOpR || 4 NextLevelOpL0 )) { |
781 | 4 | // On the first level we can omit the shufflevector <0, undef,...>. So the |
782 | 4 | // input to the other shufflevector <1, undef> must match with one of the |
783 | 4 | // inputs to the current binary operation. |
784 | 4 | // Example: |
785 | 4 | // %NextLevelOpL = shufflevector %R, <1, undef ...> |
786 | 4 | // %BinOp = fadd %NextLevelOpL, %R |
787 | 4 | if (NextLevelOpL && 4 NextLevelOpL != RD->RHS0 ) |
788 | 0 | return RK_None; |
789 | 4 | else if (4 NextLevelOpR && 4 NextLevelOpR != RD->LHS4 ) |
790 | 0 | return RK_None; |
791 | 4 | |
792 | 4 | NextLevelOp = NextLevelOpL ? 4 RD->RHS0 : RD->LHS4 ; |
793 | 4 | } else |
794 | 0 | return RK_None; |
795 | 100 | |
796 | 100 | // Check that the next levels binary operation exists and matches with the |
797 | 100 | // current one. |
798 | 100 | if (100 Level + 1 != NumLevels100 ) { |
799 | 52 | Optional<ReductionData> NextLevelRD = |
800 | 52 | getReductionData(cast<Instruction>(NextLevelOp)); |
801 | 52 | if (!NextLevelRD || 52 !RD->hasSameData(*NextLevelRD)52 ) |
802 | 0 | return RK_None; |
803 | 100 | } |
804 | 100 | |
805 | 100 | // Shuffle mask for pairwise operation must match. |
806 | 100 | if (100 matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)100 ) { |
807 | 96 | if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) |
808 | 0 | return RK_None; |
809 | 4 | } else if (4 matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)4 ) { |
810 | 4 | if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) |
811 | 0 | return RK_None; |
812 | 0 | } else { |
813 | 0 | return RK_None; |
814 | 0 | } |
815 | 100 | |
816 | 100 | if (100 ++Level == NumLevels100 ) |
817 | 48 | return RD->Kind; |
818 | 52 | |
819 | 52 | // Match next level. |
820 | 52 | return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level, |
821 | 52 | NumLevels); |
822 | 52 | } |
823 | | |
824 | | static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, |
825 | 116 | unsigned &Opcode, Type *&Ty) { |
826 | 116 | if (!EnableReduxCost) |
827 | 68 | return RK_None; |
828 | 48 | |
829 | 48 | // Need to extract the first element. |
830 | 48 | ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); |
831 | 48 | unsigned Idx = ~0u; |
832 | 48 | if (CI) |
833 | 48 | Idx = CI->getZExtValue(); |
834 | 48 | if (Idx != 0) |
835 | 0 | return RK_None; |
836 | 48 | |
837 | 48 | auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); |
838 | 48 | if (!RdxStart) |
839 | 0 | return RK_None; |
840 | 48 | Optional<ReductionData> RD = getReductionData(RdxStart); |
841 | 48 | if (!RD) |
842 | 0 | return RK_None; |
843 | 48 | |
844 | 48 | Type *VecTy = RdxStart->getType(); |
845 | 48 | unsigned NumVecElems = VecTy->getVectorNumElements(); |
846 | 48 | if (!isPowerOf2_32(NumVecElems)) |
847 | 0 | return RK_None; |
848 | 48 | |
849 | 48 | // We look for a sequence of shuffle,shuffle,add triples like the following |
850 | 48 | // that builds a pairwise reduction tree. |
851 | 48 | // |
852 | 48 | // (X0, X1, X2, X3) |
853 | 48 | // (X0 + X1, X2 + X3, undef, undef) |
854 | 48 | // ((X0 + X1) + (X2 + X3), undef, undef, undef) |
855 | 48 | // |
856 | 48 | // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, |
857 | 48 | // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> |
858 | 48 | // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, |
859 | 48 | // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> |
860 | 48 | // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 |
861 | 48 | // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, |
862 | 48 | // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> |
863 | 48 | // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, |
864 | 48 | // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> |
865 | 48 | // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 |
866 | 48 | // %r = extractelement <4 x float> %bin.rdx8, i32 0 |
867 | 48 | if (48 matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == |
868 | 48 | RK_None) |
869 | 0 | return RK_None; |
870 | 48 | |
871 | 48 | Opcode = RD->Opcode; |
872 | 48 | Ty = VecTy; |
873 | 48 | |
874 | 48 | return RD->Kind; |
875 | 48 | } |
876 | | |
877 | | static std::pair<Value *, ShuffleVectorInst *> |
878 | 148 | getShuffleAndOtherOprd(Value *L, Value *R) { |
879 | 148 | ShuffleVectorInst *S = nullptr; |
880 | 148 | |
881 | 148 | if ((S = dyn_cast<ShuffleVectorInst>(L))) |
882 | 48 | return std::make_pair(R, S); |
883 | 100 | |
884 | 100 | S = dyn_cast<ShuffleVectorInst>(R); |
885 | 100 | return std::make_pair(L, S); |
886 | 100 | } |
887 | | |
888 | | static ReductionKind |
889 | | matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, |
890 | 160 | unsigned &Opcode, Type *&Ty) { |
891 | 160 | if (!EnableReduxCost) |
892 | 68 | return RK_None; |
893 | 92 | |
894 | 92 | // Need to extract the first element. |
895 | 92 | ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); |
896 | 92 | unsigned Idx = ~0u; |
897 | 92 | if (CI) |
898 | 92 | Idx = CI->getZExtValue(); |
899 | 92 | if (Idx != 0) |
900 | 0 | return RK_None; |
901 | 92 | |
902 | 92 | auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); |
903 | 92 | if (!RdxStart) |
904 | 0 | return RK_None; |
905 | 92 | Optional<ReductionData> RD = getReductionData(RdxStart); |
906 | 92 | if (!RD) |
907 | 0 | return RK_None; |
908 | 92 | |
909 | 92 | Type *VecTy = ReduxRoot->getOperand(0)->getType(); |
910 | 92 | unsigned NumVecElems = VecTy->getVectorNumElements(); |
911 | 92 | if (!isPowerOf2_32(NumVecElems)) |
912 | 0 | return RK_None; |
913 | 92 | |
914 | 92 | // We look for a sequence of shuffles and adds like the following matching one |
915 | 92 | // fadd, shuffle vector pair at a time. |
916 | 92 | // |
917 | 92 | // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, |
918 | 92 | // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> |
919 | 92 | // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf |
920 | 92 | // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, |
921 | 92 | // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> |
922 | 92 | // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 |
923 | 92 | // %r = extractelement <4 x float> %bin.rdx8, i32 0 |
924 | 92 | |
925 | 92 | unsigned MaskStart = 1; |
926 | 92 | Instruction *RdxOp = RdxStart; |
927 | 92 | SmallVector<int, 32> ShuffleMask(NumVecElems, 0); |
928 | 92 | unsigned NumVecElemsRemain = NumVecElems; |
929 | 192 | while (NumVecElemsRemain - 1192 ) { |
930 | 148 | // Check for the right reduction operation. |
931 | 148 | if (!RdxOp) |
932 | 0 | return RK_None; |
933 | 148 | Optional<ReductionData> RDLevel = getReductionData(RdxOp); |
934 | 148 | if (!RDLevel || 148 !RDLevel->hasSameData(*RD)148 ) |
935 | 0 | return RK_None; |
936 | 148 | |
937 | 148 | Value *NextRdxOp; |
938 | 148 | ShuffleVectorInst *Shuffle; |
939 | 148 | std::tie(NextRdxOp, Shuffle) = |
940 | 148 | getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); |
941 | 148 | |
942 | 148 | // Check the current reduction operation and the shuffle use the same value. |
943 | 148 | if (Shuffle == nullptr) |
944 | 0 | return RK_None; |
945 | 148 | if (148 Shuffle->getOperand(0) != NextRdxOp148 ) |
946 | 48 | return RK_None; |
947 | 100 | |
948 | 100 | // Check that shuffle masks matches. |
949 | 284 | for (unsigned j = 0; 100 j != MaskStart284 ; ++j184 ) |
950 | 184 | ShuffleMask[j] = MaskStart + j; |
951 | 100 | // Fill the rest of the mask with -1 for undef. |
952 | 100 | std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); |
953 | 100 | |
954 | 100 | SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); |
955 | 100 | if (ShuffleMask != Mask) |
956 | 0 | return RK_None; |
957 | 100 | |
958 | 100 | RdxOp = dyn_cast<Instruction>(NextRdxOp); |
959 | 100 | NumVecElemsRemain /= 2; |
960 | 100 | MaskStart *= 2; |
961 | 100 | } |
962 | 92 | |
963 | 44 | Opcode = RD->Opcode; |
964 | 44 | Ty = VecTy; |
965 | 44 | return RD->Kind; |
966 | 160 | } |
967 | | |
968 | 18.2k | int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { |
969 | 18.2k | switch (I->getOpcode()) { |
970 | 124 | case Instruction::GetElementPtr: |
971 | 124 | return getUserCost(I); |
972 | 18.2k | |
973 | 5.22k | case Instruction::Ret: |
974 | 5.22k | case Instruction::PHI: |
975 | 5.22k | case Instruction::Br: { |
976 | 5.22k | return getCFInstrCost(I->getOpcode()); |
977 | 5.22k | } |
978 | 4.99k | case Instruction::Add: |
979 | 4.99k | case Instruction::FAdd: |
980 | 4.99k | case Instruction::Sub: |
981 | 4.99k | case Instruction::FSub: |
982 | 4.99k | case Instruction::Mul: |
983 | 4.99k | case Instruction::FMul: |
984 | 4.99k | case Instruction::UDiv: |
985 | 4.99k | case Instruction::SDiv: |
986 | 4.99k | case Instruction::FDiv: |
987 | 4.99k | case Instruction::URem: |
988 | 4.99k | case Instruction::SRem: |
989 | 4.99k | case Instruction::FRem: |
990 | 4.99k | case Instruction::Shl: |
991 | 4.99k | case Instruction::LShr: |
992 | 4.99k | case Instruction::AShr: |
993 | 4.99k | case Instruction::And: |
994 | 4.99k | case Instruction::Or: |
995 | 4.99k | case Instruction::Xor: { |
996 | 4.99k | TargetTransformInfo::OperandValueKind Op1VK = |
997 | 4.99k | getOperandInfo(I->getOperand(0)); |
998 | 4.99k | TargetTransformInfo::OperandValueKind Op2VK = |
999 | 4.99k | getOperandInfo(I->getOperand(1)); |
1000 | 4.99k | SmallVector<const Value*, 2> Operands(I->operand_values()); |
1001 | 4.99k | return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, |
1002 | 4.99k | Op2VK, TargetTransformInfo::OP_None, |
1003 | 4.99k | TargetTransformInfo::OP_None, |
1004 | 4.99k | Operands); |
1005 | 4.99k | } |
1006 | 379 | case Instruction::Select: { |
1007 | 379 | const SelectInst *SI = cast<SelectInst>(I); |
1008 | 379 | Type *CondTy = SI->getCondition()->getType(); |
1009 | 379 | return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); |
1010 | 4.99k | } |
1011 | 836 | case Instruction::ICmp: |
1012 | 836 | case Instruction::FCmp: { |
1013 | 836 | Type *ValTy = I->getOperand(0)->getType(); |
1014 | 836 | return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); |
1015 | 836 | } |
1016 | 474 | case Instruction::Store: { |
1017 | 474 | const StoreInst *SI = cast<StoreInst>(I); |
1018 | 474 | Type *ValTy = SI->getValueOperand()->getType(); |
1019 | 474 | return getMemoryOpCost(I->getOpcode(), ValTy, |
1020 | 474 | SI->getAlignment(), |
1021 | 474 | SI->getPointerAddressSpace(), I); |
1022 | 836 | } |
1023 | 587 | case Instruction::Load: { |
1024 | 587 | const LoadInst *LI = cast<LoadInst>(I); |
1025 | 587 | return getMemoryOpCost(I->getOpcode(), I->getType(), |
1026 | 587 | LI->getAlignment(), |
1027 | 587 | LI->getPointerAddressSpace(), I); |
1028 | 836 | } |
1029 | 2.13k | case Instruction::ZExt: |
1030 | 2.13k | case Instruction::SExt: |
1031 | 2.13k | case Instruction::FPToUI: |
1032 | 2.13k | case Instruction::FPToSI: |
1033 | 2.13k | case Instruction::FPExt: |
1034 | 2.13k | case Instruction::PtrToInt: |
1035 | 2.13k | case Instruction::IntToPtr: |
1036 | 2.13k | case Instruction::SIToFP: |
1037 | 2.13k | case Instruction::UIToFP: |
1038 | 2.13k | case Instruction::Trunc: |
1039 | 2.13k | case Instruction::FPTrunc: |
1040 | 2.13k | case Instruction::BitCast: |
1041 | 2.13k | case Instruction::AddrSpaceCast: { |
1042 | 2.13k | Type *SrcTy = I->getOperand(0)->getType(); |
1043 | 2.13k | return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); |
1044 | 2.13k | } |
1045 | 160 | case Instruction::ExtractElement: { |
1046 | 160 | const ExtractElementInst * EEI = cast<ExtractElementInst>(I); |
1047 | 160 | ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); |
1048 | 160 | unsigned Idx = -1; |
1049 | 160 | if (CI) |
1050 | 153 | Idx = CI->getZExtValue(); |
1051 | 160 | |
1052 | 160 | // Try to match a reduction sequence (series of shufflevector and vector |
1053 | 160 | // adds followed by a extractelement). |
1054 | 160 | unsigned ReduxOpCode; |
1055 | 160 | Type *ReduxType; |
1056 | 160 | |
1057 | 160 | switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { |
1058 | 44 | case RK_Arithmetic: |
1059 | 44 | return getArithmeticReductionCost(ReduxOpCode, ReduxType, |
1060 | 44 | /*IsPairwiseForm=*/false); |
1061 | 0 | case RK_MinMax: |
1062 | 0 | return getMinMaxReductionCost( |
1063 | 0 | ReduxType, CmpInst::makeCmpResultType(ReduxType), |
1064 | 0 | /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); |
1065 | 0 | case RK_UnsignedMinMax: |
1066 | 0 | return getMinMaxReductionCost( |
1067 | 0 | ReduxType, CmpInst::makeCmpResultType(ReduxType), |
1068 | 0 | /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); |
1069 | 116 | case RK_None: |
1070 | 116 | break; |
1071 | 116 | } |
1072 | 116 | |
1073 | 116 | switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { |
1074 | 48 | case RK_Arithmetic: |
1075 | 48 | return getArithmeticReductionCost(ReduxOpCode, ReduxType, |
1076 | 48 | /*IsPairwiseForm=*/true); |
1077 | 0 | case RK_MinMax: |
1078 | 0 | return getMinMaxReductionCost( |
1079 | 0 | ReduxType, CmpInst::makeCmpResultType(ReduxType), |
1080 | 0 | /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); |
1081 | 0 | case RK_UnsignedMinMax: |
1082 | 0 | return getMinMaxReductionCost( |
1083 | 0 | ReduxType, CmpInst::makeCmpResultType(ReduxType), |
1084 | 0 | /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); |
1085 | 68 | case RK_None: |
1086 | 68 | break; |
1087 | 68 | } |
1088 | 68 | |
1089 | 68 | return getVectorInstrCost(I->getOpcode(), |
1090 | 68 | EEI->getOperand(0)->getType(), Idx); |
1091 | 68 | } |
1092 | 477 | case Instruction::InsertElement: { |
1093 | 477 | const InsertElementInst * IE = cast<InsertElementInst>(I); |
1094 | 477 | ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); |
1095 | 477 | unsigned Idx = -1; |
1096 | 477 | if (CI) |
1097 | 477 | Idx = CI->getZExtValue(); |
1098 | 477 | return getVectorInstrCost(I->getOpcode(), |
1099 | 477 | IE->getType(), Idx); |
1100 | 68 | } |
1101 | 1.65k | case Instruction::ShuffleVector: { |
1102 | 1.65k | const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); |
1103 | 1.65k | Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); |
1104 | 1.65k | unsigned NumVecElems = VecTypOp0->getVectorNumElements(); |
1105 | 1.65k | SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); |
1106 | 1.65k | |
1107 | 1.65k | if (NumVecElems == Mask.size()1.65k ) { |
1108 | 1.65k | if (isReverseVectorMask(Mask)) |
1109 | 199 | return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, |
1110 | 199 | 0, nullptr); |
1111 | 1.45k | if (1.45k isAlternateVectorMask(Mask)1.45k ) |
1112 | 195 | return getShuffleCost(TargetTransformInfo::SK_Alternate, |
1113 | 195 | VecTypOp0, 0, nullptr); |
1114 | 1.25k | |
1115 | 1.25k | if (1.25k isZeroEltBroadcastVectorMask(Mask)1.25k ) |
1116 | 614 | return getShuffleCost(TargetTransformInfo::SK_Broadcast, |
1117 | 614 | VecTypOp0, 0, nullptr); |
1118 | 643 | |
1119 | 643 | if (643 isSingleSourceVectorMask(Mask)643 ) |
1120 | 427 | return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, |
1121 | 427 | VecTypOp0, 0, nullptr); |
1122 | 216 | |
1123 | 216 | return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, |
1124 | 216 | VecTypOp0, 0, nullptr); |
1125 | 216 | } |
1126 | 0 |
|
1127 | 0 | return -1; |
1128 | 0 | } |
1129 | 1.21k | case Instruction::Call: |
1130 | 1.21k | if (const IntrinsicInst *II1.21k = dyn_cast<IntrinsicInst>(I)) { |
1131 | 1.21k | SmallVector<Value *, 4> Args(II->arg_operands()); |
1132 | 1.21k | |
1133 | 1.21k | FastMathFlags FMF; |
1134 | 1.21k | if (auto *FPMO = dyn_cast<FPMathOperator>(II)) |
1135 | 237 | FMF = FPMO->getFastMathFlags(); |
1136 | 1.21k | |
1137 | 1.21k | return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), |
1138 | 1.21k | Args, FMF); |
1139 | 1.21k | } |
1140 | 0 | return -1; |
1141 | 1 | default: |
1142 | 1 | // We don't have any information on this instruction. |
1143 | 1 | return -1; |
1144 | 0 | } |
1145 | 0 | } |
1146 | | |
1147 | 16.9M | TargetTransformInfo::Concept::~Concept() {} |
1148 | | |
1149 | 5.52k | TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} |
1150 | | |
1151 | | TargetIRAnalysis::TargetIRAnalysis( |
1152 | | std::function<Result(const Function &)> TTICallback) |
1153 | 102k | : TTICallback(std::move(TTICallback)) {} |
1154 | | |
1155 | | TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F, |
1156 | 16.9M | FunctionAnalysisManager &) { |
1157 | 16.9M | return TTICallback(F); |
1158 | 16.9M | } |
1159 | | |
1160 | | AnalysisKey TargetIRAnalysis::Key; |
1161 | | |
1162 | 9.78k | TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { |
1163 | 9.78k | return Result(F.getParent()->getDataLayout()); |
1164 | 9.78k | } |
1165 | | |
1166 | | // Register the basic pass. |
1167 | | INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", |
1168 | | "Target Transform Information", false, true) |
1169 | | char TargetTransformInfoWrapperPass::ID = 0; |
1170 | | |
1171 | 0 | void TargetTransformInfoWrapperPass::anchor() {} |
1172 | | |
1173 | | TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass() |
1174 | 131 | : ImmutablePass(ID) { |
1175 | 131 | initializeTargetTransformInfoWrapperPassPass( |
1176 | 131 | *PassRegistry::getPassRegistry()); |
1177 | 131 | } |
1178 | | |
1179 | | TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( |
1180 | | TargetIRAnalysis TIRA) |
1181 | 107k | : ImmutablePass(ID), TIRA(std::move(TIRA)) { |
1182 | 107k | initializeTargetTransformInfoWrapperPassPass( |
1183 | 107k | *PassRegistry::getPassRegistry()); |
1184 | 107k | } |
1185 | | |
1186 | 16.9M | TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { |
1187 | 16.9M | FunctionAnalysisManager DummyFAM; |
1188 | 16.9M | TTI = TIRA.run(F, DummyFAM); |
1189 | 16.9M | return *TTI; |
1190 | 16.9M | } |
1191 | | |
1192 | | ImmutablePass * |
1193 | 107k | llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) { |
1194 | 107k | return new TargetTransformInfoWrapperPass(std::move(TIRA)); |
1195 | 107k | } |