/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | /// \file |
11 | | /// This file a TargetTransformInfo::Concept conforming object specific to the |
12 | | /// AMDGPU target machine. It uses the target's detailed information to |
13 | | /// provide more precise answers to certain TTI queries, while letting the |
14 | | /// target independent and default TTI implementations handle the rest. |
15 | | // |
16 | | //===----------------------------------------------------------------------===// |
17 | | |
18 | | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H |
19 | | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H |
20 | | |
21 | | #include "AMDGPU.h" |
22 | | #include "AMDGPUSubtarget.h" |
23 | | #include "AMDGPUTargetMachine.h" |
24 | | #include "Utils/AMDGPUBaseInfo.h" |
25 | | #include "llvm/ADT/ArrayRef.h" |
26 | | #include "llvm/Analysis/TargetTransformInfo.h" |
27 | | #include "llvm/CodeGen/BasicTTIImpl.h" |
28 | | #include "llvm/IR/Function.h" |
29 | | #include "llvm/MC/SubtargetFeature.h" |
30 | | #include "llvm/Support/MathExtras.h" |
31 | | #include <cassert> |
32 | | |
33 | | namespace llvm { |
34 | | |
35 | | class AMDGPUTargetLowering; |
36 | | class Loop; |
37 | | class ScalarEvolution; |
38 | | class Type; |
39 | | class Value; |
40 | | |
41 | | class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> { |
42 | | using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>; |
43 | | using TTI = TargetTransformInfo; |
44 | | |
45 | | friend BaseT; |
46 | | |
47 | | const AMDGPUSubtarget *ST; |
48 | | const AMDGPUTargetLowering *TLI; |
49 | | bool IsGraphicsShader; |
50 | | |
51 | | const FeatureBitset InlineFeatureIgnoreList = { |
52 | | // Codegen control options which don't matter. |
53 | | AMDGPU::FeatureEnableLoadStoreOpt, |
54 | | AMDGPU::FeatureEnableSIScheduler, |
55 | | AMDGPU::FeatureEnableUnsafeDSOffsetFolding, |
56 | | AMDGPU::FeatureFlatForGlobal, |
57 | | AMDGPU::FeaturePromoteAlloca, |
58 | | AMDGPU::FeatureUnalignedBufferAccess, |
59 | | AMDGPU::FeatureUnalignedScratchAccess, |
60 | | |
61 | | AMDGPU::FeatureAutoWaitcntBeforeBarrier, |
62 | | AMDGPU::FeatureDebuggerEmitPrologue, |
63 | | AMDGPU::FeatureDebuggerInsertNops, |
64 | | AMDGPU::FeatureDebuggerReserveRegs, |
65 | | |
66 | | // Property of the kernel/environment which can't actually differ. |
67 | | AMDGPU::FeatureSGPRInitBug, |
68 | | AMDGPU::FeatureXNACK, |
69 | | AMDGPU::FeatureTrapHandler, |
70 | | |
71 | | // Perf-tuning features |
72 | | AMDGPU::FeatureFastFMAF32, |
73 | | AMDGPU::HalfRate64Ops |
74 | | }; |
75 | | |
76 | 0 | const AMDGPUSubtarget *getST() const { return ST; } |
77 | 39.1k | const AMDGPUTargetLowering *getTLI() const { return TLI; } |
78 | | |
79 | 198 | static inline int getFullRateInstrCost() { |
80 | 198 | return TargetTransformInfo::TCC_Basic; |
81 | 198 | } |
82 | | |
83 | 24 | static inline int getHalfRateInstrCost() { |
84 | 24 | return 2 * TargetTransformInfo::TCC_Basic; |
85 | 24 | } |
86 | | |
87 | | // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe |
88 | | // should be 2 or 4. |
89 | 117 | static inline int getQuarterRateInstrCost() { |
90 | 117 | return 3 * TargetTransformInfo::TCC_Basic; |
91 | 117 | } |
92 | | |
93 | | // On some parts, normal fp64 operations are half rate, and others |
94 | | // quarter. This also applies to some integer operations. |
95 | 48 | inline int get64BitInstrCost() const { |
96 | 48 | return ST->hasHalfRate64Ops() ? |
97 | 48 | getHalfRateInstrCost()24 : getQuarterRateInstrCost()24 ; |
98 | 48 | } |
99 | | |
100 | | public: |
101 | | explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) |
102 | | : BaseT(TM, F.getParent()->getDataLayout()), |
103 | | ST(TM->getSubtargetImpl(F)), |
104 | | TLI(ST->getTargetLowering()), |
105 | 290k | IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} |
106 | | |
107 | 61.2k | bool hasBranchDivergence() { return true; } |
108 | | |
109 | | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
110 | | TTI::UnrollingPreferences &UP); |
111 | | |
112 | 7 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { |
113 | 7 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); |
114 | 7 | return TTI::PSK_FastHardware; |
115 | 7 | } |
116 | | |
117 | | unsigned getHardwareNumberOfRegisters(bool Vector) const; |
118 | | unsigned getNumberOfRegisters(bool Vector) const; |
119 | | unsigned getRegisterBitWidth(bool Vector) const; |
120 | | unsigned getMinVectorRegisterBitWidth() const; |
121 | | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; |
122 | | |
123 | | bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, |
124 | | unsigned Alignment, |
125 | | unsigned AddrSpace) const; |
126 | | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, |
127 | | unsigned Alignment, |
128 | | unsigned AddrSpace) const; |
129 | | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, |
130 | | unsigned Alignment, |
131 | | unsigned AddrSpace) const; |
132 | | |
133 | | unsigned getMaxInterleaveFactor(unsigned VF); |
134 | | |
135 | | int getArithmeticInstrCost( |
136 | | unsigned Opcode, Type *Ty, |
137 | | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
138 | | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
139 | | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
140 | | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
141 | | ArrayRef<const Value *> Args = ArrayRef<const Value *>()); |
142 | | |
143 | | unsigned getCFInstrCost(unsigned Opcode); |
144 | | |
145 | | int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); |
146 | | bool isSourceOfDivergence(const Value *V) const; |
147 | | bool isAlwaysUniform(const Value *V) const; |
148 | | |
149 | 17.5k | unsigned getFlatAddressSpace() const { |
150 | 17.5k | // Don't bother running InferAddressSpaces pass on graphics shaders which |
151 | 17.5k | // don't use flat addressing. |
152 | 17.5k | if (IsGraphicsShader) |
153 | 597 | return -1; |
154 | 16.9k | return ST->hasFlatAddressSpace() ? |
155 | 16.9k | ST->getAMDGPUAS().FLAT_ADDRESS9.40k : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE7.52k ; |
156 | 17.5k | } |
157 | | |
158 | 4 | unsigned getVectorSplitCost() { return 0; } |
159 | | |
160 | | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, |
161 | | Type *SubTp); |
162 | | |
163 | | bool areInlineCompatible(const Function *Caller, |
164 | | const Function *Callee) const; |
165 | | |
166 | 24 | unsigned getInliningThresholdMultiplier() { return 9; } |
167 | | }; |
168 | | |
169 | | } // end namespace llvm |
170 | | |
171 | | #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H |