/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/clang-build/lib/Target/AMDGPU/AMDGPUGenSubtargetInfo.inc
Line | Count | Source (jump to first uncovered line) |
1 | | /*===- TableGen'erated file -------------------------------------*- C++ -*-===*\ |
2 | | |* *| |
3 | | |* Subtarget Enumeration Source Fragment *| |
4 | | |* *| |
5 | | |* Automatically generated file, do not edit! *| |
6 | | |* *| |
7 | | \*===----------------------------------------------------------------------===*/ |
8 | | |
9 | | |
10 | | #ifdef GET_SUBTARGETINFO_ENUM |
11 | | #undef GET_SUBTARGETINFO_ENUM |
12 | | |
13 | | namespace llvm { |
14 | | namespace AMDGPU { |
15 | | enum { |
16 | | Feature16BitInsts = 0, |
17 | | FeatureAddNoCarryInsts = 1, |
18 | | FeatureApertureRegs = 2, |
19 | | FeatureAtomicFaddInsts = 3, |
20 | | FeatureAutoWaitcntBeforeBarrier = 4, |
21 | | FeatureCIInsts = 5, |
22 | | FeatureCodeObjectV3 = 6, |
23 | | FeatureCuMode = 7, |
24 | | FeatureDLInsts = 8, |
25 | | FeatureDPP = 9, |
26 | | FeatureDPP8 = 10, |
27 | | FeatureDisable = 11, |
28 | | FeatureDoesNotSupportSRAMECC = 12, |
29 | | FeatureDoesNotSupportXNACK = 13, |
30 | | FeatureDot1Insts = 14, |
31 | | FeatureDot2Insts = 15, |
32 | | FeatureDot3Insts = 16, |
33 | | FeatureDot4Insts = 17, |
34 | | FeatureDot5Insts = 18, |
35 | | FeatureDot6Insts = 19, |
36 | | FeatureDumpCode = 20, |
37 | | FeatureDumpCodeLower = 21, |
38 | | FeatureEnableDS128 = 22, |
39 | | FeatureEnableLoadStoreOpt = 23, |
40 | | FeatureEnablePRTStrictNull = 24, |
41 | | FeatureEnableSIScheduler = 25, |
42 | | FeatureEnableUnsafeDSOffsetFolding = 26, |
43 | | FeatureFMA = 27, |
44 | | FeatureFP16Denormals = 28, |
45 | | FeatureFP32Denormals = 29, |
46 | | FeatureFP64 = 30, |
47 | | FeatureFP64Denormals = 31, |
48 | | FeatureFP64FP16Denormals = 32, |
49 | | FeatureFPExceptions = 33, |
50 | | FeatureFastFMAF32 = 34, |
51 | | FeatureFlatAddressSpace = 35, |
52 | | FeatureFlatForGlobal = 36, |
53 | | FeatureFlatGlobalInsts = 37, |
54 | | FeatureFlatInstOffsets = 38, |
55 | | FeatureFlatScratchInsts = 39, |
56 | | FeatureFlatSegmentOffsetBug = 40, |
57 | | FeatureFmaMixInsts = 41, |
58 | | FeatureGCN3Encoding = 42, |
59 | | FeatureGFX7GFX8GFX9Insts = 43, |
60 | | FeatureGFX8Insts = 44, |
61 | | FeatureGFX9 = 45, |
62 | | FeatureGFX9Insts = 46, |
63 | | FeatureGFX10 = 47, |
64 | | FeatureGFX10Insts = 48, |
65 | | FeatureInstFwdPrefetchBug = 49, |
66 | | FeatureIntClamp = 50, |
67 | | FeatureInv2PiInlineImm = 51, |
68 | | FeatureLDSBankCount16 = 52, |
69 | | FeatureLDSBankCount32 = 53, |
70 | | FeatureLdsBranchVmemWARHazard = 54, |
71 | | FeatureLdsMisalignedBug = 55, |
72 | | FeatureLocalMemorySize0 = 56, |
73 | | FeatureLocalMemorySize32768 = 57, |
74 | | FeatureLocalMemorySize65536 = 58, |
75 | | FeatureMAIInsts = 59, |
76 | | FeatureMIMG_R128 = 60, |
77 | | FeatureMadMixInsts = 61, |
78 | | FeatureMaxPrivateElementSize4 = 62, |
79 | | FeatureMaxPrivateElementSize8 = 63, |
80 | | FeatureMaxPrivateElementSize16 = 64, |
81 | | FeatureMovrel = 65, |
82 | | FeatureNSAEncoding = 66, |
83 | | FeatureNSAtoVMEMBug = 67, |
84 | | FeatureNoDataDepHazard = 68, |
85 | | FeatureNoSdstCMPX = 69, |
86 | | FeatureOffset3fBug = 70, |
87 | | FeaturePkFmacF16Inst = 71, |
88 | | FeaturePromoteAlloca = 72, |
89 | | FeatureR128A16 = 73, |
90 | | FeatureRegisterBanking = 74, |
91 | | FeatureSDWA = 75, |
92 | | FeatureSDWAMac = 76, |
93 | | FeatureSDWAOmod = 77, |
94 | | FeatureSDWAOutModsVOPC = 78, |
95 | | FeatureSDWAScalar = 79, |
96 | | FeatureSDWASdst = 80, |
97 | | FeatureSGPRInitBug = 81, |
98 | | FeatureSMEMtoVectorWriteHazard = 82, |
99 | | FeatureSMemRealTime = 83, |
100 | | FeatureSRAMECC = 84, |
101 | | FeatureScalarAtomics = 85, |
102 | | FeatureScalarFlatScratchInsts = 86, |
103 | | FeatureScalarStores = 87, |
104 | | FeatureSeaIslands = 88, |
105 | | FeatureSouthernIslands = 89, |
106 | | FeatureTrapHandler = 90, |
107 | | FeatureTrigReducedRange = 91, |
108 | | FeatureUnalignedBufferAccess = 92, |
109 | | FeatureUnalignedScratchAccess = 93, |
110 | | FeatureUnpackedD16VMem = 94, |
111 | | FeatureVGPRIndexMode = 95, |
112 | | FeatureVMEMtoScalarWriteHazard = 96, |
113 | | FeatureVOP3Literal = 97, |
114 | | FeatureVOP3P = 98, |
115 | | FeatureVcmpxExecWARHazard = 99, |
116 | | FeatureVcmpxPermlaneHazard = 100, |
117 | | FeatureVolcanicIslands = 101, |
118 | | FeatureVscnt = 102, |
119 | | FeatureWavefrontSize16 = 103, |
120 | | FeatureWavefrontSize32 = 104, |
121 | | FeatureWavefrontSize64 = 105, |
122 | | FeatureXNACK = 106, |
123 | | HalfRate64Ops = 107, |
124 | | NumSubtargetFeatures = 108 |
125 | | }; |
126 | | } // end namespace AMDGPU |
127 | | } // end namespace llvm |
128 | | |
129 | | #endif // GET_SUBTARGETINFO_ENUM |
130 | | |
131 | | |
132 | | #ifdef GET_SUBTARGETINFO_MC_DESC |
133 | | #undef GET_SUBTARGETINFO_MC_DESC |
134 | | |
135 | | namespace llvm { |
136 | | // Sorted (by key) array of values for CPU features. |
137 | | extern const llvm::SubtargetFeatureKV AMDGPUFeatureKV[] = { |
138 | | { "16-bit-insts", "Has i16/f16 instructions", AMDGPU::Feature16BitInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
139 | | { "DumpCode", "Dump MachineInstrs in the CodeEmitter", AMDGPU::FeatureDumpCode, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
140 | | { "add-no-carry-insts", "Have VALU add/sub instructions without carry out", AMDGPU::FeatureAddNoCarryInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
141 | | { "aperture-regs", "Has Memory Aperture Base and Size Registers", AMDGPU::FeatureApertureRegs, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
142 | | { "atomic-fadd-insts", "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions", AMDGPU::FeatureAtomicFaddInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
143 | | { "auto-waitcnt-before-barrier", "Hardware automatically inserts waitcnt before barrier", AMDGPU::FeatureAutoWaitcntBeforeBarrier, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
144 | | { "ci-insts", "Additional instructions for CI+", AMDGPU::FeatureCIInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
145 | | { "code-object-v3", "Generate code object version 3", AMDGPU::FeatureCodeObjectV3, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
146 | | { "cumode", "Enable CU wavefront execution mode", AMDGPU::FeatureCuMode, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
147 | | { "dl-insts", "Has v_fmac_f32 and v_xnor_b32 instructions", AMDGPU::FeatureDLInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
148 | | { "dot1-insts", "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions", AMDGPU::FeatureDot1Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
149 | | { "dot2-insts", "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions", AMDGPU::FeatureDot2Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
150 | | { "dot3-insts", "Has v_dot8c_i32_i4 instruction", AMDGPU::FeatureDot3Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
151 | | { "dot4-insts", "Has v_dot2c_i32_i16 instruction", AMDGPU::FeatureDot4Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
152 | | { "dot5-insts", "Has v_dot2c_f32_f16 instruction", AMDGPU::FeatureDot5Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
153 | | { "dot6-insts", "Has v_dot4c_i32_i8 instruction", AMDGPU::FeatureDot6Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
154 | | { "dpp", "Support DPP (Data Parallel Primitives) extension", AMDGPU::FeatureDPP, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
155 | | { "dpp8", "Support DPP8 (Data Parallel Primitives) extension", AMDGPU::FeatureDPP8, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
156 | | { "dumpcode", "Dump MachineInstrs in the CodeEmitter", AMDGPU::FeatureDumpCodeLower, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
157 | | { "enable-ds128", "Use ds_{read|write}_b128", AMDGPU::FeatureEnableDS128, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
158 | | { "enable-prt-strict-null", "Enable zeroing of result registers for sparse texture fetches", AMDGPU::FeatureEnablePRTStrictNull, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
159 | | { "fast-fmaf", "Assuming f32 fma is at least as fast as mul + add", AMDGPU::FeatureFastFMAF32, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
160 | | { "flat-address-space", "Support flat address space", AMDGPU::FeatureFlatAddressSpace, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
161 | | { "flat-for-global", "Force to generate flat instruction for global", AMDGPU::FeatureFlatForGlobal, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
162 | | { "flat-global-insts", "Have global_* flat memory instructions", AMDGPU::FeatureFlatGlobalInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
163 | | { "flat-inst-offsets", "Flat instructions have immediate offset addressing mode", AMDGPU::FeatureFlatInstOffsets, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
164 | | { "flat-scratch-insts", "Have scratch_* flat memory instructions", AMDGPU::FeatureFlatScratchInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
165 | | { "flat-segment-offset-bug", "GFX10 bug, inst_offset ignored in flat segment", AMDGPU::FeatureFlatSegmentOffsetBug, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
166 | | { "fma-mix-insts", "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions", AMDGPU::FeatureFmaMixInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
167 | | { "fmaf", "Enable single precision FMA (not as fast as mul+add, but fused)", AMDGPU::FeatureFMA, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
168 | | { "fp-exceptions", "Enable floating point exceptions", AMDGPU::FeatureFPExceptions, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
169 | | { "fp16-denormals", "Enable half precision denormal handling", AMDGPU::FeatureFP16Denormals, { { { 0x100000000ULL, 0x0ULL, 0x0ULL, } } } }, |
170 | | { "fp32-denormals", "Enable single precision denormal handling", AMDGPU::FeatureFP32Denormals, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
171 | | { "fp64", "Enable double precision operations", AMDGPU::FeatureFP64, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
172 | | { "fp64-denormals", "Enable double and half precision denormal handling", AMDGPU::FeatureFP64Denormals, { { { 0x140000000ULL, 0x0ULL, 0x0ULL, } } } }, |
173 | | { "fp64-fp16-denormals", "Enable double and half precision denormal handling", AMDGPU::FeatureFP64FP16Denormals, { { { 0x40000000ULL, 0x0ULL, 0x0ULL, } } } }, |
174 | | { "gcn3-encoding", "Encoding format for VI", AMDGPU::FeatureGCN3Encoding, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
175 | | { "gfx10", "GFX10 GPU generation", AMDGPU::FeatureGFX10, { { { 0x140d52ec40001627ULL, 0x460009acb2ULL, 0x0ULL, } } } }, |
176 | | { "gfx10-insts", "Additional instructions for GFX10+", AMDGPU::FeatureGFX10Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
177 | | { "gfx7-gfx8-gfx9-insts", "Instructions shared in GFX7, GFX8, GFX9", AMDGPU::FeatureGFX7GFX8GFX9Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
178 | | { "gfx8-insts", "Additional instructions for GFX8+", AMDGPU::FeatureGFX8Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
179 | | { "gfx9", "GFX9 GPU generation", AMDGPU::FeatureGFX9, { { { 0x40c5cec40000227ULL, 0x20480e9aa00ULL, 0x0ULL, } } } }, |
180 | | { "gfx9-insts", "Additional instructions for GFX9+", AMDGPU::FeatureGFX9Insts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
181 | | { "half-rate-64-ops", "Most fp64 instructions are half rate instead of quarter", AMDGPU::HalfRate64Ops, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
182 | | { "inst-fwd-prefetch-bug", "S_INST_PREFETCH instruction causes shader to hang", AMDGPU::FeatureInstFwdPrefetchBug, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
183 | | { "int-clamp-insts", "Support clamp for integer destination", AMDGPU::FeatureIntClamp, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
184 | | { "inv-2pi-inline-imm", "Has 1 / (2 * pi) as inline immediate", AMDGPU::FeatureInv2PiInlineImm, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
185 | | { "lds-branch-vmem-war-hazard", "Switching between LDS and VMEM-tex not waiting VM_VSRC=0", AMDGPU::FeatureLdsBranchVmemWARHazard, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
186 | | { "lds-misaligned-bug", "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode", AMDGPU::FeatureLdsMisalignedBug, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
187 | | { "ldsbankcount16", "The number of LDS banks per compute unit.", AMDGPU::FeatureLDSBankCount16, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
188 | | { "ldsbankcount32", "The number of LDS banks per compute unit.", AMDGPU::FeatureLDSBankCount32, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
189 | | { "load-store-opt", "Enable SI load/store optimizer pass", AMDGPU::FeatureEnableLoadStoreOpt, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
190 | | { "localmemorysize0", "The size of local memory in bytes", AMDGPU::FeatureLocalMemorySize0, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
191 | | { "localmemorysize32768", "The size of local memory in bytes", AMDGPU::FeatureLocalMemorySize32768, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
192 | | { "localmemorysize65536", "The size of local memory in bytes", AMDGPU::FeatureLocalMemorySize65536, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
193 | | { "mad-mix-insts", "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions", AMDGPU::FeatureMadMixInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
194 | | { "mai-insts", "Has mAI instructions", AMDGPU::FeatureMAIInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
195 | | { "max-private-element-size-16", "Maximum private access size may be 16", AMDGPU::FeatureMaxPrivateElementSize16, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
196 | | { "max-private-element-size-4", "Maximum private access size may be 4", AMDGPU::FeatureMaxPrivateElementSize4, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
197 | | { "max-private-element-size-8", "Maximum private access size may be 8", AMDGPU::FeatureMaxPrivateElementSize8, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
198 | | { "mimg-r128", "Support 128-bit texture resources", AMDGPU::FeatureMIMG_R128, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
199 | | { "movrel", "Has v_movrel*_b32 instructions", AMDGPU::FeatureMovrel, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
200 | | { "no-data-dep-hazard", "Does not need SW waitstates", AMDGPU::FeatureNoDataDepHazard, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
201 | | { "no-sdst-cmpx", "V_CMPX does not write VCC/SGPR in addition to EXEC", AMDGPU::FeatureNoSdstCMPX, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
202 | | { "no-sram-ecc-support", "Hardware does not support SRAM ECC", AMDGPU::FeatureDoesNotSupportSRAMECC, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
203 | | { "no-xnack-support", "Hardware does not support XNACK", AMDGPU::FeatureDoesNotSupportXNACK, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
204 | | { "nsa-encoding", "Support NSA encoding for image instructions", AMDGPU::FeatureNSAEncoding, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
205 | | { "nsa-to-vmem-bug", "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero", AMDGPU::FeatureNSAtoVMEMBug, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
206 | | { "offset-3f-bug", "Branch offset of 3f hardware bug", AMDGPU::FeatureOffset3fBug, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
207 | | { "pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction", AMDGPU::FeaturePkFmacF16Inst, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
208 | | { "promote-alloca", "Enable promote alloca pass", AMDGPU::FeaturePromoteAlloca, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
209 | | { "r128-a16", "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9", AMDGPU::FeatureR128A16, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
210 | | { "register-banking", "Has register banking", AMDGPU::FeatureRegisterBanking, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
211 | | { "s-memrealtime", "Has s_memrealtime instruction", AMDGPU::FeatureSMemRealTime, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
212 | | { "scalar-atomics", "Has atomic scalar memory instructions", AMDGPU::FeatureScalarAtomics, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
213 | | { "scalar-flat-scratch-insts", "Have s_scratch_* flat memory instructions", AMDGPU::FeatureScalarFlatScratchInsts, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
214 | | { "scalar-stores", "Has store scalar memory instructions", AMDGPU::FeatureScalarStores, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
215 | | { "sdwa", "Support SDWA (Sub-DWORD Addressing) extension", AMDGPU::FeatureSDWA, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
216 | | { "sdwa-mav", "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension", AMDGPU::FeatureSDWAMac, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
217 | | { "sdwa-omod", "Support OMod with SDWA (Sub-DWORD Addressing) extension", AMDGPU::FeatureSDWAOmod, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
218 | | { "sdwa-out-mods-vopc", "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension", AMDGPU::FeatureSDWAOutModsVOPC, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
219 | | { "sdwa-scalar", "Support scalar register with SDWA (Sub-DWORD Addressing) extension", AMDGPU::FeatureSDWAScalar, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
220 | | { "sdwa-sdst", "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension", AMDGPU::FeatureSDWASdst, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
221 | | { "sea-islands", "SEA_ISLANDS GPU generation", AMDGPU::FeatureSeaIslands, { { { 0x1400080840001020ULL, 0x20008000002ULL, 0x0ULL, } } } }, |
222 | | { "sgpr-init-bug", "VI SGPR initialization bug requiring a fixed SGPR allocation size", AMDGPU::FeatureSGPRInitBug, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
223 | | { "si-scheduler", "Enable SI Machine Scheduler", AMDGPU::FeatureEnableSIScheduler, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
224 | | { "smem-to-vector-write-hazard", "s_load_dword followed by v_cmp page faults", AMDGPU::FeatureSMEMtoVectorWriteHazard, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
225 | | { "southern-islands", "SOUTHERN_ISLANDS GPU generation", AMDGPU::FeatureSouthernIslands, { { { 0x1220000040003000ULL, 0x20008000002ULL, 0x0ULL, } } } }, |
226 | | { "sram-ecc", "Enable SRAM ECC", AMDGPU::FeatureSRAMECC, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
227 | | { "trap-handler", "Trap handler support", AMDGPU::FeatureTrapHandler, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
228 | | { "trig-reduced-range", "Requires use of fract on arguments to trig instructions", AMDGPU::FeatureTrigReducedRange, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
229 | | { "unaligned-buffer-access", "Support unaligned global loads and stores", AMDGPU::FeatureUnalignedBufferAccess, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
230 | | { "unaligned-scratch-access", "Support unaligned scratch loads and stores", AMDGPU::FeatureUnalignedScratchAccess, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
231 | | { "unpacked-d16-vmem", "Has unpacked d16 vmem instructions", AMDGPU::FeatureUnpackedD16VMem, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
232 | | { "unsafe-ds-offset-folding", "Force using DS instruction immediate offsets on SI", AMDGPU::FeatureEnableUnsafeDSOffsetFolding, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
233 | | { "vcmpx-exec-war-hazard", "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)", AMDGPU::FeatureVcmpxExecWARHazard, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
234 | | { "vcmpx-permlane-hazard", "TODO: describe me", AMDGPU::FeatureVcmpxPermlaneHazard, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
235 | | { "vgpr-index-mode", "Has VGPR mode register indexing", AMDGPU::FeatureVGPRIndexMode, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
236 | | { "vmem-to-scalar-write-hazard", "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.", AMDGPU::FeatureVMEMtoScalarWriteHazard, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
237 | | { "volcanic-islands", "VOLCANIC_ISLANDS GPU generation", AMDGPU::FeatureVolcanicIslands, { { { 0x140c1c0840001221ULL, 0x20088885802ULL, 0x0ULL, } } } }, |
238 | | { "vop3-literal", "Can use one literal in VOP3", AMDGPU::FeatureVOP3Literal, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
239 | | { "vop3p", "Has VOP3P packed instructions", AMDGPU::FeatureVOP3P, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
240 | | { "vscnt", "Has separate store vscnt counter", AMDGPU::FeatureVscnt, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
241 | | { "wavefrontsize16", "The number of threads per wavefront", AMDGPU::FeatureWavefrontSize16, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
242 | | { "wavefrontsize32", "The number of threads per wavefront", AMDGPU::FeatureWavefrontSize32, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
243 | | { "wavefrontsize64", "The number of threads per wavefront", AMDGPU::FeatureWavefrontSize64, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
244 | | { "xnack", "Enable XNACK support", AMDGPU::FeatureXNACK, { { { 0x0ULL, 0x0ULL, 0x0ULL, } } } }, |
245 | | }; |
246 | | |
247 | | #ifdef DBGFIELD |
248 | | #error "<target>GenSubtargetInfo.inc requires a DBGFIELD macro" |
249 | | #endif |
250 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
251 | | #define DBGFIELD(x) x, |
252 | | #else |
253 | | #define DBGFIELD(x) |
254 | | #endif |
255 | | |
256 | | // =============================================================== |
257 | | // Data tables for the new per-operand machine model. |
258 | | |
259 | | // {ProcResourceIdx, Cycles} |
260 | | extern const llvm::MCWriteProcResEntry AMDGPUWriteProcResTable[] = { |
261 | | { 0, 0}, // Invalid |
262 | | { 4, 1}, // #1 |
263 | | { 5, 1}, // #2 |
264 | | { 6, 1}, // #3 |
265 | | { 3, 1}, // #4 |
266 | | { 2, 1}, // #5 |
267 | | { 1, 1}, // #6 |
268 | | { 5, 2}, // #7 |
269 | | { 4, 1}, // #8 |
270 | | { 6, 1}, // #9 |
271 | | { 4, 1}, // #10 |
272 | | { 7, 1}, // #11 |
273 | | { 3, 1}, // #12 |
274 | | { 4, 1}, // #13 |
275 | | { 2, 1}, // #14 |
276 | | { 4, 1}, // #15 |
277 | | { 4, 2}, // #16 |
278 | | { 5, 1}, // #17 |
279 | | { 6, 1}, // #18 |
280 | | { 4, 2}, // #19 |
281 | | { 6, 2} // #20 |
282 | | }; // AMDGPUWriteProcResTable |
283 | | |
284 | | // {Cycles, WriteResourceID} |
285 | | extern const llvm::MCWriteLatencyEntry AMDGPUWriteLatencyTable[] = { |
286 | | { 0, 0}, // Invalid |
287 | | { 1, 0}, // #1 WriteSALU_Write32Bit_WriteFloatFMA |
288 | | {80, 0}, // #2 WriteVMEM |
289 | | { 5, 0}, // #3 WriteLDS_WriteSMEM_WriteSALU_Write32Bit_WriteFloatFMA |
290 | | { 4, 0}, // #4 WriteExport_WriteQuarterRate32_WriteDoubleCvt |
291 | | { 8, 0}, // #5 WriteBranch_WriteDoubleAdd_Write8PassMAI |
292 | | { 1, 0}, // #6 Write32Bit_WriteFloatFMA |
293 | | { 1, 0}, // #7 WriteSALU |
294 | | { 2, 0}, // #8 Write64Bit_Write2PassMAI_WriteDoubleAdd |
295 | | {16, 0}, // #9 WriteFloatFMA_WriteDouble_Write16PassMAI_WriteExport |
296 | | {16, 0}, // #10 WriteFloatFMA_WriteDouble |
297 | | { 1, 0}, // #11 WriteSALU |
298 | | { 4, 0}, // #12 WriteQuarterRate32_WriteDouble |
299 | | { 1, 0}, // #13 WriteSALU |
300 | | { 2, 0}, // #14 Write64Bit |
301 | | { 2, 0}, // #15 Write64Bit |
302 | | {500, 0}, // #16 WriteBarrier |
303 | | {320, 0}, // #17 WriteVMEM |
304 | | {20, 0}, // #18 WriteLDS_WriteSMEM |
305 | | {32, 0}, // #19 WriteBranch |
306 | | { 5, 0}, // #20 Write32Bit_WriteFloatFMA |
307 | | { 5, 0}, // #21 WriteSALU |
308 | | {17, 0}, // #22 WriteDoubleAdd_WriteQuarterRate32_WriteDoubleCvt |
309 | | { 9, 0}, // #23 Write64Bit |
310 | | {17, 0}, // #24 WriteDouble_WriteQuarterRate32 |
311 | | { 5, 0}, // #25 WriteSALU |
312 | | { 9, 0}, // #26 Write64Bit |
313 | | { 9, 0}, // #27 Write64Bit |
314 | | {2000, 0} // #28 WriteBarrier |
315 | | }; // AMDGPUWriteLatencyTable |
316 | | |
317 | | // {UseIdx, WriteResourceID, Cycles} |
318 | | extern const llvm::MCReadAdvanceEntry AMDGPUReadAdvanceTable[] = { |
319 | | {0, 0, 0}, // Invalid |
320 | | {0, 0, -4}, // #1 |
321 | | {0, 0, -2} // #2 |
322 | | }; // AMDGPUReadAdvanceTable |
323 | | |
324 | | // {Name, NumMicroOps, BeginGroup, EndGroup, WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#} |
325 | | static const llvm::MCSchedClassDesc SIQuarterSpeedModelSchedClasses[] = { |
326 | | {DBGFIELD("InvalidSchedClass") 16383, false, false, 0, 0, 0, 0, 0, 0}, |
327 | | {DBGFIELD("NullALU_WriteSALU") 1, false, false, 1, 1, 1, 1, 0, 0}, // #1 |
328 | | {DBGFIELD("NullALU_Write32Bit") 1, false, false, 2, 1, 1, 1, 0, 0}, // #2 |
329 | | {DBGFIELD("NullALU_WriteVMEM") 1, false, false, 3, 1, 2, 1, 0, 0}, // #3 |
330 | | {DBGFIELD("NullALU_WriteLDS") 1, false, false, 4, 1, 3, 1, 0, 0}, // #4 |
331 | | {DBGFIELD("NullALU_WriteExport") 1, false, false, 5, 1, 4, 1, 0, 0}, // #5 |
332 | | {DBGFIELD("NullALU_WriteBranch") 1, false, false, 6, 1, 5, 1, 0, 0}, // #6 |
333 | | {DBGFIELD("NullALU") 0, false, false, 0, 0, 0, 0, 0, 0}, // #7 |
334 | | {DBGFIELD("NullALU_WriteSMEM") 1, false, false, 4, 1, 3, 1, 0, 0}, // #8 |
335 | | {DBGFIELD("NullALU_Write32Bit_WriteSALU") 2, false, false, 1, 2, 6, 2, 0, 0}, // #9 |
336 | | {DBGFIELD("NullALU_WriteDoubleAdd") 1, false, false, 2, 1, 5, 1, 0, 0}, // #10 |
337 | | {DBGFIELD("NullALU_Write64Bit") 1, false, false, 2, 1, 8, 1, 0, 0}, // #11 |
338 | | {DBGFIELD("NullALU_WriteQuarterRate32") 1, false, false, 2, 1, 4, 1, 0, 0}, // #12 |
339 | | {DBGFIELD("NullALU_WriteDoubleCvt") 1, false, false, 2, 1, 4, 1, 0, 0}, // #13 |
340 | | {DBGFIELD("NullALU_WriteFloatFMA") 1, false, false, 2, 1, 9, 1, 0, 0}, // #14 |
341 | | {DBGFIELD("NullALU_WriteDouble") 1, false, false, 2, 1, 9, 1, 0, 0}, // #15 |
342 | | {DBGFIELD("NullALU_WriteFloatFMA_WriteSALU") 2, false, false, 1, 2, 10, 2, 0, 0}, // #16 |
343 | | {DBGFIELD("NullALU_WriteDouble_WriteSALU") 2, false, false, 1, 2, 10, 2, 0, 0}, // #17 |
344 | | {DBGFIELD("NullALU_WriteQuarterRate32_WriteSALU") 2, false, false, 1, 2, 12, 2, 0, 0}, // #18 |
345 | | {DBGFIELD("NullALU_Write64Bit_Write64Bit") 2, false, false, 7, 1, 14, 2, 0, 0}, // #19 |
346 | | {DBGFIELD("NullALU_WriteBarrier") 1, false, false, 6, 1, 16, 1, 0, 0}, // #20 |
347 | | {DBGFIELD("V_ACCVGPR_WRITE_B32") 16382, false, false, 0, 0, 0, 0, 0, 0}, // #21 |
348 | | {DBGFIELD("V_MFMA_F32_4X4X1F32_V_MFMA_F32_4X4X2BF16_V_MFMA_F32_4X4X4F16_V_MFMA_I32_4X4X4I8_V_MFMA_F32_4X4X1F32_vi_V_MFMA_F32_4X4X2BF16_vi_V_MFMA_F32_4X4X4F16_vi_V_MFMA_I32_4X4X4I8_vi") 1, false, false, 2, 1, 8, 1, 1, 1}, // #22 |
349 | | {DBGFIELD("V_MFMA_F32_16X16X16F16_V_MFMA_F32_16X16X1F32_V_MFMA_F32_16X16X2BF16_V_MFMA_F32_16X16X4F16_V_MFMA_F32_16X16X4F32_V_MFMA_F32_16X16X8BF16_V_MFMA_I32_16X16X16I8_V_MFMA_I32_16X16X4I8_V_MFMA_F32_16X16X16F16_vi_V_MFMA_F32_16X16X1F32_vi_V_MFMA_F32_16X16X2BF16_vi_V_MFMA_F32_16X16X4F16_vi_V_MFMA_F32_16X16X4F32_vi_V_MFMA_F32_16X16X8BF16_vi_V_MFMA_I32_16X16X16I8_vi_V_MFMA_I32_16X16X4I8_vi") 1, false, false, 2, 1, 5, 1, 1, 1}, // #23 |
350 | | {DBGFIELD("V_MFMA_F32_32X32X1F32_V_MFMA_F32_32X32X2BF16_V_MFMA_F32_32X32X2F32_V_MFMA_F32_32X32X4BF16_V_MFMA_F32_32X32X4F16_V_MFMA_F32_32X32X8F16_V_MFMA_I32_32X32X4I8_V_MFMA_I32_32X32X8I8_V_MFMA_F32_32X32X1F32_vi_V_MFMA_F32_32X32X2BF16_vi_V_MFMA_F32_32X32X2F32_vi_V_MFMA_F32_32X32X4BF16_vi_V_MFMA_F32_32X32X4F16_vi_V_MFMA_F32_32X32X8F16_vi_V_MFMA_I32_32X32X4I8_vi_V_MFMA_I32_32X32X8I8_vi") 1, false, false, 2, 1, 9, 1, 1, 1}, // #24 |
351 | | {DBGFIELD("COPY") 16382, false, false, 0, 0, 0, 0, 0, 0}, // #25 |
352 | | {DBGFIELD("Write64Bit_MIVGPRRead") 1, false, false, 2, 1, 8, 1, 2, 1}, // #26 |
353 | | {DBGFIELD("Write64Bit_ReadDefault") 1, false, false, 2, 1, 8, 1, 0, 0}, // #27 |
354 | | {DBGFIELD("Write32Bit") 1, false, false, 2, 1, 1, 1, 0, 0}, // #28 |
355 | | {DBGFIELD("Write64Bit") 1, false, false, 2, 1, 8, 1, 0, 0}, // #29 |
356 | | {DBGFIELD("WriteSALU") 1, false, false, 1, 1, 1, 1, 0, 0}, // #30 |
357 | | }; // SIQuarterSpeedModelSchedClasses |
358 | | |
359 | | // {Name, NumMicroOps, BeginGroup, EndGroup, WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#} |
360 | | static const llvm::MCSchedClassDesc GFX10SpeedModelSchedClasses[] = { |
361 | | {DBGFIELD("InvalidSchedClass") 16383, false, false, 0, 0, 0, 0, 0, 0}, |
362 | | {DBGFIELD("NullALU_WriteSALU") 1, false, false, 1, 2, 3, 1, 0, 0}, // #1 |
363 | | {DBGFIELD("NullALU_Write32Bit") 1, false, false, 8, 2, 3, 1, 0, 0}, // #2 |
364 | | {DBGFIELD("NullALU_WriteVMEM") 1, false, false, 10, 2, 17, 1, 0, 0}, // #3 |
365 | | {DBGFIELD("NullALU_WriteLDS") 1, false, false, 12, 2, 18, 1, 0, 0}, // #4 |
366 | | {DBGFIELD("NullALU_WriteExport") 1, false, false, 14, 2, 9, 1, 0, 0}, // #5 |
367 | | {DBGFIELD("NullALU_WriteBranch") 1, false, false, 6, 1, 19, 1, 0, 0}, // #6 |
368 | | {DBGFIELD("NullALU") 0, false, false, 0, 0, 0, 0, 0, 0}, // #7 |
369 | | {DBGFIELD("NullALU_WriteSMEM") 1, false, false, 12, 2, 18, 1, 0, 0}, // #8 |
370 | | {DBGFIELD("NullALU_Write32Bit_WriteSALU") 2, false, false, 16, 3, 20, 2, 0, 0}, // #9 |
371 | | {DBGFIELD("NullALU_WriteDoubleAdd") 1, false, false, 8, 2, 22, 1, 0, 0}, // #10 |
372 | | {DBGFIELD("NullALU_Write64Bit") 1, false, false, 8, 2, 23, 1, 0, 0}, // #11 |
373 | | {DBGFIELD("NullALU_WriteQuarterRate32") 1, false, false, 8, 2, 22, 1, 0, 0}, // #12 |
374 | | {DBGFIELD("NullALU_WriteDoubleCvt") 1, false, false, 8, 2, 22, 1, 0, 0}, // #13 |
375 | | {DBGFIELD("NullALU_WriteFloatFMA") 1, false, false, 8, 2, 3, 1, 0, 0}, // #14 |
376 | | {DBGFIELD("NullALU_WriteDouble") 1, false, false, 8, 2, 22, 1, 0, 0}, // #15 |
377 | | {DBGFIELD("NullALU_WriteFloatFMA_WriteSALU") 2, false, false, 16, 3, 20, 2, 0, 0}, // #16 |
378 | | {DBGFIELD("NullALU_WriteDouble_WriteSALU") 2, false, false, 16, 3, 24, 2, 0, 0}, // #17 |
379 | | {DBGFIELD("NullALU_WriteQuarterRate32_WriteSALU") 2, false, false, 16, 3, 24, 2, 0, 0}, // #18 |
380 | | {DBGFIELD("NullALU_Write64Bit_Write64Bit") 2, false, false, 19, 2, 26, 2, 0, 0}, // #19 |
381 | | {DBGFIELD("NullALU_WriteBarrier") 1, false, false, 6, 1, 28, 1, 0, 0}, // #20 |
382 | | {DBGFIELD("V_ACCVGPR_WRITE_B32") 1, false, false, 8, 2, 3, 1, 0, 0}, // #21 |
383 | | {DBGFIELD("V_MFMA_F32_4X4X1F32_V_MFMA_F32_4X4X2BF16_V_MFMA_F32_4X4X4F16_V_MFMA_I32_4X4X4I8_V_MFMA_F32_4X4X1F32_vi_V_MFMA_F32_4X4X2BF16_vi_V_MFMA_F32_4X4X4F16_vi_V_MFMA_I32_4X4X4I8_vi") 1, false, false, 8, 2, 3, 1, 0, 0}, // #22 |
384 | | {DBGFIELD("V_MFMA_F32_16X16X16F16_V_MFMA_F32_16X16X1F32_V_MFMA_F32_16X16X2BF16_V_MFMA_F32_16X16X4F16_V_MFMA_F32_16X16X4F32_V_MFMA_F32_16X16X8BF16_V_MFMA_I32_16X16X16I8_V_MFMA_I32_16X16X4I8_V_MFMA_F32_16X16X16F16_vi_V_MFMA_F32_16X16X1F32_vi_V_MFMA_F32_16X16X2BF16_vi_V_MFMA_F32_16X16X4F16_vi_V_MFMA_F32_16X16X4F32_vi_V_MFMA_F32_16X16X8BF16_vi_V_MFMA_I32_16X16X16I8_vi_V_MFMA_I32_16X16X4I8_vi") 1, false, false, 8, 2, 3, 1, 0, 0}, // #23 |
385 | | {DBGFIELD("V_MFMA_F32_32X32X1F32_V_MFMA_F32_32X32X2BF16_V_MFMA_F32_32X32X2F32_V_MFMA_F32_32X32X4BF16_V_MFMA_F32_32X32X4F16_V_MFMA_F32_32X32X8F16_V_MFMA_I32_32X32X4I8_V_MFMA_I32_32X32X8I8_V_MFMA_F32_32X32X1F32_vi_V_MFMA_F32_32X32X2BF16_vi_V_MFMA_F32_32X32X2F32_vi_V_MFMA_F32_32X32X4BF16_vi_V_MFMA_F32_32X32X4F16_vi_V_MFMA_F32_32X32X8F16_vi_V_MFMA_I32_32X32X4I8_vi_V_MFMA_I32_32X32X8I8_vi") 1, false, false, 8, 2, 3, 1, 0, 0}, // #24 |
386 | | {DBGFIELD("COPY") 16382, false, false, 0, 0, 0, 0, 0, 0}, // #25 |
387 | | {DBGFIELD("Write64Bit_MIVGPRRead") 0, false, false, 0, 0, 0, 0, 0, 0}, // #26 |
388 | | {DBGFIELD("Write64Bit_ReadDefault") 0, false, false, 0, 0, 0, 0, 0, 0}, // #27 |
389 | | {DBGFIELD("Write32Bit") 1, false, false, 8, 2, 3, 1, 0, 0}, // #28 |
390 | | {DBGFIELD("Write64Bit") 1, false, false, 8, 2, 23, 1, 0, 0}, // #29 |
391 | | {DBGFIELD("WriteSALU") 1, false, false, 1, 2, 3, 1, 0, 0}, // #30 |
392 | | }; // GFX10SpeedModelSchedClasses |
393 | | |
394 | | // {Name, NumMicroOps, BeginGroup, EndGroup, WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#} |
395 | | static const llvm::MCSchedClassDesc SIFullSpeedModelSchedClasses[] = { |
396 | | {DBGFIELD("InvalidSchedClass") 16383, false, false, 0, 0, 0, 0, 0, 0}, |
397 | | {DBGFIELD("NullALU_WriteSALU") 1, false, false, 1, 1, 1, 1, 0, 0}, // #1 |
398 | | {DBGFIELD("NullALU_Write32Bit") 1, false, false, 2, 1, 1, 1, 0, 0}, // #2 |
399 | | {DBGFIELD("NullALU_WriteVMEM") 1, false, false, 3, 1, 2, 1, 0, 0}, // #3 |
400 | | {DBGFIELD("NullALU_WriteLDS") 1, false, false, 4, 1, 3, 1, 0, 0}, // #4 |
401 | | {DBGFIELD("NullALU_WriteExport") 1, false, false, 5, 1, 4, 1, 0, 0}, // #5 |
402 | | {DBGFIELD("NullALU_WriteBranch") 1, false, false, 6, 1, 5, 1, 0, 0}, // #6 |
403 | | {DBGFIELD("NullALU") 0, false, false, 0, 0, 0, 0, 0, 0}, // #7 |
404 | | {DBGFIELD("NullALU_WriteSMEM") 1, false, false, 4, 1, 3, 1, 0, 0}, // #8 |
405 | | {DBGFIELD("NullALU_Write32Bit_WriteSALU") 2, false, false, 1, 2, 6, 2, 0, 0}, // #9 |
406 | | {DBGFIELD("NullALU_WriteDoubleAdd") 1, false, false, 2, 1, 8, 1, 0, 0}, // #10 |
407 | | {DBGFIELD("NullALU_Write64Bit") 1, false, false, 2, 1, 8, 1, 0, 0}, // #11 |
408 | | {DBGFIELD("NullALU_WriteQuarterRate32") 1, false, false, 2, 1, 4, 1, 0, 0}, // #12 |
409 | | {DBGFIELD("NullALU_WriteDoubleCvt") 1, false, false, 2, 1, 4, 1, 0, 0}, // #13 |
410 | | {DBGFIELD("NullALU_WriteFloatFMA") 1, false, false, 2, 1, 1, 1, 0, 0}, // #14 |
411 | | {DBGFIELD("NullALU_WriteDouble") 1, false, false, 2, 1, 4, 1, 0, 0}, // #15 |
412 | | {DBGFIELD("NullALU_WriteFloatFMA_WriteSALU") 2, false, false, 1, 2, 6, 2, 0, 0}, // #16 |
413 | | {DBGFIELD("NullALU_WriteDouble_WriteSALU") 2, false, false, 1, 2, 12, 2, 0, 0}, // #17 |
414 | | {DBGFIELD("NullALU_WriteQuarterRate32_WriteSALU") 2, false, false, 1, 2, 12, 2, 0, 0}, // #18 |
415 | | {DBGFIELD("NullALU_Write64Bit_Write64Bit") 2, false, false, 7, 1, 14, 2, 0, 0}, // #19 |
416 | | {DBGFIELD("NullALU_WriteBarrier") 1, false, false, 6, 1, 16, 1, 0, 0}, // #20 |
417 | | {DBGFIELD("V_ACCVGPR_WRITE_B32") 16382, false, false, 0, 0, 0, 0, 0, 0}, // #21 |
418 | | {DBGFIELD("V_MFMA_F32_4X4X1F32_V_MFMA_F32_4X4X2BF16_V_MFMA_F32_4X4X4F16_V_MFMA_I32_4X4X4I8_V_MFMA_F32_4X4X1F32_vi_V_MFMA_F32_4X4X2BF16_vi_V_MFMA_F32_4X4X4F16_vi_V_MFMA_I32_4X4X4I8_vi") 1, false, false, 2, 1, 8, 1, 1, 1}, // #22 |
419 | | {DBGFIELD("V_MFMA_F32_16X16X16F16_V_MFMA_F32_16X16X1F32_V_MFMA_F32_16X16X2BF16_V_MFMA_F32_16X16X4F16_V_MFMA_F32_16X16X4F32_V_MFMA_F32_16X16X8BF16_V_MFMA_I32_16X16X16I8_V_MFMA_I32_16X16X4I8_V_MFMA_F32_16X16X16F16_vi_V_MFMA_F32_16X16X1F32_vi_V_MFMA_F32_16X16X2BF16_vi_V_MFMA_F32_16X16X4F16_vi_V_MFMA_F32_16X16X4F32_vi_V_MFMA_F32_16X16X8BF16_vi_V_MFMA_I32_16X16X16I8_vi_V_MFMA_I32_16X16X4I8_vi") 1, false, false, 2, 1, 5, 1, 1, 1}, // #23 |
420 | | {DBGFIELD("V_MFMA_F32_32X32X1F32_V_MFMA_F32_32X32X2BF16_V_MFMA_F32_32X32X2F32_V_MFMA_F32_32X32X4BF16_V_MFMA_F32_32X32X4F16_V_MFMA_F32_32X32X8F16_V_MFMA_I32_32X32X4I8_V_MFMA_I32_32X32X8I8_V_MFMA_F32_32X32X1F32_vi_V_MFMA_F32_32X32X2BF16_vi_V_MFMA_F32_32X32X2F32_vi_V_MFMA_F32_32X32X4BF16_vi_V_MFMA_F32_32X32X4F16_vi_V_MFMA_F32_32X32X8F16_vi_V_MFMA_I32_32X32X4I8_vi_V_MFMA_I32_32X32X8I8_vi") 1, false, false, 2, 1, 9, 1, 1, 1}, // #24 |
421 | | {DBGFIELD("COPY") 16382, false, false, 0, 0, 0, 0, 0, 0}, // #25 |
422 | | {DBGFIELD("Write64Bit_MIVGPRRead") 1, false, false, 2, 1, 8, 1, 2, 1}, // #26 |
423 | | {DBGFIELD("Write64Bit_ReadDefault") 1, false, false, 2, 1, 8, 1, 0, 0}, // #27 |
424 | | {DBGFIELD("Write32Bit") 1, false, false, 2, 1, 1, 1, 0, 0}, // #28 |
425 | | {DBGFIELD("Write64Bit") 1, false, false, 2, 1, 8, 1, 0, 0}, // #29 |
426 | | {DBGFIELD("WriteSALU") 1, false, false, 1, 1, 1, 1, 0, 0}, // #30 |
427 | | }; // SIFullSpeedModelSchedClasses |
428 | | |
429 | | #undef DBGFIELD |
430 | | |
431 | | static const llvm::MCSchedModel NoSchedModel = { |
432 | | MCSchedModel::DefaultIssueWidth, |
433 | | MCSchedModel::DefaultMicroOpBufferSize, |
434 | | MCSchedModel::DefaultLoopMicroOpBufferSize, |
435 | | MCSchedModel::DefaultLoadLatency, |
436 | | MCSchedModel::DefaultHighLatency, |
437 | | MCSchedModel::DefaultMispredictPenalty, |
438 | | false, // PostRAScheduler |
439 | | false, // CompleteModel |
440 | | 0, // Processor ID |
441 | | nullptr, nullptr, 0, 0, // No instruction-level machine model. |
442 | | nullptr, // No Itinerary |
443 | | nullptr // No extra processor descriptor |
444 | | }; |
445 | | |
446 | | static const unsigned SIQuarterSpeedModelProcResourceSubUnits[] = { |
447 | | 0, // Invalid |
448 | | }; |
449 | | |
450 | | // {Name, NumUnits, SuperIdx, BufferSize, SubUnitsIdxBegin} |
451 | | static const llvm::MCProcResourceDesc SIQuarterSpeedModelProcResources[] = { |
452 | | {"InvalidUnit", 0, 0, 0, 0}, |
453 | | {"HWBranch", 1, 0, 1, nullptr}, // #1 |
454 | | {"HWExport", 1, 0, 7, nullptr}, // #2 |
455 | | {"HWLGKM", 1, 0, 31, nullptr}, // #3 |
456 | | {"HWSALU", 1, 0, 1, nullptr}, // #4 |
457 | | {"HWVALU", 1, 0, 1, nullptr}, // #5 |
458 | | {"HWVMEM", 1, 0, 15, nullptr}, // #6 |
459 | | }; |
460 | | |
461 | | static const llvm::MCSchedModel SIQuarterSpeedModel = { |
462 | | 1, // IssueWidth |
463 | | 1, // MicroOpBufferSize |
464 | | MCSchedModel::DefaultLoopMicroOpBufferSize, |
465 | | MCSchedModel::DefaultLoadLatency, |
466 | | MCSchedModel::DefaultHighLatency, |
467 | | 20, // MispredictPenalty |
468 | | true, // PostRAScheduler |
469 | | false, // CompleteModel |
470 | | 1, // Processor ID |
471 | | SIQuarterSpeedModelProcResources, |
472 | | SIQuarterSpeedModelSchedClasses, |
473 | | 7, |
474 | | 31, |
475 | | nullptr, // No Itinerary |
476 | | nullptr // No extra processor descriptor |
477 | | }; |
478 | | |
479 | | static const unsigned GFX10SpeedModelProcResourceSubUnits[] = { |
480 | | 0, // Invalid |
481 | | }; |
482 | | |
483 | | // {Name, NumUnits, SuperIdx, BufferSize, SubUnitsIdxBegin} |
484 | | static const llvm::MCProcResourceDesc GFX10SpeedModelProcResources[] = { |
485 | | {"InvalidUnit", 0, 0, 0, 0}, |
486 | | {"HWBranch", 1, 0, 1, nullptr}, // #1 |
487 | | {"HWExport", 1, 0, 7, nullptr}, // #2 |
488 | | {"HWLGKM", 1, 0, 31, nullptr}, // #3 |
489 | | {"HWRC", 1, 0, 1, nullptr}, // #4 |
490 | | {"HWSALU", 1, 0, 1, nullptr}, // #5 |
491 | | {"HWVALU", 1, 0, 1, nullptr}, // #6 |
492 | | {"HWVMEM", 1, 0, 15, nullptr}, // #7 |
493 | | }; |
494 | | |
495 | | static const llvm::MCSchedModel GFX10SpeedModel = { |
496 | | 1, // IssueWidth |
497 | | 1, // MicroOpBufferSize |
498 | | MCSchedModel::DefaultLoopMicroOpBufferSize, |
499 | | MCSchedModel::DefaultLoadLatency, |
500 | | MCSchedModel::DefaultHighLatency, |
501 | | 20, // MispredictPenalty |
502 | | true, // PostRAScheduler |
503 | | false, // CompleteModel |
504 | | 2, // Processor ID |
505 | | GFX10SpeedModelProcResources, |
506 | | GFX10SpeedModelSchedClasses, |
507 | | 8, |
508 | | 31, |
509 | | nullptr, // No Itinerary |
510 | | nullptr // No extra processor descriptor |
511 | | }; |
512 | | |
513 | | static const unsigned SIFullSpeedModelProcResourceSubUnits[] = { |
514 | | 0, // Invalid |
515 | | }; |
516 | | |
517 | | // {Name, NumUnits, SuperIdx, BufferSize, SubUnitsIdxBegin} |
518 | | static const llvm::MCProcResourceDesc SIFullSpeedModelProcResources[] = { |
519 | | {"InvalidUnit", 0, 0, 0, 0}, |
520 | | {"HWBranch", 1, 0, 1, nullptr}, // #1 |
521 | | {"HWExport", 1, 0, 7, nullptr}, // #2 |
522 | | {"HWLGKM", 1, 0, 31, nullptr}, // #3 |
523 | | {"HWSALU", 1, 0, 1, nullptr}, // #4 |
524 | | {"HWVALU", 1, 0, 1, nullptr}, // #5 |
525 | | {"HWVMEM", 1, 0, 15, nullptr}, // #6 |
526 | | }; |
527 | | |
528 | | static const llvm::MCSchedModel SIFullSpeedModel = { |
529 | | 1, // IssueWidth |
530 | | 1, // MicroOpBufferSize |
531 | | MCSchedModel::DefaultLoopMicroOpBufferSize, |
532 | | MCSchedModel::DefaultLoadLatency, |
533 | | MCSchedModel::DefaultHighLatency, |
534 | | 20, // MispredictPenalty |
535 | | true, // PostRAScheduler |
536 | | false, // CompleteModel |
537 | | 3, // Processor ID |
538 | | SIFullSpeedModelProcResources, |
539 | | SIFullSpeedModelSchedClasses, |
540 | | 7, |
541 | | 31, |
542 | | nullptr, // No Itinerary |
543 | | nullptr // No extra processor descriptor |
544 | | }; |
545 | | |
546 | | // Sorted (by key) array of values for CPU subtype. |
547 | | extern const llvm::SubtargetSubTypeKV AMDGPUSubTypeKV[] = { |
548 | | { "bonaire", { { { 0x20000000002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
549 | | { "carrizo", { { { 0x20000400000040ULL, 0xc2040000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
550 | | { "fiji", { { { 0x20000000002040ULL, 0x2040000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
551 | | { "generic", { { { 0x0ULL, 0x20000000000ULL, 0x0ULL, } } }, &NoSchedModel }, |
552 | | { "generic-hsa", { { { 0x800000000ULL, 0x20000000000ULL, 0x0ULL, } } }, &NoSchedModel }, |
553 | | { "gfx1010", { { { 0xe2810000002140ULL, 0x11900e4004cULL, 0x0ULL, } } }, &GFX10SpeedModel }, |
554 | | { "gfx1011", { { { 0x628100000ce140ULL, 0x11900e4004cULL, 0x0ULL, } } }, &GFX10SpeedModel }, |
555 | | { "gfx1012", { { { 0xe28100000ce140ULL, 0x11900e4004cULL, 0x0ULL, } } }, &GFX10SpeedModel }, |
556 | | { "gfx600", { { { 0x20000400002040ULL, 0x80002000000ULL, 0x0ULL, } } }, &SIFullSpeedModel }, |
557 | | { "gfx601", { { { 0x20000000002040ULL, 0x2000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
558 | | { "gfx700", { { { 0x20000000002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
559 | | { "gfx701", { { { 0x20000400002040ULL, 0x80001000000ULL, 0x0ULL, } } }, &SIFullSpeedModel }, |
560 | | { "gfx702", { { { 0x10000400002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
561 | | { "gfx703", { { { 0x10000000002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
562 | | { "gfx704", { { { 0x20000000002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
563 | | { "gfx801", { { { 0x20000400000040ULL, 0xc2040000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
564 | | { "gfx802", { { { 0x20000000002040ULL, 0x2040020000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
565 | | { "gfx803", { { { 0x20000000002040ULL, 0x2040000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
566 | | { "gfx810", { { { 0x10000000000040ULL, 0x42000000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
567 | | { "gfx900", { { { 0x2020200000003040ULL, 0x0ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
568 | | { "gfx902", { { { 0x2020200000001040ULL, 0x40000000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
569 | | { "gfx904", { { { 0x20220000003040ULL, 0x0ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
570 | | { "gfx906", { { { 0x2022000000e140ULL, 0x80000000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
571 | | { "gfx908", { { { 0x8202200000fc148ULL, 0x80000100080ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
572 | | { "gfx909", { { { 0x2020200000000040ULL, 0x40000000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
573 | | { "hainan", { { { 0x20000000002040ULL, 0x2000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
574 | | { "hawaii", { { { 0x20000400002040ULL, 0x80001000000ULL, 0x0ULL, } } }, &SIFullSpeedModel }, |
575 | | { "iceland", { { { 0x20000000002040ULL, 0x2040020000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
576 | | { "kabini", { { { 0x10000000002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
577 | | { "kaveri", { { { 0x20000000002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
578 | | { "mullins", { { { 0x10000000002040ULL, 0x1000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
579 | | { "oland", { { { 0x20000000002040ULL, 0x2000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
580 | | { "pitcairn", { { { 0x20000000002040ULL, 0x2000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
581 | | { "polaris10", { { { 0x20000000002040ULL, 0x2040000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
582 | | { "polaris11", { { { 0x20000000002040ULL, 0x2040000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
583 | | { "stoney", { { { 0x10000000000040ULL, 0x42000000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
584 | | { "tahiti", { { { 0x20000400002040ULL, 0x80002000000ULL, 0x0ULL, } } }, &SIFullSpeedModel }, |
585 | | { "tonga", { { { 0x20000000002040ULL, 0x2040020000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
586 | | { "verde", { { { 0x20000000002040ULL, 0x2000000ULL, 0x0ULL, } } }, &SIQuarterSpeedModel }, |
587 | | }; |
588 | | |
589 | | namespace AMDGPU_MC { |
590 | | unsigned resolveVariantSchedClassImpl(unsigned SchedClass, |
591 | 0 | const MCInst *MI, unsigned CPUID) { |
592 | 0 | switch (SchedClass) { |
593 | 0 | case 21: // V_ACCVGPR_WRITE_B32 |
594 | 0 | if (CPUID == 1) { // SIQuarterSpeedModel |
595 | 0 | return 27; // Write64Bit_ReadDefault |
596 | 0 | } |
597 | 0 | if (CPUID == 3) { // SIFullSpeedModel |
598 | 0 | return 27; // Write64Bit_ReadDefault |
599 | 0 | } |
600 | 0 | break; |
601 | 0 | case 25: // COPY |
602 | 0 | if (CPUID == 1) { // SIQuarterSpeedModel |
603 | 0 | return 30; // WriteSALU |
604 | 0 | } |
605 | 0 | if (CPUID == 2) { // GFX10SpeedModel |
606 | 0 | return 30; // WriteSALU |
607 | 0 | } |
608 | 0 | if (CPUID == 3) { // SIFullSpeedModel |
609 | 0 | return 30; // WriteSALU |
610 | 0 | } |
611 | 0 | break; |
612 | 0 | }; |
613 | 0 | // Don't know how to resolve this scheduling class. |
614 | 0 | return 0; |
615 | 0 | } |
616 | | } // end of namespace AMDGPU_MC |
617 | | |
618 | | struct AMDGPUGenMCSubtargetInfo : public MCSubtargetInfo { |
619 | | AMDGPUGenMCSubtargetInfo(const Triple &TT, |
620 | | StringRef CPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF, |
621 | | ArrayRef<SubtargetSubTypeKV> PD, |
622 | | const MCWriteProcResEntry *WPR, |
623 | | const MCWriteLatencyEntry *WL, |
624 | | const MCReadAdvanceEntry *RA, const InstrStage *IS, |
625 | | const unsigned *OC, const unsigned *FP) : |
626 | | MCSubtargetInfo(TT, CPU, FS, PF, PD, |
627 | 4.23k | WPR, WL, RA, IS, OC, FP) { } |
628 | | |
629 | | unsigned resolveVariantSchedClass(unsigned SchedClass, |
630 | 0 | const MCInst *MI, unsigned CPUID) const override { |
631 | 0 | return AMDGPU_MC::resolveVariantSchedClassImpl(SchedClass, MI, CPUID); |
632 | 0 | } |
633 | | }; |
634 | | |
635 | 4.23k | static inline MCSubtargetInfo *createAMDGPUMCSubtargetInfoImpl(const Triple &TT, StringRef CPU, StringRef FS) { |
636 | 4.23k | return new AMDGPUGenMCSubtargetInfo(TT, CPU, FS, AMDGPUFeatureKV, AMDGPUSubTypeKV, |
637 | 4.23k | AMDGPUWriteProcResTable, AMDGPUWriteLatencyTable, AMDGPUReadAdvanceTable, |
638 | 4.23k | nullptr, nullptr, nullptr); |
639 | 4.23k | } |
640 | | |
641 | | } // end namespace llvm |
642 | | |
643 | | #endif // GET_SUBTARGETINFO_MC_DESC |
644 | | |
645 | | |
646 | | #ifdef GET_SUBTARGETINFO_TARGET_DESC |
647 | | #undef GET_SUBTARGETINFO_TARGET_DESC |
648 | | |
649 | | #include "llvm/Support/Debug.h" |
650 | | #include "llvm/Support/raw_ostream.h" |
651 | | |
652 | | // ParseSubtargetFeatures - Parses features string setting specified |
653 | | // subtarget options. |
654 | 3.64k | void llvm::AMDGPUSubtarget::ParseSubtargetFeatures(StringRef CPU, StringRef FS) { |
655 | 3.64k | LLVM_DEBUG(dbgs() << "\nFeatures:" << FS); |
656 | 3.64k | LLVM_DEBUG(dbgs() << "\nCPU:" << CPU << "\n\n"); |
657 | 3.64k | InitMCProcessorInfo(CPU, FS); |
658 | 3.64k | const FeatureBitset& Bits = getFeatureBits(); |
659 | 3.64k | if (Bits[AMDGPU::Feature16BitInsts]) Has16BitInsts = true2.01k ; |
660 | 3.64k | if (Bits[AMDGPU::FeatureAddNoCarryInsts]) AddNoCarryInsts = true775 ; |
661 | 3.64k | if (Bits[AMDGPU::FeatureApertureRegs]) HasApertureRegs = true775 ; |
662 | 3.64k | if (Bits[AMDGPU::FeatureAtomicFaddInsts]) HasAtomicFaddInsts = true21 ; |
663 | 3.64k | if (Bits[AMDGPU::FeatureAutoWaitcntBeforeBarrier]) AutoWaitcntBeforeBarrier = true2 ; |
664 | 3.64k | if (Bits[AMDGPU::FeatureCIInsts]) CIInsts = true2.33k ; |
665 | 3.64k | if (Bits[AMDGPU::FeatureCodeObjectV3]) CodeObjectV3 = true2.65k ; |
666 | 3.64k | if (Bits[AMDGPU::FeatureCuMode]) EnableCuMode = true7 ; |
667 | 3.64k | if (Bits[AMDGPU::FeatureDLInsts]) HasDLInsts = true276 ; |
668 | 3.64k | if (Bits[AMDGPU::FeatureDPP]) HasDPP = true2.01k ; |
669 | 3.64k | if (Bits[AMDGPU::FeatureDPP8]) HasDPP8 = true217 ; |
670 | 3.64k | if (Bits[AMDGPU::FeatureDisable]) FeatureDisable = true0 ; |
671 | 3.64k | if (Bits[AMDGPU::FeatureDoesNotSupportSRAMECC]) DoesNotSupportSRAMECC = true2.69k ; |
672 | 3.64k | if (Bits[AMDGPU::FeatureDoesNotSupportXNACK]) DoesNotSupportXNACK = true2.64k ; |
673 | 3.64k | if (Bits[AMDGPU::FeatureDot1Insts]) HasDot1Insts = true96 ; |
674 | 3.64k | if (Bits[AMDGPU::FeatureDot2Insts]) HasDot2Insts = true96 ; |
675 | 3.64k | if (Bits[AMDGPU::FeatureDot3Insts]) HasDot3Insts = true21 ; |
676 | 3.64k | if (Bits[AMDGPU::FeatureDot4Insts]) HasDot4Insts = true21 ; |
677 | 3.64k | if (Bits[AMDGPU::FeatureDot5Insts]) HasDot5Insts = true58 ; |
678 | 3.64k | if (Bits[AMDGPU::FeatureDot6Insts]) HasDot6Insts = true58 ; |
679 | 3.64k | if (Bits[AMDGPU::FeatureDumpCode]) DumpCode = true0 ; |
680 | 3.64k | if (Bits[AMDGPU::FeatureDumpCodeLower]) DumpCode = true2 ; |
681 | 3.64k | if (Bits[AMDGPU::FeatureEnableDS128]) EnableDS128 = true14 ; |
682 | 3.64k | if (Bits[AMDGPU::FeatureEnableLoadStoreOpt]) EnableLoadStoreOpt = true3.63k ; |
683 | 3.64k | if (Bits[AMDGPU::FeatureEnablePRTStrictNull]) EnablePRTStrictNull = true3.64k ; |
684 | 3.64k | if (Bits[AMDGPU::FeatureEnableSIScheduler]) EnableSIScheduler = true2 ; |
685 | 3.64k | if (Bits[AMDGPU::FeatureEnableUnsafeDSOffsetFolding]) EnableUnsafeDSOffsetFolding = true1 ; |
686 | 3.64k | if (Bits[AMDGPU::FeatureFMA]) FMA = true0 ; |
687 | 3.64k | if (Bits[AMDGPU::FeatureFP16Denormals]) FP64FP16Denormals = true1 ; |
688 | 3.64k | if (Bits[AMDGPU::FeatureFP32Denormals]) FP32Denormals = true79 ; |
689 | 3.64k | if (Bits[AMDGPU::FeatureFP64]) FP64 = true; |
690 | 3.64k | if (Bits[AMDGPU::FeatureFP64Denormals]) FP64FP16Denormals = true2 ; |
691 | 3.64k | if (Bits[AMDGPU::FeatureFP64FP16Denormals]) FP64FP16Denormals = true3.59k ; |
692 | 3.64k | if (Bits[AMDGPU::FeatureFPExceptions]) FPExceptions = true4 ; |
693 | 3.64k | if (Bits[AMDGPU::FeatureFastFMAF32]) FastFMAF32 = true1.22k ; |
694 | 3.64k | if (Bits[AMDGPU::FeatureFlatAddressSpace]) FlatAddressSpace = true2.56k ; |
695 | 3.64k | if (Bits[AMDGPU::FeatureFlatForGlobal]) FlatForGlobal = true640 ; |
696 | 3.64k | if (Bits[AMDGPU::FeatureFlatGlobalInsts]) FlatGlobalInsts = true775 ; |
697 | 3.64k | if (Bits[AMDGPU::FeatureFlatInstOffsets]) FlatInstOffsets = true775 ; |
698 | 3.64k | if (Bits[AMDGPU::FeatureFlatScratchInsts]) FlatScratchInsts = true775 ; |
699 | 3.64k | if (Bits[AMDGPU::FeatureFlatSegmentOffsetBug]) HasFlatSegmentOffsetBug = true217 ; |
700 | 3.64k | if (Bits[AMDGPU::FeatureFmaMixInsts]) HasFmaMixInsts = true282 ; |
701 | 3.64k | if (Bits[AMDGPU::FeatureGCN3Encoding]) GCN3Encoding = true1.79k ; |
702 | 3.64k | if (Bits[AMDGPU::FeatureGFX7GFX8GFX9Insts]) GFX7GFX8GFX9Insts = true2.11k ; |
703 | 3.64k | if (Bits[AMDGPU::FeatureGFX8Insts]) GFX8Insts = true2.01k ; |
704 | 3.64k | if (Bits[AMDGPU::FeatureGFX9] && Gen < GCNSubtarget::GFX9557 ) Gen = GCNSubtarget::GFX9557 ; |
705 | 3.64k | if (Bits[AMDGPU::FeatureGFX9Insts]) GFX9Insts = true775 ; |
706 | 3.64k | if (Bits[AMDGPU::FeatureGFX10] && Gen < GCNSubtarget::GFX10217 ) Gen = GCNSubtarget::GFX10217 ; |
707 | 3.64k | if (Bits[AMDGPU::FeatureGFX10Insts]) GFX10Insts = true217 ; |
708 | 3.64k | if (Bits[AMDGPU::FeatureInstFwdPrefetchBug]) HasInstFwdPrefetchBug = true217 ; |
709 | 3.64k | if (Bits[AMDGPU::FeatureIntClamp]) HasIntClamp = true2.01k ; |
710 | 3.64k | if (Bits[AMDGPU::FeatureInv2PiInlineImm]) HasInv2PiInlineImm = true2.01k ; |
711 | 3.64k | if (Bits[AMDGPU::FeatureLDSBankCount16] && LDSBankCount < 1634 ) LDSBankCount = 1634 ; |
712 | 3.64k | if (Bits[AMDGPU::FeatureLDSBankCount32] && LDSBankCount < 322.71k ) LDSBankCount = 322.71k ; |
713 | 3.64k | if (Bits[AMDGPU::FeatureLdsBranchVmemWARHazard]) HasLdsBranchVmemWARHazard = true217 ; |
714 | 3.64k | if (Bits[AMDGPU::FeatureLdsMisalignedBug]) LDSMisalignedBug = true199 ; |
715 | 3.64k | if (Bits[AMDGPU::FeatureLocalMemorySize0] && LocalMemorySize < 00 ) LocalMemorySize = 00 ; |
716 | 3.64k | if (Bits[AMDGPU::FeatureLocalMemorySize32768] && LocalMemorySize < 32768424 ) LocalMemorySize = 32768424 ; |
717 | 3.64k | if (Bits[AMDGPU::FeatureLocalMemorySize65536] && LocalMemorySize < 655362.33k ) LocalMemorySize = 655362.33k ; |
718 | 3.64k | if (Bits[AMDGPU::FeatureMAIInsts]) HasMAIInsts = true21 ; |
719 | 3.64k | if (Bits[AMDGPU::FeatureMIMG_R128]) MIMG_R128 = true2.19k ; |
720 | 3.64k | if (Bits[AMDGPU::FeatureMadMixInsts]) HasMadMixInsts = true493 ; |
721 | 3.64k | if (Bits[AMDGPU::FeatureMaxPrivateElementSize4] && MaxPrivateElementSize < 46 ) MaxPrivateElementSize = 46 ; |
722 | 3.64k | if (Bits[AMDGPU::FeatureMaxPrivateElementSize8] && MaxPrivateElementSize < 83 ) MaxPrivateElementSize = 83 ; |
723 | 3.64k | if (Bits[AMDGPU::FeatureMaxPrivateElementSize16] && MaxPrivateElementSize < 1614 ) MaxPrivateElementSize = 1614 ; |
724 | 3.64k | if (Bits[AMDGPU::FeatureMovrel]) HasMovrel = true2.19k ; |
725 | 3.64k | if (Bits[AMDGPU::FeatureNSAEncoding]) HasNSAEncoding = true216 ; |
726 | 3.64k | if (Bits[AMDGPU::FeatureNSAtoVMEMBug]) HasNSAtoVMEMBug = true217 ; |
727 | 3.64k | if (Bits[AMDGPU::FeatureNoDataDepHazard]) HasNoDataDepHazard = true217 ; |
728 | 3.64k | if (Bits[AMDGPU::FeatureNoSdstCMPX]) HasNoSdstCMPX = true217 ; |
729 | 3.64k | if (Bits[AMDGPU::FeatureOffset3fBug]) HasOffset3fBug = true217 ; |
730 | 3.64k | if (Bits[AMDGPU::FeaturePkFmacF16Inst]) HasPkFmacF16Inst = true238 ; |
731 | 3.64k | if (Bits[AMDGPU::FeaturePromoteAlloca]) EnablePromoteAlloca = true3.59k ; |
732 | 3.64k | if (Bits[AMDGPU::FeatureR128A16]) HasR128A16 = true558 ; |
733 | 3.64k | if (Bits[AMDGPU::FeatureRegisterBanking]) HasRegisterBanking = true217 ; |
734 | 3.64k | if (Bits[AMDGPU::FeatureSDWA]) HasSDWA = true2.01k ; |
735 | 3.64k | if (Bits[AMDGPU::FeatureSDWAMac]) HasSDWAMac = true1.24k ; |
736 | 3.64k | if (Bits[AMDGPU::FeatureSDWAOmod]) HasSDWAOmod = true775 ; |
737 | 3.64k | if (Bits[AMDGPU::FeatureSDWAOutModsVOPC]) HasSDWAOutModsVOPC = true1.24k ; |
738 | 3.64k | if (Bits[AMDGPU::FeatureSDWAScalar]) HasSDWAScalar = true775 ; |
739 | 3.64k | if (Bits[AMDGPU::FeatureSDWASdst]) HasSDWASdst = true775 ; |
740 | 3.64k | if (Bits[AMDGPU::FeatureSGPRInitBug]) SGPRInitBug = true505 ; |
741 | 3.64k | if (Bits[AMDGPU::FeatureSMEMtoVectorWriteHazard]) HasSMEMtoVectorWriteHazard = true217 ; |
742 | 3.64k | if (Bits[AMDGPU::FeatureSMemRealTime]) HasSMemRealTime = true2.01k ; |
743 | 3.64k | if (Bits[AMDGPU::FeatureSRAMECC]) EnableSRAMECC = true3.63k ; |
744 | 3.64k | if (Bits[AMDGPU::FeatureScalarAtomics]) HasScalarAtomics = true775 ; |
745 | 3.64k | if (Bits[AMDGPU::FeatureScalarFlatScratchInsts]) ScalarFlatScratchInsts = true775 ; |
746 | 3.64k | if (Bits[AMDGPU::FeatureScalarStores]) HasScalarStores = true2.01k ; |
747 | 3.64k | if (Bits[AMDGPU::FeatureSeaIslands] && Gen < GCNSubtarget::SEA_ISLANDS315 ) Gen = GCNSubtarget::SEA_ISLANDS186 ; |
748 | 3.64k | if (Bits[AMDGPU::FeatureSouthernIslands] && Gen < GCNSubtarget::SOUTHERN_ISLANDS424 ) Gen = GCNSubtarget::SOUTHERN_ISLANDS0 ; |
749 | 3.64k | if (Bits[AMDGPU::FeatureTrapHandler]) TrapHandler = true680 ; |
750 | 3.64k | if (Bits[AMDGPU::FeatureTrigReducedRange]) HasTrigReducedRange = true1.97k ; |
751 | 3.64k | if (Bits[AMDGPU::FeatureUnalignedBufferAccess]) UnalignedBufferAccess = true679 ; |
752 | 3.64k | if (Bits[AMDGPU::FeatureUnalignedScratchAccess]) UnalignedScratchAccess = true5 ; |
753 | 3.64k | if (Bits[AMDGPU::FeatureUnpackedD16VMem]) HasUnpackedD16VMem = true1.21k ; |
754 | 3.64k | if (Bits[AMDGPU::FeatureVGPRIndexMode]) HasVGPRIndexMode = true1.79k ; |
755 | 3.64k | if (Bits[AMDGPU::FeatureVMEMtoScalarWriteHazard]) HasVMEMtoScalarWriteHazard = true217 ; |
756 | 3.64k | if (Bits[AMDGPU::FeatureVOP3Literal]) HasVOP3Literal = true217 ; |
757 | 3.64k | if (Bits[AMDGPU::FeatureVOP3P]) HasVOP3PInsts = true775 ; |
758 | 3.64k | if (Bits[AMDGPU::FeatureVcmpxExecWARHazard]) HasVcmpxExecWARHazard = true217 ; |
759 | 3.64k | if (Bits[AMDGPU::FeatureVcmpxPermlaneHazard]) HasVcmpxPermlaneHazard = true217 ; |
760 | 3.64k | if (Bits[AMDGPU::FeatureVolcanicIslands] && Gen < GCNSubtarget::VOLCANIC_ISLANDS1.23k ) Gen = GCNSubtarget::VOLCANIC_ISLANDS1.23k ; |
761 | 3.64k | if (Bits[AMDGPU::FeatureVscnt]) HasVscnt = true217 ; |
762 | 3.64k | if (Bits[AMDGPU::FeatureWavefrontSize16] && WavefrontSize < 160 ) WavefrontSize = 160 ; |
763 | 3.64k | if (Bits[AMDGPU::FeatureWavefrontSize32] && WavefrontSize < 32200 ) WavefrontSize = 32200 ; |
764 | 3.64k | if (Bits[AMDGPU::FeatureWavefrontSize64] && WavefrontSize < 643.42k ) WavefrontSize = 643.42k ; |
765 | 3.64k | if (Bits[AMDGPU::FeatureXNACK]) EnableXNACK = true3.63k ; |
766 | 3.64k | if (Bits[AMDGPU::HalfRate64Ops]) HalfRate64Ops = true512 ; |
767 | 3.64k | } |
768 | | #endif // GET_SUBTARGETINFO_TARGET_DESC |
769 | | |
770 | | |
771 | | #ifdef GET_SUBTARGETINFO_HEADER |
772 | | #undef GET_SUBTARGETINFO_HEADER |
773 | | |
774 | | namespace llvm { |
775 | | class DFAPacketizer; |
776 | | namespace AMDGPU_MC { |
777 | | unsigned resolveVariantSchedClassImpl(unsigned SchedClass, const MCInst *MI, unsigned CPUID); |
778 | | } |
779 | | |
780 | | struct AMDGPUGenSubtargetInfo : public TargetSubtargetInfo { |
781 | | explicit AMDGPUGenSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS); |
782 | | public: |
783 | | unsigned resolveSchedClass(unsigned SchedClass, const MachineInstr *DefMI, const TargetSchedModel *SchedModel) const override; |
784 | | unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, unsigned CPUID) const override; |
785 | | DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID) const; |
786 | | }; |
787 | | } // end namespace llvm |
788 | | |
789 | | #endif // GET_SUBTARGETINFO_HEADER |
790 | | |
791 | | |
792 | | #ifdef GET_SUBTARGETINFO_CTOR |
793 | | #undef GET_SUBTARGETINFO_CTOR |
794 | | |
795 | | #include "llvm/CodeGen/TargetSchedule.h" |
796 | | |
797 | | namespace llvm { |
798 | | extern const llvm::SubtargetFeatureKV AMDGPUFeatureKV[]; |
799 | | extern const llvm::SubtargetSubTypeKV AMDGPUSubTypeKV[]; |
800 | | extern const llvm::MCWriteProcResEntry AMDGPUWriteProcResTable[]; |
801 | | extern const llvm::MCWriteLatencyEntry AMDGPUWriteLatencyTable[]; |
802 | | extern const llvm::MCReadAdvanceEntry AMDGPUReadAdvanceTable[]; |
803 | | AMDGPUGenSubtargetInfo::AMDGPUGenSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) |
804 | | : TargetSubtargetInfo(TT, CPU, FS, makeArrayRef(AMDGPUFeatureKV, 107), makeArrayRef(AMDGPUSubTypeKV, 39), |
805 | | AMDGPUWriteProcResTable, AMDGPUWriteLatencyTable, AMDGPUReadAdvanceTable, |
806 | 3.64k | nullptr, nullptr, nullptr) {} |
807 | | |
808 | | unsigned AMDGPUGenSubtargetInfo |
809 | 852k | ::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI, const TargetSchedModel *SchedModel) const { |
810 | 852k | |
811 | 852k | const SIInstrInfo *TII = |
812 | 852k | static_cast<const SIInstrInfo*>(SchedModel->getInstrInfo()); |
813 | 852k | (void)TII; |
814 | 852k | |
815 | 852k | switch (SchedClass) { |
816 | 852k | case 21: // V_ACCVGPR_WRITE_B32 |
817 | 7.90k | if (SchedModel->getProcessorID() == 1) { // SIQuarterSpeedModel |
818 | 7.90k | if (TII->hasVGPRUses(*MI)) |
819 | 6.99k | return 26; // Write64Bit_MIVGPRRead |
820 | 909 | return 27; // Write64Bit_ReadDefault |
821 | 909 | } |
822 | 0 | if (SchedModel->getProcessorID() == 3) { // SIFullSpeedModel |
823 | 0 | if (TII->hasVGPRUses(*MI)) |
824 | 0 | return 26; // Write64Bit_MIVGPRRead |
825 | 0 | return 27; // Write64Bit_ReadDefault |
826 | 0 | } |
827 | 0 | break; |
828 | 844k | case 25: // COPY |
829 | 844k | if (SchedModel->getProcessorID() == 1) { // SIQuarterSpeedModel |
830 | 681k | if (TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32419k ) |
831 | 419k | return 28; // Write32Bit |
832 | 261k | if (TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 32386 ) |
833 | 386 | return 29; // Write64Bit |
834 | 261k | return 30; // WriteSALU |
835 | 261k | } |
836 | 163k | if (SchedModel->getProcessorID() == 2) { // GFX10SpeedModel |
837 | 69.1k | if (TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 3238.7k ) |
838 | 38.7k | return 28; // Write32Bit |
839 | 30.4k | if (TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 320 ) |
840 | 0 | return 29; // Write64Bit |
841 | 30.4k | return 30; // WriteSALU |
842 | 30.4k | } |
843 | 94.1k | if (SchedModel->getProcessorID() == 3) { // SIFullSpeedModel |
844 | 94.1k | if (TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 3255.5k ) |
845 | 55.4k | return 28; // Write32Bit |
846 | 38.6k | if (TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) > 3247 ) |
847 | 47 | return 29; // Write64Bit |
848 | 38.5k | return 30; // WriteSALU |
849 | 38.5k | } |
850 | 0 | break; |
851 | 0 | }; |
852 | 0 | report_fatal_error("Expected a variant SchedClass"); |
853 | 0 | } // AMDGPUGenSubtargetInfo::resolveSchedClass |
854 | | |
855 | | unsigned AMDGPUGenSubtargetInfo |
856 | 0 | ::resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, unsigned CPUID) const { |
857 | 0 | return AMDGPU_MC::resolveVariantSchedClassImpl(SchedClass, MI, CPUID); |
858 | 0 | } // AMDGPUGenSubtargetInfo::resolveVariantSchedClass |
859 | | |
860 | | } // end namespace llvm |
861 | | |
862 | | #endif // GET_SUBTARGETINFO_CTOR |
863 | | |
864 | | |
865 | | #ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS |
866 | | #undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS |
867 | | |
868 | | #endif // GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS |
869 | | |
870 | | |
871 | | #ifdef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS |
872 | | #undef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS |
873 | | |
874 | | #endif // GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS |
875 | | |