/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// SI implementation of the TargetRegisterInfo class. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "SIRegisterInfo.h" |
15 | | #include "AMDGPURegisterBankInfo.h" |
16 | | #include "AMDGPUSubtarget.h" |
17 | | #include "SIInstrInfo.h" |
18 | | #include "SIMachineFunctionInfo.h" |
19 | | #include "MCTargetDesc/AMDGPUInstPrinter.h" |
20 | | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
21 | | #include "llvm/CodeGen/LiveIntervals.h" |
22 | | #include "llvm/CodeGen/MachineDominators.h" |
23 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
24 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | | #include "llvm/CodeGen/RegisterScavenging.h" |
26 | | #include "llvm/CodeGen/SlotIndexes.h" |
27 | | #include "llvm/IR/Function.h" |
28 | | #include "llvm/IR/LLVMContext.h" |
29 | | |
30 | | using namespace llvm; |
31 | | |
32 | 2.76M | static bool hasPressureSet(const int *PSets, unsigned PSetID) { |
33 | 97.6M | for (unsigned i = 0; PSets[i] != -1; ++i94.8M ) { |
34 | 95.3M | if (PSets[i] == (int)PSetID) |
35 | 517k | return true; |
36 | 95.3M | } |
37 | 2.76M | return false2.24M ; |
38 | 2.76M | } |
39 | | |
40 | | void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, |
41 | 2.76M | BitVector &PressureSets) const { |
42 | 5.01M | for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U2.24M ) { |
43 | 2.76M | const int *PSets = getRegUnitPressureSets(*U); |
44 | 2.76M | if (hasPressureSet(PSets, PSetID)) { |
45 | 517k | PressureSets.set(PSetID); |
46 | 517k | break; |
47 | 517k | } |
48 | 2.76M | } |
49 | 2.76M | } |
50 | | |
51 | | static cl::opt<bool> EnableSpillSGPRToSMEM( |
52 | | "amdgpu-spill-sgpr-to-smem", |
53 | | cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), |
54 | | cl::init(false)); |
55 | | |
56 | | static cl::opt<bool> EnableSpillSGPRToVGPR( |
57 | | "amdgpu-spill-sgpr-to-vgpr", |
58 | | cl::desc("Enable spilling VGPRs to SGPRs"), |
59 | | cl::ReallyHidden, |
60 | | cl::init(true)); |
61 | | |
62 | | SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : |
63 | | AMDGPURegisterInfo(), |
64 | | SGPRPressureSets(getNumRegPressureSets()), |
65 | | VGPRPressureSets(getNumRegPressureSets()), |
66 | | AGPRPressureSets(getNumRegPressureSets()), |
67 | | SpillSGPRToVGPR(false), |
68 | | SpillSGPRToSMEM(false), |
69 | 3.64k | isWave32(ST.isWave32()) { |
70 | 3.64k | if (EnableSpillSGPRToSMEM && ST.hasScalarStores()5 ) |
71 | 5 | SpillSGPRToSMEM = true; |
72 | 3.63k | else if (EnableSpillSGPRToVGPR) |
73 | 3.63k | SpillSGPRToVGPR = true; |
74 | 3.64k | |
75 | 3.64k | unsigned NumRegPressureSets = getNumRegPressureSets(); |
76 | 3.64k | |
77 | 3.64k | SGPRSetID = NumRegPressureSets; |
78 | 3.64k | VGPRSetID = NumRegPressureSets; |
79 | 3.64k | AGPRSetID = NumRegPressureSets; |
80 | 3.64k | |
81 | 924k | for (unsigned i = 0; i < NumRegPressureSets; ++i921k ) { |
82 | 921k | classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets); |
83 | 921k | classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets); |
84 | 921k | classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets); |
85 | 921k | } |
86 | 3.64k | |
87 | 3.64k | // Determine the number of reg units for each pressure set. |
88 | 3.64k | std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0); |
89 | 2.54M | for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i2.54M ) { |
90 | 2.54M | const int *PSets = getRegUnitPressureSets(i); |
91 | 58.6M | for (unsigned j = 0; PSets[j] != -1; ++j56.1M ) { |
92 | 56.1M | ++PressureSetRegUnits[PSets[j]]; |
93 | 56.1M | } |
94 | 2.54M | } |
95 | 3.64k | |
96 | 3.64k | unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0; |
97 | 924k | for (unsigned i = 0; i < NumRegPressureSets; ++i921k ) { |
98 | 921k | if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax3.64k ) { |
99 | 3.64k | VGPRSetID = i; |
100 | 3.64k | VGPRMax = PressureSetRegUnits[i]; |
101 | 3.64k | continue; |
102 | 3.64k | } |
103 | 917k | if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax509k ) { |
104 | 218k | SGPRSetID = i; |
105 | 218k | SGPRMax = PressureSetRegUnits[i]; |
106 | 218k | } |
107 | 917k | if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax3.64k ) { |
108 | 3.64k | AGPRSetID = i; |
109 | 3.64k | AGPRMax = PressureSetRegUnits[i]; |
110 | 3.64k | continue; |
111 | 3.64k | } |
112 | 917k | } |
113 | 3.64k | |
114 | 3.64k | assert(SGPRSetID < NumRegPressureSets && |
115 | 3.64k | VGPRSetID < NumRegPressureSets && |
116 | 3.64k | AGPRSetID < NumRegPressureSets); |
117 | 3.64k | } |
118 | | |
119 | | unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( |
120 | 46.3k | const MachineFunction &MF) const { |
121 | 46.3k | |
122 | 46.3k | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
123 | 46.3k | unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; |
124 | 46.3k | unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); |
125 | 46.3k | return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); |
126 | 46.3k | } |
127 | | |
128 | 45.1k | static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { |
129 | 45.1k | unsigned Reg; |
130 | 45.1k | |
131 | 45.1k | // Try to place it in a hole after PrivateSegmentBufferReg. |
132 | 45.1k | if (RegCount & 3) { |
133 | 37.9k | // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to |
134 | 37.9k | // alignment constraints, so we have a hole where can put the wave offset. |
135 | 37.9k | Reg = RegCount - 1; |
136 | 37.9k | } else { |
137 | 7.23k | // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the |
138 | 7.23k | // wave offset before it. |
139 | 7.23k | Reg = RegCount - 5; |
140 | 7.23k | } |
141 | 45.1k | |
142 | 45.1k | return Reg; |
143 | 45.1k | } |
144 | | |
145 | | unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( |
146 | 45.1k | const MachineFunction &MF) const { |
147 | 45.1k | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
148 | 45.1k | unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); |
149 | 45.1k | return AMDGPU::SGPR_32RegClass.getRegister(Reg); |
150 | 45.1k | } |
151 | | |
152 | 93.3k | BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { |
153 | 93.3k | BitVector Reserved(getNumRegs()); |
154 | 93.3k | |
155 | 93.3k | // EXEC_LO and EXEC_HI could be allocated and used as regular register, but |
156 | 93.3k | // this seems likely to result in bugs, so I'm marking them as reserved. |
157 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::EXEC); |
158 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); |
159 | 93.3k | |
160 | 93.3k | // M0 has to be reserved so that llvm accepts it as a live-in into a block. |
161 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::M0); |
162 | 93.3k | |
163 | 93.3k | // Reserve src_vccz, src_execz, src_scc. |
164 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ); |
165 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ); |
166 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC); |
167 | 93.3k | |
168 | 93.3k | // Reserve the memory aperture registers. |
169 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); |
170 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); |
171 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); |
172 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); |
173 | 93.3k | |
174 | 93.3k | // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen. |
175 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID); |
176 | 93.3k | |
177 | 93.3k | // Reserve xnack_mask registers - support is not implemented in Codegen. |
178 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK); |
179 | 93.3k | |
180 | 93.3k | // Reserve lds_direct register - support is not implemented in Codegen. |
181 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT); |
182 | 93.3k | |
183 | 93.3k | // Reserve Trap Handler registers - support is not implemented in Codegen. |
184 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TBA); |
185 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TMA); |
186 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); |
187 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); |
188 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); |
189 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); |
190 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); |
191 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); |
192 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); |
193 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); |
194 | 93.3k | |
195 | 93.3k | // Reserve null register - it shall never be allocated |
196 | 93.3k | reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL); |
197 | 93.3k | |
198 | 93.3k | // Disallow vcc_hi allocation in wave32. It may be allocated but most likely |
199 | 93.3k | // will result in bugs. |
200 | 93.3k | if (isWave32) { |
201 | 6.64k | Reserved.set(AMDGPU::VCC); |
202 | 6.64k | Reserved.set(AMDGPU::VCC_HI); |
203 | 6.64k | } |
204 | 93.3k | |
205 | 93.3k | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
206 | 93.3k | |
207 | 93.3k | unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); |
208 | 93.3k | unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); |
209 | 607k | for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i513k ) { |
210 | 513k | unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); |
211 | 513k | reserveRegisterTuples(Reserved, Reg); |
212 | 513k | } |
213 | 93.3k | |
214 | 93.3k | unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); |
215 | 93.3k | unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); |
216 | 146k | for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i52.9k ) { |
217 | 52.9k | unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); |
218 | 52.9k | reserveRegisterTuples(Reserved, Reg); |
219 | 52.9k | Reg = AMDGPU::AGPR_32RegClass.getRegister(i); |
220 | 52.9k | reserveRegisterTuples(Reserved, Reg); |
221 | 52.9k | } |
222 | 93.3k | |
223 | 93.3k | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
224 | 93.3k | |
225 | 93.3k | unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); |
226 | 93.3k | if (ScratchWaveOffsetReg != AMDGPU::NoRegister) { |
227 | 93.3k | // Reserve 1 SGPR for scratch wave offset in case we need to spill. |
228 | 93.3k | reserveRegisterTuples(Reserved, ScratchWaveOffsetReg); |
229 | 93.3k | } |
230 | 93.3k | |
231 | 93.3k | unsigned ScratchRSrcReg = MFI->getScratchRSrcReg(); |
232 | 93.3k | if (ScratchRSrcReg != AMDGPU::NoRegister) { |
233 | 93.3k | // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need |
234 | 93.3k | // to spill. |
235 | 93.3k | // TODO: May need to reserve a VGPR if doing LDS spilling. |
236 | 93.3k | reserveRegisterTuples(Reserved, ScratchRSrcReg); |
237 | 93.3k | assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); |
238 | 93.3k | } |
239 | 93.3k | |
240 | 93.3k | // We have to assume the SP is needed in case there are calls in the function, |
241 | 93.3k | // which is detected after the function is lowered. If we aren't really going |
242 | 93.3k | // to need SP, don't bother reserving it. |
243 | 93.3k | unsigned StackPtrReg = MFI->getStackPtrOffsetReg(); |
244 | 93.3k | |
245 | 93.3k | if (StackPtrReg != AMDGPU::NoRegister) { |
246 | 93.3k | reserveRegisterTuples(Reserved, StackPtrReg); |
247 | 93.3k | assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); |
248 | 93.3k | } |
249 | 93.3k | |
250 | 93.3k | unsigned FrameReg = MFI->getFrameOffsetReg(); |
251 | 93.3k | if (FrameReg != AMDGPU::NoRegister) { |
252 | 93.3k | reserveRegisterTuples(Reserved, FrameReg); |
253 | 93.3k | assert(!isSubRegister(ScratchRSrcReg, FrameReg)); |
254 | 93.3k | } |
255 | 93.3k | |
256 | 93.3k | for (unsigned Reg : MFI->WWMReservedRegs) { |
257 | 322 | reserveRegisterTuples(Reserved, Reg); |
258 | 322 | } |
259 | 93.3k | |
260 | 93.3k | // FIXME: Stop using reserved registers for this. |
261 | 93.3k | for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs()) |
262 | 512 | reserveRegisterTuples(Reserved, Reg); |
263 | 93.3k | |
264 | 93.3k | for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) |
265 | 32 | reserveRegisterTuples(Reserved, Reg); |
266 | 93.3k | |
267 | 93.3k | return Reserved; |
268 | 93.3k | } |
269 | | |
270 | 5.60k | bool SIRegisterInfo::canRealignStack(const MachineFunction &MF) const { |
271 | 5.60k | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
272 | 5.60k | // On entry, the base address is 0, so it can't possibly need any more |
273 | 5.60k | // alignment. |
274 | 5.60k | |
275 | 5.60k | // FIXME: Should be able to specify the entry frame alignment per calling |
276 | 5.60k | // convention instead. |
277 | 5.60k | if (Info->isEntryFunction()) |
278 | 5.51k | return false; |
279 | 95 | |
280 | 95 | return TargetRegisterInfo::canRealignStack(MF); |
281 | 95 | } |
282 | | |
283 | 50.8k | bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { |
284 | 50.8k | const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>(); |
285 | 50.8k | if (Info->isEntryFunction()) { |
286 | 46.3k | const MachineFrameInfo &MFI = Fn.getFrameInfo(); |
287 | 46.3k | return MFI.hasStackObjects() || MFI.hasCalls()45.1k ; |
288 | 46.3k | } |
289 | 4.52k | |
290 | 4.52k | // May need scavenger for dealing with callee saved registers. |
291 | 4.52k | return true; |
292 | 4.52k | } |
293 | | |
294 | | bool SIRegisterInfo::requiresFrameIndexScavenging( |
295 | 25.4k | const MachineFunction &MF) const { |
296 | 25.4k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
297 | 25.4k | if (MFI.hasStackObjects()) |
298 | 968 | return true; |
299 | 24.4k | |
300 | 24.4k | // May need to deal with callee saved registers. |
301 | 24.4k | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
302 | 24.4k | return !Info->isEntryFunction(); |
303 | 24.4k | } |
304 | | |
305 | | bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( |
306 | 983 | const MachineFunction &MF) const { |
307 | 983 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
308 | 983 | if (!MFI.hasStackObjects()) |
309 | 0 | return false; |
310 | 983 | |
311 | 983 | // The scavenger is used for large frames which may require finding a free |
312 | 983 | // register for large offsets. |
313 | 983 | if (!isUInt<12>(MFI.getStackSize())) |
314 | 74 | return true; |
315 | 909 | |
316 | 909 | // If using scalar stores, for spills, m0 is needed for the scalar store |
317 | 909 | // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual |
318 | 909 | // register for it during frame index elimination, so the scavenger is |
319 | 909 | // directly needed. |
320 | 909 | return MF.getSubtarget<GCNSubtarget>().hasScalarStores() && |
321 | 909 | MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs()565 ; |
322 | 909 | } |
323 | | |
324 | | bool SIRegisterInfo::requiresVirtualBaseRegisters( |
325 | 25.4k | const MachineFunction &) const { |
326 | 25.4k | // There are no special dedicated stack or frame pointers. |
327 | 25.4k | return true; |
328 | 25.4k | } |
329 | | |
330 | 50.9k | bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { |
331 | 50.9k | // This helps catch bugs as verifier errors. |
332 | 50.9k | return true; |
333 | 50.9k | } |
334 | | |
335 | 5.78k | int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const { |
336 | 5.78k | assert(SIInstrInfo::isMUBUF(*MI)); |
337 | 5.78k | |
338 | 5.78k | int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), |
339 | 5.78k | AMDGPU::OpName::offset); |
340 | 5.78k | return MI->getOperand(OffIdx).getImm(); |
341 | 5.78k | } |
342 | | |
343 | | int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, |
344 | 4 | int Idx) const { |
345 | 4 | if (!SIInstrInfo::isMUBUF(*MI)) |
346 | 0 | return 0; |
347 | 4 | |
348 | 4 | assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), |
349 | 4 | AMDGPU::OpName::vaddr) && |
350 | 4 | "Should never see frame index on non-address operand"); |
351 | 4 | |
352 | 4 | return getMUBUFInstrOffset(MI); |
353 | 4 | } |
354 | | |
355 | 6.18k | bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { |
356 | 6.18k | if (!MI->mayLoadOrStore()) |
357 | 404 | return false; |
358 | 5.77k | |
359 | 5.77k | int64_t FullOffset = Offset + getMUBUFInstrOffset(MI); |
360 | 5.77k | |
361 | 5.77k | return !isUInt<12>(FullOffset); |
362 | 5.77k | } |
363 | | |
364 | | void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, |
365 | | unsigned BaseReg, |
366 | | int FrameIdx, |
367 | 0 | int64_t Offset) const { |
368 | 0 | MachineBasicBlock::iterator Ins = MBB->begin(); |
369 | 0 | DebugLoc DL; // Defaults to "unknown" |
370 | 0 |
|
371 | 0 | if (Ins != MBB->end()) |
372 | 0 | DL = Ins->getDebugLoc(); |
373 | 0 |
|
374 | 0 | MachineFunction *MF = MBB->getParent(); |
375 | 0 | const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); |
376 | 0 | const SIInstrInfo *TII = Subtarget.getInstrInfo(); |
377 | 0 |
|
378 | 0 | if (Offset == 0) { |
379 | 0 | BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) |
380 | 0 | .addFrameIndex(FrameIdx); |
381 | 0 | return; |
382 | 0 | } |
383 | 0 | |
384 | 0 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
385 | 0 | unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
386 | 0 |
|
387 | 0 | unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
388 | 0 |
|
389 | 0 | BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) |
390 | 0 | .addImm(Offset); |
391 | 0 | BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg) |
392 | 0 | .addFrameIndex(FrameIdx); |
393 | 0 |
|
394 | 0 | TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) |
395 | 0 | .addReg(OffsetReg, RegState::Kill) |
396 | 0 | .addReg(FIReg) |
397 | 0 | .addImm(0); // clamp bit |
398 | 0 | } |
399 | | |
400 | | void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, |
401 | 0 | int64_t Offset) const { |
402 | 0 |
|
403 | 0 | MachineBasicBlock *MBB = MI.getParent(); |
404 | 0 | MachineFunction *MF = MBB->getParent(); |
405 | 0 | const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>(); |
406 | 0 | const SIInstrInfo *TII = Subtarget.getInstrInfo(); |
407 | 0 |
|
408 | | #ifndef NDEBUG |
409 | | // FIXME: Is it possible to be storing a frame index to itself? |
410 | | bool SeenFI = false; |
411 | | for (const MachineOperand &MO: MI.operands()) { |
412 | | if (MO.isFI()) { |
413 | | if (SeenFI) |
414 | | llvm_unreachable("should not see multiple frame indices"); |
415 | | |
416 | | SeenFI = true; |
417 | | } |
418 | | } |
419 | | #endif |
420 | |
|
421 | 0 | MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); |
422 | 0 | assert(FIOp && FIOp->isFI() && "frame index must be address operand"); |
423 | 0 | assert(TII->isMUBUF(MI)); |
424 | 0 | assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == |
425 | 0 | MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() && |
426 | 0 | "should only be seeing frame offset relative FrameIndex"); |
427 | 0 |
|
428 | 0 |
|
429 | 0 | MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); |
430 | 0 | int64_t NewOffset = OffsetOp->getImm() + Offset; |
431 | 0 | assert(isUInt<12>(NewOffset) && "offset should be legal"); |
432 | 0 |
|
433 | 0 | FIOp->ChangeToRegister(BaseReg, false); |
434 | 0 | OffsetOp->setImm(NewOffset); |
435 | 0 | } |
436 | | |
437 | | bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, |
438 | | unsigned BaseReg, |
439 | 0 | int64_t Offset) const { |
440 | 0 | if (!SIInstrInfo::isMUBUF(*MI)) |
441 | 0 | return false; |
442 | 0 | |
443 | 0 | int64_t NewOffset = Offset + getMUBUFInstrOffset(MI); |
444 | 0 |
|
445 | 0 | return isUInt<12>(NewOffset); |
446 | 0 | } |
447 | | |
448 | | const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( |
449 | 0 | const MachineFunction &MF, unsigned Kind) const { |
450 | 0 | // This is inaccurate. It depends on the instruction and address space. The |
451 | 0 | // only place where we should hit this is for dealing with frame indexes / |
452 | 0 | // private accesses, so this is correct in that case. |
453 | 0 | return &AMDGPU::VGPR_32RegClass; |
454 | 0 | } |
455 | | |
456 | 1.40k | static unsigned getNumSubRegsForSpillOp(unsigned Op) { |
457 | 1.40k | |
458 | 1.40k | switch (Op) { |
459 | 1.40k | case AMDGPU::SI_SPILL_S1024_SAVE: |
460 | 9 | case AMDGPU::SI_SPILL_S1024_RESTORE: |
461 | 9 | case AMDGPU::SI_SPILL_V1024_SAVE: |
462 | 9 | case AMDGPU::SI_SPILL_V1024_RESTORE: |
463 | 9 | case AMDGPU::SI_SPILL_A1024_SAVE: |
464 | 9 | case AMDGPU::SI_SPILL_A1024_RESTORE: |
465 | 9 | return 32; |
466 | 11 | case AMDGPU::SI_SPILL_S512_SAVE: |
467 | 11 | case AMDGPU::SI_SPILL_S512_RESTORE: |
468 | 11 | case AMDGPU::SI_SPILL_V512_SAVE: |
469 | 11 | case AMDGPU::SI_SPILL_V512_RESTORE: |
470 | 11 | case AMDGPU::SI_SPILL_A512_SAVE: |
471 | 11 | case AMDGPU::SI_SPILL_A512_RESTORE: |
472 | 11 | return 16; |
473 | 11 | case AMDGPU::SI_SPILL_S256_SAVE: |
474 | 0 | case AMDGPU::SI_SPILL_S256_RESTORE: |
475 | 0 | case AMDGPU::SI_SPILL_V256_SAVE: |
476 | 0 | case AMDGPU::SI_SPILL_V256_RESTORE: |
477 | 0 | return 8; |
478 | 0 | case AMDGPU::SI_SPILL_S160_SAVE: |
479 | 0 | case AMDGPU::SI_SPILL_S160_RESTORE: |
480 | 0 | case AMDGPU::SI_SPILL_V160_SAVE: |
481 | 0 | case AMDGPU::SI_SPILL_V160_RESTORE: |
482 | 0 | return 5; |
483 | 690 | case AMDGPU::SI_SPILL_S128_SAVE: |
484 | 690 | case AMDGPU::SI_SPILL_S128_RESTORE: |
485 | 690 | case AMDGPU::SI_SPILL_V128_SAVE: |
486 | 690 | case AMDGPU::SI_SPILL_V128_RESTORE: |
487 | 690 | case AMDGPU::SI_SPILL_A128_SAVE: |
488 | 690 | case AMDGPU::SI_SPILL_A128_RESTORE: |
489 | 690 | return 4; |
490 | 690 | case AMDGPU::SI_SPILL_S96_SAVE: |
491 | 1 | case AMDGPU::SI_SPILL_S96_RESTORE: |
492 | 1 | case AMDGPU::SI_SPILL_V96_SAVE: |
493 | 1 | case AMDGPU::SI_SPILL_V96_RESTORE: |
494 | 1 | return 3; |
495 | 41 | case AMDGPU::SI_SPILL_S64_SAVE: |
496 | 41 | case AMDGPU::SI_SPILL_S64_RESTORE: |
497 | 41 | case AMDGPU::SI_SPILL_V64_SAVE: |
498 | 41 | case AMDGPU::SI_SPILL_V64_RESTORE: |
499 | 41 | case AMDGPU::SI_SPILL_A64_SAVE: |
500 | 41 | case AMDGPU::SI_SPILL_A64_RESTORE: |
501 | 41 | return 2; |
502 | 657 | case AMDGPU::SI_SPILL_S32_SAVE: |
503 | 657 | case AMDGPU::SI_SPILL_S32_RESTORE: |
504 | 657 | case AMDGPU::SI_SPILL_V32_SAVE: |
505 | 657 | case AMDGPU::SI_SPILL_V32_RESTORE: |
506 | 657 | case AMDGPU::SI_SPILL_A32_SAVE: |
507 | 657 | case AMDGPU::SI_SPILL_A32_RESTORE: |
508 | 657 | return 1; |
509 | 657 | default: 0 llvm_unreachable0 ("Invalid spill opcode"); |
510 | 1.40k | } |
511 | 1.40k | } |
512 | | |
513 | 4.41k | static int getOffsetMUBUFStore(unsigned Opc) { |
514 | 4.41k | switch (Opc) { |
515 | 4.41k | case AMDGPU::BUFFER_STORE_DWORD_OFFEN: |
516 | 4.28k | return AMDGPU::BUFFER_STORE_DWORD_OFFSET; |
517 | 4.41k | case AMDGPU::BUFFER_STORE_BYTE_OFFEN: |
518 | 75 | return AMDGPU::BUFFER_STORE_BYTE_OFFSET; |
519 | 4.41k | case AMDGPU::BUFFER_STORE_SHORT_OFFEN: |
520 | 12 | return AMDGPU::BUFFER_STORE_SHORT_OFFSET; |
521 | 4.41k | case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: |
522 | 20 | return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; |
523 | 4.41k | case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: |
524 | 18 | return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; |
525 | 4.41k | case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN: |
526 | 3 | return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET; |
527 | 4.41k | case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN: |
528 | 1 | return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET; |
529 | 4.41k | default: |
530 | 0 | return -1; |
531 | 4.41k | } |
532 | 4.41k | } |
533 | | |
534 | 1.82k | static int getOffsetMUBUFLoad(unsigned Opc) { |
535 | 1.82k | switch (Opc) { |
536 | 1.82k | case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: |
537 | 1.68k | return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; |
538 | 1.82k | case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: |
539 | 68 | return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; |
540 | 1.82k | case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: |
541 | 14 | return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; |
542 | 1.82k | case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: |
543 | 30 | return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; |
544 | 1.82k | case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: |
545 | 2 | return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; |
546 | 1.82k | case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: |
547 | 0 | return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; |
548 | 1.82k | case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: |
549 | 8 | return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; |
550 | 1.82k | case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN: |
551 | 3 | return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET; |
552 | 1.82k | case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN: |
553 | 3 | return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET; |
554 | 1.82k | case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN: |
555 | 3 | return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET; |
556 | 1.82k | case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: |
557 | 3 | return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET; |
558 | 1.82k | case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN: |
559 | 3 | return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET; |
560 | 1.82k | case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN: |
561 | 5 | return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET; |
562 | 1.82k | default: |
563 | 0 | return -1; |
564 | 1.82k | } |
565 | 1.82k | } |
566 | | |
567 | | static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, |
568 | | int Index, |
569 | | unsigned Lane, |
570 | | unsigned ValueReg, |
571 | 14.1k | bool IsKill) { |
572 | 14.1k | MachineBasicBlock *MBB = MI->getParent(); |
573 | 14.1k | MachineFunction *MF = MI->getParent()->getParent(); |
574 | 14.1k | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
575 | 14.1k | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
576 | 14.1k | const SIInstrInfo *TII = ST.getInstrInfo(); |
577 | 14.1k | |
578 | 14.1k | MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); |
579 | 14.1k | |
580 | 14.1k | if (Reg == AMDGPU::NoRegister) |
581 | 13.4k | return MachineInstrBuilder(); |
582 | 695 | |
583 | 695 | bool IsStore = MI->mayStore(); |
584 | 695 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
585 | 695 | auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); |
586 | 695 | |
587 | 695 | unsigned Dst = IsStore ? Reg344 : ValueReg351 ; |
588 | 695 | unsigned Src = IsStore ? ValueReg344 : Reg351 ; |
589 | 695 | unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32344 |
590 | 695 | : AMDGPU::V_ACCVGPR_READ_B32351 ; |
591 | 695 | |
592 | 695 | return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst) |
593 | 695 | .addReg(Src, getKillRegState(IsKill)); |
594 | 695 | } |
595 | | |
596 | | // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not |
597 | | // need to handle the case where an SGPR may need to be spilled while spilling. |
598 | | static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, |
599 | | MachineFrameInfo &MFI, |
600 | | MachineBasicBlock::iterator MI, |
601 | | int Index, |
602 | 6.23k | int64_t Offset) { |
603 | 6.23k | MachineBasicBlock *MBB = MI->getParent(); |
604 | 6.23k | const DebugLoc &DL = MI->getDebugLoc(); |
605 | 6.23k | bool IsStore = MI->mayStore(); |
606 | 6.23k | |
607 | 6.23k | unsigned Opc = MI->getOpcode(); |
608 | 6.23k | int LoadStoreOp = IsStore ? |
609 | 4.41k | getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc)1.82k ; |
610 | 6.23k | if (LoadStoreOp == -1) |
611 | 0 | return false; |
612 | 6.23k | |
613 | 6.23k | const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); |
614 | 6.23k | if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr()) |
615 | 0 | return true; |
616 | 6.23k | |
617 | 6.23k | MachineInstrBuilder NewMI = |
618 | 6.23k | BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) |
619 | 6.23k | .add(*Reg) |
620 | 6.23k | .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) |
621 | 6.23k | .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) |
622 | 6.23k | .addImm(Offset) |
623 | 6.23k | .addImm(0) // glc |
624 | 6.23k | .addImm(0) // slc |
625 | 6.23k | .addImm(0) // tfe |
626 | 6.23k | .addImm(0) // dlc |
627 | 6.23k | .cloneMemRefs(*MI); |
628 | 6.23k | |
629 | 6.23k | const MachineOperand *VDataIn = TII->getNamedOperand(*MI, |
630 | 6.23k | AMDGPU::OpName::vdata_in); |
631 | 6.23k | if (VDataIn) |
632 | 20 | NewMI.add(*VDataIn); |
633 | 6.23k | return true; |
634 | 6.23k | } |
635 | | |
636 | | void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, |
637 | | unsigned LoadStoreOp, |
638 | | int Index, |
639 | | unsigned ValueReg, |
640 | | bool IsKill, |
641 | | unsigned ScratchRsrcReg, |
642 | | unsigned ScratchOffsetReg, |
643 | | int64_t InstOffset, |
644 | | MachineMemOperand *MMO, |
645 | 2.77k | RegScavenger *RS) const { |
646 | 2.77k | MachineBasicBlock *MBB = MI->getParent(); |
647 | 2.77k | MachineFunction *MF = MI->getParent()->getParent(); |
648 | 2.77k | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
649 | 2.77k | const SIInstrInfo *TII = ST.getInstrInfo(); |
650 | 2.77k | const MachineFrameInfo &MFI = MF->getFrameInfo(); |
651 | 2.77k | |
652 | 2.77k | const MCInstrDesc &Desc = TII->get(LoadStoreOp); |
653 | 2.77k | const DebugLoc &DL = MI->getDebugLoc(); |
654 | 2.77k | bool IsStore = Desc.mayStore(); |
655 | 2.77k | |
656 | 2.77k | bool Scavenged = false; |
657 | 2.77k | unsigned SOffset = ScratchOffsetReg; |
658 | 2.77k | |
659 | 2.77k | const unsigned EltSize = 4; |
660 | 2.77k | const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); |
661 | 2.77k | unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT); |
662 | 2.77k | unsigned Size = NumSubRegs * EltSize; |
663 | 2.77k | int64_t Offset = InstOffset + MFI.getObjectOffset(Index); |
664 | 2.77k | int64_t ScratchOffsetRegDelta = 0; |
665 | 2.77k | |
666 | 2.77k | unsigned Align = MFI.getObjectAlignment(Index); |
667 | 2.77k | const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); |
668 | 2.77k | |
669 | 2.77k | Register TmpReg = |
670 | 2.77k | hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()28 |
671 | 2.77k | : Register()2.74k ; |
672 | 2.77k | |
673 | 2.77k | assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset"); |
674 | 2.77k | |
675 | 2.77k | if (!isUInt<12>(Offset + Size - EltSize)) { |
676 | 246 | SOffset = AMDGPU::NoRegister; |
677 | 246 | |
678 | 246 | // We currently only support spilling VGPRs to EltSize boundaries, meaning |
679 | 246 | // we can simplify the adjustment of Offset here to just scale with |
680 | 246 | // WavefrontSize. |
681 | 246 | Offset *= ST.getWavefrontSize(); |
682 | 246 | |
683 | 246 | // We don't have access to the register scavenger if this function is called |
684 | 246 | // during PEI::scavengeFrameVirtualRegs(). |
685 | 246 | if (RS) |
686 | 246 | SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false); |
687 | 246 | |
688 | 246 | if (SOffset == AMDGPU::NoRegister) { |
689 | 3 | // There are no free SGPRs, and since we are in the process of spilling |
690 | 3 | // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true |
691 | 3 | // on SI/CI and on VI it is true until we implement spilling using scalar |
692 | 3 | // stores), we have no way to free up an SGPR. Our solution here is to |
693 | 3 | // add the offset directly to the ScratchOffset register, and then |
694 | 3 | // subtract the offset after the spill to return ScratchOffset to it's |
695 | 3 | // original value. |
696 | 3 | SOffset = ScratchOffsetReg; |
697 | 3 | ScratchOffsetRegDelta = Offset; |
698 | 243 | } else { |
699 | 243 | Scavenged = true; |
700 | 243 | } |
701 | 246 | |
702 | 246 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) |
703 | 246 | .addReg(ScratchOffsetReg) |
704 | 246 | .addImm(Offset); |
705 | 246 | |
706 | 246 | Offset = 0; |
707 | 246 | } |
708 | 2.77k | |
709 | 10.6k | for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize7.91k ) { |
710 | 7.91k | unsigned SubReg = NumSubRegs == 1 ? |
711 | 6.65k | ValueReg1.25k : getSubReg(ValueReg, getSubRegFromChannel(i)); |
712 | 7.91k | |
713 | 7.91k | unsigned SOffsetRegState = 0; |
714 | 7.91k | unsigned SrcDstRegState = getDefRegState(!IsStore); |
715 | 7.91k | if (i + 1 == e) { |
716 | 2.77k | SOffsetRegState |= getKillRegState(Scavenged); |
717 | 2.77k | // The last implicit use carries the "Kill" flag. |
718 | 2.77k | SrcDstRegState |= getKillRegState(IsKill); |
719 | 2.77k | } |
720 | 7.91k | |
721 | 7.91k | auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill); |
722 | 7.91k | |
723 | 7.91k | if (!MIB.getInstr()) { |
724 | 7.21k | unsigned FinalReg = SubReg; |
725 | 7.21k | if (TmpReg != AMDGPU::NoRegister) { |
726 | 44 | if (IsStore) |
727 | 22 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg) |
728 | 22 | .addReg(SubReg, getKillRegState(IsKill)); |
729 | 44 | SubReg = TmpReg; |
730 | 44 | } |
731 | 7.21k | |
732 | 7.21k | MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); |
733 | 7.21k | MachineMemOperand *NewMMO |
734 | 7.21k | = MF->getMachineMemOperand(PInfo, MMO->getFlags(), |
735 | 7.21k | EltSize, MinAlign(Align, EltSize * i)); |
736 | 7.21k | |
737 | 7.21k | MIB = BuildMI(*MBB, MI, DL, Desc) |
738 | 7.21k | .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)) |
739 | 7.21k | .addReg(ScratchRsrcReg) |
740 | 7.21k | .addReg(SOffset, SOffsetRegState) |
741 | 7.21k | .addImm(Offset) |
742 | 7.21k | .addImm(0) // glc |
743 | 7.21k | .addImm(0) // slc |
744 | 7.21k | .addImm(0) // tfe |
745 | 7.21k | .addImm(0) // dlc |
746 | 7.21k | .addMemOperand(NewMMO); |
747 | 7.21k | |
748 | 7.21k | if (!IsStore && TmpReg != AMDGPU::NoRegister3.59k ) |
749 | 22 | MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), |
750 | 22 | FinalReg) |
751 | 22 | .addReg(TmpReg, RegState::Kill); |
752 | 7.21k | } |
753 | 7.91k | |
754 | 7.91k | if (NumSubRegs > 1) |
755 | 6.65k | MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); |
756 | 7.91k | } |
757 | 2.77k | |
758 | 2.77k | if (ScratchOffsetRegDelta != 0) { |
759 | 3 | // Subtract the offset we added to the ScratchOffset register. |
760 | 3 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg) |
761 | 3 | .addReg(ScratchOffsetReg) |
762 | 3 | .addImm(ScratchOffsetRegDelta); |
763 | 3 | } |
764 | 2.77k | } |
765 | | |
766 | | static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize, |
767 | 32 | bool Store) { |
768 | 32 | if (SuperRegSize % 16 == 0) { |
769 | 6 | return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR3 : |
770 | 6 | AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR3 }; |
771 | 6 | } |
772 | 26 | |
773 | 26 | if (SuperRegSize % 8 == 0) { |
774 | 16 | return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR8 : |
775 | 16 | AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR8 }; |
776 | 16 | } |
777 | 10 | |
778 | 10 | return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR5 : |
779 | 10 | AMDGPU::S_BUFFER_LOAD_DWORD_SGPR5 }; |
780 | 10 | } |
781 | | |
782 | | bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, |
783 | | int Index, |
784 | | RegScavenger *RS, |
785 | 878 | bool OnlyToVGPR) const { |
786 | 878 | MachineBasicBlock *MBB = MI->getParent(); |
787 | 878 | MachineFunction *MF = MBB->getParent(); |
788 | 878 | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
789 | 878 | DenseSet<unsigned> SGPRSpillVGPRDefinedSet; |
790 | 878 | |
791 | 878 | ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills |
792 | 878 | = MFI->getSGPRToVGPRSpills(Index); |
793 | 878 | bool SpillToVGPR = !VGPRSpills.empty(); |
794 | 878 | if (OnlyToVGPR && !SpillToVGPR828 ) |
795 | 0 | return false; |
796 | 878 | |
797 | 878 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
798 | 878 | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
799 | 878 | const SIInstrInfo *TII = ST.getInstrInfo(); |
800 | 878 | |
801 | 878 | unsigned SuperReg = MI->getOperand(0).getReg(); |
802 | 878 | bool IsKill = MI->getOperand(0).isKill(); |
803 | 878 | const DebugLoc &DL = MI->getDebugLoc(); |
804 | 878 | |
805 | 878 | MachineFrameInfo &FrameInfo = MF->getFrameInfo(); |
806 | 878 | |
807 | 878 | bool SpillToSMEM = spillSGPRToSMEM(); |
808 | 878 | if (SpillToSMEM && OnlyToVGPR16 ) |
809 | 0 | return false; |
810 | 878 | |
811 | 878 | Register FrameReg = getFrameRegister(*MF); |
812 | 878 | |
813 | 878 | assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && |
814 | 878 | SuperReg != MFI->getFrameOffsetReg() && |
815 | 878 | SuperReg != MFI->getScratchWaveOffsetReg())); |
816 | 878 | |
817 | 878 | assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); |
818 | 878 | |
819 | 878 | unsigned OffsetReg = AMDGPU::M0; |
820 | 878 | unsigned M0CopyReg = AMDGPU::NoRegister; |
821 | 878 | |
822 | 878 | if (SpillToSMEM) { |
823 | 16 | if (RS->isRegUsed(AMDGPU::M0)) { |
824 | 16 | M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
825 | 16 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) |
826 | 16 | .addReg(AMDGPU::M0); |
827 | 16 | } |
828 | 16 | } |
829 | 878 | |
830 | 878 | unsigned ScalarStoreOp; |
831 | 878 | unsigned EltSize = 4; |
832 | 878 | const TargetRegisterClass *RC = getPhysRegClass(SuperReg); |
833 | 878 | if (SpillToSMEM && isSGPRClass(RC)16 ) { |
834 | 16 | // XXX - if private_element_size is larger than 4 it might be useful to be |
835 | 16 | // able to spill wider vmem spills. |
836 | 16 | std::tie(EltSize, ScalarStoreOp) = |
837 | 16 | getSpillEltSize(getRegSizeInBits(*RC) / 8, true); |
838 | 16 | } |
839 | 878 | |
840 | 878 | ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); |
841 | 878 | unsigned NumSubRegs = SplitParts.empty() ? 1520 : SplitParts.size()358 ; |
842 | 878 | |
843 | 878 | // SubReg carries the "Kill" flag when SubReg == SuperReg. |
844 | 878 | unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill520 ); |
845 | 2.59k | for (unsigned i = 0, e = NumSubRegs; i < e; ++i1.72k ) { |
846 | 1.72k | unsigned SubReg = NumSubRegs == 1 ? |
847 | 1.20k | SuperReg520 : getSubReg(SuperReg, SplitParts[i]); |
848 | 1.72k | |
849 | 1.72k | if (SpillToSMEM) { |
850 | 23 | int64_t FrOffset = FrameInfo.getObjectOffset(Index); |
851 | 23 | |
852 | 23 | // The allocated memory size is really the wavefront size * the frame |
853 | 23 | // index size. The widest register class is 64 bytes, so a 4-byte scratch |
854 | 23 | // allocation is enough to spill this in a single stack object. |
855 | 23 | // |
856 | 23 | // FIXME: Frame size/offsets are computed earlier than this, so the extra |
857 | 23 | // space is still unnecessarily allocated. |
858 | 23 | |
859 | 23 | unsigned Align = FrameInfo.getObjectAlignment(Index); |
860 | 23 | MachinePointerInfo PtrInfo |
861 | 23 | = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); |
862 | 23 | MachineMemOperand *MMO |
863 | 23 | = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, |
864 | 23 | EltSize, MinAlign(Align, EltSize * i)); |
865 | 23 | |
866 | 23 | // SMEM instructions only support a single offset, so increment the wave |
867 | 23 | // offset. |
868 | 23 | |
869 | 23 | int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); |
870 | 23 | if (Offset != 0) { |
871 | 23 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) |
872 | 23 | .addReg(FrameReg) |
873 | 23 | .addImm(Offset); |
874 | 23 | } else { |
875 | 0 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) |
876 | 0 | .addReg(FrameReg); |
877 | 0 | } |
878 | 23 | |
879 | 23 | BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) |
880 | 23 | .addReg(SubReg, getKillRegState(IsKill)) // sdata |
881 | 23 | .addReg(MFI->getScratchRSrcReg()) // sbase |
882 | 23 | .addReg(OffsetReg, RegState::Kill) // soff |
883 | 23 | .addImm(0) // glc |
884 | 23 | .addImm(0) // dlc |
885 | 23 | .addMemOperand(MMO); |
886 | 23 | |
887 | 23 | continue; |
888 | 23 | } |
889 | 1.69k | |
890 | 1.69k | if (SpillToVGPR) { |
891 | 1.62k | SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; |
892 | 1.62k | |
893 | 1.62k | // During SGPR spilling to VGPR, determine if the VGPR is defined. The |
894 | 1.62k | // only circumstance in which we say it is undefined is when it is the |
895 | 1.62k | // first spill to this VGPR in the first basic block. |
896 | 1.62k | bool VGPRDefined = true; |
897 | 1.62k | if (MBB == &MF->front()) |
898 | 1.54k | VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second; |
899 | 1.62k | |
900 | 1.62k | // Mark the "old value of vgpr" input undef only if this is the first sgpr |
901 | 1.62k | // spill to this specific vgpr in the first basic block. |
902 | 1.62k | BuildMI(*MBB, MI, DL, |
903 | 1.62k | TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), |
904 | 1.62k | Spill.VGPR) |
905 | 1.62k | .addReg(SubReg, getKillRegState(IsKill)) |
906 | 1.62k | .addImm(Spill.Lane) |
907 | 1.62k | .addReg(Spill.VGPR, VGPRDefined ? 0834 : RegState::Undef787 ); |
908 | 1.62k | |
909 | 1.62k | // FIXME: Since this spills to another register instead of an actual |
910 | 1.62k | // frame index, we should delete the frame index when all references to |
911 | 1.62k | // it are fixed. |
912 | 1.62k | } else { |
913 | 76 | // XXX - Can to VGPR spill fail for some subregisters but not others? |
914 | 76 | if (OnlyToVGPR) |
915 | 0 | return false; |
916 | 76 | |
917 | 76 | // Spill SGPR to a frame index. |
918 | 76 | // TODO: Should VI try to spill to VGPR and then spill to SMEM? |
919 | 76 | unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
920 | 76 | // TODO: Should VI try to spill to VGPR and then spill to SMEM? |
921 | 76 | |
922 | 76 | MachineInstrBuilder Mov |
923 | 76 | = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) |
924 | 76 | .addReg(SubReg, SubKillState); |
925 | 76 | |
926 | 76 | |
927 | 76 | // There could be undef components of a spilled super register. |
928 | 76 | // TODO: Can we detect this and skip the spill? |
929 | 76 | if (NumSubRegs > 1) { |
930 | 70 | // The last implicit use of the SuperReg carries the "Kill" flag. |
931 | 70 | unsigned SuperKillState = 0; |
932 | 70 | if (i + 1 == e) |
933 | 28 | SuperKillState |= getKillRegState(IsKill); |
934 | 70 | Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); |
935 | 70 | } |
936 | 76 | |
937 | 76 | unsigned Align = FrameInfo.getObjectAlignment(Index); |
938 | 76 | MachinePointerInfo PtrInfo |
939 | 76 | = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); |
940 | 76 | MachineMemOperand *MMO |
941 | 76 | = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, |
942 | 76 | EltSize, MinAlign(Align, EltSize * i)); |
943 | 76 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) |
944 | 76 | .addReg(TmpReg, RegState::Kill) // src |
945 | 76 | .addFrameIndex(Index) // vaddr |
946 | 76 | .addReg(MFI->getScratchRSrcReg()) // srrsrc |
947 | 76 | .addReg(MFI->getStackPtrOffsetReg()) // soffset |
948 | 76 | .addImm(i * 4) // offset |
949 | 76 | .addMemOperand(MMO); |
950 | 76 | } |
951 | 1.69k | } |
952 | 878 | |
953 | 878 | if (M0CopyReg != AMDGPU::NoRegister) { |
954 | 16 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) |
955 | 16 | .addReg(M0CopyReg, RegState::Kill); |
956 | 16 | } |
957 | 878 | |
958 | 878 | MI->eraseFromParent(); |
959 | 878 | MFI->addToSpilledSGPRs(NumSubRegs); |
960 | 878 | return true; |
961 | 878 | } |
962 | | |
963 | | bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, |
964 | | int Index, |
965 | | RegScavenger *RS, |
966 | 873 | bool OnlyToVGPR) const { |
967 | 873 | MachineFunction *MF = MI->getParent()->getParent(); |
968 | 873 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
969 | 873 | MachineBasicBlock *MBB = MI->getParent(); |
970 | 873 | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
971 | 873 | |
972 | 873 | ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills |
973 | 873 | = MFI->getSGPRToVGPRSpills(Index); |
974 | 873 | bool SpillToVGPR = !VGPRSpills.empty(); |
975 | 873 | if (OnlyToVGPR && !SpillToVGPR822 ) |
976 | 0 | return false; |
977 | 873 | |
978 | 873 | MachineFrameInfo &FrameInfo = MF->getFrameInfo(); |
979 | 873 | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
980 | 873 | const SIInstrInfo *TII = ST.getInstrInfo(); |
981 | 873 | const DebugLoc &DL = MI->getDebugLoc(); |
982 | 873 | |
983 | 873 | unsigned SuperReg = MI->getOperand(0).getReg(); |
984 | 873 | bool SpillToSMEM = spillSGPRToSMEM(); |
985 | 873 | if (SpillToSMEM && OnlyToVGPR16 ) |
986 | 0 | return false; |
987 | 873 | |
988 | 873 | assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); |
989 | 873 | |
990 | 873 | unsigned OffsetReg = AMDGPU::M0; |
991 | 873 | unsigned M0CopyReg = AMDGPU::NoRegister; |
992 | 873 | |
993 | 873 | if (SpillToSMEM) { |
994 | 16 | if (RS->isRegUsed(AMDGPU::M0)) { |
995 | 16 | M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
996 | 16 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) |
997 | 16 | .addReg(AMDGPU::M0); |
998 | 16 | } |
999 | 16 | } |
1000 | 873 | |
1001 | 873 | unsigned EltSize = 4; |
1002 | 873 | unsigned ScalarLoadOp; |
1003 | 873 | |
1004 | 873 | Register FrameReg = getFrameRegister(*MF); |
1005 | 873 | |
1006 | 873 | const TargetRegisterClass *RC = getPhysRegClass(SuperReg); |
1007 | 873 | if (SpillToSMEM && isSGPRClass(RC)16 ) { |
1008 | 16 | // XXX - if private_element_size is larger than 4 it might be useful to be |
1009 | 16 | // able to spill wider vmem spills. |
1010 | 16 | std::tie(EltSize, ScalarLoadOp) = |
1011 | 16 | getSpillEltSize(getRegSizeInBits(*RC) / 8, false); |
1012 | 16 | } |
1013 | 873 | |
1014 | 873 | ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); |
1015 | 873 | unsigned NumSubRegs = SplitParts.empty() ? 1515 : SplitParts.size()358 ; |
1016 | 873 | |
1017 | 873 | // SubReg carries the "Kill" flag when SubReg == SuperReg. |
1018 | 873 | int64_t FrOffset = FrameInfo.getObjectOffset(Index); |
1019 | 873 | |
1020 | 2.59k | for (unsigned i = 0, e = NumSubRegs; i < e; ++i1.71k ) { |
1021 | 1.71k | unsigned SubReg = NumSubRegs == 1 ? |
1022 | 1.20k | SuperReg515 : getSubReg(SuperReg, SplitParts[i]); |
1023 | 1.71k | |
1024 | 1.71k | if (SpillToSMEM) { |
1025 | 23 | // FIXME: Size may be > 4 but extra bytes wasted. |
1026 | 23 | unsigned Align = FrameInfo.getObjectAlignment(Index); |
1027 | 23 | MachinePointerInfo PtrInfo |
1028 | 23 | = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); |
1029 | 23 | MachineMemOperand *MMO |
1030 | 23 | = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, |
1031 | 23 | EltSize, MinAlign(Align, EltSize * i)); |
1032 | 23 | |
1033 | 23 | // Add i * 4 offset |
1034 | 23 | int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); |
1035 | 23 | if (Offset != 0) { |
1036 | 23 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) |
1037 | 23 | .addReg(FrameReg) |
1038 | 23 | .addImm(Offset); |
1039 | 23 | } else { |
1040 | 0 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) |
1041 | 0 | .addReg(FrameReg); |
1042 | 0 | } |
1043 | 23 | |
1044 | 23 | auto MIB = |
1045 | 23 | BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) |
1046 | 23 | .addReg(MFI->getScratchRSrcReg()) // sbase |
1047 | 23 | .addReg(OffsetReg, RegState::Kill) // soff |
1048 | 23 | .addImm(0) // glc |
1049 | 23 | .addImm(0) // dlc |
1050 | 23 | .addMemOperand(MMO); |
1051 | 23 | |
1052 | 23 | if (NumSubRegs > 1 && i == 010 ) |
1053 | 3 | MIB.addReg(SuperReg, RegState::ImplicitDefine); |
1054 | 23 | |
1055 | 23 | continue; |
1056 | 23 | } |
1057 | 1.69k | |
1058 | 1.69k | if (SpillToVGPR) { |
1059 | 1.61k | SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; |
1060 | 1.61k | auto MIB = |
1061 | 1.61k | BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), |
1062 | 1.61k | SubReg) |
1063 | 1.61k | .addReg(Spill.VGPR) |
1064 | 1.61k | .addImm(Spill.Lane); |
1065 | 1.61k | |
1066 | 1.61k | if (NumSubRegs > 1 && i == 01.12k ) |
1067 | 326 | MIB.addReg(SuperReg, RegState::ImplicitDefine); |
1068 | 1.61k | } else { |
1069 | 78 | if (OnlyToVGPR) |
1070 | 0 | return false; |
1071 | 78 | |
1072 | 78 | // Restore SGPR from a stack slot. |
1073 | 78 | // FIXME: We should use S_LOAD_DWORD here for VI. |
1074 | 78 | unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
1075 | 78 | unsigned Align = FrameInfo.getObjectAlignment(Index); |
1076 | 78 | |
1077 | 78 | MachinePointerInfo PtrInfo |
1078 | 78 | = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); |
1079 | 78 | |
1080 | 78 | MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, |
1081 | 78 | MachineMemOperand::MOLoad, EltSize, |
1082 | 78 | MinAlign(Align, EltSize * i)); |
1083 | 78 | |
1084 | 78 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) |
1085 | 78 | .addFrameIndex(Index) // vaddr |
1086 | 78 | .addReg(MFI->getScratchRSrcReg()) // srsrc |
1087 | 78 | .addReg(MFI->getStackPtrOffsetReg()) // soffset |
1088 | 78 | .addImm(i * 4) // offset |
1089 | 78 | .addMemOperand(MMO); |
1090 | 78 | |
1091 | 78 | auto MIB = |
1092 | 78 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) |
1093 | 78 | .addReg(TmpReg, RegState::Kill); |
1094 | 78 | |
1095 | 78 | if (NumSubRegs > 1) |
1096 | 72 | MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); |
1097 | 78 | } |
1098 | 1.69k | } |
1099 | 873 | |
1100 | 873 | if (M0CopyReg != AMDGPU::NoRegister) { |
1101 | 16 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) |
1102 | 16 | .addReg(M0CopyReg, RegState::Kill); |
1103 | 16 | } |
1104 | 873 | |
1105 | 873 | MI->eraseFromParent(); |
1106 | 873 | return true; |
1107 | 873 | } |
1108 | | |
1109 | | /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to |
1110 | | /// a VGPR and the stack slot can be safely eliminated when all other users are |
1111 | | /// handled. |
1112 | | bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( |
1113 | | MachineBasicBlock::iterator MI, |
1114 | | int FI, |
1115 | 1.65k | RegScavenger *RS) const { |
1116 | 1.65k | switch (MI->getOpcode()) { |
1117 | 1.65k | case AMDGPU::SI_SPILL_S1024_SAVE: |
1118 | 828 | case AMDGPU::SI_SPILL_S512_SAVE: |
1119 | 828 | case AMDGPU::SI_SPILL_S256_SAVE: |
1120 | 828 | case AMDGPU::SI_SPILL_S160_SAVE: |
1121 | 828 | case AMDGPU::SI_SPILL_S128_SAVE: |
1122 | 828 | case AMDGPU::SI_SPILL_S96_SAVE: |
1123 | 828 | case AMDGPU::SI_SPILL_S64_SAVE: |
1124 | 828 | case AMDGPU::SI_SPILL_S32_SAVE: |
1125 | 828 | return spillSGPR(MI, FI, RS, true); |
1126 | 828 | case AMDGPU::SI_SPILL_S1024_RESTORE: |
1127 | 822 | case AMDGPU::SI_SPILL_S512_RESTORE: |
1128 | 822 | case AMDGPU::SI_SPILL_S256_RESTORE: |
1129 | 822 | case AMDGPU::SI_SPILL_S160_RESTORE: |
1130 | 822 | case AMDGPU::SI_SPILL_S128_RESTORE: |
1131 | 822 | case AMDGPU::SI_SPILL_S96_RESTORE: |
1132 | 822 | case AMDGPU::SI_SPILL_S64_RESTORE: |
1133 | 822 | case AMDGPU::SI_SPILL_S32_RESTORE: |
1134 | 822 | return restoreSGPR(MI, FI, RS, true); |
1135 | 822 | default: |
1136 | 0 | llvm_unreachable("not an SGPR spill instruction"); |
1137 | 1.65k | } |
1138 | 1.65k | } |
1139 | | |
1140 | | void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, |
1141 | | int SPAdj, unsigned FIOperandNum, |
1142 | 9.53k | RegScavenger *RS) const { |
1143 | 9.53k | MachineFunction *MF = MI->getParent()->getParent(); |
1144 | 9.53k | MachineRegisterInfo &MRI = MF->getRegInfo(); |
1145 | 9.53k | MachineBasicBlock *MBB = MI->getParent(); |
1146 | 9.53k | SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
1147 | 9.53k | MachineFrameInfo &FrameInfo = MF->getFrameInfo(); |
1148 | 9.53k | const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); |
1149 | 9.53k | const SIInstrInfo *TII = ST.getInstrInfo(); |
1150 | 9.53k | DebugLoc DL = MI->getDebugLoc(); |
1151 | 9.53k | |
1152 | 9.53k | assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?"); |
1153 | 9.53k | |
1154 | 9.53k | MachineOperand &FIOp = MI->getOperand(FIOperandNum); |
1155 | 9.53k | int Index = MI->getOperand(FIOperandNum).getIndex(); |
1156 | 9.53k | |
1157 | 9.53k | Register FrameReg = getFrameRegister(*MF); |
1158 | 9.53k | |
1159 | 9.53k | switch (MI->getOpcode()) { |
1160 | 9.53k | // SGPR register spill |
1161 | 9.53k | case AMDGPU::SI_SPILL_S1024_SAVE: |
1162 | 50 | case AMDGPU::SI_SPILL_S512_SAVE: |
1163 | 50 | case AMDGPU::SI_SPILL_S256_SAVE: |
1164 | 50 | case AMDGPU::SI_SPILL_S160_SAVE: |
1165 | 50 | case AMDGPU::SI_SPILL_S128_SAVE: |
1166 | 50 | case AMDGPU::SI_SPILL_S96_SAVE: |
1167 | 50 | case AMDGPU::SI_SPILL_S64_SAVE: |
1168 | 50 | case AMDGPU::SI_SPILL_S32_SAVE: { |
1169 | 50 | spillSGPR(MI, Index, RS); |
1170 | 50 | break; |
1171 | 50 | } |
1172 | 50 | |
1173 | 50 | // SGPR register restore |
1174 | 51 | case AMDGPU::SI_SPILL_S1024_RESTORE: |
1175 | 51 | case AMDGPU::SI_SPILL_S512_RESTORE: |
1176 | 51 | case AMDGPU::SI_SPILL_S256_RESTORE: |
1177 | 51 | case AMDGPU::SI_SPILL_S160_RESTORE: |
1178 | 51 | case AMDGPU::SI_SPILL_S128_RESTORE: |
1179 | 51 | case AMDGPU::SI_SPILL_S96_RESTORE: |
1180 | 51 | case AMDGPU::SI_SPILL_S64_RESTORE: |
1181 | 51 | case AMDGPU::SI_SPILL_S32_RESTORE: { |
1182 | 51 | restoreSGPR(MI, Index, RS); |
1183 | 51 | break; |
1184 | 51 | } |
1185 | 51 | |
1186 | 51 | // VGPR register spill |
1187 | 1.40k | case AMDGPU::SI_SPILL_V1024_SAVE: |
1188 | 1.40k | case AMDGPU::SI_SPILL_V512_SAVE: |
1189 | 1.40k | case AMDGPU::SI_SPILL_V256_SAVE: |
1190 | 1.40k | case AMDGPU::SI_SPILL_V160_SAVE: |
1191 | 1.40k | case AMDGPU::SI_SPILL_V128_SAVE: |
1192 | 1.40k | case AMDGPU::SI_SPILL_V96_SAVE: |
1193 | 1.40k | case AMDGPU::SI_SPILL_V64_SAVE: |
1194 | 1.40k | case AMDGPU::SI_SPILL_V32_SAVE: |
1195 | 1.40k | case AMDGPU::SI_SPILL_A1024_SAVE: |
1196 | 1.40k | case AMDGPU::SI_SPILL_A512_SAVE: |
1197 | 1.40k | case AMDGPU::SI_SPILL_A128_SAVE: |
1198 | 1.40k | case AMDGPU::SI_SPILL_A64_SAVE: |
1199 | 1.40k | case AMDGPU::SI_SPILL_A32_SAVE: { |
1200 | 1.40k | const MachineOperand *VData = TII->getNamedOperand(*MI, |
1201 | 1.40k | AMDGPU::OpName::vdata); |
1202 | 1.40k | assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == |
1203 | 1.40k | MFI->getStackPtrOffsetReg()); |
1204 | 1.40k | |
1205 | 1.40k | buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, |
1206 | 1.40k | Index, |
1207 | 1.40k | VData->getReg(), VData->isKill(), |
1208 | 1.40k | TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), |
1209 | 1.40k | FrameReg, |
1210 | 1.40k | TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), |
1211 | 1.40k | *MI->memoperands_begin(), |
1212 | 1.40k | RS); |
1213 | 1.40k | MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); |
1214 | 1.40k | MI->eraseFromParent(); |
1215 | 1.40k | break; |
1216 | 1.40k | } |
1217 | 1.40k | case AMDGPU::SI_SPILL_V32_RESTORE: |
1218 | 1.36k | case AMDGPU::SI_SPILL_V64_RESTORE: |
1219 | 1.36k | case AMDGPU::SI_SPILL_V96_RESTORE: |
1220 | 1.36k | case AMDGPU::SI_SPILL_V128_RESTORE: |
1221 | 1.36k | case AMDGPU::SI_SPILL_V160_RESTORE: |
1222 | 1.36k | case AMDGPU::SI_SPILL_V256_RESTORE: |
1223 | 1.36k | case AMDGPU::SI_SPILL_V512_RESTORE: |
1224 | 1.36k | case AMDGPU::SI_SPILL_V1024_RESTORE: |
1225 | 1.36k | case AMDGPU::SI_SPILL_A32_RESTORE: |
1226 | 1.36k | case AMDGPU::SI_SPILL_A64_RESTORE: |
1227 | 1.36k | case AMDGPU::SI_SPILL_A128_RESTORE: |
1228 | 1.36k | case AMDGPU::SI_SPILL_A512_RESTORE: |
1229 | 1.36k | case AMDGPU::SI_SPILL_A1024_RESTORE: { |
1230 | 1.36k | const MachineOperand *VData = TII->getNamedOperand(*MI, |
1231 | 1.36k | AMDGPU::OpName::vdata); |
1232 | 1.36k | assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == |
1233 | 1.36k | MFI->getStackPtrOffsetReg()); |
1234 | 1.36k | |
1235 | 1.36k | buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, |
1236 | 1.36k | Index, |
1237 | 1.36k | VData->getReg(), VData->isKill(), |
1238 | 1.36k | TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), |
1239 | 1.36k | FrameReg, |
1240 | 1.36k | TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), |
1241 | 1.36k | *MI->memoperands_begin(), |
1242 | 1.36k | RS); |
1243 | 1.36k | MI->eraseFromParent(); |
1244 | 1.36k | break; |
1245 | 1.36k | } |
1246 | 1.36k | |
1247 | 6.66k | default: { |
1248 | 6.66k | const DebugLoc &DL = MI->getDebugLoc(); |
1249 | 6.66k | bool IsMUBUF = TII->isMUBUF(*MI); |
1250 | 6.66k | |
1251 | 6.66k | if (!IsMUBUF && !MFI->isEntryFunction()404 ) { |
1252 | 52 | // Convert to an absolute stack address by finding the offset from the |
1253 | 52 | // scratch wave base and scaling by the wave size. |
1254 | 52 | // |
1255 | 52 | // In an entry function/kernel the offset is already the absolute |
1256 | 52 | // address relative to the frame register. |
1257 | 52 | |
1258 | 52 | unsigned DiffReg |
1259 | 52 | = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
1260 | 52 | |
1261 | 52 | bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; |
1262 | 52 | Register ResultReg = IsCopy ? |
1263 | 48 | MI->getOperand(0).getReg() : |
1264 | 52 | MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass)4 ; |
1265 | 52 | |
1266 | 52 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) |
1267 | 52 | .addReg(FrameReg) |
1268 | 52 | .addReg(MFI->getScratchWaveOffsetReg()); |
1269 | 52 | |
1270 | 52 | int64_t Offset = FrameInfo.getObjectOffset(Index); |
1271 | 52 | if (Offset == 0) { |
1272 | 36 | // XXX - This never happens because of emergency scavenging slot at 0? |
1273 | 36 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) |
1274 | 36 | .addImm(Log2_32(ST.getWavefrontSize())) |
1275 | 36 | .addReg(DiffReg); |
1276 | 36 | } else { |
1277 | 16 | unsigned ScaledReg |
1278 | 16 | = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
1279 | 16 | |
1280 | 16 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) |
1281 | 16 | .addImm(Log2_32(ST.getWavefrontSize())) |
1282 | 16 | .addReg(DiffReg, RegState::Kill); |
1283 | 16 | |
1284 | 16 | // TODO: Fold if use instruction is another add of a constant. |
1285 | 16 | if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { |
1286 | 12 | TII->getAddNoCarry(*MBB, MI, DL, ResultReg) |
1287 | 12 | .addImm(Offset) |
1288 | 12 | .addReg(ScaledReg, RegState::Kill) |
1289 | 12 | .addImm(0); // clamp bit |
1290 | 12 | } else { |
1291 | 4 | unsigned ConstOffsetReg |
1292 | 4 | = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); |
1293 | 4 | |
1294 | 4 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) |
1295 | 4 | .addImm(Offset); |
1296 | 4 | TII->getAddNoCarry(*MBB, MI, DL, ResultReg) |
1297 | 4 | .addReg(ConstOffsetReg, RegState::Kill) |
1298 | 4 | .addReg(ScaledReg, RegState::Kill) |
1299 | 4 | .addImm(0); // clamp bit |
1300 | 4 | } |
1301 | 16 | } |
1302 | 52 | |
1303 | 52 | // Don't introduce an extra copy if we're just materializing in a mov. |
1304 | 52 | if (IsCopy) |
1305 | 48 | MI->eraseFromParent(); |
1306 | 4 | else |
1307 | 4 | FIOp.ChangeToRegister(ResultReg, false, false, true); |
1308 | 52 | return; |
1309 | 52 | } |
1310 | 6.60k | |
1311 | 6.60k | if (IsMUBUF) { |
1312 | 6.25k | // Disable offen so we don't need a 0 vgpr base. |
1313 | 6.25k | assert(static_cast<int>(FIOperandNum) == |
1314 | 6.25k | AMDGPU::getNamedOperandIdx(MI->getOpcode(), |
1315 | 6.25k | AMDGPU::OpName::vaddr)); |
1316 | 6.25k | |
1317 | 6.25k | assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == |
1318 | 6.25k | MFI->getStackPtrOffsetReg()); |
1319 | 6.25k | |
1320 | 6.25k | TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg); |
1321 | 6.25k | |
1322 | 6.25k | int64_t Offset = FrameInfo.getObjectOffset(Index); |
1323 | 6.25k | int64_t OldImm |
1324 | 6.25k | = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); |
1325 | 6.25k | int64_t NewOffset = OldImm + Offset; |
1326 | 6.25k | |
1327 | 6.25k | if (isUInt<12>(NewOffset) && |
1328 | 6.25k | buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)6.23k ) { |
1329 | 6.23k | MI->eraseFromParent(); |
1330 | 6.23k | return; |
1331 | 6.23k | } |
1332 | 370 | } |
1333 | 370 | |
1334 | 370 | // If the offset is simply too big, don't convert to a scratch wave offset |
1335 | 370 | // relative index. |
1336 | 370 | |
1337 | 370 | int64_t Offset = FrameInfo.getObjectOffset(Index); |
1338 | 370 | FIOp.ChangeToImmediate(Offset); |
1339 | 370 | if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { |
1340 | 18 | unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
1341 | 18 | BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) |
1342 | 18 | .addImm(Offset); |
1343 | 18 | FIOp.ChangeToRegister(TmpReg, false, false, true); |
1344 | 18 | } |
1345 | 370 | } |
1346 | 9.53k | } |
1347 | 9.53k | } |
1348 | | |
1349 | 19.8M | StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { |
1350 | 19.8M | return AMDGPUInstPrinter::getRegisterName(Reg); |
1351 | 19.8M | } |
1352 | | |
1353 | | // FIXME: This is very slow. It might be worth creating a map from physreg to |
1354 | | // register class. |
1355 | 8.50M | const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { |
1356 | 8.50M | assert(!TargetRegisterInfo::isVirtualRegister(Reg)); |
1357 | 8.50M | |
1358 | 8.50M | static const TargetRegisterClass *const BaseClasses[] = { |
1359 | 8.50M | &AMDGPU::VGPR_32RegClass, |
1360 | 8.50M | &AMDGPU::SReg_32RegClass, |
1361 | 8.50M | &AMDGPU::AGPR_32RegClass, |
1362 | 8.50M | &AMDGPU::VReg_64RegClass, |
1363 | 8.50M | &AMDGPU::SReg_64RegClass, |
1364 | 8.50M | &AMDGPU::AReg_64RegClass, |
1365 | 8.50M | &AMDGPU::VReg_96RegClass, |
1366 | 8.50M | &AMDGPU::SReg_96RegClass, |
1367 | 8.50M | &AMDGPU::VReg_128RegClass, |
1368 | 8.50M | &AMDGPU::SReg_128RegClass, |
1369 | 8.50M | &AMDGPU::AReg_128RegClass, |
1370 | 8.50M | &AMDGPU::VReg_160RegClass, |
1371 | 8.50M | &AMDGPU::SReg_160RegClass, |
1372 | 8.50M | &AMDGPU::VReg_256RegClass, |
1373 | 8.50M | &AMDGPU::SReg_256RegClass, |
1374 | 8.50M | &AMDGPU::VReg_512RegClass, |
1375 | 8.50M | &AMDGPU::SReg_512RegClass, |
1376 | 8.50M | &AMDGPU::AReg_512RegClass, |
1377 | 8.50M | &AMDGPU::SReg_1024RegClass, |
1378 | 8.50M | &AMDGPU::VReg_1024RegClass, |
1379 | 8.50M | &AMDGPU::AReg_1024RegClass, |
1380 | 8.50M | &AMDGPU::SCC_CLASSRegClass, |
1381 | 8.50M | &AMDGPU::Pseudo_SReg_32RegClass, |
1382 | 8.50M | &AMDGPU::Pseudo_SReg_128RegClass, |
1383 | 8.50M | }; |
1384 | 8.50M | |
1385 | 33.0M | for (const TargetRegisterClass *BaseClass : BaseClasses) { |
1386 | 33.0M | if (BaseClass->contains(Reg)) { |
1387 | 8.50M | return BaseClass; |
1388 | 8.50M | } |
1389 | 33.0M | } |
1390 | 8.50M | return nullptr0 ; |
1391 | 8.50M | } |
1392 | | |
1393 | | // TODO: It might be helpful to have some target specific flags in |
1394 | | // TargetRegisterClass to mark which classes are VGPRs to make this trivial. |
1395 | 15.7M | bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { |
1396 | 15.7M | unsigned Size = getRegSizeInBits(*RC); |
1397 | 15.7M | if (Size < 32) |
1398 | 10.1k | return false; |
1399 | 15.7M | switch (Size) { |
1400 | 15.7M | case 32: |
1401 | 8.38M | return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; |
1402 | 15.7M | case 64: |
1403 | 5.00M | return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; |
1404 | 15.7M | case 96: |
1405 | 11.2k | return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; |
1406 | 15.7M | case 128: |
1407 | 1.91M | return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; |
1408 | 15.7M | case 160: |
1409 | 4.93k | return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr; |
1410 | 15.7M | case 256: |
1411 | 279k | return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; |
1412 | 15.7M | case 512: |
1413 | 86.7k | return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; |
1414 | 15.7M | case 1024: |
1415 | 46.0k | return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr; |
1416 | 15.7M | default: |
1417 | 0 | llvm_unreachable("Invalid register class size"); |
1418 | 15.7M | } |
1419 | 15.7M | } |
1420 | | |
1421 | 7.64M | bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const { |
1422 | 7.64M | unsigned Size = getRegSizeInBits(*RC); |
1423 | 7.64M | if (Size < 32) |
1424 | 1 | return false; |
1425 | 7.64M | switch (Size) { |
1426 | 7.64M | case 32: |
1427 | 3.50M | return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr; |
1428 | 7.64M | case 64: |
1429 | 2.72M | return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr; |
1430 | 7.64M | case 96: |
1431 | 3.42k | return false; |
1432 | 7.64M | case 128: |
1433 | 1.10M | return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr; |
1434 | 7.64M | case 160: |
1435 | 238k | case 256: |
1436 | 238k | return false; |
1437 | 238k | case 512: |
1438 | 49.8k | return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr; |
1439 | 238k | case 1024: |
1440 | 17.4k | return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr; |
1441 | 238k | default: |
1442 | 0 | llvm_unreachable("Invalid register class size"); |
1443 | 7.64M | } |
1444 | 7.64M | } |
1445 | | |
1446 | | const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( |
1447 | 169k | const TargetRegisterClass *SRC) const { |
1448 | 169k | switch (getRegSizeInBits(*SRC)) { |
1449 | 169k | case 32: |
1450 | 145k | return &AMDGPU::VGPR_32RegClass; |
1451 | 169k | case 64: |
1452 | 17.6k | return &AMDGPU::VReg_64RegClass; |
1453 | 169k | case 96: |
1454 | 86 | return &AMDGPU::VReg_96RegClass; |
1455 | 169k | case 128: |
1456 | 5.94k | return &AMDGPU::VReg_128RegClass; |
1457 | 169k | case 160: |
1458 | 0 | return &AMDGPU::VReg_160RegClass; |
1459 | 169k | case 256: |
1460 | 94 | return &AMDGPU::VReg_256RegClass; |
1461 | 169k | case 512: |
1462 | 60 | return &AMDGPU::VReg_512RegClass; |
1463 | 169k | case 1024: |
1464 | 11 | return &AMDGPU::VReg_1024RegClass; |
1465 | 169k | default: |
1466 | 0 | llvm_unreachable("Invalid register class size"); |
1467 | 169k | } |
1468 | 169k | } |
1469 | | |
1470 | | const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass( |
1471 | 950 | const TargetRegisterClass *SRC) const { |
1472 | 950 | switch (getRegSizeInBits(*SRC)) { |
1473 | 950 | case 32: |
1474 | 948 | return &AMDGPU::AGPR_32RegClass; |
1475 | 950 | case 64: |
1476 | 2 | return &AMDGPU::AReg_64RegClass; |
1477 | 950 | case 128: |
1478 | 0 | return &AMDGPU::AReg_128RegClass; |
1479 | 950 | case 512: |
1480 | 0 | return &AMDGPU::AReg_512RegClass; |
1481 | 950 | case 1024: |
1482 | 0 | return &AMDGPU::AReg_1024RegClass; |
1483 | 950 | default: |
1484 | 0 | llvm_unreachable("Invalid register class size"); |
1485 | 950 | } |
1486 | 950 | } |
1487 | | |
1488 | | const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( |
1489 | 2.51k | const TargetRegisterClass *VRC) const { |
1490 | 2.51k | switch (getRegSizeInBits(*VRC)) { |
1491 | 2.51k | case 32: |
1492 | 2.24k | return &AMDGPU::SGPR_32RegClass; |
1493 | 2.51k | case 64: |
1494 | 248 | return &AMDGPU::SReg_64RegClass; |
1495 | 2.51k | case 96: |
1496 | 0 | return &AMDGPU::SReg_96RegClass; |
1497 | 2.51k | case 128: |
1498 | 15 | return &AMDGPU::SReg_128RegClass; |
1499 | 2.51k | case 160: |
1500 | 0 | return &AMDGPU::SReg_160RegClass; |
1501 | 2.51k | case 256: |
1502 | 2 | return &AMDGPU::SReg_256RegClass; |
1503 | 2.51k | case 512: |
1504 | 0 | return &AMDGPU::SReg_512RegClass; |
1505 | 2.51k | case 1024: |
1506 | 0 | return &AMDGPU::SReg_1024RegClass; |
1507 | 2.51k | default: |
1508 | 0 | llvm_unreachable("Invalid register class size"); |
1509 | 2.51k | } |
1510 | 2.51k | } |
1511 | | |
1512 | | const TargetRegisterClass *SIRegisterInfo::getSubRegClass( |
1513 | 499k | const TargetRegisterClass *RC, unsigned SubIdx) const { |
1514 | 499k | if (SubIdx == AMDGPU::NoSubRegister) |
1515 | 420k | return RC; |
1516 | 78.6k | |
1517 | 78.6k | // We can assume that each lane corresponds to one 32-bit register. |
1518 | 78.6k | unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes(); |
1519 | 78.6k | if (isSGPRClass(RC)) { |
1520 | 37.7k | switch (Count) { |
1521 | 37.7k | case 1: |
1522 | 37.6k | return &AMDGPU::SGPR_32RegClass; |
1523 | 37.7k | case 2: |
1524 | 60 | return &AMDGPU::SReg_64RegClass; |
1525 | 37.7k | case 3: |
1526 | 0 | return &AMDGPU::SReg_96RegClass; |
1527 | 37.7k | case 4: |
1528 | 0 | return &AMDGPU::SReg_128RegClass; |
1529 | 37.7k | case 5: |
1530 | 0 | return &AMDGPU::SReg_160RegClass; |
1531 | 37.7k | case 8: |
1532 | 0 | return &AMDGPU::SReg_256RegClass; |
1533 | 37.7k | case 16: |
1534 | 0 | return &AMDGPU::SReg_512RegClass; |
1535 | 37.7k | case 32: /* fall-through */ |
1536 | 0 | default: |
1537 | 0 | llvm_unreachable("Invalid sub-register class size"); |
1538 | 40.9k | } |
1539 | 40.9k | } else if (hasAGPRs(RC)) { |
1540 | 4 | switch (Count) { |
1541 | 4 | case 1: |
1542 | 4 | return &AMDGPU::AGPR_32RegClass; |
1543 | 4 | case 2: |
1544 | 0 | return &AMDGPU::AReg_64RegClass; |
1545 | 4 | case 4: |
1546 | 0 | return &AMDGPU::AReg_128RegClass; |
1547 | 4 | case 16: |
1548 | 0 | return &AMDGPU::AReg_512RegClass; |
1549 | 4 | case 32: /* fall-through */ |
1550 | 0 | default: |
1551 | 0 | llvm_unreachable("Invalid sub-register class size"); |
1552 | 40.9k | } |
1553 | 40.9k | } else { |
1554 | 40.9k | switch (Count) { |
1555 | 40.9k | case 1: |
1556 | 40.4k | return &AMDGPU::VGPR_32RegClass; |
1557 | 40.9k | case 2: |
1558 | 546 | return &AMDGPU::VReg_64RegClass; |
1559 | 40.9k | case 3: |
1560 | 0 | return &AMDGPU::VReg_96RegClass; |
1561 | 40.9k | case 4: |
1562 | 0 | return &AMDGPU::VReg_128RegClass; |
1563 | 40.9k | case 5: |
1564 | 0 | return &AMDGPU::VReg_160RegClass; |
1565 | 40.9k | case 8: |
1566 | 0 | return &AMDGPU::VReg_256RegClass; |
1567 | 40.9k | case 16: |
1568 | 0 | return &AMDGPU::VReg_512RegClass; |
1569 | 40.9k | case 32: /* fall-through */ |
1570 | 0 | default: |
1571 | 0 | llvm_unreachable("Invalid sub-register class size"); |
1572 | 40.9k | } |
1573 | 40.9k | } |
1574 | 78.6k | } |
1575 | | |
1576 | | bool SIRegisterInfo::shouldRewriteCopySrc( |
1577 | | const TargetRegisterClass *DefRC, |
1578 | | unsigned DefSubReg, |
1579 | | const TargetRegisterClass *SrcRC, |
1580 | 640k | unsigned SrcSubReg) const { |
1581 | 640k | // We want to prefer the smallest register class possible, so we don't want to |
1582 | 640k | // stop and rewrite on anything that looks like a subregister |
1583 | 640k | // extract. Operations mostly don't care about the super register class, so we |
1584 | 640k | // only want to stop on the most basic of copies between the same register |
1585 | 640k | // class. |
1586 | 640k | // |
1587 | 640k | // e.g. if we have something like |
1588 | 640k | // %0 = ... |
1589 | 640k | // %1 = ... |
1590 | 640k | // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 |
1591 | 640k | // %3 = COPY %2, sub0 |
1592 | 640k | // |
1593 | 640k | // We want to look through the COPY to find: |
1594 | 640k | // => %3 = COPY %0 |
1595 | 640k | |
1596 | 640k | // Plain copy. |
1597 | 640k | return getCommonSubClass(DefRC, SrcRC) != nullptr; |
1598 | 640k | } |
1599 | | |
1600 | | /// Returns a register that is not used at any point in the function. |
1601 | | /// If all registers are used, then this function will return |
1602 | | // AMDGPU::NoRegister. |
1603 | | unsigned |
1604 | | SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, |
1605 | | const TargetRegisterClass *RC, |
1606 | 206 | const MachineFunction &MF) const { |
1607 | 206 | |
1608 | 206 | for (unsigned Reg : *RC) |
1609 | 5.83k | if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)5.37k ) |
1610 | 202 | return Reg; |
1611 | 206 | return AMDGPU::NoRegister4 ; |
1612 | 206 | } |
1613 | | |
1614 | | ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, |
1615 | 9.61k | unsigned EltSize) const { |
1616 | 9.61k | if (EltSize == 4) { |
1617 | 9.31k | static const int16_t Sub0_31[] = { |
1618 | 9.31k | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, |
1619 | 9.31k | AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, |
1620 | 9.31k | AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, |
1621 | 9.31k | AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, |
1622 | 9.31k | AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, |
1623 | 9.31k | AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, |
1624 | 9.31k | AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, |
1625 | 9.31k | AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31, |
1626 | 9.31k | }; |
1627 | 9.31k | |
1628 | 9.31k | static const int16_t Sub0_15[] = { |
1629 | 9.31k | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, |
1630 | 9.31k | AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, |
1631 | 9.31k | AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, |
1632 | 9.31k | AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, |
1633 | 9.31k | }; |
1634 | 9.31k | |
1635 | 9.31k | static const int16_t Sub0_7[] = { |
1636 | 9.31k | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, |
1637 | 9.31k | AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, |
1638 | 9.31k | }; |
1639 | 9.31k | |
1640 | 9.31k | static const int16_t Sub0_4[] = { |
1641 | 9.31k | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, |
1642 | 9.31k | }; |
1643 | 9.31k | |
1644 | 9.31k | static const int16_t Sub0_3[] = { |
1645 | 9.31k | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, |
1646 | 9.31k | }; |
1647 | 9.31k | |
1648 | 9.31k | static const int16_t Sub0_2[] = { |
1649 | 9.31k | AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, |
1650 | 9.31k | }; |
1651 | 9.31k | |
1652 | 9.31k | static const int16_t Sub0_1[] = { |
1653 | 9.31k | AMDGPU::sub0, AMDGPU::sub1, |
1654 | 9.31k | }; |
1655 | 9.31k | |
1656 | 9.31k | switch (AMDGPU::getRegBitWidth(*RC->MC)) { |
1657 | 9.31k | case 32: |
1658 | 1.01k | return {}; |
1659 | 9.31k | case 64: |
1660 | 7.84k | return makeArrayRef(Sub0_1); |
1661 | 9.31k | case 96: |
1662 | 12 | return makeArrayRef(Sub0_2); |
1663 | 9.31k | case 128: |
1664 | 314 | return makeArrayRef(Sub0_3); |
1665 | 9.31k | case 160: |
1666 | 10 | return makeArrayRef(Sub0_4); |
1667 | 9.31k | case 256: |
1668 | 64 | return makeArrayRef(Sub0_7); |
1669 | 9.31k | case 512: |
1670 | 56 | return makeArrayRef(Sub0_15); |
1671 | 9.31k | case 1024: |
1672 | 1 | return makeArrayRef(Sub0_31); |
1673 | 9.31k | default: |
1674 | 0 | llvm_unreachable("unhandled register size"); |
1675 | 298 | } |
1676 | 298 | } |
1677 | 298 | |
1678 | 298 | if (EltSize == 8) { |
1679 | 282 | static const int16_t Sub0_31_64[] = { |
1680 | 282 | AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, |
1681 | 282 | AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, |
1682 | 282 | AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, |
1683 | 282 | AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, |
1684 | 282 | AMDGPU::sub16_sub17, AMDGPU::sub18_sub19, |
1685 | 282 | AMDGPU::sub20_sub21, AMDGPU::sub22_sub23, |
1686 | 282 | AMDGPU::sub24_sub25, AMDGPU::sub26_sub27, |
1687 | 282 | AMDGPU::sub28_sub29, AMDGPU::sub30_sub31 |
1688 | 282 | }; |
1689 | 282 | |
1690 | 282 | static const int16_t Sub0_15_64[] = { |
1691 | 282 | AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, |
1692 | 282 | AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, |
1693 | 282 | AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, |
1694 | 282 | AMDGPU::sub12_sub13, AMDGPU::sub14_sub15 |
1695 | 282 | }; |
1696 | 282 | |
1697 | 282 | static const int16_t Sub0_7_64[] = { |
1698 | 282 | AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, |
1699 | 282 | AMDGPU::sub4_sub5, AMDGPU::sub6_sub7 |
1700 | 282 | }; |
1701 | 282 | |
1702 | 282 | |
1703 | 282 | static const int16_t Sub0_3_64[] = { |
1704 | 282 | AMDGPU::sub0_sub1, AMDGPU::sub2_sub3 |
1705 | 282 | }; |
1706 | 282 | |
1707 | 282 | switch (AMDGPU::getRegBitWidth(*RC->MC)) { |
1708 | 282 | case 64: |
1709 | 16 | return {}; |
1710 | 282 | case 128: |
1711 | 257 | return makeArrayRef(Sub0_3_64); |
1712 | 282 | case 256: |
1713 | 4 | return makeArrayRef(Sub0_7_64); |
1714 | 282 | case 512: |
1715 | 5 | return makeArrayRef(Sub0_15_64); |
1716 | 282 | case 1024: |
1717 | 0 | return makeArrayRef(Sub0_31_64); |
1718 | 282 | default: |
1719 | 0 | llvm_unreachable("unhandled register size"); |
1720 | 16 | } |
1721 | 16 | } |
1722 | 16 | |
1723 | 16 | if (EltSize == 16) { |
1724 | 12 | |
1725 | 12 | static const int16_t Sub0_31_128[] = { |
1726 | 12 | AMDGPU::sub0_sub1_sub2_sub3, |
1727 | 12 | AMDGPU::sub4_sub5_sub6_sub7, |
1728 | 12 | AMDGPU::sub8_sub9_sub10_sub11, |
1729 | 12 | AMDGPU::sub12_sub13_sub14_sub15, |
1730 | 12 | AMDGPU::sub16_sub17_sub18_sub19, |
1731 | 12 | AMDGPU::sub20_sub21_sub22_sub23, |
1732 | 12 | AMDGPU::sub24_sub25_sub26_sub27, |
1733 | 12 | AMDGPU::sub28_sub29_sub30_sub31 |
1734 | 12 | }; |
1735 | 12 | |
1736 | 12 | static const int16_t Sub0_15_128[] = { |
1737 | 12 | AMDGPU::sub0_sub1_sub2_sub3, |
1738 | 12 | AMDGPU::sub4_sub5_sub6_sub7, |
1739 | 12 | AMDGPU::sub8_sub9_sub10_sub11, |
1740 | 12 | AMDGPU::sub12_sub13_sub14_sub15 |
1741 | 12 | }; |
1742 | 12 | |
1743 | 12 | static const int16_t Sub0_7_128[] = { |
1744 | 12 | AMDGPU::sub0_sub1_sub2_sub3, |
1745 | 12 | AMDGPU::sub4_sub5_sub6_sub7 |
1746 | 12 | }; |
1747 | 12 | |
1748 | 12 | switch (AMDGPU::getRegBitWidth(*RC->MC)) { |
1749 | 12 | case 128: |
1750 | 4 | return {}; |
1751 | 12 | case 256: |
1752 | 7 | return makeArrayRef(Sub0_7_128); |
1753 | 12 | case 512: |
1754 | 1 | return makeArrayRef(Sub0_15_128); |
1755 | 12 | case 1024: |
1756 | 0 | return makeArrayRef(Sub0_31_128); |
1757 | 12 | default: |
1758 | 0 | llvm_unreachable("unhandled register size"); |
1759 | 4 | } |
1760 | 4 | } |
1761 | 4 | |
1762 | 4 | assert(EltSize == 32 && "unhandled elt size"); |
1763 | 4 | |
1764 | 4 | static const int16_t Sub0_31_256[] = { |
1765 | 4 | AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, |
1766 | 4 | AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, |
1767 | 4 | AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, |
1768 | 4 | AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 |
1769 | 4 | }; |
1770 | 4 | |
1771 | 4 | static const int16_t Sub0_15_256[] = { |
1772 | 4 | AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, |
1773 | 4 | AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 |
1774 | 4 | }; |
1775 | 4 | |
1776 | 4 | switch (AMDGPU::getRegBitWidth(*RC->MC)) { |
1777 | 4 | case 256: |
1778 | 0 | return {}; |
1779 | 4 | case 512: |
1780 | 4 | return makeArrayRef(Sub0_15_256); |
1781 | 4 | case 1024: |
1782 | 0 | return makeArrayRef(Sub0_31_256); |
1783 | 4 | default: |
1784 | 0 | llvm_unreachable("unhandled register size"); |
1785 | 4 | } |
1786 | 4 | } |
1787 | | |
1788 | | const TargetRegisterClass* |
1789 | | SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, |
1790 | 7.32M | unsigned Reg) const { |
1791 | 7.32M | if (TargetRegisterInfo::isVirtualRegister(Reg)) |
1792 | 480k | return MRI.getRegClass(Reg); |
1793 | 6.84M | |
1794 | 6.84M | return getPhysRegClass(Reg); |
1795 | 6.84M | } |
1796 | | |
1797 | | bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, |
1798 | 5.37M | unsigned Reg) const { |
1799 | 5.37M | const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg); |
1800 | 5.37M | assert(RC && "Register class for the reg not found"); |
1801 | 5.37M | return hasVGPRs(RC); |
1802 | 5.37M | } |
1803 | | |
1804 | | bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI, |
1805 | 1.67M | unsigned Reg) const { |
1806 | 1.67M | const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg); |
1807 | 1.67M | assert(RC && "Register class for the reg not found"); |
1808 | 1.67M | return hasAGPRs(RC); |
1809 | 1.67M | } |
1810 | | |
1811 | | bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, |
1812 | | const TargetRegisterClass *SrcRC, |
1813 | | unsigned SubReg, |
1814 | | const TargetRegisterClass *DstRC, |
1815 | | unsigned DstSubReg, |
1816 | | const TargetRegisterClass *NewRC, |
1817 | 225k | LiveIntervals &LIS) const { |
1818 | 225k | unsigned SrcSize = getRegSizeInBits(*SrcRC); |
1819 | 225k | unsigned DstSize = getRegSizeInBits(*DstRC); |
1820 | 225k | unsigned NewSize = getRegSizeInBits(*NewRC); |
1821 | 225k | |
1822 | 225k | // Do not increase size of registers beyond dword, we would need to allocate |
1823 | 225k | // adjacent registers and constraint regalloc more than needed. |
1824 | 225k | |
1825 | 225k | // Always allow dword coalescing. |
1826 | 225k | if (SrcSize <= 32 || DstSize <= 32111k ) |
1827 | 167k | return true; |
1828 | 57.5k | |
1829 | 57.5k | return NewSize <= DstSize || NewSize <= SrcSize11.5k ; |
1830 | 57.5k | } |
1831 | | |
1832 | | unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, |
1833 | 214k | MachineFunction &MF) const { |
1834 | 214k | |
1835 | 214k | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1836 | 214k | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
1837 | 214k | |
1838 | 214k | unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), |
1839 | 214k | MF.getFunction()); |
1840 | 214k | switch (RC->getID()) { |
1841 | 214k | default: |
1842 | 0 | return AMDGPURegisterInfo::getRegPressureLimit(RC, MF); |
1843 | 214k | case AMDGPU::VGPR_32RegClassID: |
1844 | 133k | return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); |
1845 | 214k | case AMDGPU::SGPR_32RegClassID: |
1846 | 80.3k | return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); |
1847 | 214k | } |
1848 | 214k | } |
1849 | | |
1850 | | unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, |
1851 | 13.5M | unsigned Idx) const { |
1852 | 13.5M | if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet()13.4M ) |
1853 | 133k | return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, |
1854 | 133k | const_cast<MachineFunction &>(MF)); |
1855 | 13.4M | |
1856 | 13.4M | if (Idx == getSGPRPressureSet()) |
1857 | 80.3k | return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, |
1858 | 80.3k | const_cast<MachineFunction &>(MF)); |
1859 | 13.3M | |
1860 | 13.3M | return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx); |
1861 | 13.3M | } |
1862 | | |
1863 | 6.27M | const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { |
1864 | 6.27M | static const int Empty[] = { -1 }; |
1865 | 6.27M | |
1866 | 6.27M | if (hasRegUnit(AMDGPU::M0, RegUnit)) |
1867 | 3.64k | return Empty; |
1868 | 6.26M | return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit); |
1869 | 6.26M | } |
1870 | | |
1871 | 2.94k | unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const { |
1872 | 2.94k | // Not a callee saved register. |
1873 | 2.94k | return AMDGPU::SGPR30_SGPR31; |
1874 | 2.94k | } |
1875 | | |
1876 | | const TargetRegisterClass * |
1877 | | SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, |
1878 | | const RegisterBank &RB, |
1879 | 2.38k | const MachineRegisterInfo &MRI) const { |
1880 | 2.38k | switch (Size) { |
1881 | 2.38k | case 1: { |
1882 | 155 | switch (RB.getID()) { |
1883 | 155 | case AMDGPU::VGPRRegBankID: |
1884 | 43 | return &AMDGPU::VGPR_32RegClass; |
1885 | 155 | case AMDGPU::VCCRegBankID: |
1886 | 14 | return isWave32 ? |
1887 | 8 | &AMDGPU::SReg_32_XM0_XEXECRegClass6 : &AMDGPU::SReg_64_XEXECRegClass; |
1888 | 155 | case AMDGPU::SGPRRegBankID: |
1889 | 51 | return &AMDGPU::SReg_32_XM0RegClass; |
1890 | 155 | case AMDGPU::SCCRegBankID: |
1891 | 47 | // This needs to return an allocatable class, so don't bother returning |
1892 | 47 | // the dummy SCC class. |
1893 | 47 | return &AMDGPU::SReg_32_XM0RegClass; |
1894 | 155 | default: |
1895 | 0 | llvm_unreachable("unknown register bank"); |
1896 | 0 | } |
1897 | 0 | } |
1898 | 1.11k | case 32: |
1899 | 1.11k | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass735 : |
1900 | 1.11k | &AMDGPU::SReg_32_XM0RegClass380 ; |
1901 | 760 | case 64: |
1902 | 760 | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass415 : |
1903 | 760 | &AMDGPU::SReg_64_XEXECRegClass345 ; |
1904 | 39 | case 96: |
1905 | 39 | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass31 : |
1906 | 39 | &AMDGPU::SReg_96RegClass8 ; |
1907 | 96 | case 128: |
1908 | 96 | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass63 : |
1909 | 96 | &AMDGPU::SReg_128RegClass33 ; |
1910 | 8 | case 160: |
1911 | 8 | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass4 : |
1912 | 8 | &AMDGPU::SReg_160RegClass4 ; |
1913 | 36 | case 256: |
1914 | 36 | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass2 : |
1915 | 36 | &AMDGPU::SReg_256RegClass34 ; |
1916 | 46 | case 512: |
1917 | 46 | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass7 : |
1918 | 46 | &AMDGPU::SReg_512RegClass39 ; |
1919 | 129 | default: |
1920 | 129 | if (Size < 32) |
1921 | 123 | return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass88 : |
1922 | 123 | &AMDGPU::SReg_32_XM0RegClass35 ; |
1923 | 6 | return nullptr; |
1924 | 2.38k | } |
1925 | 2.38k | } |
1926 | | |
1927 | | const TargetRegisterClass * |
1928 | | SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, |
1929 | 4.82k | const MachineRegisterInfo &MRI) const { |
1930 | 4.82k | if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg())) |
1931 | 2.05k | return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI); |
1932 | 2.76k | return nullptr; |
1933 | 2.76k | } |
1934 | | |
1935 | 1.18k | unsigned SIRegisterInfo::getVCC() const { |
1936 | 1.18k | return isWave32 ? AMDGPU::VCC_LO156 : AMDGPU::VCC1.02k ; |
1937 | 1.18k | } |
1938 | | |
1939 | | const TargetRegisterClass * |
1940 | 59.8M | SIRegisterInfo::getRegClass(unsigned RCID) const { |
1941 | 59.8M | switch ((int)RCID) { |
1942 | 59.8M | case AMDGPU::SReg_1RegClassID: |
1943 | 1.22M | return getBoolRC(); |
1944 | 59.8M | case AMDGPU::SReg_1_XEXECRegClassID: |
1945 | 706k | return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass35.4k |
1946 | 706k | : &AMDGPU::SReg_64_XEXECRegClass670k ; |
1947 | 59.8M | case -1: |
1948 | 13.8M | return nullptr; |
1949 | 59.8M | default: |
1950 | 44.1M | return AMDGPURegisterInfo::getRegClass(RCID); |
1951 | 59.8M | } |
1952 | 59.8M | } |
1953 | | |
1954 | | // Find reaching register definition |
1955 | | MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg, |
1956 | | MachineInstr &Use, |
1957 | | MachineRegisterInfo &MRI, |
1958 | 412 | LiveIntervals *LIS) const { |
1959 | 412 | auto &MDT = LIS->getAnalysis<MachineDominatorTree>(); |
1960 | 412 | SlotIndex UseIdx = LIS->getInstructionIndex(Use); |
1961 | 412 | SlotIndex DefIdx; |
1962 | 412 | |
1963 | 412 | if (TargetRegisterInfo::isVirtualRegister(Reg)) { |
1964 | 59 | if (!LIS->hasInterval(Reg)) |
1965 | 0 | return nullptr; |
1966 | 59 | LiveInterval &LI = LIS->getInterval(Reg); |
1967 | 59 | LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)11 |
1968 | 59 | : MRI.getMaxLaneMaskForVReg(Reg)48 ; |
1969 | 59 | VNInfo *V = nullptr; |
1970 | 59 | if (LI.hasSubRanges()) { |
1971 | 17 | for (auto &S : LI.subranges()) { |
1972 | 17 | if ((S.LaneMask & SubLanes) == SubLanes) { |
1973 | 11 | V = S.getVNInfoAt(UseIdx); |
1974 | 11 | break; |
1975 | 11 | } |
1976 | 17 | } |
1977 | 47 | } else { |
1978 | 47 | V = LI.getVNInfoAt(UseIdx); |
1979 | 47 | } |
1980 | 59 | if (!V) |
1981 | 1 | return nullptr; |
1982 | 58 | DefIdx = V->def; |
1983 | 353 | } else { |
1984 | 353 | // Find last def. |
1985 | 1.01k | for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units658 ) { |
1986 | 661 | LiveRange &LR = LIS->getRegUnit(*Units); |
1987 | 661 | if (VNInfo *V = LR.getVNInfoAt(UseIdx)) { |
1988 | 658 | if (!DefIdx.isValid() || |
1989 | 658 | MDT.dominates(LIS->getInstructionFromIndex(DefIdx), |
1990 | 308 | LIS->getInstructionFromIndex(V->def))) |
1991 | 657 | DefIdx = V->def; |
1992 | 658 | } else { |
1993 | 3 | return nullptr; |
1994 | 3 | } |
1995 | 661 | } |
1996 | 353 | } |
1997 | 412 | |
1998 | 412 | MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx); |
1999 | 408 | |
2000 | 408 | if (!Def || !MDT.dominates(Def, &Use)406 ) |
2001 | 2 | return nullptr; |
2002 | 406 | |
2003 | 406 | assert(Def->modifiesRegister(Reg, this)); |
2004 | 406 | |
2005 | 406 | return Def; |
2006 | 406 | } |