/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | #include "SIMachineFunctionInfo.h" |
11 | | #include "AMDGPUArgumentUsageInfo.h" |
12 | | #include "AMDGPUSubtarget.h" |
13 | | #include "SIRegisterInfo.h" |
14 | | #include "Utils/AMDGPUBaseInfo.h" |
15 | | #include "llvm/ADT/Optional.h" |
16 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
17 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
18 | | #include "llvm/CodeGen/MachineFunction.h" |
19 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
20 | | #include "llvm/IR/CallingConv.h" |
21 | | #include "llvm/IR/Function.h" |
22 | | #include <cassert> |
23 | | #include <vector> |
24 | | |
25 | | #define MAX_LANES 64 |
26 | | |
27 | | using namespace llvm; |
28 | | |
29 | | SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) |
30 | | : AMDGPUMachineFunction(MF), |
31 | | BufferPSV(*(MF.getSubtarget().getInstrInfo())), |
32 | | ImagePSV(*(MF.getSubtarget().getInstrInfo())), |
33 | | PrivateSegmentBuffer(false), |
34 | | DispatchPtr(false), |
35 | | QueuePtr(false), |
36 | | KernargSegmentPtr(false), |
37 | | DispatchID(false), |
38 | | FlatScratchInit(false), |
39 | | GridWorkgroupCountX(false), |
40 | | GridWorkgroupCountY(false), |
41 | | GridWorkgroupCountZ(false), |
42 | | WorkGroupIDX(false), |
43 | | WorkGroupIDY(false), |
44 | | WorkGroupIDZ(false), |
45 | | WorkGroupInfo(false), |
46 | | PrivateSegmentWaveByteOffset(false), |
47 | | WorkItemIDX(false), |
48 | | WorkItemIDY(false), |
49 | | WorkItemIDZ(false), |
50 | | ImplicitBufferPtr(false), |
51 | 15.3k | ImplicitArgPtr(false) { |
52 | 15.3k | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); |
53 | 15.3k | const Function *F = MF.getFunction(); |
54 | 15.3k | FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); |
55 | 15.3k | WavesPerEU = ST.getWavesPerEU(*F); |
56 | 15.3k | |
57 | 15.3k | if (!isEntryFunction()15.3k ) { |
58 | 1.01k | // Non-entry functions have no special inputs for now, other registers |
59 | 1.01k | // required for scratch access. |
60 | 1.01k | ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; |
61 | 1.01k | ScratchWaveOffsetReg = AMDGPU::SGPR4; |
62 | 1.01k | FrameOffsetReg = AMDGPU::SGPR5; |
63 | 1.01k | StackPtrOffsetReg = AMDGPU::SGPR32; |
64 | 1.01k | |
65 | 1.01k | ArgInfo.PrivateSegmentBuffer = |
66 | 1.01k | ArgDescriptor::createRegister(ScratchRSrcReg); |
67 | 1.01k | ArgInfo.PrivateSegmentWaveByteOffset = |
68 | 1.01k | ArgDescriptor::createRegister(ScratchWaveOffsetReg); |
69 | 1.01k | |
70 | 1.01k | if (F->hasFnAttribute("amdgpu-implicitarg-ptr")) |
71 | 9 | ImplicitArgPtr = true; |
72 | 15.3k | } else { |
73 | 14.3k | if (F->hasFnAttribute("amdgpu-implicitarg-ptr")) |
74 | 28 | KernargSegmentPtr = true; |
75 | 14.3k | } |
76 | 15.3k | |
77 | 15.3k | CallingConv::ID CC = F->getCallingConv(); |
78 | 15.3k | if (CC == CallingConv::AMDGPU_KERNEL || 15.3k CC == CallingConv::SPIR_KERNEL1.58k ) { |
79 | 13.7k | if (!F->arg_empty()) |
80 | 12.8k | KernargSegmentPtr = true; |
81 | 13.7k | WorkGroupIDX = true; |
82 | 13.7k | WorkItemIDX = true; |
83 | 15.3k | } else if (1.57k CC == CallingConv::AMDGPU_PS1.57k ) { |
84 | 443 | PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); |
85 | 443 | } |
86 | 15.3k | |
87 | 15.3k | if (ST.debuggerEmitPrologue()15.3k ) { |
88 | 4 | // Enable everything. |
89 | 4 | WorkGroupIDX = true; |
90 | 4 | WorkGroupIDY = true; |
91 | 4 | WorkGroupIDZ = true; |
92 | 4 | WorkItemIDX = true; |
93 | 4 | WorkItemIDY = true; |
94 | 4 | WorkItemIDZ = true; |
95 | 15.3k | } else { |
96 | 15.3k | if (F->hasFnAttribute("amdgpu-work-group-id-x")) |
97 | 34 | WorkGroupIDX = true; |
98 | 15.3k | |
99 | 15.3k | if (F->hasFnAttribute("amdgpu-work-group-id-y")) |
100 | 40 | WorkGroupIDY = true; |
101 | 15.3k | |
102 | 15.3k | if (F->hasFnAttribute("amdgpu-work-group-id-z")) |
103 | 40 | WorkGroupIDZ = true; |
104 | 15.3k | |
105 | 15.3k | if (F->hasFnAttribute("amdgpu-work-item-id-x")) |
106 | 19 | WorkItemIDX = true; |
107 | 15.3k | |
108 | 15.3k | if (F->hasFnAttribute("amdgpu-work-item-id-y")) |
109 | 99 | WorkItemIDY = true; |
110 | 15.3k | |
111 | 15.3k | if (F->hasFnAttribute("amdgpu-work-item-id-z")) |
112 | 62 | WorkItemIDZ = true; |
113 | 15.3k | } |
114 | 15.3k | |
115 | 15.3k | const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
116 | 15.3k | bool MaySpill = ST.isVGPRSpillingEnabled(*F); |
117 | 15.3k | bool HasStackObjects = FrameInfo.hasStackObjects(); |
118 | 15.3k | |
119 | 15.3k | if (isEntryFunction()15.3k ) { |
120 | 14.3k | // X, XY, and XYZ are the only supported combinations, so make sure Y is |
121 | 14.3k | // enabled if Z is. |
122 | 14.3k | if (WorkItemIDZ) |
123 | 58 | WorkItemIDY = true; |
124 | 14.3k | |
125 | 14.3k | if (HasStackObjects || 14.3k MaySpill14.0k ) { |
126 | 13.8k | PrivateSegmentWaveByteOffset = true; |
127 | 13.8k | |
128 | 13.8k | // HS and GS always have the scratch wave offset in SGPR5 on GFX9. |
129 | 13.8k | if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && |
130 | 1.44k | (CC == CallingConv::AMDGPU_HS || 1.44k CC == CallingConv::AMDGPU_GS1.44k )) |
131 | 4 | ArgInfo.PrivateSegmentWaveByteOffset |
132 | 4 | = ArgDescriptor::createRegister(AMDGPU::SGPR5); |
133 | 13.8k | } |
134 | 14.3k | } |
135 | 15.3k | |
136 | 15.3k | bool IsCOV2 = ST.isAmdCodeObjectV2(MF); |
137 | 15.3k | if (IsCOV215.3k ) { |
138 | 2.01k | if (HasStackObjects || 2.01k MaySpill1.85k ) |
139 | 2.01k | PrivateSegmentBuffer = true; |
140 | 2.01k | |
141 | 2.01k | if (F->hasFnAttribute("amdgpu-dispatch-ptr")) |
142 | 35 | DispatchPtr = true; |
143 | 2.01k | |
144 | 2.01k | if (F->hasFnAttribute("amdgpu-queue-ptr")) |
145 | 68 | QueuePtr = true; |
146 | 2.01k | |
147 | 2.01k | if (F->hasFnAttribute("amdgpu-dispatch-id")) |
148 | 15 | DispatchID = true; |
149 | 15.3k | } else if (13.3k ST.isMesaGfxShader(MF)13.3k ) { |
150 | 8 | if (HasStackObjects || 8 MaySpill6 ) |
151 | 2 | ImplicitBufferPtr = true; |
152 | 13.3k | } |
153 | 15.3k | |
154 | 15.3k | if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr")) |
155 | 36 | KernargSegmentPtr = true; |
156 | 15.3k | |
157 | 15.3k | if (ST.hasFlatAddressSpace() && 15.3k isEntryFunction()9.79k && IsCOV28.89k ) { |
158 | 1.72k | // TODO: This could be refined a lot. The attribute is a poor way of |
159 | 1.72k | // detecting calls that may require it before argument lowering. |
160 | 1.72k | if (HasStackObjects || 1.72k F->hasFnAttribute("amdgpu-flat-scratch")1.61k ) |
161 | 332 | FlatScratchInit = true; |
162 | 1.72k | } |
163 | 15.3k | } |
164 | | |
165 | | unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( |
166 | 1.74k | const SIRegisterInfo &TRI) { |
167 | 1.74k | ArgInfo.PrivateSegmentBuffer = |
168 | 1.74k | ArgDescriptor::createRegister(TRI.getMatchingSuperReg( |
169 | 1.74k | getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass)); |
170 | 1.74k | NumUserSGPRs += 4; |
171 | 1.74k | return ArgInfo.PrivateSegmentBuffer.getRegister(); |
172 | 1.74k | } |
173 | | |
174 | 25 | unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { |
175 | 25 | ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( |
176 | 25 | getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); |
177 | 25 | NumUserSGPRs += 2; |
178 | 25 | return ArgInfo.DispatchPtr.getRegister(); |
179 | 25 | } |
180 | | |
181 | 57 | unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { |
182 | 57 | ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( |
183 | 57 | getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); |
184 | 57 | NumUserSGPRs += 2; |
185 | 57 | return ArgInfo.QueuePtr.getRegister(); |
186 | 57 | } |
187 | | |
188 | 12.8k | unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { |
189 | 12.8k | ArgInfo.KernargSegmentPtr |
190 | 12.8k | = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( |
191 | 12.8k | getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); |
192 | 12.8k | NumUserSGPRs += 2; |
193 | 12.8k | return ArgInfo.KernargSegmentPtr.getRegister(); |
194 | 12.8k | } |
195 | | |
196 | 5 | unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { |
197 | 5 | ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( |
198 | 5 | getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); |
199 | 5 | NumUserSGPRs += 2; |
200 | 5 | return ArgInfo.DispatchID.getRegister(); |
201 | 5 | } |
202 | | |
203 | 332 | unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { |
204 | 332 | ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( |
205 | 332 | getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); |
206 | 332 | NumUserSGPRs += 2; |
207 | 332 | return ArgInfo.FlatScratchInit.getRegister(); |
208 | 332 | } |
209 | | |
210 | 2 | unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { |
211 | 2 | ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( |
212 | 2 | getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); |
213 | 2 | NumUserSGPRs += 2; |
214 | 2 | return ArgInfo.ImplicitBufferPtr.getRegister(); |
215 | 2 | } |
216 | | |
217 | 78 | static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { |
218 | 91 | for (unsigned I = 0; CSRegs[I]91 ; ++I13 ) { |
219 | 91 | if (CSRegs[I] == Reg) |
220 | 78 | return true; |
221 | 91 | } |
222 | 78 | |
223 | 0 | return false; |
224 | 78 | } |
225 | | |
226 | | /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. |
227 | | bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, |
228 | 1.08k | int FI) { |
229 | 1.08k | std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; |
230 | 1.08k | |
231 | 1.08k | // This has already been allocated. |
232 | 1.08k | if (!SpillLanes.empty()) |
233 | 549 | return true; |
234 | 539 | |
235 | 539 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); |
236 | 539 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
237 | 539 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
238 | 539 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
239 | 539 | unsigned WaveSize = ST.getWavefrontSize(); |
240 | 539 | |
241 | 539 | unsigned Size = FrameInfo.getObjectSize(FI); |
242 | 539 | assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); |
243 | 539 | assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); |
244 | 539 | |
245 | 539 | int NumLanes = Size / 4; |
246 | 539 | |
247 | 539 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); |
248 | 539 | |
249 | 539 | // Make sure to handle the case where a wide SGPR spill may span between two |
250 | 539 | // VGPRs. |
251 | 1.47k | for (int I = 0; I < NumLanes1.47k ; ++I, ++NumVGPRSpillLanes934 ) { |
252 | 938 | unsigned LaneVGPR; |
253 | 938 | unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); |
254 | 938 | |
255 | 938 | if (VGPRIndex == 0938 ) { |
256 | 133 | LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); |
257 | 133 | if (LaneVGPR == AMDGPU::NoRegister133 ) { |
258 | 4 | // We have no VGPRs left for spilling SGPRs. Reset because we will not |
259 | 4 | // partially spill the SGPR to VGPRs. |
260 | 4 | SGPRToVGPRSpills.erase(FI); |
261 | 4 | NumVGPRSpillLanes -= I; |
262 | 4 | return false; |
263 | 4 | } |
264 | 129 | |
265 | 129 | Optional<int> CSRSpillFI; |
266 | 129 | if (FrameInfo.hasCalls() && 129 CSRegs78 && isCalleeSavedReg(CSRegs, LaneVGPR)78 ) { |
267 | 78 | // TODO: Should this be a CreateSpillStackObject? This is technically a |
268 | 78 | // weird CSR spill. |
269 | 78 | CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false); |
270 | 78 | } |
271 | 129 | |
272 | 129 | SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); |
273 | 129 | |
274 | 129 | // Add this register as live-in to all blocks to avoid machine verifer |
275 | 129 | // complaining about use of an undefined physical register. |
276 | 129 | for (MachineBasicBlock &BB : MF) |
277 | 294 | BB.addLiveIn(LaneVGPR); |
278 | 938 | } else { |
279 | 805 | LaneVGPR = SpillVGPRs.back().VGPR; |
280 | 805 | } |
281 | 938 | |
282 | 934 | SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); |
283 | 934 | } |
284 | 539 | |
285 | 535 | return true; |
286 | 1.08k | } |
287 | | |
288 | 125 | void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { |
289 | 125 | for (auto &R : SGPRToVGPRSpills) |
290 | 535 | MFI.RemoveStackObject(R.first); |
291 | 125 | } |