Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
10
#include "SIMachineFunctionInfo.h"
11
#include "AMDGPUArgumentUsageInfo.h"
12
#include "AMDGPUSubtarget.h"
13
#include "SIRegisterInfo.h"
14
#include "Utils/AMDGPUBaseInfo.h"
15
#include "llvm/ADT/Optional.h"
16
#include "llvm/CodeGen/MachineBasicBlock.h"
17
#include "llvm/CodeGen/MachineFrameInfo.h"
18
#include "llvm/CodeGen/MachineFunction.h"
19
#include "llvm/CodeGen/MachineRegisterInfo.h"
20
#include "llvm/IR/CallingConv.h"
21
#include "llvm/IR/Function.h"
22
#include <cassert>
23
#include <vector>
24
25
#define MAX_LANES 64
26
27
using namespace llvm;
28
29
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30
  : AMDGPUMachineFunction(MF),
31
    BufferPSV(*(MF.getSubtarget().getInstrInfo())),
32
    ImagePSV(*(MF.getSubtarget().getInstrInfo())),
33
    PrivateSegmentBuffer(false),
34
    DispatchPtr(false),
35
    QueuePtr(false),
36
    KernargSegmentPtr(false),
37
    DispatchID(false),
38
    FlatScratchInit(false),
39
    GridWorkgroupCountX(false),
40
    GridWorkgroupCountY(false),
41
    GridWorkgroupCountZ(false),
42
    WorkGroupIDX(false),
43
    WorkGroupIDY(false),
44
    WorkGroupIDZ(false),
45
    WorkGroupInfo(false),
46
    PrivateSegmentWaveByteOffset(false),
47
    WorkItemIDX(false),
48
    WorkItemIDY(false),
49
    WorkItemIDZ(false),
50
    ImplicitBufferPtr(false),
51
15.3k
    ImplicitArgPtr(false) {
52
15.3k
  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
53
15.3k
  const Function *F = MF.getFunction();
54
15.3k
  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
55
15.3k
  WavesPerEU = ST.getWavesPerEU(*F);
56
15.3k
57
15.3k
  if (
!isEntryFunction()15.3k
) {
58
1.01k
    // Non-entry functions have no special inputs for now, other registers
59
1.01k
    // required for scratch access.
60
1.01k
    ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
61
1.01k
    ScratchWaveOffsetReg = AMDGPU::SGPR4;
62
1.01k
    FrameOffsetReg = AMDGPU::SGPR5;
63
1.01k
    StackPtrOffsetReg = AMDGPU::SGPR32;
64
1.01k
65
1.01k
    ArgInfo.PrivateSegmentBuffer =
66
1.01k
      ArgDescriptor::createRegister(ScratchRSrcReg);
67
1.01k
    ArgInfo.PrivateSegmentWaveByteOffset =
68
1.01k
      ArgDescriptor::createRegister(ScratchWaveOffsetReg);
69
1.01k
70
1.01k
    if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
71
9
      ImplicitArgPtr = true;
72
15.3k
  } else {
73
14.3k
    if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
74
28
      KernargSegmentPtr = true;
75
14.3k
  }
76
15.3k
77
15.3k
  CallingConv::ID CC = F->getCallingConv();
78
15.3k
  if (
CC == CallingConv::AMDGPU_KERNEL || 15.3k
CC == CallingConv::SPIR_KERNEL1.58k
) {
79
13.7k
    if (!F->arg_empty())
80
12.8k
      KernargSegmentPtr = true;
81
13.7k
    WorkGroupIDX = true;
82
13.7k
    WorkItemIDX = true;
83
15.3k
  } else 
if (1.57k
CC == CallingConv::AMDGPU_PS1.57k
) {
84
443
    PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
85
443
  }
86
15.3k
87
15.3k
  if (
ST.debuggerEmitPrologue()15.3k
) {
88
4
    // Enable everything.
89
4
    WorkGroupIDX = true;
90
4
    WorkGroupIDY = true;
91
4
    WorkGroupIDZ = true;
92
4
    WorkItemIDX = true;
93
4
    WorkItemIDY = true;
94
4
    WorkItemIDZ = true;
95
15.3k
  } else {
96
15.3k
    if (F->hasFnAttribute("amdgpu-work-group-id-x"))
97
34
      WorkGroupIDX = true;
98
15.3k
99
15.3k
    if (F->hasFnAttribute("amdgpu-work-group-id-y"))
100
40
      WorkGroupIDY = true;
101
15.3k
102
15.3k
    if (F->hasFnAttribute("amdgpu-work-group-id-z"))
103
40
      WorkGroupIDZ = true;
104
15.3k
105
15.3k
    if (F->hasFnAttribute("amdgpu-work-item-id-x"))
106
19
      WorkItemIDX = true;
107
15.3k
108
15.3k
    if (F->hasFnAttribute("amdgpu-work-item-id-y"))
109
99
      WorkItemIDY = true;
110
15.3k
111
15.3k
    if (F->hasFnAttribute("amdgpu-work-item-id-z"))
112
62
      WorkItemIDZ = true;
113
15.3k
  }
114
15.3k
115
15.3k
  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
116
15.3k
  bool MaySpill = ST.isVGPRSpillingEnabled(*F);
117
15.3k
  bool HasStackObjects = FrameInfo.hasStackObjects();
118
15.3k
119
15.3k
  if (
isEntryFunction()15.3k
) {
120
14.3k
    // X, XY, and XYZ are the only supported combinations, so make sure Y is
121
14.3k
    // enabled if Z is.
122
14.3k
    if (WorkItemIDZ)
123
58
      WorkItemIDY = true;
124
14.3k
125
14.3k
    if (
HasStackObjects || 14.3k
MaySpill14.0k
) {
126
13.8k
      PrivateSegmentWaveByteOffset = true;
127
13.8k
128
13.8k
    // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
129
13.8k
    if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
130
1.44k
        
(CC == CallingConv::AMDGPU_HS || 1.44k
CC == CallingConv::AMDGPU_GS1.44k
))
131
4
      ArgInfo.PrivateSegmentWaveByteOffset
132
4
        = ArgDescriptor::createRegister(AMDGPU::SGPR5);
133
13.8k
    }
134
14.3k
  }
135
15.3k
136
15.3k
  bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
137
15.3k
  if (
IsCOV215.3k
) {
138
2.01k
    if (
HasStackObjects || 2.01k
MaySpill1.85k
)
139
2.01k
      PrivateSegmentBuffer = true;
140
2.01k
141
2.01k
    if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
142
35
      DispatchPtr = true;
143
2.01k
144
2.01k
    if (F->hasFnAttribute("amdgpu-queue-ptr"))
145
68
      QueuePtr = true;
146
2.01k
147
2.01k
    if (F->hasFnAttribute("amdgpu-dispatch-id"))
148
15
      DispatchID = true;
149
15.3k
  } else 
if (13.3k
ST.isMesaGfxShader(MF)13.3k
) {
150
8
    if (
HasStackObjects || 8
MaySpill6
)
151
2
      ImplicitBufferPtr = true;
152
13.3k
  }
153
15.3k
154
15.3k
  if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
155
36
    KernargSegmentPtr = true;
156
15.3k
157
15.3k
  if (
ST.hasFlatAddressSpace() && 15.3k
isEntryFunction()9.79k
&&
IsCOV28.89k
) {
158
1.72k
    // TODO: This could be refined a lot. The attribute is a poor way of
159
1.72k
    // detecting calls that may require it before argument lowering.
160
1.72k
    if (
HasStackObjects || 1.72k
F->hasFnAttribute("amdgpu-flat-scratch")1.61k
)
161
332
      FlatScratchInit = true;
162
1.72k
  }
163
15.3k
}
164
165
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
166
1.74k
  const SIRegisterInfo &TRI) {
167
1.74k
  ArgInfo.PrivateSegmentBuffer =
168
1.74k
    ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
169
1.74k
    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
170
1.74k
  NumUserSGPRs += 4;
171
1.74k
  return ArgInfo.PrivateSegmentBuffer.getRegister();
172
1.74k
}
173
174
25
unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
175
25
  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
176
25
    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
177
25
  NumUserSGPRs += 2;
178
25
  return ArgInfo.DispatchPtr.getRegister();
179
25
}
180
181
57
unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
182
57
  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
183
57
    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
184
57
  NumUserSGPRs += 2;
185
57
  return ArgInfo.QueuePtr.getRegister();
186
57
}
187
188
12.8k
unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
189
12.8k
  ArgInfo.KernargSegmentPtr
190
12.8k
    = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
191
12.8k
    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
192
12.8k
  NumUserSGPRs += 2;
193
12.8k
  return ArgInfo.KernargSegmentPtr.getRegister();
194
12.8k
}
195
196
5
unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
197
5
  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
198
5
    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
199
5
  NumUserSGPRs += 2;
200
5
  return ArgInfo.DispatchID.getRegister();
201
5
}
202
203
332
unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
204
332
  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
205
332
    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
206
332
  NumUserSGPRs += 2;
207
332
  return ArgInfo.FlatScratchInit.getRegister();
208
332
}
209
210
2
unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
211
2
  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
212
2
    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
213
2
  NumUserSGPRs += 2;
214
2
  return ArgInfo.ImplicitBufferPtr.getRegister();
215
2
}
216
217
78
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
218
91
  for (unsigned I = 0; 
CSRegs[I]91
;
++I13
) {
219
91
    if (CSRegs[I] == Reg)
220
78
      return true;
221
91
  }
222
78
223
0
  return false;
224
78
}
225
226
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
227
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
228
1.08k
                                                    int FI) {
229
1.08k
  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
230
1.08k
231
1.08k
  // This has already been allocated.
232
1.08k
  if (!SpillLanes.empty())
233
549
    return true;
234
539
235
539
  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
236
539
  const SIRegisterInfo *TRI = ST.getRegisterInfo();
237
539
  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
238
539
  MachineRegisterInfo &MRI = MF.getRegInfo();
239
539
  unsigned WaveSize = ST.getWavefrontSize();
240
539
241
539
  unsigned Size = FrameInfo.getObjectSize(FI);
242
539
  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
243
539
  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
244
539
245
539
  int NumLanes = Size / 4;
246
539
247
539
  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
248
539
249
539
  // Make sure to handle the case where a wide SGPR spill may span between two
250
539
  // VGPRs.
251
1.47k
  for (int I = 0; 
I < NumLanes1.47k
;
++I, ++NumVGPRSpillLanes934
) {
252
938
    unsigned LaneVGPR;
253
938
    unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
254
938
255
938
    if (
VGPRIndex == 0938
) {
256
133
      LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
257
133
      if (
LaneVGPR == AMDGPU::NoRegister133
) {
258
4
        // We have no VGPRs left for spilling SGPRs. Reset because we will not
259
4
        // partially spill the SGPR to VGPRs.
260
4
        SGPRToVGPRSpills.erase(FI);
261
4
        NumVGPRSpillLanes -= I;
262
4
        return false;
263
4
      }
264
129
265
129
      Optional<int> CSRSpillFI;
266
129
      if (
FrameInfo.hasCalls() && 129
CSRegs78
&&
isCalleeSavedReg(CSRegs, LaneVGPR)78
) {
267
78
        // TODO: Should this be a CreateSpillStackObject? This is technically a
268
78
        // weird CSR spill.
269
78
        CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
270
78
      }
271
129
272
129
      SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
273
129
274
129
      // Add this register as live-in to all blocks to avoid machine verifer
275
129
      // complaining about use of an undefined physical register.
276
129
      for (MachineBasicBlock &BB : MF)
277
294
        BB.addLiveIn(LaneVGPR);
278
938
    } else {
279
805
      LaneVGPR = SpillVGPRs.back().VGPR;
280
805
    }
281
938
282
934
    SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
283
934
  }
284
539
285
535
  return true;
286
1.08k
}
287
288
125
void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
289
125
  for (auto &R : SGPRToVGPRSpills)
290
535
    MFI.RemoveStackObject(R.first);
291
125
}