Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements the lowering of LLVM calls to machine code calls for
11
/// GlobalISel.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#include "AMDGPUCallLowering.h"
16
#include "AMDGPU.h"
17
#include "AMDGPUISelLowering.h"
18
#include "AMDGPUSubtarget.h"
19
#include "SIISelLowering.h"
20
#include "SIMachineFunctionInfo.h"
21
#include "SIRegisterInfo.h"
22
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23
#include "llvm/CodeGen/Analysis.h"
24
#include "llvm/CodeGen/CallingConvLower.h"
25
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26
#include "llvm/CodeGen/MachineInstrBuilder.h"
27
#include "llvm/Support/LowLevelTypeImpl.h"
28
29
using namespace llvm;
30
31
namespace {
32
33
struct OutgoingArgHandler : public CallLowering::ValueHandler {
34
  OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
35
                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
36
3
      : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
37
38
  MachineInstrBuilder MIB;
39
40
  Register getStackAddress(uint64_t Size, int64_t Offset,
41
0
                           MachinePointerInfo &MPO) override {
42
0
    llvm_unreachable("not implemented");
43
0
  }
44
45
  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
46
0
                            MachinePointerInfo &MPO, CCValAssign &VA) override {
47
0
    llvm_unreachable("not implemented");
48
0
  }
49
50
  void assignValueToReg(Register ValVReg, Register PhysReg,
51
4
                        CCValAssign &VA) override {
52
4
    MIB.addUse(PhysReg);
53
4
    MIRBuilder.buildCopy(PhysReg, ValVReg);
54
4
  }
55
56
  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
57
                 CCValAssign::LocInfo LocInfo,
58
                 const CallLowering::ArgInfo &Info,
59
4
                 CCState &State) override {
60
4
    return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
61
4
  }
62
};
63
64
struct IncomingArgHandler : public CallLowering::ValueHandler {
65
  uint64_t StackUsed = 0;
66
67
  IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
68
                     CCAssignFn *AssignFn)
69
80
    : ValueHandler(MIRBuilder, MRI, AssignFn) {}
70
71
  Register getStackAddress(uint64_t Size, int64_t Offset,
72
101
                           MachinePointerInfo &MPO) override {
73
101
    auto &MFI = MIRBuilder.getMF().getFrameInfo();
74
101
    int FI = MFI.CreateFixedObject(Size, Offset, true);
75
101
    MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
76
101
    Register AddrReg = MRI.createGenericVirtualRegister(
77
101
      LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
78
101
    MIRBuilder.buildFrameIndex(AddrReg, FI);
79
101
    StackUsed = std::max(StackUsed, Size + Offset);
80
101
    return AddrReg;
81
101
  }
82
83
  void assignValueToReg(Register ValVReg, Register PhysReg,
84
704
                        CCValAssign &VA) override {
85
704
    markPhysRegUsed(PhysReg);
86
704
87
704
    if (VA.getLocVT().getSizeInBits() < 32) {
88
2
      // 16-bit types are reported as legal for 32-bit registers. We need to do
89
2
      // a 32-bit copy, and truncate to avoid the verifier complaining about it.
90
2
      auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
91
2
      MIRBuilder.buildTrunc(ValVReg, Copy);
92
2
      return;
93
2
    }
94
702
95
702
    switch (VA.getLocInfo()) {
96
702
    case CCValAssign::LocInfo::SExt:
97
8
    case CCValAssign::LocInfo::ZExt:
98
8
    case CCValAssign::LocInfo::AExt: {
99
8
      auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
100
8
      MIRBuilder.buildTrunc(ValVReg, Copy);
101
8
      break;
102
8
    }
103
694
    default:
104
694
      MIRBuilder.buildCopy(ValVReg, PhysReg);
105
694
      break;
106
702
    }
107
702
  }
108
109
  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
110
101
                            MachinePointerInfo &MPO, CCValAssign &VA) override {
111
101
    // FIXME: Get alignment
112
101
    auto MMO = MIRBuilder.getMF().getMachineMemOperand(
113
101
      MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
114
101
    MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
115
101
  }
116
117
  /// How the physical register gets marked varies between formal
118
  /// parameters (it's a basic-block live-in), and a call instruction
119
  /// (it's an implicit-def of the BL).
120
  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
121
122
  // FIXME: What is the point of this being a callback?
123
707
  bool isArgumentHandler() const override { return true; }
124
};
125
126
struct FormalArgHandler : public IncomingArgHandler {
127
  FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
128
                   CCAssignFn *AssignFn)
129
80
    : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
130
131
704
  void markPhysRegUsed(unsigned PhysReg) override {
132
704
    MIRBuilder.getMBB().addLiveIn(PhysReg);
133
704
  }
134
};
135
136
}
137
138
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
139
3.64k
  : CallLowering(&TLI) {
140
3.64k
}
141
142
void AMDGPUCallLowering::splitToValueTypes(
143
    const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
144
    const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
145
111
    SplitArgTy PerformArgSplit) const {
146
111
  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
147
111
  LLVMContext &Ctx = OrigArg.Ty->getContext();
148
111
149
111
  if (OrigArg.Ty->isVoidTy())
150
0
    return;
151
111
152
111
  SmallVector<EVT, 4> SplitVTs;
153
111
  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
154
111
155
111
  assert(OrigArg.Regs.size() == SplitVTs.size());
156
111
157
111
  int SplitIdx = 0;
158
112
  for (EVT VT : SplitVTs) {
159
112
    unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
160
112
    Type *Ty = VT.getTypeForEVT(Ctx);
161
112
162
112
163
112
164
112
    if (NumParts == 1) {
165
46
      // No splitting to do, but we want to replace the original type (e.g. [1 x
166
46
      // double] -> double).
167
46
      SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
168
46
                             OrigArg.Flags, OrigArg.IsFixed);
169
46
170
46
      ++SplitIdx;
171
46
      continue;
172
46
    }
173
66
174
66
    LLT LLTy = getLLTForType(*Ty, DL);
175
66
176
66
    SmallVector<Register, 8> SplitRegs;
177
66
178
66
    EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
179
66
    Type *PartTy = PartVT.getTypeForEVT(Ctx);
180
66
    LLT PartLLT = getLLTForType(*PartTy, DL);
181
66
182
66
    // FIXME: Should we be reporting all of the part registers for a single
183
66
    // argument, and let handleAssignments take care of the repacking?
184
825
    for (unsigned i = 0; i < NumParts; 
++i759
) {
185
759
      Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
186
759
      SplitRegs.push_back(PartReg);
187
759
      SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
188
759
    }
189
66
190
66
    PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
191
66
192
66
    ++SplitIdx;
193
66
  }
194
111
}
195
196
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
197
                                     const Value *Val,
198
459
                                     ArrayRef<Register> VRegs) const {
199
459
200
459
  MachineFunction &MF = MIRBuilder.getMF();
201
459
  MachineRegisterInfo &MRI = MF.getRegInfo();
202
459
  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
203
459
  MFI->setIfReturnsVoid(!Val);
204
459
205
459
  if (!Val) {
206
456
    MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
207
456
    return true;
208
456
  }
209
3
210
3
  Register VReg = VRegs[0];
211
3
212
3
  const Function &F = MF.getFunction();
213
3
  auto &DL = F.getParent()->getDataLayout();
214
3
  if (!AMDGPU::isShader(F.getCallingConv()))
215
0
    return false;
216
3
217
3
218
3
  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
219
3
  SmallVector<EVT, 4> SplitVTs;
220
3
  SmallVector<uint64_t, 4> Offsets;
221
3
  ArgInfo OrigArg{VReg, Val->getType()};
222
3
  setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
223
3
  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
224
3
225
3
  SmallVector<ArgInfo, 8> SplitArgs;
226
3
  CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
227
7
  for (unsigned i = 0, e = Offsets.size(); i != e; 
++i4
) {
228
4
    Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
229
4
    SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
230
4
  }
231
3
  auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
232
3
  OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
233
3
  if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
234
0
    return false;
235
3
  MIRBuilder.insertInstr(RetInstr);
236
3
237
3
  return true;
238
3
}
239
240
Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
241
                                               Type *ParamTy,
242
182
                                               uint64_t Offset) const {
243
182
244
182
  MachineFunction &MF = MIRBuilder.getMF();
245
182
  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
246
182
  MachineRegisterInfo &MRI = MF.getRegInfo();
247
182
  const Function &F = MF.getFunction();
248
182
  const DataLayout &DL = F.getParent()->getDataLayout();
249
182
  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
250
182
  LLT PtrType = getLLTForType(*PtrTy, DL);
251
182
  Register DstReg = MRI.createGenericVirtualRegister(PtrType);
252
182
  Register KernArgSegmentPtr =
253
182
    MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
254
182
  Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
255
182
256
182
  Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
257
182
  MIRBuilder.buildConstant(OffsetReg, Offset);
258
182
259
182
  MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
260
182
261
182
  return DstReg;
262
182
}
263
264
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
265
                                        Type *ParamTy, uint64_t Offset,
266
                                        unsigned Align,
267
182
                                        Register DstReg) const {
268
182
  MachineFunction &MF = MIRBuilder.getMF();
269
182
  const Function &F = MF.getFunction();
270
182
  const DataLayout &DL = F.getParent()->getDataLayout();
271
182
  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
272
182
  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
273
182
  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
274
182
  Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
275
182
276
182
  MachineMemOperand *MMO =
277
182
      MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
278
182
                                       MachineMemOperand::MODereferenceable |
279
182
                                       MachineMemOperand::MOInvariant,
280
182
                                       TypeSize, Align);
281
182
282
182
  MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
283
182
}
284
285
// Allocate special inputs passed in user SGPRs.
286
static void allocateHSAUserSGPRs(CCState &CCInfo,
287
                                 MachineIRBuilder &MIRBuilder,
288
                                 MachineFunction &MF,
289
                                 const SIRegisterInfo &TRI,
290
379
                                 SIMachineFunctionInfo &Info) {
291
379
  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
292
379
  if (Info.hasPrivateSegmentBuffer()) {
293
260
    unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
294
260
    MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
295
260
    CCInfo.AllocateReg(PrivateSegmentBufferReg);
296
260
  }
297
379
298
379
  if (Info.hasDispatchPtr()) {
299
1
    unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
300
1
    MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
301
1
    CCInfo.AllocateReg(DispatchPtrReg);
302
1
  }
303
379
304
379
  if (Info.hasQueuePtr()) {
305
1
    unsigned QueuePtrReg = Info.addQueuePtr(TRI);
306
1
    MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
307
1
    CCInfo.AllocateReg(QueuePtrReg);
308
1
  }
309
379
310
379
  if (Info.hasKernargSegmentPtr()) {
311
130
    MachineRegisterInfo &MRI = MF.getRegInfo();
312
130
    Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
313
130
    const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
314
130
    Register VReg = MRI.createGenericVirtualRegister(P4);
315
130
    MRI.addLiveIn(InputPtrReg, VReg);
316
130
    MIRBuilder.getMBB().addLiveIn(InputPtrReg);
317
130
    MIRBuilder.buildCopy(VReg, InputPtrReg);
318
130
    CCInfo.AllocateReg(InputPtrReg);
319
130
  }
320
379
321
379
  if (Info.hasDispatchID()) {
322
1
    unsigned DispatchIDReg = Info.addDispatchID(TRI);
323
1
    MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
324
1
    CCInfo.AllocateReg(DispatchIDReg);
325
1
  }
326
379
327
379
  if (Info.hasFlatScratchInit()) {
328
0
    unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
329
0
    MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
330
0
    CCInfo.AllocateReg(FlatScratchInitReg);
331
0
  }
332
379
333
379
  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
334
379
  // these from the dispatch pointer.
335
379
}
336
337
bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
338
    MachineIRBuilder &MIRBuilder, const Function &F,
339
379
    ArrayRef<ArrayRef<Register>> VRegs) const {
340
379
  MachineFunction &MF = MIRBuilder.getMF();
341
379
  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
342
379
  MachineRegisterInfo &MRI = MF.getRegInfo();
343
379
  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
344
379
  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
345
379
  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
346
379
347
379
  const DataLayout &DL = F.getParent()->getDataLayout();
348
379
349
379
  SmallVector<CCValAssign, 16> ArgLocs;
350
379
  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
351
379
352
379
  allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
353
379
354
379
  unsigned i = 0;
355
379
  const unsigned KernArgBaseAlign = 16;
356
379
  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
357
379
  uint64_t ExplicitArgOffset = 0;
358
379
359
379
  // TODO: Align down to dword alignment and extract bits for extending loads.
360
379
  for (auto &Arg : F.args()) {
361
183
    Type *ArgTy = Arg.getType();
362
183
    unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
363
183
    if (AllocSize == 0)
364
1
      continue;
365
182
366
182
    unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
367
182
368
182
    uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
369
182
    ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
370
182
371
182
    ArrayRef<Register> OrigArgRegs = VRegs[i];
372
182
    Register ArgReg =
373
182
      OrigArgRegs.size() == 1
374
182
      ? 
OrigArgRegs[0]175
375
182
      : 
MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL))7
;
376
182
    unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
377
182
    ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
378
182
    lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
379
182
    if (OrigArgRegs.size() > 1)
380
7
      unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
381
182
    ++i;
382
182
  }
383
379
384
379
  TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
385
379
  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
386
379
  return true;
387
379
}
388
389
static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder,
390
                                    ArrayRef<Register> OrigRegs,
391
                                    ArrayRef<Register> Regs,
392
                                    LLT LLTy,
393
66
                                    LLT PartLLT) {
394
66
  if (!LLTy.isVector() && 
!PartLLT.isVector()6
) {
395
6
    MIRBuilder.buildMerge(OrigRegs[0], Regs);
396
6
    return;
397
6
  }
398
60
399
60
  if (LLTy.isVector() && PartLLT.isVector()) {
400
0
    assert(LLTy.getElementType() == PartLLT.getElementType());
401
0
402
0
    int DstElts = LLTy.getNumElements();
403
0
    int PartElts = PartLLT.getNumElements();
404
0
    if (DstElts % PartElts == 0)
405
0
      MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
406
0
    else {
407
0
      // Deal with v3s16 split into v2s16
408
0
      assert(PartElts == 2 && DstElts % 2 != 0);
409
0
      int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
410
0
411
0
      LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
412
0
      auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
413
0
      MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
414
0
    }
415
0
416
0
    return;
417
0
  }
418
60
419
60
  assert(LLTy.isVector() && !PartLLT.isVector());
420
60
421
60
  LLT DstEltTy = LLTy.getElementType();
422
60
  if (DstEltTy == PartLLT) {
423
32
    // Vector was trivially scalarized.
424
32
    MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
425
32
  } else 
if (28
DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()28
) {
426
13
    // Deal with vector with 64-bit elements decomposed to 32-bit
427
13
    // registers. Need to create intermediate 64-bit elements.
428
13
    SmallVector<Register, 8> EltMerges;
429
13
    int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
430
13
431
13
    assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
432
13
433
88
    for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; 
++I75
) {
434
75
      auto Merge = MIRBuilder.buildMerge(DstEltTy,
435
75
                                         Regs.take_front(PartsPerElt));
436
75
      EltMerges.push_back(Merge.getReg(0));
437
75
      Regs = Regs.drop_front(PartsPerElt);
438
75
    }
439
13
440
13
    MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
441
15
  } else {
442
15
    // Vector was split, and elements promoted to a wider type.
443
15
    LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
444
15
    auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
445
15
    MIRBuilder.buildTrunc(OrigRegs[0], BV);
446
15
  }
447
60
}
448
449
bool AMDGPUCallLowering::lowerFormalArguments(
450
    MachineIRBuilder &MIRBuilder, const Function &F,
451
459
    ArrayRef<ArrayRef<Register>> VRegs) const {
452
459
  CallingConv::ID CC = F.getCallingConv();
453
459
454
459
  // The infrastructure for normal calling convention lowering is essentially
455
459
  // useless for kernels. We want to avoid any kind of legalization or argument
456
459
  // splitting.
457
459
  if (CC == CallingConv::AMDGPU_KERNEL)
458
379
    return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
459
80
460
80
  // AMDGPU_GS and AMDGP_HS are not supported yet.
461
80
  if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS)
462
0
    return false;
463
80
464
80
  const bool IsShader = AMDGPU::isShader(CC);
465
80
  const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
466
80
467
80
  MachineFunction &MF = MIRBuilder.getMF();
468
80
  MachineBasicBlock &MBB = MIRBuilder.getMBB();
469
80
  MachineRegisterInfo &MRI = MF.getRegInfo();
470
80
  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
471
80
  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
472
80
  const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
473
80
  const DataLayout &DL = F.getParent()->getDataLayout();
474
80
475
80
476
80
  SmallVector<CCValAssign, 16> ArgLocs;
477
80
  CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
478
80
479
80
  if (Info->hasImplicitBufferPtr()) {
480
11
    Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
481
11
    MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
482
11
    CCInfo.AllocateReg(ImplicitBufferPtrReg);
483
11
  }
484
80
485
80
486
80
  SmallVector<ArgInfo, 32> SplitArgs;
487
80
  unsigned Idx = 0;
488
80
  unsigned PSInputNum = 0;
489
80
490
113
  for (auto &Arg : F.args()) {
491
113
    if (DL.getTypeStoreSize(Arg.getType()) == 0)
492
0
      continue;
493
113
494
113
    const bool InReg = Arg.hasAttribute(Attribute::InReg);
495
113
496
113
    // SGPR arguments to functions not implemented.
497
113
    if (!IsShader && 
InReg94
)
498
0
      return false;
499
113
500
113
    // TODO: Handle sret.
501
113
    if (Arg.hasAttribute(Attribute::StructRet) ||
502
113
        Arg.hasAttribute(Attribute::SwiftSelf) ||
503
113
        Arg.hasAttribute(Attribute::SwiftError) ||
504
113
        Arg.hasAttribute(Attribute::Nest))
505
0
      return false;
506
113
507
113
    if (CC == CallingConv::AMDGPU_PS && 
!InReg8
&&
PSInputNum <= 156
) {
508
6
      const bool ArgUsed = !Arg.use_empty();
509
6
      bool SkipArg = !ArgUsed && 
!Info->isPSInputAllocated(PSInputNum)2
;
510
6
511
6
      if (!SkipArg) {
512
4
        Info->markPSInputAllocated(PSInputNum);
513
4
        if (ArgUsed)
514
4
          Info->markPSInputEnabled(PSInputNum);
515
4
      }
516
6
517
6
      ++PSInputNum;
518
6
519
6
      if (SkipArg) {
520
5
        for (int I = 0, E = VRegs[Idx].size(); I != E; 
++I3
)
521
3
          MIRBuilder.buildUndef(VRegs[Idx][I]);
522
2
523
2
        ++Idx;
524
2
        continue;
525
2
      }
526
111
    }
527
111
528
111
    ArgInfo OrigArg(VRegs[Idx], Arg.getType());
529
111
    setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
530
111
531
111
    splitToValueTypes(
532
111
      OrigArg, SplitArgs, DL, MRI, CC,
533
111
      // FIXME: We should probably be passing multiple registers to
534
111
      // handleAssignments to do this
535
111
      [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
536
66
        packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs,
537
66
                                LLTy, PartLLT);
538
66
      });
539
111
540
111
    ++Idx;
541
111
  }
542
80
543
80
  // At least one interpolation mode must be enabled or else the GPU will
544
80
  // hang.
545
80
  //
546
80
  // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
547
80
  // set PSInputAddr, the user wants to enable some bits after the compilation
548
80
  // based on run-time states. Since we can't know what the final PSInputEna
549
80
  // will look like, so we shouldn't do anything here and the user should take
550
80
  // responsibility for the correct programming.
551
80
  //
552
80
  // Otherwise, the following restrictions apply:
553
80
  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
554
80
  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
555
80
  //   enabled too.
556
80
  if (CC == CallingConv::AMDGPU_PS) {
557
4
    if ((Info->getPSInputAddr() & 0x7F) == 0 ||
558
4
        
(3
(Info->getPSInputAddr() & 0xF) == 03
&&
559
3
         
Info->isPSInputAllocated(11)0
)) {
560
1
      CCInfo.AllocateReg(AMDGPU::VGPR0);
561
1
      CCInfo.AllocateReg(AMDGPU::VGPR1);
562
1
      Info->markPSInputAllocated(0);
563
1
      Info->markPSInputEnabled(0);
564
1
    }
565
4
566
4
    if (Subtarget.isAmdPalOS()) {
567
0
      // For isAmdPalOS, the user does not enable some bits after compilation
568
0
      // based on run-time states; the register values being generated here are
569
0
      // the final ones set in hardware. Therefore we need to apply the
570
0
      // workaround to PSInputAddr and PSInputEnable together.  (The case where
571
0
      // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
572
0
      // set up an input arg for a particular interpolation mode, but nothing
573
0
      // uses that input arg. Really we should have an earlier pass that removes
574
0
      // such an arg.)
575
0
      unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
576
0
      if ((PsInputBits & 0x7F) == 0 ||
577
0
          ((PsInputBits & 0xF) == 0 &&
578
0
           (PsInputBits >> 11 & 1)))
579
0
        Info->markPSInputEnabled(
580
0
          countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
581
0
    }
582
4
  }
583
80
584
80
  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
585
80
  CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
586
80
587
80
  if (!MBB.empty())
588
54
    MIRBuilder.setInstr(*MBB.begin());
589
80
590
80
  FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
591
80
  if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
592
0
    return false;
593
80
594
80
  if (!IsEntryFunc) {
595
68
    // Special inputs come after user arguments.
596
68
    TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
597
68
  }
598
80
599
80
  // Start adding system SGPRs.
600
80
  if (IsEntryFunc) {
601
12
    TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
602
68
  } else {
603
68
    CCInfo.AllocateReg(Info->getScratchRSrcReg());
604
68
    CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
605
68
    CCInfo.AllocateReg(Info->getFrameOffsetReg());
606
68
    TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
607
68
  }
608
80
609
80
  // Move back to the end of the basic block.
610
80
  MIRBuilder.setMBB(MBB);
611
80
612
80
  return true;
613
80
}