Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Line
Count
Source (jump to first uncovered line)
1
//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
//
11
//===----------------------------------------------------------------------===//
12
13
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15
16
#include "AMDGPUArgumentUsageInfo.h"
17
#include "AMDGPUMachineFunction.h"
18
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19
#include "SIInstrInfo.h"
20
#include "SIRegisterInfo.h"
21
#include "llvm/ADT/ArrayRef.h"
22
#include "llvm/ADT/DenseMap.h"
23
#include "llvm/ADT/Optional.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/SparseBitVector.h"
27
#include "llvm/CodeGen/MIRYamlMapping.h"
28
#include "llvm/CodeGen/PseudoSourceValue.h"
29
#include "llvm/CodeGen/TargetInstrInfo.h"
30
#include "llvm/MC/MCRegisterInfo.h"
31
#include "llvm/Support/ErrorHandling.h"
32
#include <array>
33
#include <cassert>
34
#include <utility>
35
#include <vector>
36
37
namespace llvm {
38
39
class MachineFrameInfo;
40
class MachineFunction;
41
class TargetRegisterClass;
42
43
class AMDGPUPseudoSourceValue : public PseudoSourceValue {
44
public:
45
  enum AMDGPUPSVKind : unsigned {
46
    PSVBuffer = PseudoSourceValue::TargetCustom,
47
    PSVImage,
48
    GWSResource
49
  };
50
51
protected:
52
  AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
53
2.86k
      : PseudoSourceValue(Kind, TII) {}
54
55
public:
56
17.8k
  bool isConstant(const MachineFrameInfo *) const override {
57
17.8k
    // This should probably be true for most images, but we will start by being
58
17.8k
    // conservative.
59
17.8k
    return false;
60
17.8k
  }
61
62
4.35k
  bool isAliased(const MachineFrameInfo *) const override {
63
4.35k
    return true;
64
4.35k
  }
65
66
195
  bool mayAlias(const MachineFrameInfo *) const override {
67
195
    return true;
68
195
  }
69
};
70
71
class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
72
public:
73
  explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
74
1.48k
      : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
75
76
0
  static bool classof(const PseudoSourceValue *V) {
77
0
    return V->kind() == PSVBuffer;
78
0
  }
79
};
80
81
class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
82
public:
83
  // TODO: Is the img rsrc useful?
84
  explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
85
1.21k
      : AMDGPUPseudoSourceValue(PSVImage, TII) {}
86
87
0
  static bool classof(const PseudoSourceValue *V) {
88
0
    return V->kind() == PSVImage;
89
0
  }
90
};
91
92
class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
93
public:
94
  explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
95
166
      : AMDGPUPseudoSourceValue(GWSResource, TII) {}
96
97
0
  static bool classof(const PseudoSourceValue *V) {
98
0
    return V->kind() == GWSResource;
99
0
  }
100
101
  // These are inaccessible memory from IR.
102
0
  bool isAliased(const MachineFrameInfo *) const override {
103
0
    return false;
104
0
  }
105
106
  // These are inaccessible memory from IR.
107
0
  bool mayAlias(const MachineFrameInfo *) const override {
108
0
    return false;
109
0
  }
110
111
32
  void printCustom(raw_ostream &OS) const override {
112
32
    OS << "GWSResource";
113
32
  }
114
};
115
116
namespace yaml {
117
118
struct SIArgument {
119
  bool IsRegister;
120
  union {
121
    StringValue RegisterName;
122
    unsigned StackOffset;
123
  };
124
  Optional<unsigned> Mask;
125
126
  // Default constructor, which creates a stack argument.
127
223
  SIArgument() : IsRegister(false), StackOffset(0) {}
128
23.4k
  SIArgument(const SIArgument &Other) {
129
23.4k
    IsRegister = Other.IsRegister;
130
23.4k
    if (IsRegister) {
131
23.2k
      ::new ((void *)std::addressof(RegisterName))
132
23.2k
          StringValue(Other.RegisterName);
133
23.2k
    } else
134
225
      StackOffset = Other.StackOffset;
135
23.4k
    Mask = Other.Mask;
136
23.4k
  }
137
43
  SIArgument &operator=(const SIArgument &Other) {
138
43
    IsRegister = Other.IsRegister;
139
43
    if (IsRegister) {
140
43
      ::new ((void *)std::addressof(RegisterName))
141
43
          StringValue(Other.RegisterName);
142
43
    } else
143
0
      StackOffset = Other.StackOffset;
144
43
    Mask = Other.Mask;
145
43
    return *this;
146
43
  }
147
35.3k
  ~SIArgument() {
148
35.3k
    if (IsRegister)
149
34.9k
      RegisterName.~StringValue();
150
35.3k
  }
151
152
  // Helper to create a register or stack argument.
153
11.6k
  static inline SIArgument createArgument(bool IsReg) {
154
11.6k
    if (IsReg)
155
11.6k
      return SIArgument(IsReg);
156
2
    return SIArgument();
157
2
  }
158
159
private:
160
  // Construct a register argument.
161
11.6k
  SIArgument(bool) : IsRegister(true), RegisterName() {}
162
};
163
164
template <> struct MappingTraits<SIArgument> {
165
11.6k
  static void mapping(IO &YamlIO, SIArgument &A) {
166
11.6k
    if (YamlIO.outputting()) {
167
11.6k
      if (A.IsRegister)
168
11.6k
        YamlIO.mapRequired("reg", A.RegisterName);
169
2
      else
170
2
        YamlIO.mapRequired("offset", A.StackOffset);
171
11.6k
    } else {
172
45
      auto Keys = YamlIO.keys();
173
45
      if (is_contained(Keys, "reg")) {
174
43
        A = SIArgument::createArgument(true);
175
43
        YamlIO.mapRequired("reg", A.RegisterName);
176
43
      } else 
if (2
is_contained(Keys, "offset")2
)
177
2
        YamlIO.mapRequired("offset", A.StackOffset);
178
0
      else
179
0
        YamlIO.setError("missing required key 'reg' or 'offset'");
180
45
    }
181
11.6k
    YamlIO.mapOptional("mask", A.Mask);
182
11.6k
  }
183
  static const bool flow = true;
184
};
185
186
struct SIArgumentInfo {
187
  Optional<SIArgument> PrivateSegmentBuffer;
188
  Optional<SIArgument> DispatchPtr;
189
  Optional<SIArgument> QueuePtr;
190
  Optional<SIArgument> KernargSegmentPtr;
191
  Optional<SIArgument> DispatchID;
192
  Optional<SIArgument> FlatScratchInit;
193
  Optional<SIArgument> PrivateSegmentSize;
194
195
  Optional<SIArgument> WorkGroupIDX;
196
  Optional<SIArgument> WorkGroupIDY;
197
  Optional<SIArgument> WorkGroupIDZ;
198
  Optional<SIArgument> WorkGroupInfo;
199
  Optional<SIArgument> PrivateSegmentWaveByteOffset;
200
201
  Optional<SIArgument> ImplicitArgPtr;
202
  Optional<SIArgument> ImplicitBufferPtr;
203
204
  Optional<SIArgument> WorkItemIDX;
205
  Optional<SIArgument> WorkItemIDY;
206
  Optional<SIArgument> WorkItemIDZ;
207
};
208
209
template <> struct MappingTraits<SIArgumentInfo> {
210
5.62k
  static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
211
5.62k
    YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
212
5.62k
    YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
213
5.62k
    YamlIO.mapOptional("queuePtr", AI.QueuePtr);
214
5.62k
    YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
215
5.62k
    YamlIO.mapOptional("dispatchID", AI.DispatchID);
216
5.62k
    YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
217
5.62k
    YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
218
5.62k
219
5.62k
    YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
220
5.62k
    YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
221
5.62k
    YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
222
5.62k
    YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
223
5.62k
    YamlIO.mapOptional("privateSegmentWaveByteOffset",
224
5.62k
                       AI.PrivateSegmentWaveByteOffset);
225
5.62k
226
5.62k
    YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
227
5.62k
    YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
228
5.62k
229
5.62k
    YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
230
5.62k
    YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
231
5.62k
    YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
232
5.62k
  }
233
};
234
235
// Default to default mode for default calling convention.
236
struct SIMode {
237
  bool IEEE = true;
238
  bool DX10Clamp = true;
239
240
11.6k
  SIMode() = default;
241
242
243
5.80k
  SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
244
5.80k
    IEEE = Mode.IEEE;
245
5.80k
    DX10Clamp = Mode.DX10Clamp;
246
5.80k
  }
247
248
5.80k
  bool operator ==(const SIMode Other) const {
249
5.80k
    return IEEE == Other.IEEE && 
DX10Clamp == Other.DX10Clamp5.77k
;
250
5.80k
  }
251
};
252
253
template <> struct MappingTraits<SIMode> {
254
5.83k
  static void mapping(IO &YamlIO, SIMode &Mode) {
255
5.83k
    YamlIO.mapOptional("ieee", Mode.IEEE, true);
256
5.83k
    YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
257
5.83k
  }
258
};
259
260
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
261
  uint64_t ExplicitKernArgSize = 0;
262
  unsigned MaxKernArgAlign = 0;
263
  unsigned LDSSize = 0;
264
  bool IsEntryFunction = false;
265
  bool NoSignedZerosFPMath = false;
266
  bool MemoryBound = false;
267
  bool WaveLimiter = false;
268
269
  StringValue ScratchRSrcReg = "$private_rsrc_reg";
270
  StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
271
  StringValue FrameOffsetReg = "$fp_reg";
272
  StringValue StackPtrOffsetReg = "$sp_reg";
273
274
  Optional<SIArgumentInfo> ArgInfo;
275
  SIMode Mode;
276
277
5.61k
  SIMachineFunctionInfo() = default;
278
  SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
279
                        const TargetRegisterInfo &TRI);
280
281
  void mappingImpl(yaml::IO &YamlIO) override;
282
11.4k
  ~SIMachineFunctionInfo() = default;
283
};
284
285
template <> struct MappingTraits<SIMachineFunctionInfo> {
286
6.00k
  static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
287
6.00k
    YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
288
6.00k
                       UINT64_C(0));
289
6.00k
    YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
290
6.00k
    YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
291
6.00k
    YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
292
6.00k
    YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
293
6.00k
    YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
294
6.00k
    YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
295
6.00k
    YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
296
6.00k
                       StringValue("$private_rsrc_reg"));
297
6.00k
    YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
298
6.00k
                       StringValue("$scratch_wave_offset_reg"));
299
6.00k
    YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
300
6.00k
                       StringValue("$fp_reg"));
301
6.00k
    YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
302
6.00k
                       StringValue("$sp_reg"));
303
6.00k
    YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
304
6.00k
    YamlIO.mapOptional("mode", MFI.Mode, SIMode());
305
6.00k
  }
306
};
307
308
} // end namespace yaml
309
310
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
311
/// tells the hardware which interpolation parameters to load.
312
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
313
  friend class GCNTargetMachine;
314
315
  unsigned TIDReg = AMDGPU::NoRegister;
316
317
  // Registers that may be reserved for spilling purposes. These may be the same
318
  // as the input registers.
319
  unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
320
  unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
321
322
  // This is the current function's incremented size from the kernel's scratch
323
  // wave offset register. For an entry function, this is exactly the same as
324
  // the ScratchWaveOffsetReg.
325
  unsigned FrameOffsetReg = AMDGPU::FP_REG;
326
327
  // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
328
  unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
329
330
  AMDGPUFunctionArgInfo ArgInfo;
331
332
  // State of MODE register, assumed FP mode.
333
  AMDGPU::SIModeRegisterDefaults Mode;
334
335
  // Graphics info.
336
  unsigned PSInputAddr = 0;
337
  unsigned PSInputEnable = 0;
338
339
  /// Number of bytes of arguments this function has on the stack. If the callee
340
  /// is expected to restore the argument stack this should be a multiple of 16,
341
  /// all usable during a tail call.
342
  ///
343
  /// The alternative would forbid tail call optimisation in some cases: if we
344
  /// want to transfer control from a function with 8-bytes of stack-argument
345
  /// space to a function with 16-bytes then misalignment of this value would
346
  /// make a stack adjustment necessary, which could not be undone by the
347
  /// callee.
348
  unsigned BytesInStackArgArea = 0;
349
350
  bool ReturnsVoid = true;
351
352
  // A pair of default/requested minimum/maximum flat work group sizes.
353
  // Minimum - first, maximum - second.
354
  std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
355
356
  // A pair of default/requested minimum/maximum number of waves per execution
357
  // unit. Minimum - first, maximum - second.
358
  std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
359
360
  DenseMap<const Value *,
361
           std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
362
  DenseMap<const Value *,
363
           std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
364
  std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
365
366
private:
367
  unsigned LDSWaveSpillSize = 0;
368
  unsigned NumUserSGPRs = 0;
369
  unsigned NumSystemSGPRs = 0;
370
371
  bool HasSpilledSGPRs = false;
372
  bool HasSpilledVGPRs = false;
373
  bool HasNonSpillStackObjects = false;
374
  bool IsStackRealigned = false;
375
376
  unsigned NumSpilledSGPRs = 0;
377
  unsigned NumSpilledVGPRs = 0;
378
379
  // Feature bits required for inputs passed in user SGPRs.
380
  bool PrivateSegmentBuffer : 1;
381
  bool DispatchPtr : 1;
382
  bool QueuePtr : 1;
383
  bool KernargSegmentPtr : 1;
384
  bool DispatchID : 1;
385
  bool FlatScratchInit : 1;
386
387
  // Feature bits required for inputs passed in system SGPRs.
388
  bool WorkGroupIDX : 1; // Always initialized.
389
  bool WorkGroupIDY : 1;
390
  bool WorkGroupIDZ : 1;
391
  bool WorkGroupInfo : 1;
392
  bool PrivateSegmentWaveByteOffset : 1;
393
394
  bool WorkItemIDX : 1; // Always initialized.
395
  bool WorkItemIDY : 1;
396
  bool WorkItemIDZ : 1;
397
398
  // Private memory buffer
399
  // Compute directly in sgpr[0:1]
400
  // Other shaders indirect 64-bits at sgpr[0:1]
401
  bool ImplicitBufferPtr : 1;
402
403
  // Pointer to where the ABI inserts special kernel arguments separate from the
404
  // user arguments. This is an offset from the KernargSegmentPtr.
405
  bool ImplicitArgPtr : 1;
406
407
  // The hard-wired high half of the address of the global information table
408
  // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
409
  // current hardware only allows a 16 bit value.
410
  unsigned GITPtrHigh;
411
412
  unsigned HighBitsOf32BitAddress;
413
  unsigned GDSSize;
414
415
  // Current recorded maximum possible occupancy.
416
  unsigned Occupancy;
417
418
  MCPhysReg getNextUserSGPR() const;
419
420
  MCPhysReg getNextSystemSGPR() const;
421
422
public:
423
  struct SpilledReg {
424
    unsigned VGPR = 0;
425
    int Lane = -1;
426
427
    SpilledReg() = default;
428
1.63k
    SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
429
430
0
    bool hasLane() { return Lane != -1;}
431
0
    bool hasReg() { return VGPR != 0;}
432
  };
433
434
  struct SGPRSpillVGPRCSR {
435
    // VGPR used for SGPR spills
436
    unsigned VGPR;
437
438
    // If the VGPR is a CSR, the stack slot used to save/restore it in the
439
    // prolog/epilog.
440
    Optional<int> FI;
441
442
202
    SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
443
  };
444
445
  struct VGPRSpillToAGPR {
446
    SmallVector<MCPhysReg, 32> Lanes;
447
    bool FullyAllocated = false;
448
  };
449
450
  SparseBitVector<> WWMReservedRegs;
451
452
329
  void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
453
454
private:
455
  // SGPR->VGPR spilling support.
456
  using SpillRegMask = std::pair<unsigned, unsigned>;
457
458
  // Track VGPR + wave index for each subregister of the SGPR spilled to
459
  // frameindex key.
460
  DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
461
  unsigned NumVGPRSpillLanes = 0;
462
  SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
463
464
  DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
465
466
  // AGPRs used for VGPR spills.
467
  SmallVector<MCPhysReg, 32> SpillAGPR;
468
469
  // VGPRs used for AGPR spills.
470
  SmallVector<MCPhysReg, 32> SpillVGPR;
471
472
public: // FIXME
473
  /// If this is set, an SGPR used for save/restore of the register used for the
474
  /// frame pointer.
475
  unsigned SGPRForFPSaveRestoreCopy = 0;
476
  Optional<int> FramePointerSaveIndex;
477
478
public:
479
  SIMachineFunctionInfo(const MachineFunction &MF);
480
481
  bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
482
483
1.98k
  ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
484
1.98k
    auto I = SGPRToVGPRSpills.find(FrameIndex);
485
1.98k
    return (I == SGPRToVGPRSpills.end()) ?
486
1.88k
      
ArrayRef<SpilledReg>()101
: makeArrayRef(I->second);
487
1.98k
  }
488
489
7.25k
  ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
490
7.25k
    return SpillVGPRs;
491
7.25k
  }
492
493
93.8k
  ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
494
93.8k
    return SpillAGPR;
495
93.8k
  }
496
497
93.8k
  ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
498
93.8k
    return SpillVGPR;
499
93.8k
  }
500
501
14.1k
  MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
502
14.1k
    auto I = VGPRToAGPRSpills.find(FrameIndex);
503
14.1k
    return (I == VGPRToAGPRSpills.end()) ? 
(MCPhysReg)AMDGPU::NoRegister13.3k
504
14.1k
                                         : 
I->second.Lanes[Lane]798
;
505
14.1k
  }
506
507
81.6k
  AMDGPU::SIModeRegisterDefaults getMode() const {
508
81.6k
    return Mode;
509
81.6k
  }
510
511
  bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
512
                                 unsigned NumLane) const;
513
  bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
514
  bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
515
  void removeDeadFrameIndices(MachineFrameInfo &MFI);
516
517
0
  bool hasCalculatedTID() const { return TIDReg != 0; };
518
0
  unsigned getTIDReg() const { return TIDReg; };
519
0
  void setTIDReg(unsigned Reg) { TIDReg = Reg; }
520
521
43
  unsigned getBytesInStackArgArea() const {
522
43
    return BytesInStackArgArea;
523
43
  }
524
525
25.1k
  void setBytesInStackArgArea(unsigned Bytes) {
526
25.1k
    BytesInStackArgArea = Bytes;
527
25.1k
  }
528
529
  // Add user SGPRs.
530
  unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
531
  unsigned addDispatchPtr(const SIRegisterInfo &TRI);
532
  unsigned addQueuePtr(const SIRegisterInfo &TRI);
533
  unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
534
  unsigned addDispatchID(const SIRegisterInfo &TRI);
535
  unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
536
  unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
537
538
  // Add system SGPRs.
539
20.3k
  unsigned addWorkGroupIDX() {
540
20.3k
    ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
541
20.3k
    NumSystemSGPRs += 1;
542
20.3k
    return ArgInfo.WorkGroupIDX.getRegister();
543
20.3k
  }
544
545
30
  unsigned addWorkGroupIDY() {
546
30
    ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
547
30
    NumSystemSGPRs += 1;
548
30
    return ArgInfo.WorkGroupIDY.getRegister();
549
30
  }
550
551
28
  unsigned addWorkGroupIDZ() {
552
28
    ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
553
28
    NumSystemSGPRs += 1;
554
28
    return ArgInfo.WorkGroupIDZ.getRegister();
555
28
  }
556
557
0
  unsigned addWorkGroupInfo() {
558
0
    ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
559
0
    NumSystemSGPRs += 1;
560
0
    return ArgInfo.WorkGroupInfo.getRegister();
561
0
  }
562
563
  // Add special VGPR inputs
564
20.3k
  void setWorkItemIDX(ArgDescriptor Arg) {
565
20.3k
    ArgInfo.WorkItemIDX = Arg;
566
20.3k
  }
567
568
182
  void setWorkItemIDY(ArgDescriptor Arg) {
569
182
    ArgInfo.WorkItemIDY = Arg;
570
182
  }
571
572
92
  void setWorkItemIDZ(ArgDescriptor Arg) {
573
92
    ArgInfo.WorkItemIDZ = Arg;
574
92
  }
575
576
20.3k
  unsigned addPrivateSegmentWaveByteOffset() {
577
20.3k
    ArgInfo.PrivateSegmentWaveByteOffset
578
20.3k
      = ArgDescriptor::createRegister(getNextSystemSGPR());
579
20.3k
    NumSystemSGPRs += 1;
580
20.3k
    return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
581
20.3k
  }
582
583
2.91k
  void setPrivateSegmentWaveByteOffset(unsigned Reg) {
584
2.91k
    ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
585
2.91k
  }
586
587
27.7k
  bool hasPrivateSegmentBuffer() const {
588
27.7k
    return PrivateSegmentBuffer;
589
27.7k
  }
590
591
31.2k
  bool hasDispatchPtr() const {
592
31.2k
    return DispatchPtr;
593
31.2k
  }
594
595
30.0k
  bool hasQueuePtr() const {
596
30.0k
    return QueuePtr;
597
30.0k
  }
598
599
30.0k
  bool hasKernargSegmentPtr() const {
600
30.0k
    return KernargSegmentPtr;
601
30.0k
  }
602
603
30.0k
  bool hasDispatchID() const {
604
30.0k
    return DispatchID;
605
30.0k
  }
606
607
302k
  bool hasFlatScratchInit() const {
608
302k
    return FlatScratchInit;
609
302k
  }
610
611
48.8k
  bool hasWorkGroupIDX() const {
612
48.8k
    return WorkGroupIDX;
613
48.8k
  }
614
615
48.8k
  bool hasWorkGroupIDY() const {
616
48.8k
    return WorkGroupIDY;
617
48.8k
  }
618
619
48.8k
  bool hasWorkGroupIDZ() const {
620
48.8k
    return WorkGroupIDZ;
621
48.8k
  }
622
623
46.4k
  bool hasWorkGroupInfo() const {
624
46.4k
    return WorkGroupInfo;
625
46.4k
  }
626
627
23.3k
  bool hasPrivateSegmentWaveByteOffset() const {
628
23.3k
    return PrivateSegmentWaveByteOffset;
629
23.3k
  }
630
631
25.6k
  bool hasWorkItemIDX() const {
632
25.6k
    return WorkItemIDX;
633
25.6k
  }
634
635
48.7k
  bool hasWorkItemIDY() const {
636
48.7k
    return WorkItemIDY;
637
48.7k
  }
638
639
48.7k
  bool hasWorkItemIDZ() const {
640
48.7k
    return WorkItemIDZ;
641
48.7k
  }
642
643
2.32k
  bool hasImplicitArgPtr() const {
644
2.32k
    return ImplicitArgPtr;
645
2.32k
  }
646
647
23.5k
  bool hasImplicitBufferPtr() const {
648
23.5k
    return ImplicitBufferPtr;
649
23.5k
  }
650
651
31.6k
  AMDGPUFunctionArgInfo &getArgInfo() {
652
31.6k
    return ArgInfo;
653
31.6k
  }
654
655
6.50k
  const AMDGPUFunctionArgInfo &getArgInfo() const {
656
6.50k
    return ArgInfo;
657
6.50k
  }
658
659
  std::pair<const ArgDescriptor *, const TargetRegisterClass *>
660
66.3k
  getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
661
66.3k
    return ArgInfo.getPreloadedValue(Value);
662
66.3k
  }
663
664
30.5k
  Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
665
30.5k
    auto Arg = ArgInfo.getPreloadedValue(Value).first;
666
30.5k
    return Arg ? 
Arg->getRegister()30.5k
:
Register()6
;
667
30.5k
  }
668
669
11
  unsigned getGITPtrHigh() const {
670
11
    return GITPtrHigh;
671
11
  }
672
673
122
  unsigned get32BitAddressHighBits() const {
674
122
    return HighBitsOf32BitAddress;
675
122
  }
676
677
48
  unsigned getGDSSize() const {
678
48
    return GDSSize;
679
48
  }
680
681
46.3k
  unsigned getNumUserSGPRs() const {
682
46.3k
    return NumUserSGPRs;
683
46.3k
  }
684
685
1.63k
  unsigned getNumPreloadedSGPRs() const {
686
1.63k
    return NumUserSGPRs + NumSystemSGPRs;
687
1.63k
  }
688
689
2.98k
  unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
690
2.98k
    return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
691
2.98k
  }
692
693
  /// Returns the physical register reserved for use as the resource
694
  /// descriptor for scratch accesses.
695
241k
  unsigned getScratchRSrcReg() const {
696
241k
    return ScratchRSrcReg;
697
241k
  }
698
699
46.9k
  void setScratchRSrcReg(unsigned Reg) {
700
46.9k
    assert(Reg != 0 && "Should never be unset");
701
46.9k
    ScratchRSrcReg = Reg;
702
46.9k
  }
703
704
230k
  unsigned getScratchWaveOffsetReg() const {
705
230k
    return ScratchWaveOffsetReg;
706
230k
  }
707
708
209k
  unsigned getFrameOffsetReg() const {
709
209k
    return FrameOffsetReg;
710
209k
  }
711
712
46.6k
  void setFrameOffsetReg(unsigned Reg) {
713
46.6k
    assert(Reg != 0 && "Should never be unset");
714
46.6k
    FrameOffsetReg = Reg;
715
46.6k
  }
716
717
46.6k
  void setStackPtrOffsetReg(unsigned Reg) {
718
46.6k
    assert(Reg != 0 && "Should never be unset");
719
46.6k
    StackPtrOffsetReg = Reg;
720
46.6k
  }
721
722
  // Note the unset value for this is AMDGPU::SP_REG rather than
723
  // NoRegister. This is mostly a workaround for MIR tests where state that
724
  // can't be directly computed from the function is not preserved in serialized
725
  // MIR.
726
254k
  unsigned getStackPtrOffsetReg() const {
727
254k
    return StackPtrOffsetReg;
728
254k
  }
729
730
46.6k
  void setScratchWaveOffsetReg(unsigned Reg) {
731
46.6k
    assert(Reg != 0 && "Should never be unset");
732
46.6k
    ScratchWaveOffsetReg = Reg;
733
46.6k
  }
734
735
33
  unsigned getQueuePtrUserSGPR() const {
736
33
    return ArgInfo.QueuePtr.getRegister();
737
33
  }
738
739
13
  unsigned getImplicitBufferPtrUserSGPR() const {
740
13
    return ArgInfo.ImplicitBufferPtr.getRegister();
741
13
  }
742
743
1.45k
  bool hasSpilledSGPRs() const {
744
1.45k
    return HasSpilledSGPRs;
745
1.45k
  }
746
747
1.78k
  void setHasSpilledSGPRs(bool Spill = true) {
748
1.78k
    HasSpilledSGPRs = Spill;
749
1.78k
  }
750
751
14
  bool hasSpilledVGPRs() const {
752
14
    return HasSpilledVGPRs;
753
14
  }
754
755
1.34k
  void setHasSpilledVGPRs(bool Spill = true) {
756
1.34k
    HasSpilledVGPRs = Spill;
757
1.34k
  }
758
759
0
  bool hasNonSpillStackObjects() const {
760
0
    return HasNonSpillStackObjects;
761
0
  }
762
763
960
  void setHasNonSpillStackObjects(bool StackObject = true) {
764
960
    HasNonSpillStackObjects = StackObject;
765
960
  }
766
767
27.6k
  bool isStackRealigned() const {
768
27.6k
    return IsStackRealigned;
769
27.6k
  }
770
771
8
  void setIsStackRealigned(bool Realigned = true) {
772
8
    IsStackRealigned = Realigned;
773
8
  }
774
775
24.4k
  unsigned getNumSpilledSGPRs() const {
776
24.4k
    return NumSpilledSGPRs;
777
24.4k
  }
778
779
24.4k
  unsigned getNumSpilledVGPRs() const {
780
24.4k
    return NumSpilledVGPRs;
781
24.4k
  }
782
783
878
  void addToSpilledSGPRs(unsigned num) {
784
878
    NumSpilledSGPRs += num;
785
878
  }
786
787
1.40k
  void addToSpilledVGPRs(unsigned num) {
788
1.40k
    NumSpilledVGPRs += num;
789
1.40k
  }
790
791
7.13k
  unsigned getPSInputAddr() const {
792
7.13k
    return PSInputAddr;
793
7.13k
  }
794
795
2.56k
  unsigned getPSInputEnable() const {
796
2.56k
    return PSInputEnable;
797
2.56k
  }
798
799
1.85k
  bool isPSInputAllocated(unsigned Index) const {
800
1.85k
    return PSInputAddr & (1 << Index);
801
1.85k
  }
802
803
5.54k
  void markPSInputAllocated(unsigned Index) {
804
5.54k
    PSInputAddr |= 1 << Index;
805
5.54k
  }
806
807
5.43k
  void markPSInputEnabled(unsigned Index) {
808
5.43k
    PSInputEnable |= 1 << Index;
809
5.43k
  }
810
811
5.17k
  bool returnsVoid() const {
812
5.17k
    return ReturnsVoid;
813
5.17k
  }
814
815
5.63k
  void setIfReturnsVoid(bool Value) {
816
5.63k
    ReturnsVoid = Value;
817
5.63k
  }
818
819
  /// \returns A pair of default/requested minimum/maximum flat work group sizes
820
  /// for this function.
821
0
  std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
822
0
    return FlatWorkGroupSizes;
823
0
  }
824
825
  /// \returns Default/requested minimum flat work group size for this function.
826
0
  unsigned getMinFlatWorkGroupSize() const {
827
0
    return FlatWorkGroupSizes.first;
828
0
  }
829
830
  /// \returns Default/requested maximum flat work group size for this function.
831
27.1k
  unsigned getMaxFlatWorkGroupSize() const {
832
27.1k
    return FlatWorkGroupSizes.second;
833
27.1k
  }
834
835
  /// \returns A pair of default/requested minimum/maximum number of waves per
836
  /// execution unit.
837
500k
  std::pair<unsigned, unsigned> getWavesPerEU() const {
838
500k
    return WavesPerEU;
839
500k
  }
840
841
  /// \returns Default/requested minimum number of waves per execution unit.
842
0
  unsigned getMinWavesPerEU() const {
843
0
    return WavesPerEU.first;
844
0
  }
845
846
  /// \returns Default/requested maximum number of waves per execution unit.
847
160k
  unsigned getMaxWavesPerEU() const {
848
160k
    return WavesPerEU.second;
849
160k
  }
850
851
  /// \returns SGPR used for \p Dim's work group ID.
852
0
  unsigned getWorkGroupIDSGPR(unsigned Dim) const {
853
0
    switch (Dim) {
854
0
    case 0:
855
0
      assert(hasWorkGroupIDX());
856
0
      return ArgInfo.WorkGroupIDX.getRegister();
857
0
    case 1:
858
0
      assert(hasWorkGroupIDY());
859
0
      return ArgInfo.WorkGroupIDY.getRegister();
860
0
    case 2:
861
0
      assert(hasWorkGroupIDZ());
862
0
      return ArgInfo.WorkGroupIDZ.getRegister();
863
0
    }
864
0
    llvm_unreachable("unexpected dimension");
865
0
  }
866
867
23.1k
  unsigned getLDSWaveSpillSize() const {
868
23.1k
    return LDSWaveSpillSize;
869
23.1k
  }
870
871
  const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
872
1.48k
                                                    const Value *BufferRsrc) {
873
1.48k
    assert(BufferRsrc);
874
1.48k
    auto PSV = BufferPSVs.try_emplace(
875
1.48k
      BufferRsrc,
876
1.48k
      llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
877
1.48k
    return PSV.first->second.get();
878
1.48k
  }
879
880
  const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
881
1.21k
                                                  const Value *ImgRsrc) {
882
1.21k
    assert(ImgRsrc);
883
1.21k
    auto PSV = ImagePSVs.try_emplace(
884
1.21k
      ImgRsrc,
885
1.21k
      llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
886
1.21k
    return PSV.first->second.get();
887
1.21k
  }
888
889
180
  const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
890
180
    if (!GWSResourcePSV) {
891
166
      GWSResourcePSV =
892
166
          llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
893
166
    }
894
180
895
180
    return GWSResourcePSV.get();
896
180
  }
897
898
30.1k
  unsigned getOccupancy() const {
899
30.1k
    return Occupancy;
900
30.1k
  }
901
902
8.86k
  unsigned getMinAllowedOccupancy() const {
903
8.86k
    if (!isMemoryBound() && 
!needsWaveLimiter()8.19k
)
904
1.41k
      return Occupancy;
905
7.44k
    return (Occupancy < 4) ? 
Occupancy323
:
47.12k
;
906
7.44k
  }
907
908
  void limitOccupancy(const MachineFunction &MF);
909
910
168k
  void limitOccupancy(unsigned Limit) {
911
168k
    if (Occupancy > Limit)
912
342
      Occupancy = Limit;
913
168k
  }
914
915
2
  void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
916
2
    if (Occupancy < Limit)
917
0
      Occupancy = Limit;
918
2
    limitOccupancy(MF);
919
2
  }
920
};
921
922
} // end namespace llvm
923
924
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H