Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "AMDGPUBaseInfo.h"
10
#include "AMDGPUTargetTransformInfo.h"
11
#include "AMDGPU.h"
12
#include "SIDefines.h"
13
#include "AMDGPUAsmUtils.h"
14
#include "llvm/ADT/StringRef.h"
15
#include "llvm/ADT/Triple.h"
16
#include "llvm/BinaryFormat/ELF.h"
17
#include "llvm/CodeGen/MachineMemOperand.h"
18
#include "llvm/IR/Attributes.h"
19
#include "llvm/IR/Constants.h"
20
#include "llvm/IR/Function.h"
21
#include "llvm/IR/GlobalValue.h"
22
#include "llvm/IR/Instruction.h"
23
#include "llvm/IR/LLVMContext.h"
24
#include "llvm/IR/Module.h"
25
#include "llvm/MC/MCContext.h"
26
#include "llvm/MC/MCInstrDesc.h"
27
#include "llvm/MC/MCInstrInfo.h"
28
#include "llvm/MC/MCRegisterInfo.h"
29
#include "llvm/MC/MCSectionELF.h"
30
#include "llvm/MC/MCSubtargetInfo.h"
31
#include "llvm/MC/SubtargetFeature.h"
32
#include "llvm/Support/Casting.h"
33
#include "llvm/Support/ErrorHandling.h"
34
#include "llvm/Support/MathExtras.h"
35
#include <algorithm>
36
#include <cassert>
37
#include <cstdint>
38
#include <cstring>
39
#include <utility>
40
41
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
42
43
#define GET_INSTRINFO_NAMED_OPS
44
#define GET_INSTRMAP_INFO
45
#include "AMDGPUGenInstrInfo.inc"
46
#undef GET_INSTRMAP_INFO
47
#undef GET_INSTRINFO_NAMED_OPS
48
49
namespace {
50
51
/// \returns Bit mask for given bit \p Shift and bit \p Width.
52
660k
unsigned getBitMask(unsigned Shift, unsigned Width) {
53
660k
  return ((1 << Width) - 1) << Shift;
54
660k
}
55
56
/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57
///
58
/// \returns Packed \p Dst.
59
159k
unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60
159k
  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61
159k
  Dst |= (Src << Shift) & getBitMask(Shift, Width);
62
159k
  return Dst;
63
159k
}
64
65
/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66
///
67
/// \returns Unpacked bits.
68
182k
unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69
182k
  return (Src & getBitMask(Shift, Width)) >> Shift;
70
182k
}
71
72
/// \returns Vmcnt bit shift (lower bits).
73
153k
unsigned getVmcntBitShiftLo() { return 0; }
74
75
/// \returns Vmcnt bit width (lower bits).
76
277k
unsigned getVmcntBitWidthLo() { return 4; }
77
78
/// \returns Expcnt bit shift.
79
153k
unsigned getExpcntBitShift() { return 4; }
80
81
/// \returns Expcnt bit width.
82
233k
unsigned getExpcntBitWidth() { return 3; }
83
84
/// \returns Lgkmcnt bit shift.
85
153k
unsigned getLgkmcntBitShift() { return 8; }
86
87
/// \returns Lgkmcnt bit width.
88
228k
unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89
228k
  return (VersionMajor >= 10) ? 
617.5k
:
4211k
;
90
228k
}
91
92
/// \returns Vmcnt bit shift (higher bits).
93
40.5k
unsigned getVmcntBitShiftHi() { return 14; }
94
95
/// \returns Vmcnt bit width (higher bits).
96
60.7k
unsigned getVmcntBitWidthHi() { return 2; }
97
98
} // end namespace anonymous
99
100
namespace llvm {
101
102
namespace AMDGPU {
103
104
#define GET_MIMGBaseOpcodesTable_IMPL
105
#define GET_MIMGDimInfoTable_IMPL
106
#define GET_MIMGInfoTable_IMPL
107
#define GET_MIMGLZMappingTable_IMPL
108
#define GET_MIMGMIPMappingTable_IMPL
109
#include "AMDGPUGenSearchableTables.inc"
110
111
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
112
2.19k
                  unsigned VDataDwords, unsigned VAddrDwords) {
113
2.19k
  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
114
2.19k
                                             VDataDwords, VAddrDwords);
115
2.19k
  return Info ? 
Info->Opcode1.52k
:
-1674
;
116
2.19k
}
117
118
0
const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
119
0
  const MIMGInfo *Info = getMIMGInfo(Opc);
120
0
  return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
121
0
}
122
123
135
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
124
135
  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
125
135
  const MIMGInfo *NewInfo =
126
135
      getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
127
135
                          NewChannels, OrigInfo->VAddrDwords);
128
135
  return NewInfo ? NewInfo->Opcode : 
-10
;
129
135
}
130
131
struct MUBUFInfo {
132
  uint16_t Opcode;
133
  uint16_t BaseOpcode;
134
  uint8_t dwords;
135
  bool has_vaddr;
136
  bool has_srsrc;
137
  bool has_soffset;
138
};
139
140
#define GET_MUBUFInfoTable_DECL
141
#define GET_MUBUFInfoTable_IMPL
142
#include "AMDGPUGenSearchableTables.inc"
143
144
75.0k
int getMUBUFBaseOpcode(unsigned Opc) {
145
75.0k
  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
146
75.0k
  return Info ? Info->BaseOpcode : 
-10
;
147
75.0k
}
148
149
218
int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
150
218
  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
151
218
  return Info ? Info->Opcode : 
-10
;
152
218
}
153
154
43.2k
int getMUBUFDwords(unsigned Opc) {
155
43.2k
  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
156
43.2k
  return Info ? Info->dwords : 
00
;
157
43.2k
}
158
159
20.0k
bool getMUBUFHasVAddr(unsigned Opc) {
160
20.0k
  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
161
20.0k
  return Info ? Info->has_vaddr : 
false0
;
162
20.0k
}
163
164
20.0k
bool getMUBUFHasSrsrc(unsigned Opc) {
165
20.0k
  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
166
20.0k
  return Info ? Info->has_srsrc : 
false0
;
167
20.0k
}
168
169
20.0k
bool getMUBUFHasSoffset(unsigned Opc) {
170
20.0k
  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
171
20.0k
  return Info ? Info->has_soffset : 
false0
;
172
20.0k
}
173
174
// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
175
// header files, so we need to wrap it in a function that takes unsigned
176
// instead.
177
1.61M
int getMCOpcode(uint16_t Opcode, unsigned Gen) {
178
1.61M
  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
179
1.61M
}
180
181
namespace IsaInfo {
182
183
2.42k
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
184
2.42k
  auto TargetTriple = STI->getTargetTriple();
185
2.42k
  auto Version = getIsaVersion(STI->getCPU());
186
2.42k
187
2.42k
  Stream << TargetTriple.getArchName() << '-'
188
2.42k
         << TargetTriple.getVendorName() << '-'
189
2.42k
         << TargetTriple.getOSName() << '-'
190
2.42k
         << TargetTriple.getEnvironmentName() << '-'
191
2.42k
         << "gfx"
192
2.42k
         << Version.Major
193
2.42k
         << Version.Minor
194
2.42k
         << Version.Stepping;
195
2.42k
196
2.42k
  if (hasXNACK(*STI))
197
87
    Stream << "+xnack";
198
2.42k
  if (hasSRAMECC(*STI))
199
29
    Stream << "+sram-ecc";
200
2.42k
201
2.42k
  Stream.flush();
202
2.42k
}
203
204
924k
bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
205
924k
  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
206
924k
             
STI->getFeatureBits().test(FeatureCodeObjectV3)11.7k
;
207
924k
}
208
209
795k
unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
210
795k
  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
211
0
    return 16;
212
795k
  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
213
61.7k
    return 32;
214
733k
215
733k
  return 64;
216
733k
}
217
218
10
unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
219
10
  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
220
0
    return 32768;
221
10
  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
222
10
    return 65536;
223
0
224
0
  return 0;
225
0
}
226
227
111k
unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
228
111k
  return 4;
229
111k
}
230
231
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
232
346k
                               unsigned FlatWorkGroupSize) {
233
346k
  assert(FlatWorkGroupSize != 0);
234
346k
  if (STI->getTargetTriple().getArch() != Triple::amdgcn)
235
4.02k
    return 8;
236
342k
  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
237
342k
  if (N == 1)
238
40.1k
    return 40;
239
301k
  N = 40 / N;
240
301k
  return std::min(N, 16u);
241
301k
}
242
243
0
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
244
0
  return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
245
0
}
246
247
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
248
55.6k
                          unsigned FlatWorkGroupSize) {
249
55.6k
  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
250
55.6k
}
251
252
55.6k
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
253
55.6k
  return 1;
254
55.6k
}
255
256
55.0k
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
257
55.0k
  // FIXME: Need to take scratch memory into account.
258
55.0k
  if (!isGFX10(*STI))
259
53.0k
    return 10;
260
2.07k
  return 20;
261
2.07k
}
262
263
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
264
55.6k
                          unsigned FlatWorkGroupSize) {
265
55.6k
  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
266
55.6k
                 getEUsPerCU(STI)) / getEUsPerCU(STI);
267
55.6k
}
268
269
438k
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
270
438k
  return 1;
271
438k
}
272
273
438k
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
274
438k
  return 2048;
275
438k
}
276
277
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
278
397k
                              unsigned FlatWorkGroupSize) {
279
397k
  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
280
397k
                 getWavefrontSize(STI);
281
397k
}
282
283
562k
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
284
562k
  IsaVersion Version = getIsaVersion(STI->getCPU());
285
562k
  if (Version.Major >= 10)
286
0
    return getAddressableNumSGPRs(STI);
287
562k
  if (Version.Major >= 8)
288
319k
    return 16;
289
243k
  return 8;
290
243k
}
291
292
46.3k
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
293
46.3k
  return 8;
294
46.3k
}
295
296
562k
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
297
562k
  IsaVersion Version = getIsaVersion(STI->getCPU());
298
562k
  if (Version.Major >= 8)
299
319k
    return 800;
300
243k
  return 512;
301
243k
}
302
303
669k
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
304
669k
  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
305
127k
    return FIXED_NUM_SGPRS_FOR_INIT_BUG;
306
541k
307
541k
  IsaVersion Version = getIsaVersion(STI->getCPU());
308
541k
  if (Version.Major >= 10)
309
62.0k
    return 106;
310
479k
  if (Version.Major >= 8)
311
217k
    return 102;
312
261k
  return 104;
313
261k
}
314
315
23.2k
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
316
23.2k
  assert(WavesPerEU != 0);
317
23.2k
318
23.2k
  IsaVersion Version = getIsaVersion(STI->getCPU());
319
23.2k
  if (Version.Major >= 10)
320
2.06k
    return 0;
321
21.2k
322
21.2k
  if (WavesPerEU >= getMaxWavesPerEU(STI))
323
21.0k
    return 0;
324
132
325
132
  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
326
132
  if (STI->getFeatureBits().test(FeatureTrapHandler))
327
77
    MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
328
132
  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
329
132
  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
330
132
}
331
332
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
333
620k
                        bool Addressable) {
334
620k
  assert(WavesPerEU != 0);
335
620k
336
620k
  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
337
620k
  IsaVersion Version = getIsaVersion(STI->getCPU());
338
620k
  if (Version.Major >= 10)
339
57.9k
    return Addressable ? 
AddressableNumSGPRs33.4k
:
10824.4k
;
340
562k
  if (Version.Major >= 8 && 
!Addressable319k
)
341
138k
    AddressableNumSGPRs = 112;
342
562k
  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
343
562k
  if (STI->getFeatureBits().test(FeatureTrapHandler))
344
81.4k
    MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
345
562k
  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
346
562k
  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
347
562k
}
348
349
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
350
29.1k
                          bool FlatScrUsed, bool XNACKUsed) {
351
29.1k
  unsigned ExtraSGPRs = 0;
352
29.1k
  if (VCCUsed)
353
8.66k
    ExtraSGPRs = 2;
354
29.1k
355
29.1k
  IsaVersion Version = getIsaVersion(STI->getCPU());
356
29.1k
  if (Version.Major >= 10)
357
2.97k
    return ExtraSGPRs;
358
26.1k
359
26.1k
  if (Version.Major < 8) {
360
10.4k
    if (FlatScrUsed)
361
422
      ExtraSGPRs = 4;
362
15.7k
  } else {
363
15.7k
    if (XNACKUsed)
364
563
      ExtraSGPRs = 4;
365
15.7k
366
15.7k
    if (FlatScrUsed)
367
961
      ExtraSGPRs = 6;
368
15.7k
  }
369
26.1k
370
26.1k
  return ExtraSGPRs;
371
26.1k
}
372
373
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
374
29.0k
                          bool FlatScrUsed) {
375
29.0k
  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
376
29.0k
                          STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
377
29.0k
}
378
379
23.1k
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
380
23.1k
  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
381
23.1k
  // SGPRBlocks is actual number of SGPR blocks minus 1.
382
23.1k
  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
383
23.1k
}
384
385
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
386
429k
                             Optional<bool> EnableWavefrontSize32) {
387
429k
  bool IsWave32 = EnableWavefrontSize32 ?
388
4
      *EnableWavefrontSize32 :
389
429k
      
STI->getFeatureBits().test(FeatureWavefrontSize32)429k
;
390
429k
  return IsWave32 ? 
840.5k
:
4388k
;
391
429k
}
392
393
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
394
46.3k
                                Optional<bool> EnableWavefrontSize32) {
395
46.3k
  return getVGPRAllocGranule(STI, EnableWavefrontSize32);
396
46.3k
}
397
398
375k
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
399
375k
  if (!isGFX10(*STI))
400
335k
    return 256;
401
40.3k
  return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 
102436.7k
:
5123.60k
;
402
40.3k
}
403
404
398k
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
405
398k
  return 256;
406
398k
}
407
408
23.5k
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
409
23.5k
  assert(WavesPerEU != 0);
410
23.5k
411
23.5k
  if (WavesPerEU >= getMaxWavesPerEU(STI))
412
21.3k
    return 0;
413
2.20k
  unsigned MinNumVGPRs =
414
2.20k
      alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
415
2.20k
                getVGPRAllocGranule(STI)) + 1;
416
2.20k
  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
417
2.20k
}
418
419
370k
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
420
370k
  assert(WavesPerEU != 0);
421
370k
422
370k
  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
423
370k
                                   getVGPRAllocGranule(STI));
424
370k
  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
425
370k
  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
426
370k
}
427
428
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
429
23.1k
                          Optional<bool> EnableWavefrontSize32) {
430
23.1k
  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
431
23.1k
                     getVGPREncodingGranule(STI, EnableWavefrontSize32));
432
23.1k
  // VGPRBlocks is actual number of VGPR blocks minus 1.
433
23.1k
  return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
434
23.1k
}
435
436
} // end namespace IsaInfo
437
438
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
439
1.21k
                               const MCSubtargetInfo *STI) {
440
1.21k
  IsaVersion Version = getIsaVersion(STI->getCPU());
441
1.21k
442
1.21k
  memset(&Header, 0, sizeof(Header));
443
1.21k
444
1.21k
  Header.amd_kernel_code_version_major = 1;
445
1.21k
  Header.amd_kernel_code_version_minor = 2;
446
1.21k
  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
447
1.21k
  Header.amd_machine_version_major = Version.Major;
448
1.21k
  Header.amd_machine_version_minor = Version.Minor;
449
1.21k
  Header.amd_machine_version_stepping = Version.Stepping;
450
1.21k
  Header.kernel_code_entry_byte_offset = sizeof(Header);
451
1.21k
  Header.wavefront_size = 6;
452
1.21k
453
1.21k
  // If the code object does not support indirect functions, then the value must
454
1.21k
  // be 0xffffffff.
455
1.21k
  Header.call_convention = -1;
456
1.21k
457
1.21k
  // These alignment values are specified in powers of two, so alignment =
458
1.21k
  // 2^n.  The minimum alignment is 2^4 = 16.
459
1.21k
  Header.kernarg_segment_alignment = 4;
460
1.21k
  Header.group_segment_alignment = 4;
461
1.21k
  Header.private_segment_alignment = 4;
462
1.21k
463
1.21k
  if (Version.Major >= 10) {
464
58
    if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
465
41
      Header.wavefront_size = 5;
466
41
      Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
467
41
    }
468
58
    Header.compute_pgm_resource_registers |=
469
58
      S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
470
58
      S_00B848_MEM_ORDERED(1);
471
58
  }
472
1.21k
}
473
474
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
475
43
    const MCSubtargetInfo *STI) {
476
43
  IsaVersion Version = getIsaVersion(STI->getCPU());
477
43
478
43
  amdhsa::kernel_descriptor_t KD;
479
43
  memset(&KD, 0, sizeof(KD));
480
43
481
43
  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
482
43
                  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
483
43
                  amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
484
43
  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
485
43
                  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
486
43
  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
487
43
                  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
488
43
  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
489
43
                  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
490
43
  if (Version.Major >= 10) {
491
21
    AMDHSA_BITS_SET(KD.kernel_code_properties,
492
21
                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
493
21
                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
494
21
    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
495
21
                    amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
496
21
                    STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
497
21
    AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
498
21
                    amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
499
21
  }
500
43
  return KD;
501
43
}
502
503
0
bool isGroupSegment(const GlobalValue *GV) {
504
0
  return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
505
0
}
506
507
0
bool isGlobalSegment(const GlobalValue *GV) {
508
0
  return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
509
0
}
510
511
39
bool isReadOnlySegment(const GlobalValue *GV) {
512
39
  return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
513
39
         
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT2
;
514
39
}
515
516
140
bool shouldEmitConstantsToTextSection(const Triple &TT) {
517
140
  return TT.getOS() != Triple::AMDHSA;
518
140
}
519
520
34.0k
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
521
34.0k
  Attribute A = F.getFnAttribute(Name);
522
34.0k
  int Result = Default;
523
34.0k
524
34.0k
  if (A.isStringAttribute()) {
525
2.24k
    StringRef Str = A.getValueAsString();
526
2.24k
    if (Str.getAsInteger(0, Result)) {
527
27
      LLVMContext &Ctx = F.getContext();
528
27
      Ctx.emitError("can't parse integer attribute " + Name);
529
27
    }
530
2.24k
  }
531
34.0k
532
34.0k
  return Result;
533
34.0k
}
534
535
std::pair<int, int> getIntegerPairAttribute(const Function &F,
536
                                            StringRef Name,
537
                                            std::pair<int, int> Default,
538
494k
                                            bool OnlyFirstRequired) {
539
494k
  Attribute A = F.getFnAttribute(Name);
540
494k
  if (!A.isStringAttribute())
541
492k
    return Default;
542
1.68k
543
1.68k
  LLVMContext &Ctx = F.getContext();
544
1.68k
  std::pair<int, int> Ints = Default;
545
1.68k
  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
546
1.68k
  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
547
28
    Ctx.emitError("can't parse first integer attribute " + Name);
548
28
    return Default;
549
28
  }
550
1.65k
  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
551
48
    if (!OnlyFirstRequired || 
!Strs.second.trim().empty()6
) {
552
42
      Ctx.emitError("can't parse second integer attribute " + Name);
553
42
      return Default;
554
42
    }
555
1.61k
  }
556
1.61k
557
1.61k
  return Ints;
558
1.61k
}
559
560
75.0k
unsigned getVmcntBitMask(const IsaVersion &Version) {
561
75.0k
  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
562
75.0k
  if (Version.Major < 9)
563
54.8k
    return VmcntLo;
564
20.2k
565
20.2k
  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
566
20.2k
  return VmcntLo | VmcntHi;
567
20.2k
}
568
569
79.5k
unsigned getExpcntBitMask(const IsaVersion &Version) {
570
79.5k
  return (1 << getExpcntBitWidth()) - 1;
571
79.5k
}
572
573
75.3k
unsigned getLgkmcntBitMask(const IsaVersion &Version) {
574
75.3k
  return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
575
75.3k
}
576
577
49.0k
unsigned getWaitcntBitMask(const IsaVersion &Version) {
578
49.0k
  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
579
49.0k
  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
580
49.0k
  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
581
49.0k
                                getLgkmcntBitWidth(Version.Major));
582
49.0k
  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
583
49.0k
  if (Version.Major < 9)
584
36.6k
    return Waitcnt;
585
12.4k
586
12.4k
  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
587
12.4k
  return Waitcnt | VmcntHi;
588
12.4k
}
589
590
55.6k
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
591
55.6k
  unsigned VmcntLo =
592
55.6k
      unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
593
55.6k
  if (Version.Major < 9)
594
39.9k
    return VmcntLo;
595
15.6k
596
15.6k
  unsigned VmcntHi =
597
15.6k
      unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
598
15.6k
  VmcntHi <<= getVmcntBitWidthLo();
599
15.6k
  return VmcntLo | VmcntHi;
600
15.6k
}
601
602
55.6k
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
603
55.6k
  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
604
55.6k
}
605
606
55.6k
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
607
55.6k
  return unpackBits(Waitcnt, getLgkmcntBitShift(),
608
55.6k
                    getLgkmcntBitWidth(Version.Major));
609
55.6k
}
610
611
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
612
49.3k
                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
613
49.3k
  Vmcnt = decodeVmcnt(Version, Waitcnt);
614
49.3k
  Expcnt = decodeExpcnt(Version, Waitcnt);
615
49.3k
  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
616
49.3k
}
617
618
6.21k
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
619
6.21k
  Waitcnt Decoded;
620
6.21k
  Decoded.VmCnt = decodeVmcnt(Version, Encoded);
621
6.21k
  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
622
6.21k
  Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
623
6.21k
  return Decoded;
624
6.21k
}
625
626
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
627
48.9k
                     unsigned Vmcnt) {
628
48.9k
  Waitcnt =
629
48.9k
      packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
630
48.9k
  if (Version.Major < 9)
631
36.5k
    return Waitcnt;
632
12.4k
633
12.4k
  Vmcnt >>= getVmcntBitWidthLo();
634
12.4k
  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
635
12.4k
}
636
637
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
638
48.9k
                      unsigned Expcnt) {
639
48.9k
  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
640
48.9k
}
641
642
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
643
48.9k
                       unsigned Lgkmcnt) {
644
48.9k
  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
645
48.9k
                                    getLgkmcntBitWidth(Version.Major));
646
48.9k
}
647
648
unsigned encodeWaitcnt(const IsaVersion &Version,
649
48.8k
                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
650
48.8k
  unsigned Waitcnt = getWaitcntBitMask(Version);
651
48.8k
  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
652
48.8k
  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
653
48.8k
  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
654
48.8k
  return Waitcnt;
655
48.8k
}
656
657
44.0k
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
658
44.0k
  return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
659
44.0k
}
660
661
//===----------------------------------------------------------------------===//
662
// hwreg
663
//===----------------------------------------------------------------------===//
664
665
namespace Hwreg {
666
667
102
int64_t getHwregId(const StringRef Name) {
668
1.60k
  for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; 
++Id1.50k
) {
669
1.59k
    if (IdSymbolic[Id] && 
Name == IdSymbolic[Id]1.05k
)
670
86
      return Id;
671
1.59k
  }
672
102
  
return ID_UNKNOWN_16
;
673
102
}
674
675
1.01k
static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
676
1.01k
  if (isSI(STI) || 
isCI(STI)747
||
isVI(STI)617
)
677
645
    return ID_SYMBOLIC_FIRST_GFX9_;
678
374
  else if (isGFX9(STI))
679
131
    return ID_SYMBOLIC_FIRST_GFX10_;
680
243
  else
681
243
    return ID_SYMBOLIC_LAST_;
682
1.01k
}
683
684
1.01k
bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
685
1.01k
  return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
686
1.01k
         
IdSymbolic[Id]674
;
687
1.01k
}
688
689
305
bool isValidHwreg(int64_t Id) {
690
305
  return 0 <= Id && isUInt<ID_WIDTH_>(Id);
691
305
}
692
693
299
bool isValidHwregOffset(int64_t Offset) {
694
299
  return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
695
299
}
696
697
287
bool isValidHwregWidth(int64_t Width) {
698
287
  return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
699
287
}
700
701
380
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
702
380
  return (Id << ID_SHIFT_) |
703
380
         (Offset << OFFSET_SHIFT_) |
704
380
         ((Width - 1) << WIDTH_M1_SHIFT_);
705
380
}
706
707
933
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
708
933
  return isValidHwreg(Id, STI) ? 
IdSymbolic[Id]611
:
""322
;
709
933
}
710
711
933
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
712
933
  Id = (Val & ID_MASK_) >> ID_SHIFT_;
713
933
  Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
714
933
  Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
715
933
}
716
717
} // namespace Hwreg
718
719
//===----------------------------------------------------------------------===//
720
// SendMsg
721
//===----------------------------------------------------------------------===//
722
723
namespace SendMsg {
724
725
148
int64_t getMsgId(const StringRef Name) {
726
951
  for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; 
++i803
) {
727
928
    if (IdSymbolic[i] && 
Name == IdSymbolic[i]490
)
728
125
      return i;
729
928
  }
730
148
  
return ID_UNKNOWN_23
;
731
148
}
732
733
498
static bool isValidMsgId(int64_t MsgId) {
734
498
  return (ID_GAPS_FIRST_ <= MsgId && 
MsgId < ID_GAPS_LAST_486
) &&
IdSymbolic[MsgId]486
;
735
498
}
736
737
447
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
738
447
  if (Strict) {
739
366
    if (MsgId == ID_GS_ALLOC_REQ || 
MsgId == ID_GET_DOORBELL345
)
740
27
      return isGFX9(STI) || 
isGFX10(STI)20
;
741
339
    else
742
339
      return isValidMsgId(MsgId);
743
81
  } else {
744
81
    return 0 <= MsgId && 
isUInt<ID_WIDTH_>(MsgId)77
;
745
81
  }
746
447
}
747
748
159
StringRef getMsgName(int64_t MsgId) {
749
159
  return isValidMsgId(MsgId)? IdSymbolic[MsgId] : 
""0
;
750
159
}
751
752
68
int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
753
68
  const char* const *S = (MsgId == ID_SYSMSG) ? 
OpSysSymbolic7
:
OpGsSymbolic61
;
754
68
  const int F = (MsgId == ID_SYSMSG) ? 
OP_SYS_FIRST_7
:
OP_GS_FIRST_61
;
755
68
  const int L = (MsgId == ID_SYSMSG) ? 
OP_SYS_LAST_7
:
OP_GS_LAST_61
;
756
184
  for (int i = F; i < L; 
++i116
) {
757
167
    if (Name == S[i]) {
758
51
      return i;
759
51
    }
760
167
  }
761
68
  
return OP_UNKNOWN_17
;
762
68
}
763
764
376
bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
765
376
766
376
  if (!Strict)
767
73
    return 0 <= OpId && 
isUInt<OP_WIDTH_>(OpId)69
;
768
303
769
303
  switch(MsgId)
770
303
  {
771
303
  case ID_GS:
772
147
    return (OP_GS_FIRST_ <= OpId && 
OpId < OP_GS_LAST_143
) &&
OpId != OP_GS_NOP135
;
773
303
  case ID_GS_DONE:
774
37
    return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
775
303
  case ID_SYSMSG:
776
34
    return OP_SYS_FIRST_ <= OpId && 
OpId < OP_SYS_LAST_30
;
777
303
  default:
778
85
    return OpId == OP_NONE_;
779
303
  }
780
303
}
781
782
118
StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
783
118
  assert(msgRequiresOp(MsgId));
784
118
  return (MsgId == ID_SYSMSG)? 
OpSysSymbolic[OpId]10
:
OpGsSymbolic[OpId]108
;
785
118
}
786
787
266
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
788
266
789
266
  if (!Strict)
790
65
    return 0 <= StreamId && 
isUInt<STREAM_ID_WIDTH_>(StreamId)61
;
791
201
792
201
  switch(MsgId)
793
201
  {
794
201
  case ID_GS:
795
110
    return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
796
201
  case ID_GS_DONE:
797
29
    return (OpId == OP_GS_NOP)?
798
29
           (StreamId == STREAM_ID_NONE_) :
799
29
           
(0
STREAM_ID_FIRST_ <= StreamId0
&&
StreamId < STREAM_ID_LAST_0
);
800
201
  default:
801
62
    return StreamId == STREAM_ID_NONE_;
802
201
  }
803
201
}
804
805
258
bool msgRequiresOp(int64_t MsgId) {
806
258
  return MsgId == ID_GS || 
MsgId == ID_GS_DONE124
||
MsgId == ID_SYSMSG87
;
807
258
}
808
809
172
bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
810
172
  return (MsgId == ID_GS || 
MsgId == ID_GS_DONE62
) &&
OpId != OP_GS_NOP147
;
811
172
}
812
813
void decodeMsg(unsigned Val,
814
               uint16_t &MsgId,
815
               uint16_t &OpId,
816
261
               uint16_t &StreamId) {
817
261
  MsgId = Val & ID_MASK_;
818
261
  OpId = (Val & OP_MASK_) >> OP_SHIFT_;
819
261
  StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
820
261
}
821
822
uint64_t encodeMsg(uint64_t MsgId,
823
                   uint64_t OpId,
824
197
                   uint64_t StreamId) {
825
197
  return (MsgId << ID_SHIFT_) |
826
197
         (OpId << OP_SHIFT_) |
827
197
         (StreamId << STREAM_ID_SHIFT_);
828
197
}
829
830
} // namespace SendMsg
831
832
//===----------------------------------------------------------------------===//
833
//
834
//===----------------------------------------------------------------------===//
835
836
2.54k
unsigned getInitialPSInputAddr(const Function &F) {
837
2.54k
  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
838
2.54k
}
839
840
667k
bool isShader(CallingConv::ID cc) {
841
667k
  switch(cc) {
842
667k
    case CallingConv::AMDGPU_VS:
843
73.1k
    case CallingConv::AMDGPU_LS:
844
73.1k
    case CallingConv::AMDGPU_HS:
845
73.1k
    case CallingConv::AMDGPU_ES:
846
73.1k
    case CallingConv::AMDGPU_GS:
847
73.1k
    case CallingConv::AMDGPU_PS:
848
73.1k
    case CallingConv::AMDGPU_CS:
849
73.1k
      return true;
850
594k
    default:
851
594k
      return false;
852
667k
  }
853
667k
}
854
855
79.5k
bool isCompute(CallingConv::ID cc) {
856
79.5k
  return !isShader(cc) || 
cc == CallingConv::AMDGPU_CS7.00k
;
857
79.5k
}
858
859
222k
bool isEntryFunctionCC(CallingConv::ID CC) {
860
222k
  switch (CC) {
861
222k
  case CallingConv::AMDGPU_KERNEL:
862
197k
  case CallingConv::SPIR_KERNEL:
863
197k
  case CallingConv::AMDGPU_VS:
864
197k
  case CallingConv::AMDGPU_GS:
865
197k
  case CallingConv::AMDGPU_PS:
866
197k
  case CallingConv::AMDGPU_CS:
867
197k
  case CallingConv::AMDGPU_ES:
868
197k
  case CallingConv::AMDGPU_HS:
869
197k
  case CallingConv::AMDGPU_LS:
870
197k
    return true;
871
197k
  default:
872
24.5k
    return false;
873
222k
  }
874
222k
}
875
876
8.31k
bool hasXNACK(const MCSubtargetInfo &STI) {
877
8.31k
  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
878
8.31k
}
879
880
2.62k
bool hasSRAMECC(const MCSubtargetInfo &STI) {
881
2.62k
  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
882
2.62k
}
883
884
0
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
885
0
  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
886
0
}
887
888
4.39k
bool hasPackedD16(const MCSubtargetInfo &STI) {
889
4.39k
  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
890
4.39k
}
891
892
768k
bool isSI(const MCSubtargetInfo &STI) {
893
768k
  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
894
768k
}
895
896
871k
bool isCI(const MCSubtargetInfo &STI) {
897
871k
  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
898
871k
}
899
900
203k
bool isVI(const MCSubtargetInfo &STI) {
901
203k
  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
902
203k
}
903
904
180k
bool isGFX9(const MCSubtargetInfo &STI) {
905
180k
  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
906
180k
}
907
908
2.69M
bool isGFX10(const MCSubtargetInfo &STI) {
909
2.69M
  return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
910
2.69M
}
911
912
81.3k
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
913
81.3k
  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
914
81.3k
}
915
916
501k
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
917
501k
  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
918
501k
  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
919
501k
  return SGPRClass.contains(FirstSubReg != 0 ? 
FirstSubReg57.6k
:
Reg444k
) ||
920
501k
    
Reg == AMDGPU::SCC398k
;
921
501k
}
922
923
2.28k
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
924
157k
  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); 
++R154k
) {
925
154k
    if (*R == Reg1) 
return true34
;
926
154k
  }
927
2.28k
  
return false2.25k
;
928
2.28k
}
929
930
#define MAP_REG2REG \
931
4.39M
  using namespace AMDGPU; \
932
4.39M
  switch(Reg) { \
933
4.39M
  
default: return Reg2.44M
; \
934
4.39M
  
CASE_CI_VI1.87M
(FLAT_SCR) \
935
12.2k
  CASE_CI_VI(FLAT_SCR_LO) \
936
15.9k
  CASE_CI_VI(FLAT_SCR_HI) \
937
8.31k
  
CASE_VI_GFX9_GFX1068
(TTMP0) \
938
130
  CASE_VI_GFX9_GFX10(TTMP1) \
939
6
  CASE_VI_GFX9_GFX10(TTMP2) \
940
5
  CASE_VI_GFX9_GFX10(TTMP3) \
941
126
  CASE_VI_GFX9_GFX10(TTMP4) \
942
5
  CASE_VI_GFX9_GFX10(TTMP5) \
943
0
  CASE_VI_GFX9_GFX10(TTMP6) \
944
0
  CASE_VI_GFX9_GFX10(TTMP7) \
945
187
  CASE_VI_GFX9_GFX10(TTMP8) \
946
74
  CASE_VI_GFX9_GFX10(TTMP9) \
947
20
  CASE_VI_GFX9_GFX10(TTMP10) \
948
8.73k
  CASE_VI_GFX9_GFX10(TTMP11) \
949
4.59k
  
CASE_VI_GFX9_GFX1017
(TTMP12) \
950
7
  
CASE_VI_GFX9_GFX103
(TTMP13) \
951
3
  CASE_VI_GFX9_GFX10(TTMP14) \
952
17
  CASE_VI_GFX9_GFX10(TTMP15) \
953
17
  CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
954
45
  CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
955
68
  CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
956
0
  CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
957
0
  CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
958
3.51k
  CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
959
1.52k
  
CASE_VI_GFX9_GFX106
(TTMP12_TTMP13) \
960
3
  CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
961
0
  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
962
177
  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
963
298
  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
964
3
  CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
965
20
  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
966
150
  CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
967
6
  CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
968
6
  CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
969
4.39M
  }
970
971
#define CASE_CI_VI(node) \
972
19.0k
  assert(!isSI(STI)); \
973
19.0k
  case node: return isCI(STI) ? node##_ci : node##_vi;
974
975
#define CASE_VI_GFX9_GFX10(node) \
976
7.52k
  case node: return (isGFX9(STI) || 
isGFX10(STI)7.33k
) ? node##_gfx9_gfx10 : node##_vi;
977
978
2.11M
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
979
2.11M
  if (STI.getTargetTriple().getArch() == Triple::r600)
980
207k
    return Reg;
981
1.90M
  MAP_REG2REG
982
1.90M
}
983
984
#undef CASE_CI_VI
985
#undef CASE_VI_GFX9_GFX10
986
987
18.9k
#define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
988
6.18k
#define CASE_VI_GFX9_GFX10(node) case node##_vi: 
case node##_gfx9_gfx10: return node0
;
989
990
592k
unsigned mc2PseudoReg(unsigned Reg) {
991
592k
  MAP_REG2REG
992
592k
}
993
994
#undef CASE_CI_VI
995
#undef CASE_VI_GFX9_GFX10
996
#undef MAP_REG2REG
997
998
2.90M
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
999
2.90M
  assert(OpNo < Desc.NumOperands);
1000
2.90M
  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1001
2.90M
  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1002
2.90M
         
OpType <= AMDGPU::OPERAND_SRC_LAST1.21M
;
1003
2.90M
}
1004
1005
112
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1006
112
  assert(OpNo < Desc.NumOperands);
1007
112
  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1008
112
  switch (OpType) {
1009
112
  case AMDGPU::OPERAND_REG_IMM_FP32:
1010
112
  case AMDGPU::OPERAND_REG_IMM_FP64:
1011
112
  case AMDGPU::OPERAND_REG_IMM_FP16:
1012
112
  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1013
112
  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1014
112
  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1015
112
  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1016
112
  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1017
112
  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1018
112
  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1019
112
  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1020
112
  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1021
112
  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1022
112
  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1023
112
    return true;
1024
112
  default:
1025
0
    return false;
1026
112
  }
1027
112
}
1028
1029
0
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1030
0
  assert(OpNo < Desc.NumOperands);
1031
0
  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1032
0
  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1033
0
         OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1034
0
}
1035
1036
// Avoid using MCRegisterClass::getSize, since that function will go away
1037
// (move from MC* level to Target* level). Return size in bits.
1038
33.8k
unsigned getRegBitWidth(unsigned RCID) {
1039
33.8k
  switch (RCID) {
1040
33.8k
  case AMDGPU::SGPR_32RegClassID:
1041
8.18k
  case AMDGPU::VGPR_32RegClassID:
1042
8.18k
  case AMDGPU::VRegOrLds_32RegClassID:
1043
8.18k
  case AMDGPU::AGPR_32RegClassID:
1044
8.18k
  case AMDGPU::VS_32RegClassID:
1045
8.18k
  case AMDGPU::AV_32RegClassID:
1046
8.18k
  case AMDGPU::SReg_32RegClassID:
1047
8.18k
  case AMDGPU::SReg_32_XM0RegClassID:
1048
8.18k
  case AMDGPU::SRegOrLds_32RegClassID:
1049
8.18k
    return 32;
1050
17.7k
  case AMDGPU::SGPR_64RegClassID:
1051
17.7k
  case AMDGPU::VS_64RegClassID:
1052
17.7k
  case AMDGPU::AV_64RegClassID:
1053
17.7k
  case AMDGPU::SReg_64RegClassID:
1054
17.7k
  case AMDGPU::VReg_64RegClassID:
1055
17.7k
  case AMDGPU::AReg_64RegClassID:
1056
17.7k
  case AMDGPU::SReg_64_XEXECRegClassID:
1057
17.7k
    return 64;
1058
17.7k
  case AMDGPU::SGPR_96RegClassID:
1059
417
  case AMDGPU::SReg_96RegClassID:
1060
417
  case AMDGPU::VReg_96RegClassID:
1061
417
    return 96;
1062
7.39k
  case AMDGPU::SGPR_128RegClassID:
1063
7.39k
  case AMDGPU::SReg_128RegClassID:
1064
7.39k
  case AMDGPU::VReg_128RegClassID:
1065
7.39k
  case AMDGPU::AReg_128RegClassID:
1066
7.39k
    return 128;
1067
7.39k
  case AMDGPU::SGPR_160RegClassID:
1068
10
  case AMDGPU::SReg_160RegClassID:
1069
10
  case AMDGPU::VReg_160RegClassID:
1070
10
    return 160;
1071
78
  case AMDGPU::SReg_256RegClassID:
1072
78
  case AMDGPU::VReg_256RegClassID:
1073
78
    return 256;
1074
90
  case AMDGPU::SReg_512RegClassID:
1075
90
  case AMDGPU::VReg_512RegClassID:
1076
90
  case AMDGPU::AReg_512RegClassID:
1077
90
    return 512;
1078
90
  case AMDGPU::SReg_1024RegClassID:
1079
19
  case AMDGPU::VReg_1024RegClassID:
1080
19
  case AMDGPU::AReg_1024RegClassID:
1081
19
    return 1024;
1082
19
  default:
1083
0
    llvm_unreachable("Unexpected register class");
1084
33.8k
  }
1085
33.8k
}
1086
1087
14.0k
unsigned getRegBitWidth(const MCRegisterClass &RC) {
1088
14.0k
  return getRegBitWidth(RC.getID());
1089
14.0k
}
1090
1091
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1092
4.40k
                           unsigned OpNo) {
1093
4.40k
  assert(OpNo < Desc.NumOperands);
1094
4.40k
  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1095
4.40k
  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1096
4.40k
}
1097
1098
103k
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1099
103k
  if (Literal >= -16 && 
Literal <= 6489.6k
)
1100
51.7k
    return true;
1101
51.3k
1102
51.3k
  uint64_t Val = static_cast<uint64_t>(Literal);
1103
51.3k
  return (Val == DoubleToBits(0.0)) ||
1104
51.3k
         (Val == DoubleToBits(1.0)) ||
1105
51.3k
         
(Val == DoubleToBits(-1.0))42.0k
||
1106
51.3k
         
(Val == DoubleToBits(0.5))41.1k
||
1107
51.3k
         
(Val == DoubleToBits(-0.5))29.1k
||
1108
51.3k
         
(Val == DoubleToBits(2.0))28.9k
||
1109
51.3k
         
(Val == DoubleToBits(-2.0))26.7k
||
1110
51.3k
         
(Val == DoubleToBits(4.0))26.5k
||
1111
51.3k
         
(Val == DoubleToBits(-4.0))24.9k
||
1112
51.3k
         
(13.0k
Val == 0x3fc45f306dc9c88213.0k
&&
HasInv2Pi474
);
1113
51.3k
}
1114
1115
5.08M
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1116
5.08M
  if (Literal >= -16 && 
Literal <= 644.96M
)
1117
4.24M
    return true;
1118
837k
1119
837k
  // The actual type of the operand does not seem to matter as long
1120
837k
  // as the bits match one of the inline immediate values.  For example:
1121
837k
  //
1122
837k
  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1123
837k
  // so it is a legal inline immediate.
1124
837k
  //
1125
837k
  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1126
837k
  // floating-point, so it is a legal inline immediate.
1127
837k
1128
837k
  uint32_t Val = static_cast<uint32_t>(Literal);
1129
837k
  return (Val == FloatToBits(0.0f)) ||
1130
837k
         (Val == FloatToBits(1.0f)) ||
1131
837k
         
(Val == FloatToBits(-1.0f))752k
||
1132
837k
         
(Val == FloatToBits(0.5f))742k
||
1133
837k
         
(Val == FloatToBits(-0.5f))714k
||
1134
837k
         
(Val == FloatToBits(2.0f))713k
||
1135
837k
         
(Val == FloatToBits(-2.0f))685k
||
1136
837k
         
(Val == FloatToBits(4.0f))680k
||
1137
837k
         
(Val == FloatToBits(-4.0f))659k
||
1138
837k
         
(633k
Val == 0x3e22f983633k
&&
HasInv2Pi4.51k
);
1139
837k
}
1140
1141
228k
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1142
228k
  if (!HasInv2Pi)
1143
280
    return false;
1144
228k
1145
228k
  if (Literal >= -16 && 
Literal <= 64212k
)
1146
183k
    return true;
1147
44.6k
1148
44.6k
  uint16_t Val = static_cast<uint16_t>(Literal);
1149
44.6k
  return Val == 0x3C00 || // 1.0
1150
44.6k
         
Val == 0xBC0038.5k
|| // -1.0
1151
44.6k
         
Val == 0x380037.2k
|| // 0.5
1152
44.6k
         
Val == 0xB80028.5k
|| // -0.5
1153
44.6k
         
Val == 0x400028.2k
|| // 2.0
1154
44.6k
         
Val == 0xC00023.7k
|| // -2.0
1155
44.6k
         
Val == 0x440022.3k
|| // 4.0
1156
44.6k
         
Val == 0xC40019.0k
|| // -4.0
1157
44.6k
         
Val == 0x311810.4k
; // 1/2pi
1158
44.6k
}
1159
1160
12.7k
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1161
12.7k
  assert(HasInv2Pi);
1162
12.7k
1163
12.7k
  if (isInt<16>(Literal) || 
isUInt<16>(Literal)3.35k
) {
1164
11.5k
    int16_t Trunc = static_cast<int16_t>(Literal);
1165
11.5k
    return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1166
11.5k
  }
1167
1.15k
  if (!(Literal & 0xffff))
1168
63
    return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1169
1.09k
1170
1.09k
  int16_t Lo16 = static_cast<int16_t>(Literal);
1171
1.09k
  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1172
1.09k
  return Lo16 == Hi16 && 
isInlinableLiteral16(Lo16, HasInv2Pi)543
;
1173
1.09k
}
1174
1175
4
bool isArgPassedInSGPR(const Argument *A) {
1176
4
  const Function *F = A->getParent();
1177
4
1178
4
  // Arguments to compute shaders are never a source of divergence.
1179
4
  CallingConv::ID CC = F->getCallingConv();
1180
4
  switch (CC) {
1181
4
  case CallingConv::AMDGPU_KERNEL:
1182
4
  case CallingConv::SPIR_KERNEL:
1183
4
    return true;
1184
4
  case CallingConv::AMDGPU_VS:
1185
0
  case CallingConv::AMDGPU_LS:
1186
0
  case CallingConv::AMDGPU_HS:
1187
0
  case CallingConv::AMDGPU_ES:
1188
0
  case CallingConv::AMDGPU_GS:
1189
0
  case CallingConv::AMDGPU_PS:
1190
0
  case CallingConv::AMDGPU_CS:
1191
0
    // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1192
0
    // Everything else is in VGPRs.
1193
0
    return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1194
0
           F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1195
0
  default:
1196
0
    // TODO: Should calls support inreg for SGPR inputs?
1197
0
    return false;
1198
4
  }
1199
4
}
1200
1201
81.3k
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1202
81.3k
  return isGCN3Encoding(ST) || 
isGFX10(ST)38.4k
;
1203
81.3k
}
1204
1205
54.4k
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1206
54.4k
  if (hasSMEMByteOffset(ST))
1207
32.1k
    return ByteOffset;
1208
22.3k
  return ByteOffset >> 2;
1209
22.3k
}
1210
1211
26.8k
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
1212
26.8k
  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
1213
26.8k
  return (hasSMEMByteOffset(ST)) ?
1214
15.8k
    isUInt<20>(EncodedOffset) : 
isUInt<8>(EncodedOffset)11.0k
;
1215
26.8k
}
1216
1217
// Given Imm, split it into the values to put into the SOffset and ImmOffset
1218
// fields in an MUBUF instruction. Return false if it is not possible (due to a
1219
// hardware bug needing a workaround).
1220
//
1221
// The required alignment ensures that individual address components remain
1222
// aligned if they are aligned to begin with. It also ensures that additional
1223
// offsets within the given alignment can be added to the resulting ImmOffset.
1224
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1225
557
                      const GCNSubtarget *Subtarget, uint32_t Align) {
1226
557
  const uint32_t MaxImm = alignDown(4095, Align);
1227
557
  uint32_t Overflow = 0;
1228
557
1229
557
  if (Imm > MaxImm) {
1230
33
    if (Imm <= MaxImm + 64) {
1231
12
      // Use an SOffset inline constant for 4..64
1232
12
      Overflow = Imm - MaxImm;
1233
12
      Imm = MaxImm;
1234
21
    } else {
1235
21
      // Try to keep the same value in SOffset for adjacent loads, so that
1236
21
      // the corresponding register contents can be re-used.
1237
21
      //
1238
21
      // Load values with all low-bits (except for alignment bits) set into
1239
21
      // SOffset, so that a larger range of values can be covered using
1240
21
      // s_movk_i32.
1241
21
      //
1242
21
      // Atomic operations fail to work correctly when individual address
1243
21
      // components are unaligned, even if their sum is aligned.
1244
21
      uint32_t High = (Imm + Align) & ~4095;
1245
21
      uint32_t Low = (Imm + Align) & 4095;
1246
21
      Imm = Low;
1247
21
      Overflow = High - Align;
1248
21
    }
1249
33
  }
1250
557
1251
557
  // There is a hardware bug in SI and CI which prevents address clamping in
1252
557
  // MUBUF instructions from working correctly with SOffsets. The immediate
1253
557
  // offset is unaffected.
1254
557
  if (Overflow > 0 &&
1255
557
      
Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS33
)
1256
15
    return false;
1257
542
1258
542
  ImmOffset = Imm;
1259
542
  SOffset = Overflow;
1260
542
  return true;
1261
542
}
1262
1263
31.7k
SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
1264
31.7k
  *this = getDefaultForCallingConv(F.getCallingConv());
1265
31.7k
1266
31.7k
  StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1267
31.7k
  if (!IEEEAttr.empty())
1268
42
    IEEE = IEEEAttr == "true";
1269
31.7k
1270
31.7k
  StringRef DX10ClampAttr
1271
31.7k
    = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1272
31.7k
  if (!DX10ClampAttr.empty())
1273
70
    DX10Clamp = DX10ClampAttr == "true";
1274
31.7k
}
1275
1276
namespace {
1277
1278
struct SourceOfDivergence {
1279
  unsigned Intr;
1280
};
1281
const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1282
1283
#define GET_SourcesOfDivergence_IMPL
1284
#include "AMDGPUGenSearchableTables.inc"
1285
1286
} // end anonymous namespace
1287
1288
191k
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1289
191k
  return lookupSourceOfDivergence(IntrID);
1290
191k
}
1291
1292
} // namespace AMDGPU
1293
} // namespace llvm