Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file provides AMDGPU specific target streamer methods.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AMDGPUTargetStreamer.h"
14
#include "AMDGPU.h"
15
#include "SIDefines.h"
16
#include "Utils/AMDGPUBaseInfo.h"
17
#include "Utils/AMDKernelCodeTUtils.h"
18
#include "llvm/ADT/Twine.h"
19
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
20
#include "llvm/BinaryFormat/ELF.h"
21
#include "llvm/IR/Constants.h"
22
#include "llvm/IR/Function.h"
23
#include "llvm/IR/Metadata.h"
24
#include "llvm/IR/Module.h"
25
#include "llvm/MC/MCContext.h"
26
#include "llvm/MC/MCELFStreamer.h"
27
#include "llvm/MC/MCObjectFileInfo.h"
28
#include "llvm/MC/MCSectionELF.h"
29
#include "llvm/Support/FormattedStream.h"
30
#include "llvm/Support/TargetParser.h"
31
32
namespace llvm {
33
#include "AMDGPUPTNote.h"
34
}
35
36
using namespace llvm;
37
using namespace llvm::AMDGPU;
38
using namespace llvm::AMDGPU::HSAMD;
39
40
//===----------------------------------------------------------------------===//
41
// AMDGPUTargetStreamer
42
//===----------------------------------------------------------------------===//
43
44
24
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
45
24
  HSAMD::Metadata HSAMetadata;
46
24
  if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
47
6
    return false;
48
18
49
18
  return EmitHSAMetadata(HSAMetadata);
50
18
}
51
52
11
bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
53
11
  msgpack::Document HSAMetadataDoc;
54
11
  if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
55
0
    return false;
56
11
  return EmitHSAMetadata(HSAMetadataDoc, false);
57
11
}
58
59
0
StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
60
0
  AMDGPU::GPUKind AK;
61
0
62
0
  switch (ElfMach) {
63
0
  case ELF::EF_AMDGPU_MACH_R600_R600:      AK = GK_R600;    break;
64
0
  case ELF::EF_AMDGPU_MACH_R600_R630:      AK = GK_R630;    break;
65
0
  case ELF::EF_AMDGPU_MACH_R600_RS880:     AK = GK_RS880;   break;
66
0
  case ELF::EF_AMDGPU_MACH_R600_RV670:     AK = GK_RV670;   break;
67
0
  case ELF::EF_AMDGPU_MACH_R600_RV710:     AK = GK_RV710;   break;
68
0
  case ELF::EF_AMDGPU_MACH_R600_RV730:     AK = GK_RV730;   break;
69
0
  case ELF::EF_AMDGPU_MACH_R600_RV770:     AK = GK_RV770;   break;
70
0
  case ELF::EF_AMDGPU_MACH_R600_CEDAR:     AK = GK_CEDAR;   break;
71
0
  case ELF::EF_AMDGPU_MACH_R600_CYPRESS:   AK = GK_CYPRESS; break;
72
0
  case ELF::EF_AMDGPU_MACH_R600_JUNIPER:   AK = GK_JUNIPER; break;
73
0
  case ELF::EF_AMDGPU_MACH_R600_REDWOOD:   AK = GK_REDWOOD; break;
74
0
  case ELF::EF_AMDGPU_MACH_R600_SUMO:      AK = GK_SUMO;    break;
75
0
  case ELF::EF_AMDGPU_MACH_R600_BARTS:     AK = GK_BARTS;   break;
76
0
  case ELF::EF_AMDGPU_MACH_R600_CAICOS:    AK = GK_CAICOS;  break;
77
0
  case ELF::EF_AMDGPU_MACH_R600_CAYMAN:    AK = GK_CAYMAN;  break;
78
0
  case ELF::EF_AMDGPU_MACH_R600_TURKS:     AK = GK_TURKS;   break;
79
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600:  AK = GK_GFX600;  break;
80
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601:  AK = GK_GFX601;  break;
81
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700:  AK = GK_GFX700;  break;
82
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701:  AK = GK_GFX701;  break;
83
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702:  AK = GK_GFX702;  break;
84
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703:  AK = GK_GFX703;  break;
85
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704:  AK = GK_GFX704;  break;
86
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801:  AK = GK_GFX801;  break;
87
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802:  AK = GK_GFX802;  break;
88
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803:  AK = GK_GFX803;  break;
89
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810:  AK = GK_GFX810;  break;
90
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900:  AK = GK_GFX900;  break;
91
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902:  AK = GK_GFX902;  break;
92
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904:  AK = GK_GFX904;  break;
93
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906:  AK = GK_GFX906;  break;
94
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908:  AK = GK_GFX908;  break;
95
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909:  AK = GK_GFX909;  break;
96
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
97
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
98
0
  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
99
0
  case ELF::EF_AMDGPU_MACH_NONE:           AK = GK_NONE;    break;
100
0
  }
101
0
102
0
  StringRef GPUName = getArchNameAMDGCN(AK);
103
0
  if (GPUName != "")
104
0
    return GPUName;
105
0
  return getArchNameR600(AK);
106
0
}
107
108
199
unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
109
199
  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
110
199
  if (AK == AMDGPU::GPUKind::GK_NONE)
111
36
    AK = parseArchR600(GPU);
112
199
113
199
  switch (AK) {
114
199
  
case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R6002
;
115
199
  
case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R6301
;
116
199
  
case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS8801
;
117
199
  
case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV6701
;
118
199
  
case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV7101
;
119
199
  
case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV7301
;
120
199
  
case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV7701
;
121
199
  
case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR1
;
122
199
  
case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS3
;
123
199
  
case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER1
;
124
199
  
case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD2
;
125
199
  
case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO1
;
126
199
  
case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS1
;
127
199
  
case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS1
;
128
199
  
case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN1
;
129
199
  
case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS1
;
130
199
  
case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX6006
;
131
199
  
case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX6016
;
132
199
  
case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX70027
;
133
199
  
case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX7012
;
134
199
  
case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX7021
;
135
199
  
case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX7033
;
136
199
  
case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX7042
;
137
199
  
case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX80116
;
138
199
  
case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX80220
;
139
199
  
case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX80326
;
140
199
  
case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX8103
;
141
199
  
case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90026
;
142
199
  
case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9024
;
143
199
  
case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9042
;
144
199
  
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9065
;
145
199
  
case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9082
;
146
199
  
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9091
;
147
199
  
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10109
;
148
199
  
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10111
;
149
199
  
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10121
;
150
199
  
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE16
;
151
0
  }
152
0
153
0
  llvm_unreachable("unknown GPU");
154
0
}
155
156
//===----------------------------------------------------------------------===//
157
// AMDGPUTargetAsmStreamer
158
//===----------------------------------------------------------------------===//
159
160
AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
161
                                                 formatted_raw_ostream &OS)
162
2.81k
    : AMDGPUTargetStreamer(S), OS(OS) { }
163
164
// A hook for emitting stuff at the end.
165
// We use it for emitting the accumulated PAL metadata as directives.
166
2.38k
void AMDGPUTargetAsmStreamer::finish() {
167
2.38k
  std::string S;
168
2.38k
  getPALMetadata()->toString(S);
169
2.38k
  OS << S;
170
2.38k
}
171
172
255
void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
173
255
  OS << "\t.amdgcn_target \"" << Target << "\"\n";
174
255
}
175
176
void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
177
145
    uint32_t Major, uint32_t Minor) {
178
145
  OS << "\t.hsa_code_object_version " <<
179
145
        Twine(Major) << "," << Twine(Minor) << '\n';
180
145
}
181
182
void
183
AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
184
                                                       uint32_t Minor,
185
                                                       uint32_t Stepping,
186
                                                       StringRef VendorName,
187
233
                                                       StringRef ArchName) {
188
233
  OS << "\t.hsa_code_object_isa " <<
189
233
        Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
190
233
        ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
191
233
192
233
}
193
194
void
195
930
AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
196
930
  OS << "\t.amd_kernel_code_t\n";
197
930
  dumpAmdKernelCode(&Header, OS, "\t\t");
198
930
  OS << "\t.end_amd_kernel_code_t\n";
199
930
}
200
201
void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
202
948
                                                   unsigned Type) {
203
948
  switch (Type) {
204
948
    
default: 0
llvm_unreachable0
("Invalid AMDGPU symbol type");
205
948
    case ELF::STT_AMDGPU_HSA_KERNEL:
206
948
      OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
207
948
      break;
208
948
  }
209
948
}
210
211
void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
212
152
                                            unsigned Align) {
213
152
  OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align
214
152
     << '\n';
215
152
}
216
217
2.01k
bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
218
2.01k
  OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
219
2.01k
  return true;
220
2.01k
}
221
222
bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
223
151
    const AMDGPU::HSAMD::Metadata &HSAMetadata) {
224
151
  std::string HSAMetadataString;
225
151
  if (HSAMD::toString(HSAMetadata, HSAMetadataString))
226
0
    return false;
227
151
228
151
  OS << '\t' << AssemblerDirectiveBegin << '\n';
229
151
  OS << HSAMetadataString << '\n';
230
151
  OS << '\t' << AssemblerDirectiveEnd << '\n';
231
151
  return true;
232
151
}
233
234
bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
235
263
    msgpack::Document &HSAMetadataDoc, bool Strict) {
236
263
  V3::MetadataVerifier Verifier(Strict);
237
263
  if (!Verifier.verify(HSAMetadataDoc.getRoot()))
238
0
    return false;
239
263
240
263
  std::string HSAMetadataString;
241
263
  raw_string_ostream StrOS(HSAMetadataString);
242
263
  HSAMetadataDoc.toYAML(StrOS);
243
263
244
263
  OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
245
263
  OS << StrOS.str() << '\n';
246
263
  OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
247
263
  return true;
248
263
}
249
250
33
bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
251
33
  const uint32_t Encoded_s_code_end = 0xbf9f0000;
252
33
  OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
253
33
  OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
254
33
  return true;
255
33
}
256
257
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
258
    const MCSubtargetInfo &STI, StringRef KernelName,
259
    const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
260
3.00k
    bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
261
3.00k
  IsaVersion IVersion = getIsaVersion(STI.getCPU());
262
3.00k
263
3.00k
  OS << "\t.amdhsa_kernel " << KernelName << '\n';
264
3.00k
265
3.00k
#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME)   \
266
3.00k
  STREAM << "\t\t" << DIRECTIVE << " "                                         \
267
82.8k
         << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
268
3.00k
269
3.00k
  OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
270
3.00k
     << '\n';
271
3.00k
  OS << "\t\t.amdhsa_private_segment_fixed_size "
272
3.00k
     << KD.private_segment_fixed_size << '\n';
273
3.00k
274
3.00k
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
275
3.00k
              kernel_code_properties,
276
3.00k
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
277
3.00k
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
278
3.00k
              kernel_code_properties,
279
3.00k
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
280
3.00k
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
281
3.00k
              kernel_code_properties,
282
3.00k
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
283
3.00k
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
284
3.00k
              kernel_code_properties,
285
3.00k
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
286
3.00k
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
287
3.00k
              kernel_code_properties,
288
3.00k
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
289
3.00k
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
290
3.00k
              kernel_code_properties,
291
3.00k
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
292
3.00k
  PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
293
3.00k
              kernel_code_properties,
294
3.00k
              amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
295
3.00k
  if (IVersion.Major >= 10)
296
3.00k
    
PRINT_FIELD862
(OS, ".amdhsa_wavefront_size32", KD,
297
3.00k
                kernel_code_properties,
298
3.00k
                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
299
3.00k
  PRINT_FIELD(
300
3.00k
      OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
301
3.00k
      compute_pgm_rsrc2,
302
3.00k
      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
303
3.00k
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
304
3.00k
              compute_pgm_rsrc2,
305
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
306
3.00k
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
307
3.00k
              compute_pgm_rsrc2,
308
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
309
3.00k
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
310
3.00k
              compute_pgm_rsrc2,
311
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
312
3.00k
  PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
313
3.00k
              compute_pgm_rsrc2,
314
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
315
3.00k
  PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
316
3.00k
              compute_pgm_rsrc2,
317
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
318
3.00k
319
3.00k
  // These directives are required.
320
3.00k
  OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
321
3.00k
  OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
322
3.00k
323
3.00k
  if (!ReserveVCC)
324
1.97k
    OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
325
3.00k
  if (IVersion.Major >= 7 && 
!ReserveFlatScr2.99k
)
326
2.76k
    OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
327
3.00k
  if (IVersion.Major >= 8 && 
ReserveXNACK != hasXNACK(STI)2.35k
)
328
4
    OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
329
3.00k
330
3.00k
  PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
331
3.00k
              compute_pgm_rsrc1,
332
3.00k
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
333
3.00k
  PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
334
3.00k
              compute_pgm_rsrc1,
335
3.00k
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
336
3.00k
  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
337
3.00k
              compute_pgm_rsrc1,
338
3.00k
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
339
3.00k
  PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
340
3.00k
              compute_pgm_rsrc1,
341
3.00k
              amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
342
3.00k
  PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
343
3.00k
              compute_pgm_rsrc1,
344
3.00k
              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
345
3.00k
  PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
346
3.00k
              compute_pgm_rsrc1,
347
3.00k
              amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
348
3.00k
  if (IVersion.Major >= 9)
349
3.00k
    
PRINT_FIELD1.32k
(OS, ".amdhsa_fp16_overflow", KD,
350
3.00k
                compute_pgm_rsrc1,
351
3.00k
                amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
352
3.00k
  if (IVersion.Major >= 10) {
353
862
    PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
354
862
                compute_pgm_rsrc1,
355
862
                amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
356
862
    PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
357
862
                compute_pgm_rsrc1,
358
862
                amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
359
862
    PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
360
862
                compute_pgm_rsrc1,
361
862
                amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
362
862
  }
363
3.00k
  PRINT_FIELD(
364
3.00k
      OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
365
3.00k
      compute_pgm_rsrc2,
366
3.00k
      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
367
3.00k
  PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
368
3.00k
              compute_pgm_rsrc2,
369
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
370
3.00k
  PRINT_FIELD(
371
3.00k
      OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
372
3.00k
      compute_pgm_rsrc2,
373
3.00k
      amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
374
3.00k
  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
375
3.00k
              compute_pgm_rsrc2,
376
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
377
3.00k
  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
378
3.00k
              compute_pgm_rsrc2,
379
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
380
3.00k
  PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
381
3.00k
              compute_pgm_rsrc2,
382
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
383
3.00k
  PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
384
3.00k
              compute_pgm_rsrc2,
385
3.00k
              amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
386
3.00k
#undef PRINT_FIELD
387
3.00k
388
3.00k
  OS << "\t.end_amdhsa_kernel\n";
389
3.00k
}
390
391
//===----------------------------------------------------------------------===//
392
// AMDGPUTargetELFStreamer
393
//===----------------------------------------------------------------------===//
394
395
AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
396
    MCStreamer &S, const MCSubtargetInfo &STI)
397
199
    : AMDGPUTargetStreamer(S), Streamer(S) {
398
199
  MCAssembler &MCA = getStreamer().getAssembler();
399
199
  unsigned EFlags = MCA.getELFHeaderEFlags();
400
199
401
199
  EFlags &= ~ELF::EF_AMDGPU_MACH;
402
199
  EFlags |= getElfMach(STI.getCPU());
403
199
404
199
  EFlags &= ~ELF::EF_AMDGPU_XNACK;
405
199
  if (AMDGPU::hasXNACK(STI))
406
27
    EFlags |= ELF::EF_AMDGPU_XNACK;
407
199
408
199
  EFlags &= ~ELF::EF_AMDGPU_SRAM_ECC;
409
199
  if (AMDGPU::hasSRAMECC(STI))
410
5
    EFlags |= ELF::EF_AMDGPU_SRAM_ECC;
411
199
412
199
  MCA.setELFHeaderEFlags(EFlags);
413
199
}
414
415
1.28k
MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
416
1.28k
  return static_cast<MCELFStreamer &>(Streamer);
417
1.28k
}
418
419
// A hook for emitting stuff at the end.
420
// We use it for emitting the accumulated PAL metadata as a .note record.
421
193
void AMDGPUTargetELFStreamer::finish() {
422
193
  std::string Blob;
423
193
  const char *Vendor = getPALMetadata()->getVendor();
424
193
  unsigned Type = getPALMetadata()->getType();
425
193
  getPALMetadata()->toBlob(Type, Blob);
426
193
  if (Blob.empty())
427
187
    return;
428
6
  EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
429
6
           [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); });
430
6
}
431
432
void AMDGPUTargetELFStreamer::EmitNote(
433
    StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
434
260
    function_ref<void(MCELFStreamer &)> EmitDesc) {
435
260
  auto &S = getStreamer();
436
260
  auto &Context = S.getContext();
437
260
438
260
  auto NameSZ = Name.size() + 1;
439
260
440
260
  S.PushSection();
441
260
  S.SwitchSection(Context.getELFSection(
442
260
    ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
443
260
  S.EmitIntValue(NameSZ, 4);                                  // namesz
444
260
  S.EmitValue(DescSZ, 4);                                     // descz
445
260
  S.EmitIntValue(NoteType, 4);                                // type
446
260
  S.EmitBytes(Name);                                          // name
447
260
  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
448
260
  EmitDesc(S);                                                // desc
449
260
  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
450
260
  S.PopSection();
451
260
}
452
453
40
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
454
455
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
456
38
    uint32_t Major, uint32_t Minor) {
457
38
458
38
  EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
459
38
           ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
460
38
             OS.EmitIntValue(Major, 4);
461
38
             OS.EmitIntValue(Minor, 4);
462
38
           });
463
38
}
464
465
void
466
AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
467
                                                       uint32_t Minor,
468
                                                       uint32_t Stepping,
469
                                                       StringRef VendorName,
470
45
                                                       StringRef ArchName) {
471
45
  uint16_t VendorNameSize = VendorName.size() + 1;
472
45
  uint16_t ArchNameSize = ArchName.size() + 1;
473
45
474
45
  unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
475
45
    sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
476
45
    VendorNameSize + ArchNameSize;
477
45
478
45
  EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
479
45
           ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
480
45
             OS.EmitIntValue(VendorNameSize, 2);
481
45
             OS.EmitIntValue(ArchNameSize, 2);
482
45
             OS.EmitIntValue(Major, 4);
483
45
             OS.EmitIntValue(Minor, 4);
484
45
             OS.EmitIntValue(Stepping, 4);
485
45
             OS.EmitBytes(VendorName);
486
45
             OS.EmitIntValue(0, 1); // NULL terminate VendorName
487
45
             OS.EmitBytes(ArchName);
488
45
             OS.EmitIntValue(0, 1); // NULL terminte ArchName
489
45
           });
490
45
}
491
492
void
493
272
AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
494
272
495
272
  MCStreamer &OS = getStreamer();
496
272
  OS.PushSection();
497
272
  OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
498
272
  OS.PopSection();
499
272
}
500
501
void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
502
277
                                                   unsigned Type) {
503
277
  MCSymbolELF *Symbol = cast<MCSymbolELF>(
504
277
      getStreamer().getContext().getOrCreateSymbol(SymbolName));
505
277
  Symbol->setType(Type);
506
277
}
507
508
void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
509
7
                                            unsigned Align) {
510
7
  assert(isPowerOf2_32(Align));
511
7
512
7
  MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
513
7
  SymbolELF->setType(ELF::STT_OBJECT);
514
7
515
7
  if (!SymbolELF->isBindingSet()) {
516
2
    SymbolELF->setBinding(ELF::STB_GLOBAL);
517
2
    SymbolELF->setExternal(true);
518
2
  }
519
7
520
7
  if (SymbolELF->declareCommon(Size, Align, true)) {
521
0
    report_fatal_error("Symbol: " + Symbol->getName() +
522
0
                       " redeclared as different type");
523
0
  }
524
7
525
7
  SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
526
7
  SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
527
7
}
528
529
102
bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
530
102
  // Create two labels to mark the beginning and end of the desc field
531
102
  // and a MCExpr to calculate the size of the desc field.
532
102
  auto &Context = getContext();
533
102
  auto *DescBegin = Context.createTempSymbol();
534
102
  auto *DescEnd = Context.createTempSymbol();
535
102
  auto *DescSZ = MCBinaryExpr::createSub(
536
102
    MCSymbolRefExpr::create(DescEnd, Context),
537
102
    MCSymbolRefExpr::create(DescBegin, Context), Context);
538
102
539
102
  EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
540
102
           [&](MCELFStreamer &OS) {
541
102
             OS.EmitLabel(DescBegin);
542
102
             OS.EmitBytes(IsaVersionString);
543
102
             OS.EmitLabel(DescEnd);
544
102
           });
545
102
  return true;
546
102
}
547
548
bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
549
37
                                              bool Strict) {
550
37
  V3::MetadataVerifier Verifier(Strict);
551
37
  if (!Verifier.verify(HSAMetadataDoc.getRoot()))
552
0
    return false;
553
37
554
37
  std::string HSAMetadataString;
555
37
  HSAMetadataDoc.writeToBlob(HSAMetadataString);
556
37
557
37
  // Create two labels to mark the beginning and end of the desc field
558
37
  // and a MCExpr to calculate the size of the desc field.
559
37
  auto &Context = getContext();
560
37
  auto *DescBegin = Context.createTempSymbol();
561
37
  auto *DescEnd = Context.createTempSymbol();
562
37
  auto *DescSZ = MCBinaryExpr::createSub(
563
37
      MCSymbolRefExpr::create(DescEnd, Context),
564
37
      MCSymbolRefExpr::create(DescBegin, Context), Context);
565
37
566
37
  EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
567
37
           [&](MCELFStreamer &OS) {
568
37
             OS.EmitLabel(DescBegin);
569
37
             OS.EmitBytes(HSAMetadataString);
570
37
             OS.EmitLabel(DescEnd);
571
37
           });
572
37
  return true;
573
37
}
574
575
bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
576
32
    const AMDGPU::HSAMD::Metadata &HSAMetadata) {
577
32
  std::string HSAMetadataString;
578
32
  if (HSAMD::toString(HSAMetadata, HSAMetadataString))
579
0
    return false;
580
32
581
32
  // Create two labels to mark the beginning and end of the desc field
582
32
  // and a MCExpr to calculate the size of the desc field.
583
32
  auto &Context = getContext();
584
32
  auto *DescBegin = Context.createTempSymbol();
585
32
  auto *DescEnd = Context.createTempSymbol();
586
32
  auto *DescSZ = MCBinaryExpr::createSub(
587
32
    MCSymbolRefExpr::create(DescEnd, Context),
588
32
    MCSymbolRefExpr::create(DescBegin, Context), Context);
589
32
590
32
  EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
591
32
           [&](MCELFStreamer &OS) {
592
32
             OS.EmitLabel(DescBegin);
593
32
             OS.EmitBytes(HSAMetadataString);
594
32
             OS.EmitLabel(DescEnd);
595
32
           });
596
32
  return true;
597
32
}
598
599
2
bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
600
2
  const uint32_t Encoded_s_code_end = 0xbf9f0000;
601
2
602
2
  MCStreamer &OS = getStreamer();
603
2
  OS.PushSection();
604
2
  OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
605
66
  for (unsigned I = 0; I < 32; 
++I64
)
606
64
    OS.EmitIntValue(Encoded_s_code_end, 4);
607
2
  OS.PopSection();
608
2
  return true;
609
2
}
610
611
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
612
    const MCSubtargetInfo &STI, StringRef KernelName,
613
    const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
614
    uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
615
278
    bool ReserveXNACK) {
616
278
  auto &Streamer = getStreamer();
617
278
  auto &Context = Streamer.getContext();
618
278
619
278
  MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
620
278
      Context.getOrCreateSymbol(Twine(KernelName)));
621
278
  MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
622
278
      Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
623
278
624
278
  // Copy kernel descriptor symbol's binding, other and visibility from the
625
278
  // kernel code symbol.
626
278
  KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
627
278
  KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
628
278
  KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
629
278
  // Kernel descriptor symbol's type and size are fixed.
630
278
  KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
631
278
  KernelDescriptorSymbol->setSize(
632
278
      MCConstantExpr::create(sizeof(KernelDescriptor), Context));
633
278
634
278
  // The visibility of the kernel code symbol must be protected or less to allow
635
278
  // static relocations from the kernel descriptor to be used.
636
278
  if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
637
278
    KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
638
278
639
278
  Streamer.EmitLabel(KernelDescriptorSymbol);
640
278
  Streamer.EmitBytes(StringRef(
641
278
      (const char*)&(KernelDescriptor),
642
278
      offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
643
278
  // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
644
278
  // expression being created is:
645
278
  //   (start of kernel code) - (start of kernel descriptor)
646
278
  // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
647
278
  Streamer.EmitValue(MCBinaryExpr::createSub(
648
278
      MCSymbolRefExpr::create(
649
278
          KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
650
278
      MCSymbolRefExpr::create(
651
278
          KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
652
278
      Context),
653
278
      sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
654
278
  Streamer.EmitBytes(StringRef(
655
278
      (const char*)&(KernelDescriptor) +
656
278
          offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
657
278
          sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
658
278
      sizeof(KernelDescriptor) -
659
278
          offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
660
278
          sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
661
278
}