Coverage Report

Created: 2023-09-30 09:22

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements AMDGPU TargetInfo objects.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "AMDGPU.h"
14
#include "clang/Basic/Builtins.h"
15
#include "clang/Basic/CodeGenOptions.h"
16
#include "clang/Basic/Diagnostic.h"
17
#include "clang/Basic/LangOptions.h"
18
#include "clang/Basic/MacroBuilder.h"
19
#include "clang/Basic/TargetBuiltins.h"
20
using namespace clang;
21
using namespace clang::targets;
22
23
namespace clang {
24
namespace targets {
25
26
// If you edit the description strings, make sure you update
27
// getPointerWidthV().
28
29
static const char *const DataLayoutStringR600 =
30
    "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31
    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
32
33
static const char *const DataLayoutStringAMDGCN =
34
    "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35
    "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
36
    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
37
    "-ni:7:8";
38
39
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
40
    Generic,  // Default
41
    Global,   // opencl_global
42
    Local,    // opencl_local
43
    Constant, // opencl_constant
44
    Private,  // opencl_private
45
    Generic,  // opencl_generic
46
    Global,   // opencl_global_device
47
    Global,   // opencl_global_host
48
    Global,   // cuda_device
49
    Constant, // cuda_constant
50
    Local,    // cuda_shared
51
    Global,   // sycl_global
52
    Global,   // sycl_global_device
53
    Global,   // sycl_global_host
54
    Local,    // sycl_local
55
    Private,  // sycl_private
56
    Generic,  // ptr32_sptr
57
    Generic,  // ptr32_uptr
58
    Generic,  // ptr64
59
    Generic,  // hlsl_groupshared
60
};
61
62
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63
    Private,  // Default
64
    Global,   // opencl_global
65
    Local,    // opencl_local
66
    Constant, // opencl_constant
67
    Private,  // opencl_private
68
    Generic,  // opencl_generic
69
    Global,   // opencl_global_device
70
    Global,   // opencl_global_host
71
    Global,   // cuda_device
72
    Constant, // cuda_constant
73
    Local,    // cuda_shared
74
    // SYCL address space values for this map are dummy
75
    Generic, // sycl_global
76
    Generic, // sycl_global_device
77
    Generic, // sycl_global_host
78
    Generic, // sycl_local
79
    Generic, // sycl_private
80
    Generic, // ptr32_sptr
81
    Generic, // ptr32_uptr
82
    Generic, // ptr64
83
    Generic, // hlsl_groupshared
84
85
};
86
} // namespace targets
87
} // namespace clang
88
89
static constexpr Builtin::Info BuiltinInfo[] = {
90
#define BUILTIN(ID, TYPE, ATTRS)                                               \
91
  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
92
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
93
  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94
#include "clang/Basic/BuiltinsAMDGPU.def"
95
};
96
97
const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98
  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99
  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100
  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101
  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102
  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103
  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104
  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105
  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106
  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107
  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108
  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109
  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110
  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111
  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112
  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113
  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114
  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115
  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116
  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117
  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118
  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119
  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120
  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121
  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122
  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123
  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124
  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125
  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126
  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127
  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128
  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129
  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130
  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131
  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132
  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133
  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134
  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135
  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136
  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137
  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138
  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139
  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140
  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141
  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142
  "flat_scratch_lo", "flat_scratch_hi",
143
  "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144
  "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145
  "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146
  "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147
  "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148
  "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149
  "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150
  "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151
  "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152
  "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153
  "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154
  "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155
  "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156
  "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157
  "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158
  "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159
  "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160
  "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161
  "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162
  "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163
  "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164
  "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165
  "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166
  "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167
  "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168
  "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169
  "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170
  "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171
  "a252", "a253", "a254", "a255"
172
};
173
174
190
ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175
190
  return llvm::ArrayRef(GCCRegNames);
176
190
}
177
178
bool AMDGPUTargetInfo::initFeatureMap(
179
    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180
963
    const std::vector<std::string> &FeatureVec) const {
181
182
963
  using namespace llvm::AMDGPU;
183
963
  fillAMDGPUFeatureMap(CPU, getTriple(), Features);
184
963
  if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
185
0
    return false;
186
187
  // TODO: Should move this logic into TargetParser
188
963
  std::string ErrorMsg;
189
963
  if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
190
2
    Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
191
2
    return false;
192
2
  }
193
194
961
  return true;
195
963
}
196
197
void AMDGPUTargetInfo::fillValidCPUList(
198
2
    SmallVectorImpl<StringRef> &Values) const {
199
2
  if (isAMDGCN(getTriple()))
200
1
    llvm::AMDGPU::fillValidArchListAMDGCN(Values);
201
1
  else
202
1
    llvm::AMDGPU::fillValidArchListR600(Values);
203
2
}
204
205
2.21k
void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
206
2.21k
  AddrSpaceMap = DefaultIsPrivate ? 
&AMDGPUDefIsPrivMap935
:
&AMDGPUDefIsGenMap1.28k
;
207
2.21k
}
208
209
AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
210
                                   const TargetOptions &Opts)
211
758
    : TargetInfo(Triple),
212
758
      GPUKind(isAMDGCN(Triple) ?
213
679
              llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
214
758
              
llvm::AMDGPU::parseArchR600(Opts.CPU)79
),
215
758
      GPUFeatures(isAMDGCN(Triple) ?
216
679
                  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
217
758
                  
llvm::AMDGPU::getArchAttrR600(GPUKind)79
) {
218
758
  resetDataLayout(isAMDGCN(getTriple()) ? 
DataLayoutStringAMDGCN679
219
758
                                        : 
DataLayoutStringR60079
);
220
221
758
  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
222
758
                     
!isAMDGCN(Triple)751
);
223
758
  UseAddrSpaceMapMangling = true;
224
225
758
  if (isAMDGCN(Triple)) {
226
    // __bf16 is always available as a load/store only type on AMDGCN.
227
679
    BFloat16Width = BFloat16Align = 16;
228
679
    BFloat16Format = &llvm::APFloat::BFloat();
229
679
  }
230
231
758
  HasLegalHalfType = true;
232
758
  HasFloat16 = true;
233
758
  WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 
32104
:
64654
;
234
758
  AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
235
236
  // Set pointer width and alignment for the generic address space.
237
758
  PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
238
758
  if (getMaxPointerWidth() == 64) {
239
679
    LongWidth = LongAlign = 64;
240
679
    SizeType = UnsignedLong;
241
679
    PtrDiffType = SignedLong;
242
679
    IntPtrType = SignedLong;
243
679
  }
244
245
758
  MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
246
758
  CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
247
758
  for (auto F : {"image-insts", "gws"})
248
1.51k
    ReadOnlyFeatures.insert(F);
249
758
  HalfArgsAndReturns = true;
250
758
}
251
252
1.46k
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
253
1.46k
  TargetInfo::adjust(Diags, Opts);
254
  // ToDo: There are still a few places using default address space as private
255
  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
256
  // can be removed from the following line.
257
1.46k
  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
258
1.46k
                     
!isAMDGCN(getTriple())629
);
259
1.46k
}
260
261
747
ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
262
747
  return llvm::ArrayRef(BuiltinInfo,
263
747
                        clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
264
747
}
265
266
void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
267
747
                                        MacroBuilder &Builder) const {
268
747
  Builder.defineMacro("__AMD__");
269
747
  Builder.defineMacro("__AMDGPU__");
270
271
747
  if (isAMDGCN(getTriple()))
272
670
    Builder.defineMacro("__AMDGCN__");
273
77
  else
274
77
    Builder.defineMacro("__R600__");
275
276
747
  if (GPUKind != llvm::AMDGPU::GK_NONE) {
277
387
    StringRef CanonName = isAMDGCN(getTriple()) ?
278
327
      getArchNameAMDGCN(GPUKind) : 
getArchNameR600(GPUKind)60
;
279
387
    Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
280
    // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
281
387
    if (isAMDGCN(getTriple())) {
282
327
      assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
283
327
      Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
284
327
                          Twine("__"));
285
327
    }
286
387
    if (isAMDGCN(getTriple())) {
287
327
      Builder.defineMacro("__amdgcn_processor__",
288
327
                          Twine("\"") + Twine(CanonName) + Twine("\""));
289
327
      Builder.defineMacro("__amdgcn_target_id__",
290
327
                          Twine("\"") + Twine(*getTargetID()) + Twine("\""));
291
327
      for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
292
287
        auto Loc = OffloadArchFeatures.find(F);
293
287
        if (Loc != OffloadArchFeatures.end()) {
294
8
          std::string NewF = F.str();
295
8
          std::replace(NewF.begin(), NewF.end(), '-', '_');
296
8
          Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
297
8
                                  Twine("__"),
298
8
                              Loc->second ? 
"1"4
:
"0"4
);
299
8
        }
300
287
      }
301
327
    }
302
387
  }
303
304
747
  if (AllowAMDGPUUnsafeFPAtomics)
305
6
    Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
306
307
  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
308
  // removed in the near future.
309
747
  if (hasFMAF())
310
689
    Builder.defineMacro("__HAS_FMAF__");
311
747
  if (hasFastFMAF())
312
607
    Builder.defineMacro("FP_FAST_FMAF");
313
747
  if (hasLDEXPF())
314
670
    Builder.defineMacro("__HAS_LDEXPF__");
315
747
  if (hasFP64())
316
670
    Builder.defineMacro("__HAS_FP64__");
317
747
  if (hasFastFMA())
318
670
    Builder.defineMacro("FP_FAST_FMA");
319
320
747
  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
321
  // ToDo: deprecate this macro for naming consistency.
322
747
  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
323
747
  Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
324
747
}
325
326
79
void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
327
79
  assert(HalfFormat == Aux->HalfFormat);
328
79
  assert(FloatFormat == Aux->FloatFormat);
329
79
  assert(DoubleFormat == Aux->DoubleFormat);
330
331
  // On x86_64 long double is 80-bit extended precision format, which is
332
  // not supported by AMDGPU. 128-bit floating point format is also not
333
  // supported by AMDGPU. Therefore keep its own format for these two types.
334
79
  auto SaveLongDoubleFormat = LongDoubleFormat;
335
79
  auto SaveFloat128Format = Float128Format;
336
79
  auto SaveLongDoubleWidth = LongDoubleWidth;
337
79
  auto SaveLongDoubleAlign = LongDoubleAlign;
338
79
  copyAuxTarget(Aux);
339
79
  LongDoubleFormat = SaveLongDoubleFormat;
340
79
  Float128Format = SaveFloat128Format;
341
79
  LongDoubleWidth = SaveLongDoubleWidth;
342
79
  LongDoubleAlign = SaveLongDoubleAlign;
343
  // For certain builtin types support on the host target, claim they are
344
  // support to pass the compilation of the host code during the device-side
345
  // compilation.
346
  // FIXME: As the side effect, we also accept `__float128` uses in the device
347
  // code. To rejct these builtin types supported in the host target but not in
348
  // the device target, one approach would support `device_builtin` attribute
349
  // so that we could tell the device builtin types from the host ones. The
350
  // also solves the different representations of the same builtin type, such
351
  // as `size_t` in the MSVC environment.
352
79
  if (Aux->hasFloat128Type()) {
353
13
    HasFloat128 = true;
354
13
    Float128Format = DoubleFormat;
355
13
  }
356
79
}