/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Basic/Targets/AMDGPU.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file declares AMDGPU TargetInfo objects. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
14 | | #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |
15 | | |
16 | | #include "clang/Basic/TargetID.h" |
17 | | #include "clang/Basic/TargetInfo.h" |
18 | | #include "clang/Basic/TargetOptions.h" |
19 | | #include "llvm/ADT/StringSet.h" |
20 | | #include "llvm/ADT/Triple.h" |
21 | | #include "llvm/Support/Compiler.h" |
22 | | #include "llvm/Support/TargetParser.h" |
23 | | |
24 | | namespace clang { |
25 | | namespace targets { |
26 | | |
27 | | class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { |
28 | | |
29 | | static const Builtin::Info BuiltinInfo[]; |
30 | | static const char *const GCCRegNames[]; |
31 | | |
32 | | enum AddrSpace { |
33 | | Generic = 0, |
34 | | Global = 1, |
35 | | Local = 3, |
36 | | Constant = 4, |
37 | | Private = 5 |
38 | | }; |
39 | | static const LangASMap AMDGPUDefIsGenMap; |
40 | | static const LangASMap AMDGPUDefIsPrivMap; |
41 | | |
42 | | llvm::AMDGPU::GPUKind GPUKind; |
43 | | unsigned GPUFeatures; |
44 | | unsigned WavefrontSize; |
45 | | |
46 | | /// Target ID is device name followed by optional feature name postfixed |
47 | | /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-. |
48 | | /// If the target ID contains feature+, map it to true. |
49 | | /// If the target ID contains feature-, map it to false. |
50 | | /// If the target ID does not contain a feature (default), do not map it. |
51 | | llvm::StringMap<bool> OffloadArchFeatures; |
52 | | std::string TargetID; |
53 | | |
54 | 1.57k | bool hasFP64() const { |
55 | 1.57k | return getTriple().getArch() == llvm::Triple::amdgcn || |
56 | 1.57k | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64)227 ; |
57 | 1.57k | } |
58 | | |
59 | | /// Has fast fma f32 |
60 | 522 | bool hasFastFMAF() const { |
61 | 522 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32); |
62 | 522 | } |
63 | | |
64 | | /// Has fast fma f64 |
65 | 522 | bool hasFastFMA() const { |
66 | 522 | return getTriple().getArch() == llvm::Triple::amdgcn; |
67 | 522 | } |
68 | | |
69 | 522 | bool hasFMAF() const { |
70 | 522 | return getTriple().getArch() == llvm::Triple::amdgcn || |
71 | 522 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA)75 ; |
72 | 522 | } |
73 | | |
74 | 0 | bool hasFullRateDenormalsF32() const { |
75 | 0 | return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
76 | 0 | } |
77 | | |
78 | 522 | bool hasLDEXPF() const { |
79 | 522 | return getTriple().getArch() == llvm::Triple::amdgcn || |
80 | 522 | !!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP)75 ; |
81 | 522 | } |
82 | | |
83 | 5.15k | static bool isAMDGCN(const llvm::Triple &TT) { |
84 | 5.15k | return TT.getArch() == llvm::Triple::amdgcn; |
85 | 5.15k | } |
86 | | |
87 | 2.22k | static bool isR600(const llvm::Triple &TT) { |
88 | 2.22k | return TT.getArch() == llvm::Triple::r600; |
89 | 2.22k | } |
90 | | |
91 | | public: |
92 | | AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); |
93 | | |
94 | | void setAddressSpaceMap(bool DefaultIsPrivate); |
95 | | |
96 | | void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; |
97 | | |
98 | 2.22k | uint64_t getPointerWidthV(unsigned AddrSpace) const override { |
99 | 2.22k | if (isR600(getTriple())) |
100 | 163 | return 32; |
101 | | |
102 | 2.06k | if (AddrSpace == Private || AddrSpace == Local1.73k ) |
103 | 808 | return 32; |
104 | | |
105 | 1.25k | return 64; |
106 | 2.06k | } |
107 | | |
108 | 752 | uint64_t getPointerAlignV(unsigned AddrSpace) const override { |
109 | 752 | return getPointerWidthV(AddrSpace); |
110 | 752 | } |
111 | | |
112 | 3.77k | uint64_t getMaxPointerWidth() const override { |
113 | 3.77k | return getTriple().getArch() == llvm::Triple::amdgcn ? 643.50k : 32263 ; |
114 | 3.77k | } |
115 | | |
116 | 12 | const char *getClobbers() const override { return ""; } |
117 | | |
118 | | ArrayRef<const char *> getGCCRegNames() const override; |
119 | | |
120 | 76 | ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override { |
121 | 76 | return None; |
122 | 76 | } |
123 | | |
124 | | /// Accepted register names: (n, m is unsigned integer, n < m) |
125 | | /// v |
126 | | /// s |
127 | | /// a |
128 | | /// {vn}, {v[n]} |
129 | | /// {sn}, {s[n]} |
130 | | /// {an}, {a[n]} |
131 | | /// {S} , where S is a special register name |
132 | | ////{v[n:m]} |
133 | | /// {s[n:m]} |
134 | | /// {a[n:m]} |
135 | | bool validateAsmConstraint(const char *&Name, |
136 | 130 | TargetInfo::ConstraintInfo &Info) const override { |
137 | 130 | static const ::llvm::StringSet<> SpecialRegs({ |
138 | 130 | "exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma", |
139 | 130 | "flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo", |
140 | 130 | "exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi", |
141 | 130 | }); |
142 | | |
143 | 130 | switch (*Name) { |
144 | 5 | case 'I': |
145 | 5 | Info.setRequiresImmediate(-16, 64); |
146 | 5 | return true; |
147 | 5 | case 'J': |
148 | 5 | Info.setRequiresImmediate(-32768, 32767); |
149 | 5 | return true; |
150 | 1 | case 'A': |
151 | 2 | case 'B': |
152 | 3 | case 'C': |
153 | 3 | Info.setRequiresImmediate(); |
154 | 3 | return true; |
155 | 117 | default: |
156 | 117 | break; |
157 | 130 | } |
158 | | |
159 | 117 | StringRef S(Name); |
160 | | |
161 | 117 | if (S == "DA" || S == "DB"114 ) { |
162 | 6 | Name++; |
163 | 6 | Info.setRequiresImmediate(); |
164 | 6 | return true; |
165 | 6 | } |
166 | | |
167 | 111 | bool HasLeftParen = false; |
168 | 111 | if (S.front() == '{') { |
169 | 57 | HasLeftParen = true; |
170 | 57 | S = S.drop_front(); |
171 | 57 | } |
172 | 111 | if (S.empty()) |
173 | 1 | return false; |
174 | 110 | if (S.front() != 'v' && S.front() != 's'47 && S.front() != 'a'26 ) { |
175 | 5 | if (!HasLeftParen) |
176 | 0 | return false; |
177 | 5 | auto E = S.find('}'); |
178 | 5 | if (!SpecialRegs.count(S.substr(0, E))) |
179 | 2 | return false; |
180 | 3 | S = S.drop_front(E + 1); |
181 | 3 | if (!S.empty()) |
182 | 2 | return false; |
183 | | // Found {S} where S is a special register. |
184 | 1 | Info.setAllowsRegister(); |
185 | 1 | Name = S.data() - 1; |
186 | 1 | return true; |
187 | 3 | } |
188 | 105 | S = S.drop_front(); |
189 | 105 | if (!HasLeftParen) { |
190 | 54 | if (!S.empty()) |
191 | 4 | return false; |
192 | | // Found s, v or a. |
193 | 50 | Info.setAllowsRegister(); |
194 | 50 | Name = S.data() - 1; |
195 | 50 | return true; |
196 | 54 | } |
197 | 51 | bool HasLeftBracket = false; |
198 | 51 | if (!S.empty() && S.front() == '['50 ) { |
199 | 27 | HasLeftBracket = true; |
200 | 27 | S = S.drop_front(); |
201 | 27 | } |
202 | 51 | unsigned long long N; |
203 | 51 | if (S.empty() || consumeUnsignedInteger(S, 10, N)50 ) |
204 | 7 | return false; |
205 | 44 | if (!S.empty() && S.front() == ':'43 ) { |
206 | 20 | if (!HasLeftBracket) |
207 | 0 | return false; |
208 | 20 | S = S.drop_front(); |
209 | 20 | unsigned long long M; |
210 | 20 | if (consumeUnsignedInteger(S, 10, M) || N >= M18 ) |
211 | 3 | return false; |
212 | 20 | } |
213 | 41 | if (HasLeftBracket) { |
214 | 22 | if (S.empty() || S.front() != ']') |
215 | 2 | return false; |
216 | 20 | S = S.drop_front(); |
217 | 20 | } |
218 | 39 | if (S.empty() || S.front() != '}'36 ) |
219 | 5 | return false; |
220 | 34 | S = S.drop_front(); |
221 | 34 | if (!S.empty()) |
222 | 2 | return false; |
223 | | // Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]} |
224 | | // or {a[n:m]}. |
225 | 32 | Info.setAllowsRegister(); |
226 | 32 | Name = S.data() - 1; |
227 | 32 | return true; |
228 | 34 | } |
229 | | |
230 | | // \p Constraint will be left pointing at the last character of |
231 | | // the constraint. In practice, it won't be changed unless the |
232 | | // constraint is longer than one character. |
233 | 16 | std::string convertConstraint(const char *&Constraint) const override { |
234 | | |
235 | 16 | StringRef S(Constraint); |
236 | 16 | if (S == "DA" || S == "DB"15 ) { |
237 | 2 | return std::string("^") + std::string(Constraint++, 2); |
238 | 2 | } |
239 | | |
240 | 14 | const char *Begin = Constraint; |
241 | 14 | TargetInfo::ConstraintInfo Info("", ""); |
242 | 14 | if (validateAsmConstraint(Constraint, Info)) |
243 | 14 | return std::string(Begin).substr(0, Constraint - Begin + 1); |
244 | | |
245 | 0 | Constraint = Begin; |
246 | 0 | return std::string(1, *Constraint); |
247 | 14 | } |
248 | | |
249 | | bool |
250 | | initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, |
251 | | StringRef CPU, |
252 | | const std::vector<std::string> &FeatureVec) const override; |
253 | | |
254 | | ArrayRef<Builtin::Info> getTargetBuiltins() const override; |
255 | | |
256 | 2.28k | bool useFP16ConversionIntrinsics() const override { return false; } |
257 | | |
258 | | void getTargetDefines(const LangOptions &Opts, |
259 | | MacroBuilder &Builder) const override; |
260 | | |
261 | 390 | BuiltinVaListKind getBuiltinVaListKind() const override { |
262 | 390 | return TargetInfo::CharPtrBuiltinVaList; |
263 | 390 | } |
264 | | |
265 | 17 | bool isValidCPUName(StringRef Name) const override { |
266 | 17 | if (getTriple().getArch() == llvm::Triple::amdgcn) |
267 | 17 | return llvm::AMDGPU::parseArchAMDGCN(Name) != llvm::AMDGPU::GK_NONE; |
268 | 0 | return llvm::AMDGPU::parseArchR600(Name) != llvm::AMDGPU::GK_NONE; |
269 | 17 | } |
270 | | |
271 | | void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override; |
272 | | |
273 | 284 | bool setCPU(const std::string &Name) override { |
274 | 284 | if (getTriple().getArch() == llvm::Triple::amdgcn) { |
275 | 223 | GPUKind = llvm::AMDGPU::parseArchAMDGCN(Name); |
276 | 223 | GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(GPUKind); |
277 | 223 | } else { |
278 | 61 | GPUKind = llvm::AMDGPU::parseArchR600(Name); |
279 | 61 | GPUFeatures = llvm::AMDGPU::getArchAttrR600(GPUKind); |
280 | 61 | } |
281 | | |
282 | 284 | return GPUKind != llvm::AMDGPU::GK_NONE; |
283 | 284 | } |
284 | | |
285 | 526 | void setSupportedOpenCLOpts() override { |
286 | 526 | auto &Opts = getSupportedOpenCLOpts(); |
287 | 526 | Opts["cl_clang_storage_class_specifiers"] = true; |
288 | 526 | Opts["__cl_clang_variadic_functions"] = true; |
289 | 526 | Opts["__cl_clang_function_pointers"] = true; |
290 | 526 | Opts["__cl_clang_non_portable_kernel_param_types"] = true; |
291 | 526 | Opts["__cl_clang_bitfields"] = true; |
292 | | |
293 | 526 | bool IsAMDGCN = isAMDGCN(getTriple()); |
294 | | |
295 | 526 | Opts["cl_khr_fp64"] = hasFP64(); |
296 | 526 | Opts["__opencl_c_fp64"] = hasFP64(); |
297 | | |
298 | 526 | if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR76 ) { |
299 | 485 | Opts["cl_khr_byte_addressable_store"] = true; |
300 | 485 | Opts["cl_khr_global_int32_base_atomics"] = true; |
301 | 485 | Opts["cl_khr_global_int32_extended_atomics"] = true; |
302 | 485 | Opts["cl_khr_local_int32_base_atomics"] = true; |
303 | 485 | Opts["cl_khr_local_int32_extended_atomics"] = true; |
304 | 485 | } |
305 | | |
306 | 526 | if (IsAMDGCN) { |
307 | 450 | Opts["cl_khr_fp16"] = true; |
308 | 450 | Opts["cl_khr_int64_base_atomics"] = true; |
309 | 450 | Opts["cl_khr_int64_extended_atomics"] = true; |
310 | 450 | Opts["cl_khr_mipmap_image"] = true; |
311 | 450 | Opts["cl_khr_mipmap_image_writes"] = true; |
312 | 450 | Opts["cl_khr_subgroups"] = true; |
313 | 450 | Opts["cl_amd_media_ops"] = true; |
314 | 450 | Opts["cl_amd_media_ops2"] = true; |
315 | | |
316 | 450 | Opts["__opencl_c_images"] = true; |
317 | 450 | Opts["__opencl_c_3d_image_writes"] = true; |
318 | 450 | Opts["cl_khr_3d_image_writes"] = true; |
319 | 450 | } |
320 | 526 | } |
321 | | |
322 | 39 | LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override { |
323 | 39 | switch (TK) { |
324 | 18 | case OCLTK_Image: |
325 | 18 | return LangAS::opencl_constant; |
326 | | |
327 | 2 | case OCLTK_ClkEvent: |
328 | 10 | case OCLTK_Queue: |
329 | 12 | case OCLTK_ReserveID: |
330 | 12 | return LangAS::opencl_global; |
331 | | |
332 | 9 | default: |
333 | 9 | return TargetInfo::getOpenCLTypeAddrSpace(TK); |
334 | 39 | } |
335 | 39 | } |
336 | | |
337 | 205 | LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override { |
338 | 205 | switch (AS) { |
339 | 88 | case 0: |
340 | 88 | return LangAS::opencl_generic; |
341 | 32 | case 1: |
342 | 32 | return LangAS::opencl_global; |
343 | 71 | case 3: |
344 | 71 | return LangAS::opencl_local; |
345 | 11 | case 4: |
346 | 11 | return LangAS::opencl_constant; |
347 | 0 | case 5: |
348 | 0 | return LangAS::opencl_private; |
349 | 3 | default: |
350 | 3 | return getLangASFromTargetAS(AS); |
351 | 205 | } |
352 | 205 | } |
353 | | |
354 | 30 | LangAS getCUDABuiltinAddressSpace(unsigned AS) const override { |
355 | 30 | switch (AS) { |
356 | 6 | case 0: |
357 | 6 | return LangAS::Default; |
358 | 0 | case 1: |
359 | 0 | return LangAS::cuda_device; |
360 | 18 | case 3: |
361 | 18 | return LangAS::cuda_shared; |
362 | 6 | case 4: |
363 | 6 | return LangAS::cuda_constant; |
364 | 0 | default: |
365 | 0 | return getLangASFromTargetAS(AS); |
366 | 30 | } |
367 | 30 | } |
368 | | |
369 | 98 | llvm::Optional<LangAS> getConstantAddressSpace() const override { |
370 | 98 | return getLangASFromTargetAS(Constant); |
371 | 98 | } |
372 | | |
373 | 181 | const llvm::omp::GV &getGridValue() const override { |
374 | 181 | switch (WavefrontSize) { |
375 | 0 | case 32: |
376 | 0 | return llvm::omp::getAMDGPUGridValues<32>(); |
377 | 181 | case 64: |
378 | 181 | return llvm::omp::getAMDGPUGridValues<64>(); |
379 | 0 | default: |
380 | 0 | llvm_unreachable("getGridValue not implemented for this wavesize"); |
381 | 181 | } |
382 | 181 | } |
383 | | |
384 | | /// \returns Target specific vtbl ptr address space. |
385 | 0 | unsigned getVtblPtrAddressSpace() const override { |
386 | 0 | return static_cast<unsigned>(Constant); |
387 | 0 | } |
388 | | |
389 | | /// \returns If a target requires an address within a target specific address |
390 | | /// space \p AddressSpace to be converted in order to be used, then return the |
391 | | /// corresponding target specific DWARF address space. |
392 | | /// |
393 | | /// \returns Otherwise return None and no conversion will be emitted in the |
394 | | /// DWARF. |
395 | | Optional<unsigned> |
396 | 215 | getDWARFAddressSpace(unsigned AddressSpace) const override { |
397 | 215 | const unsigned DWARF_Private = 1; |
398 | 215 | const unsigned DWARF_Local = 2; |
399 | 215 | if (AddressSpace == Private) { |
400 | 81 | return DWARF_Private; |
401 | 134 | } else if (AddressSpace == Local) { |
402 | 27 | return DWARF_Local; |
403 | 107 | } else { |
404 | 107 | return None; |
405 | 107 | } |
406 | 215 | } |
407 | | |
408 | 405 | CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { |
409 | 405 | switch (CC) { |
410 | 396 | default: |
411 | 396 | return CCCR_Warning; |
412 | 0 | case CC_C: |
413 | 0 | case CC_OpenCLKernel: |
414 | 9 | case CC_AMDGPUKernelCall: |
415 | 9 | return CCCR_OK; |
416 | 405 | } |
417 | 405 | } |
418 | | |
419 | | // In amdgcn target the null pointer in global, constant, and generic |
420 | | // address space has value 0 but in private and local address space has |
421 | | // value ~0. |
422 | 3.40k | uint64_t getNullPointerValue(LangAS AS) const override { |
423 | | // FIXME: Also should handle region. |
424 | 3.40k | return (AS == LangAS::opencl_local || AS == LangAS::opencl_private2.90k ) |
425 | 3.40k | ? ~01.06k : 02.33k ; |
426 | 3.40k | } |
427 | | |
428 | | void setAuxTarget(const TargetInfo *Aux) override; |
429 | | |
430 | 10 | bool hasBitIntType() const override { return true; } |
431 | | |
432 | | // Record offload arch features since they are needed for defining the |
433 | | // pre-defined macros. |
434 | | bool handleTargetFeatures(std::vector<std::string> &Features, |
435 | 526 | DiagnosticsEngine &Diags) override { |
436 | 526 | auto TargetIDFeatures = |
437 | 526 | getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind)); |
438 | 2.31k | for (const auto &F : Features) { |
439 | 2.31k | assert(F.front() == '+' || F.front() == '-'); |
440 | 2.31k | if (F == "+wavefrontsize64") |
441 | 8 | WavefrontSize = 64; |
442 | 2.31k | bool IsOn = F.front() == '+'; |
443 | 2.31k | StringRef Name = StringRef(F).drop_front(); |
444 | 2.31k | if (!llvm::is_contained(TargetIDFeatures, Name)) |
445 | 2.30k | continue; |
446 | 8 | assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end()); |
447 | 0 | OffloadArchFeatures[Name] = IsOn; |
448 | 8 | } |
449 | 526 | return true; |
450 | 526 | } |
451 | | |
452 | 222 | Optional<std::string> getTargetID() const override { |
453 | 222 | if (!isAMDGCN(getTriple())) |
454 | 0 | return llvm::None; |
455 | | // When -target-cpu is not set, we assume generic code that it is valid |
456 | | // for all GPU and use an empty string as target ID to represent that. |
457 | 222 | if (GPUKind == llvm::AMDGPU::GK_NONE) |
458 | 0 | return std::string(""); |
459 | 222 | return getCanonicalTargetID(getArchNameAMDGCN(GPUKind), |
460 | 222 | OffloadArchFeatures); |
461 | 222 | } |
462 | | }; |
463 | | |
464 | | } // namespace targets |
465 | | } // namespace clang |
466 | | |
467 | | #endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H |