/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Sema/SemaCUDA.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// \file |
9 | | /// This file implements semantic analysis for CUDA constructs. |
10 | | /// |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/AST/ASTContext.h" |
14 | | #include "clang/AST/Decl.h" |
15 | | #include "clang/AST/ExprCXX.h" |
16 | | #include "clang/Basic/Cuda.h" |
17 | | #include "clang/Basic/TargetInfo.h" |
18 | | #include "clang/Lex/Preprocessor.h" |
19 | | #include "clang/Sema/Lookup.h" |
20 | | #include "clang/Sema/ScopeInfo.h" |
21 | | #include "clang/Sema/Sema.h" |
22 | | #include "clang/Sema/SemaDiagnostic.h" |
23 | | #include "clang/Sema/SemaInternal.h" |
24 | | #include "clang/Sema/Template.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include <optional> |
27 | | using namespace clang; |
28 | | |
29 | 216 | template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { |
30 | 216 | if (!D) |
31 | 0 | return false; |
32 | 216 | if (auto *A = D->getAttr<AttrT>()) |
33 | 216 | return !A->isImplicit(); |
34 | 0 | return false; |
35 | 216 | } |
36 | | |
37 | 9 | void Sema::PushForceCUDAHostDevice() { |
38 | 9 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
39 | 9 | ForceCUDAHostDeviceDepth++; |
40 | 9 | } |
41 | | |
42 | 7 | bool Sema::PopForceCUDAHostDevice() { |
43 | 7 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
44 | 7 | if (ForceCUDAHostDeviceDepth == 0) |
45 | 0 | return false; |
46 | 7 | ForceCUDAHostDeviceDepth--; |
47 | 7 | return true; |
48 | 7 | } |
49 | | |
50 | | ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
51 | | MultiExprArg ExecConfig, |
52 | 181 | SourceLocation GGGLoc) { |
53 | 181 | FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); |
54 | 181 | if (!ConfigDecl) |
55 | 0 | return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) |
56 | 0 | << getCudaConfigureFuncName()); |
57 | 181 | QualType ConfigQTy = ConfigDecl->getType(); |
58 | | |
59 | 181 | DeclRefExpr *ConfigDR = new (Context) |
60 | 181 | DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); |
61 | 181 | MarkFunctionReferenced(LLLLoc, ConfigDecl); |
62 | | |
63 | 181 | return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, |
64 | 181 | /*IsExecConfig=*/true); |
65 | 181 | } |
66 | | |
67 | | Sema::CUDAFunctionTarget |
68 | 36 | Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { |
69 | 36 | bool HasHostAttr = false; |
70 | 36 | bool HasDeviceAttr = false; |
71 | 36 | bool HasGlobalAttr = false; |
72 | 36 | bool HasInvalidTargetAttr = false; |
73 | 37 | for (const ParsedAttr &AL : Attrs) { |
74 | 37 | switch (AL.getKind()) { |
75 | 15 | case ParsedAttr::AT_CUDAGlobal: |
76 | 15 | HasGlobalAttr = true; |
77 | 15 | break; |
78 | 10 | case ParsedAttr::AT_CUDAHost: |
79 | 10 | HasHostAttr = true; |
80 | 10 | break; |
81 | 12 | case ParsedAttr::AT_CUDADevice: |
82 | 12 | HasDeviceAttr = true; |
83 | 12 | break; |
84 | 0 | case ParsedAttr::AT_CUDAInvalidTarget: |
85 | 0 | HasInvalidTargetAttr = true; |
86 | 0 | break; |
87 | 0 | default: |
88 | 0 | break; |
89 | 37 | } |
90 | 37 | } |
91 | | |
92 | 36 | if (HasInvalidTargetAttr) |
93 | 0 | return CFT_InvalidTarget; |
94 | | |
95 | 36 | if (HasGlobalAttr) |
96 | 15 | return CFT_Global; |
97 | | |
98 | 21 | if (HasHostAttr && HasDeviceAttr10 ) |
99 | 6 | return CFT_HostDevice; |
100 | | |
101 | 15 | if (HasDeviceAttr) |
102 | 6 | return CFT_Device; |
103 | | |
104 | 9 | return CFT_Host; |
105 | 15 | } |
106 | | |
107 | | template <typename A> |
108 | 304k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { |
109 | 352k | return D->hasAttrs()304k && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) 245k { |
110 | 352k | return isa<A>(Attribute) && |
111 | 352k | !(142k IgnoreImplicitAttr142k && Attribute->isImplicit()78 ); |
112 | 352k | }); SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const Line | Count | Source | 109 | 142k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 142k | return isa<A>(Attribute) && | 111 | 142k | !(116k IgnoreImplicitAttr116k && Attribute->isImplicit()40 ); | 112 | 142k | }); |
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const Line | Count | Source | 109 | 210k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 210k | return isa<A>(Attribute) && | 111 | 210k | !(26.0k IgnoreImplicitAttr26.0k && Attribute->isImplicit()38 ); | 112 | 210k | }); |
|
113 | 304k | } SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool) Line | Count | Source | 108 | 152k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { | 109 | 152k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 122k | return isa<A>(Attribute) && | 111 | 122k | !(IgnoreImplicitAttr && Attribute->isImplicit()); | 112 | 122k | }); | 113 | 152k | } |
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool) Line | Count | Source | 108 | 152k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { | 109 | 152k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 122k | return isa<A>(Attribute) && | 111 | 122k | !(IgnoreImplicitAttr && Attribute->isImplicit()); | 112 | 122k | }); | 113 | 152k | } |
|
114 | | |
115 | | /// IdentifyCUDATarget - Determine the CUDA compilation target for this function |
116 | | Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, |
117 | 157k | bool IgnoreImplicitHDAttr) { |
118 | | // Code that lives outside a function is run on the host. |
119 | 157k | if (D == nullptr) |
120 | 1.35k | return CFT_Host; |
121 | | |
122 | 155k | if (D->hasAttr<CUDAInvalidTargetAttr>()) |
123 | 39 | return CFT_InvalidTarget; |
124 | | |
125 | 155k | if (D->hasAttr<CUDAGlobalAttr>()) |
126 | 3.62k | return CFT_Global; |
127 | | |
128 | 152k | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { |
129 | 116k | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) |
130 | 20.9k | return CFT_HostDevice; |
131 | 95.3k | return CFT_Device; |
132 | 116k | } else if (35.9k hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)35.9k ) { |
133 | 5.09k | return CFT_Host; |
134 | 30.8k | } else if ((D->isImplicit() || !D->isUserProvided()29.3k ) && |
135 | 30.8k | !IgnoreImplicitHDAttr1.48k ) { |
136 | | // Some implicit declarations (like intrinsic functions) are not marked. |
137 | | // Set the most lenient target on them for maximal flexibility. |
138 | 1.48k | return CFT_HostDevice; |
139 | 1.48k | } |
140 | | |
141 | 29.3k | return CFT_Host; |
142 | 152k | } |
143 | | |
144 | | /// IdentifyTarget - Determine the CUDA compilation target for this variable. |
145 | 1.12k | Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { |
146 | 1.12k | if (Var->hasAttr<HIPManagedAttr>()) |
147 | 68 | return CVT_Unified; |
148 | | // Only constexpr and const variabless with implicit constant attribute |
149 | | // are emitted on both sides. Such variables are promoted to device side |
150 | | // only if they have static constant intializers on device side. |
151 | 1.05k | if ((Var->isConstexpr() || Var->getType().isConstQualified()967 ) && |
152 | 1.05k | Var->hasAttr<CUDAConstantAttr>()375 && |
153 | 1.05k | !hasExplicitAttr<CUDAConstantAttr>(Var)216 ) |
154 | 204 | return CVT_Both; |
155 | 850 | if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>()574 || |
156 | 850 | Var->hasAttr<CUDASharedAttr>()448 || |
157 | 850 | Var->getType()->isCUDADeviceBuiltinSurfaceType()304 || |
158 | 850 | Var->getType()->isCUDADeviceBuiltinTextureType()300 ) |
159 | 554 | return CVT_Device; |
160 | | // Function-scope static variable without explicit device or constant |
161 | | // attribute are emitted |
162 | | // - on both sides in host device functions |
163 | | // - on device side in device or global functions |
164 | 296 | if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { |
165 | 64 | switch (IdentifyCUDATarget(FD)) { |
166 | 22 | case CFT_HostDevice: |
167 | 22 | return CVT_Both; |
168 | 23 | case CFT_Device: |
169 | 36 | case CFT_Global: |
170 | 36 | return CVT_Device; |
171 | 6 | default: |
172 | 6 | return CVT_Host; |
173 | 64 | } |
174 | 64 | } |
175 | 232 | return CVT_Host; |
176 | 296 | } |
177 | | |
178 | | // * CUDA Call preference table |
179 | | // |
180 | | // F - from, |
181 | | // T - to |
182 | | // Ph - preference in host mode |
183 | | // Pd - preference in device mode |
184 | | // H - handled in (x) |
185 | | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. |
186 | | // |
187 | | // | F | T | Ph | Pd | H | |
188 | | // |----+----+-----+-----+-----+ |
189 | | // | d | d | N | N | (c) | |
190 | | // | d | g | -- | -- | (a) | |
191 | | // | d | h | -- | -- | (e) | |
192 | | // | d | hd | HD | HD | (b) | |
193 | | // | g | d | N | N | (c) | |
194 | | // | g | g | -- | -- | (a) | |
195 | | // | g | h | -- | -- | (e) | |
196 | | // | g | hd | HD | HD | (b) | |
197 | | // | h | d | -- | -- | (e) | |
198 | | // | h | g | N | N | (c) | |
199 | | // | h | h | N | N | (c) | |
200 | | // | h | hd | HD | HD | (b) | |
201 | | // | hd | d | WS | SS | (d) | |
202 | | // | hd | g | SS | -- |(d/a)| |
203 | | // | hd | h | SS | WS | (d) | |
204 | | // | hd | hd | HD | HD | (b) | |
205 | | |
206 | | Sema::CUDAFunctionPreference |
207 | | Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, |
208 | 45.8k | const FunctionDecl *Callee) { |
209 | 45.8k | assert(Callee && "Callee must be valid."); |
210 | 45.8k | CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); |
211 | 45.8k | CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); |
212 | | |
213 | | // If one of the targets is invalid, the check always fails, no matter what |
214 | | // the other target is. |
215 | 45.8k | if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) |
216 | 7 | return CFP_Never; |
217 | | |
218 | | // (a) Can't call global from some contexts until we support CUDA's |
219 | | // dynamic parallelism. |
220 | 45.8k | if (CalleeTarget == CFT_Global && |
221 | 45.8k | (692 CallerTarget == CFT_Global692 || CallerTarget == CFT_Device691 )) |
222 | 7 | return CFP_Never; |
223 | | |
224 | | // (b) Calling HostDevice is OK for everyone. |
225 | 45.8k | if (CalleeTarget == CFT_HostDevice) |
226 | 12.4k | return CFP_HostDevice; |
227 | | |
228 | | // (c) Best case scenarios |
229 | 33.3k | if (CalleeTarget == CallerTarget || |
230 | 33.3k | (4.11k CallerTarget == CFT_Host4.11k && CalleeTarget == CFT_Global818 ) || |
231 | 33.3k | (3.45k CallerTarget == CFT_Global3.45k && CalleeTarget == CFT_Device648 )) |
232 | 30.5k | return CFP_Native; |
233 | | |
234 | | // (d) HostDevice behavior depends on compilation mode. |
235 | 2.81k | if (CallerTarget == CFT_HostDevice) { |
236 | | // It's OK to call a compilation-mode matching function from an HD one. |
237 | 1.36k | if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device639 ) || |
238 | 1.36k | (1.08k !getLangOpts().CUDAIsDevice1.08k && |
239 | 1.08k | (727 CalleeTarget == CFT_Host727 || CalleeTarget == CFT_Global302 ))) |
240 | 725 | return CFP_SameSide; |
241 | | |
242 | | // Calls from HD to non-mode-matching functions (i.e., to host functions |
243 | | // when compiling in device mode or to device functions when compiling in |
244 | | // host mode) are allowed at the sema level, but eventually rejected if |
245 | | // they're ever codegened. TODO: Reject said calls earlier. |
246 | 641 | return CFP_WrongSide; |
247 | 1.36k | } |
248 | | |
249 | | // (e) Calling across device/host boundary is not something you should do. |
250 | 1.45k | if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device154 ) || |
251 | 1.45k | (1.29k CallerTarget == CFT_Device1.29k && CalleeTarget == CFT_Host1.28k ) || |
252 | 1.45k | (14 CallerTarget == CFT_Global14 && CalleeTarget == CFT_Host14 )) |
253 | 1.45k | return CFP_Never; |
254 | | |
255 | 0 | llvm_unreachable("All cases should've been handled by now."); |
256 | 0 | } |
257 | | |
258 | 0 | template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { |
259 | 0 | if (!D) |
260 | 0 | return false; |
261 | 0 | if (auto *A = D->getAttr<AttrT>()) |
262 | 0 | return A->isImplicit(); |
263 | 0 | return D->isImplicit(); |
264 | 0 | } Unexecuted instantiation: SemaCUDA.cpp:bool hasImplicitAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*) Unexecuted instantiation: SemaCUDA.cpp:bool hasImplicitAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*) |
265 | | |
266 | 0 | bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { |
267 | 0 | bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); |
268 | 0 | bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); |
269 | 0 | return IsImplicitDevAttr && IsImplicitHostAttr; |
270 | 0 | } |
271 | | |
272 | | void Sema::EraseUnwantedCUDAMatches( |
273 | | const FunctionDecl *Caller, |
274 | 4 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { |
275 | 4 | if (Matches.size() <= 1) |
276 | 0 | return; |
277 | | |
278 | 4 | using Pair = std::pair<DeclAccessPair, FunctionDecl*>; |
279 | | |
280 | | // Gets the CUDA function preference for a call from Caller to Match. |
281 | 20 | auto GetCFP = [&](const Pair &Match) { |
282 | 20 | return IdentifyCUDAPreference(Caller, Match.second); |
283 | 20 | }; |
284 | | |
285 | | // Find the best call preference among the functions in Matches. |
286 | 4 | CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( |
287 | 4 | Matches.begin(), Matches.end(), |
288 | 4 | [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); |
289 | | |
290 | | // Erase all functions with lower priority. |
291 | 4 | llvm::erase_if(Matches, |
292 | 8 | [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); |
293 | 4 | } |
294 | | |
295 | | /// When an implicitly-declared special member has to invoke more than one |
296 | | /// base/field special member, conflicts may occur in the targets of these |
297 | | /// members. For example, if one base's member __host__ and another's is |
298 | | /// __device__, it's a conflict. |
299 | | /// This function figures out if the given targets \param Target1 and |
300 | | /// \param Target2 conflict, and if they do not it fills in |
301 | | /// \param ResolvedTarget with a target that resolves for both calls. |
302 | | /// \return true if there's a conflict, false otherwise. |
303 | | static bool |
304 | | resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, |
305 | | Sema::CUDAFunctionTarget Target2, |
306 | 83 | Sema::CUDAFunctionTarget *ResolvedTarget) { |
307 | | // Only free functions and static member functions may be global. |
308 | 83 | assert(Target1 != Sema::CFT_Global); |
309 | 83 | assert(Target2 != Sema::CFT_Global); |
310 | | |
311 | 83 | if (Target1 == Sema::CFT_HostDevice) { |
312 | 53 | *ResolvedTarget = Target2; |
313 | 53 | } else if (30 Target2 == Sema::CFT_HostDevice30 ) { |
314 | 1 | *ResolvedTarget = Target1; |
315 | 29 | } else if (Target1 != Target2) { |
316 | 25 | return true; |
317 | 25 | } else { |
318 | 4 | *ResolvedTarget = Target1; |
319 | 4 | } |
320 | | |
321 | 58 | return false; |
322 | 83 | } |
323 | | |
324 | | bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
325 | | CXXSpecialMember CSM, |
326 | | CXXMethodDecl *MemberDecl, |
327 | | bool ConstRHS, |
328 | 3.39k | bool Diagnose) { |
329 | | // If the defaulted special member is defined lexically outside of its |
330 | | // owning class, or the special member already has explicit device or host |
331 | | // attributes, do not infer. |
332 | 3.39k | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); |
333 | 3.39k | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); |
334 | 3.39k | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); |
335 | 3.39k | bool HasExplicitAttr = |
336 | 3.39k | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()1.65k ) || |
337 | 3.39k | (3.38k HasH3.38k && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()1.64k ); |
338 | 3.39k | if (!InClass || HasExplicitAttr3.38k ) |
339 | 14 | return false; |
340 | | |
341 | 3.37k | std::optional<CUDAFunctionTarget> InferredTarget; |
342 | | |
343 | | // We're going to invoke special member lookup; mark that these special |
344 | | // members are called from this one, and not from its caller. |
345 | 3.37k | ContextRAII MethodContext(*this, MemberDecl); |
346 | | |
347 | | // Look for special members in base classes that should be invoked from here. |
348 | | // Infer the target of this member base on the ones it should call. |
349 | | // Skip direct and indirect virtual bases for abstract classes. |
350 | 3.37k | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; |
351 | 3.37k | for (const auto &B : ClassDecl->bases()) { |
352 | 391 | if (!B.isVirtual()) { |
353 | 391 | Bases.push_back(&B); |
354 | 391 | } |
355 | 391 | } |
356 | | |
357 | 3.37k | if (!ClassDecl->isAbstract()) { |
358 | 3.28k | llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); |
359 | 3.28k | } |
360 | | |
361 | 3.37k | for (const auto *B : Bases) { |
362 | 391 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); |
363 | 391 | if (!BaseType) { |
364 | 0 | continue; |
365 | 0 | } |
366 | | |
367 | 391 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); |
368 | 391 | Sema::SpecialMemberOverloadResult SMOR = |
369 | 391 | LookupSpecialMember(BaseClassDecl, CSM, |
370 | 391 | /* ConstArg */ ConstRHS, |
371 | 391 | /* VolatileArg */ false, |
372 | 391 | /* RValueThis */ false, |
373 | 391 | /* ConstThis */ false, |
374 | 391 | /* VolatileThis */ false); |
375 | | |
376 | 391 | if (!SMOR.getMethod()) |
377 | 5 | continue; |
378 | | |
379 | 386 | CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); |
380 | 386 | if (!InferredTarget) { |
381 | 356 | InferredTarget = BaseMethodTarget; |
382 | 356 | } else { |
383 | 30 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
384 | 30 | *InferredTarget, BaseMethodTarget, &*InferredTarget); |
385 | 30 | if (ResolutionError) { |
386 | 13 | if (Diagnose) { |
387 | 3 | Diag(ClassDecl->getLocation(), |
388 | 3 | diag::note_implicit_member_target_infer_collision) |
389 | 3 | << (unsigned)CSM << *InferredTarget << BaseMethodTarget; |
390 | 3 | } |
391 | 13 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
392 | 13 | return true; |
393 | 13 | } |
394 | 30 | } |
395 | 386 | } |
396 | | |
397 | | // Same as for bases, but now for special members of fields. |
398 | 3.36k | for (const auto *F : ClassDecl->fields()) { |
399 | 3.24k | if (F->isInvalidDecl()) { |
400 | 0 | continue; |
401 | 0 | } |
402 | | |
403 | 3.24k | const RecordType *FieldType = |
404 | 3.24k | Context.getBaseElementType(F->getType())->getAs<RecordType>(); |
405 | 3.24k | if (!FieldType) { |
406 | 2.90k | continue; |
407 | 2.90k | } |
408 | | |
409 | 338 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); |
410 | 338 | Sema::SpecialMemberOverloadResult SMOR = |
411 | 338 | LookupSpecialMember(FieldRecDecl, CSM, |
412 | 338 | /* ConstArg */ ConstRHS && !F->isMutable()126 , |
413 | 338 | /* VolatileArg */ false, |
414 | 338 | /* RValueThis */ false, |
415 | 338 | /* ConstThis */ false, |
416 | 338 | /* VolatileThis */ false); |
417 | | |
418 | 338 | if (!SMOR.getMethod()) |
419 | 0 | continue; |
420 | | |
421 | 338 | CUDAFunctionTarget FieldMethodTarget = |
422 | 338 | IdentifyCUDATarget(SMOR.getMethod()); |
423 | 338 | if (!InferredTarget) { |
424 | 285 | InferredTarget = FieldMethodTarget; |
425 | 285 | } else { |
426 | 53 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
427 | 53 | *InferredTarget, FieldMethodTarget, &*InferredTarget); |
428 | 53 | if (ResolutionError) { |
429 | 12 | if (Diagnose) { |
430 | 4 | Diag(ClassDecl->getLocation(), |
431 | 4 | diag::note_implicit_member_target_infer_collision) |
432 | 4 | << (unsigned)CSM << *InferredTarget << FieldMethodTarget; |
433 | 4 | } |
434 | 12 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
435 | 12 | return true; |
436 | 12 | } |
437 | 53 | } |
438 | 338 | } |
439 | | |
440 | | |
441 | | // If no target was inferred, mark this member as __host__ __device__; |
442 | | // it's the least restrictive option that can be invoked from any target. |
443 | 3.35k | bool NeedsH = true, NeedsD = true; |
444 | 3.35k | if (InferredTarget) { |
445 | 616 | if (*InferredTarget == CFT_Device) |
446 | 58 | NeedsH = false; |
447 | 558 | else if (*InferredTarget == CFT_Host) |
448 | 67 | NeedsD = false; |
449 | 616 | } |
450 | | |
451 | | // We either setting attributes first time, or the inferred ones must match |
452 | | // previously set ones. |
453 | 3.35k | if (NeedsD && !HasD3.28k ) |
454 | 1.64k | MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
455 | 3.35k | if (NeedsH && !HasH3.29k ) |
456 | 1.65k | MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
457 | | |
458 | 3.35k | return false; |
459 | 3.36k | } |
460 | | |
461 | 33 | bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { |
462 | 33 | if (!CD->isDefined() && CD->isTemplateInstantiation()2 ) |
463 | 0 | InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); |
464 | | |
465 | | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered |
466 | | // empty at a point in the translation unit, if it is either a |
467 | | // trivial constructor |
468 | 33 | if (CD->isTrivial()) |
469 | 9 | return true; |
470 | | |
471 | | // ... or it satisfies all of the following conditions: |
472 | | // The constructor function has been defined. |
473 | | // The constructor function has no parameters, |
474 | | // and the function body is an empty compound statement. |
475 | 24 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 013 )) |
476 | 15 | return false; |
477 | | |
478 | | // Its class has no virtual functions and no virtual base classes. |
479 | 9 | if (CD->getParent()->isDynamicClass()) |
480 | 0 | return false; |
481 | | |
482 | | // Union ctor does not call ctors of its data members. |
483 | 9 | if (CD->getParent()->isUnion()) |
484 | 4 | return true; |
485 | | |
486 | | // The only form of initializer allowed is an empty constructor. |
487 | | // This will recursively check all base classes and member initializers |
488 | 5 | if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { |
489 | 4 | if (const CXXConstructExpr *CE = |
490 | 4 | dyn_cast<CXXConstructExpr>(CI->getInit())) |
491 | 4 | return isEmptyCudaConstructor(Loc, CE->getConstructor()); |
492 | 0 | return false; |
493 | 4 | })) |
494 | 4 | return false; |
495 | | |
496 | 1 | return true; |
497 | 5 | } |
498 | | |
499 | 26 | bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { |
500 | | // No destructor -> no problem. |
501 | 26 | if (!DD) |
502 | 17 | return true; |
503 | | |
504 | 9 | if (!DD->isDefined() && DD->isTemplateInstantiation()0 ) |
505 | 0 | InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); |
506 | | |
507 | | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered |
508 | | // empty at a point in the translation unit, if it is either a |
509 | | // trivial constructor |
510 | 9 | if (DD->isTrivial()) |
511 | 4 | return true; |
512 | | |
513 | | // ... or it satisfies all of the following conditions: |
514 | | // The destructor function has been defined. |
515 | | // and the function body is an empty compound statement. |
516 | 5 | if (!DD->hasTrivialBody()) |
517 | 3 | return false; |
518 | | |
519 | 2 | const CXXRecordDecl *ClassDecl = DD->getParent(); |
520 | | |
521 | | // Its class has no virtual functions and no virtual base classes. |
522 | 2 | if (ClassDecl->isDynamicClass()) |
523 | 0 | return false; |
524 | | |
525 | | // Union does not have base class and union dtor does not call dtors of its |
526 | | // data members. |
527 | 2 | if (DD->getParent()->isUnion()) |
528 | 2 | return true; |
529 | | |
530 | | // Only empty destructors are allowed. This will recursively check |
531 | | // destructors for all base classes... |
532 | 0 | if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { |
533 | 0 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) |
534 | 0 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); |
535 | 0 | return true; |
536 | 0 | })) |
537 | 0 | return false; |
538 | | |
539 | | // ... and member fields. |
540 | 0 | if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { |
541 | 0 | if (CXXRecordDecl *RD = Field->getType() |
542 | 0 | ->getBaseElementTypeUnsafe() |
543 | 0 | ->getAsCXXRecordDecl()) |
544 | 0 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); |
545 | 0 | return true; |
546 | 0 | })) |
547 | 0 | return false; |
548 | | |
549 | 0 | return true; |
550 | 0 | } |
551 | | |
552 | | namespace { |
553 | | enum CUDAInitializerCheckKind { |
554 | | CICK_DeviceOrConstant, // Check initializer for device/constant variable |
555 | | CICK_Shared, // Check initializer for shared variable |
556 | | }; |
557 | | |
558 | 1.75k | bool IsDependentVar(VarDecl *VD) { |
559 | 1.75k | if (VD->getType()->isDependentType()) |
560 | 31 | return true; |
561 | 1.72k | if (const auto *Init = VD->getInit()) |
562 | 1.57k | return Init->isValueDependent(); |
563 | 142 | return false; |
564 | 1.72k | } |
565 | | |
566 | | // Check whether a variable has an allowed initializer for a CUDA device side |
567 | | // variable with global storage. \p VD may be a host variable to be checked for |
568 | | // potential promotion to device side variable. |
569 | | // |
570 | | // CUDA/HIP allows only empty constructors as initializers for global |
571 | | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
572 | | // __shared__ variables whether they are local or not (they all are implicitly |
573 | | // static in CUDA). One exception is that CUDA allows constant initializers |
574 | | // for __constant__ and __device__ variables. |
575 | | bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, |
576 | 644 | CUDAInitializerCheckKind CheckKind) { |
577 | 644 | assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); |
578 | 644 | assert(!IsDependentVar(VD) && "do not check dependent var"); |
579 | 644 | const Expr *Init = VD->getInit(); |
580 | 644 | auto IsEmptyInit = [&](const Expr *Init) { |
581 | 643 | if (!Init) |
582 | 0 | return true; |
583 | 643 | if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) { |
584 | 29 | return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); |
585 | 29 | } |
586 | 614 | return false; |
587 | 643 | }; |
588 | 644 | auto IsConstantInit = [&](const Expr *Init) { |
589 | 628 | assert(Init); |
590 | 628 | ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, |
591 | 628 | /*NoWronSidedVars=*/true); |
592 | 628 | return Init->isConstantInitializer(S.Context, |
593 | 628 | VD->getType()->isReferenceType()); |
594 | 628 | }; |
595 | 644 | auto HasEmptyDtor = [&](VarDecl *VD) { |
596 | 611 | if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) |
597 | 26 | return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); |
598 | 585 | return true; |
599 | 611 | }; |
600 | 644 | if (CheckKind == CICK_Shared) |
601 | 5 | return IsEmptyInit(Init) && HasEmptyDtor(VD)4 ; |
602 | 639 | return S.LangOpts.GPUAllowDeviceInit || |
603 | 639 | (638 (638 IsEmptyInit(Init)638 || IsConstantInit(Init)628 ) && HasEmptyDtor(VD)607 ); |
604 | 644 | } |
605 | | } // namespace |
606 | | |
607 | 2.88k | void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { |
608 | | // Do not check dependent variables since the ctor/dtor/initializer are not |
609 | | // determined. Do it after instantiation. |
610 | 2.88k | if (VD->isInvalidDecl() || !VD->hasInit()2.87k || !VD->hasGlobalStorage()1.96k || |
611 | 2.88k | IsDependentVar(VD)688 ) |
612 | 2.24k | return; |
613 | 640 | const Expr *Init = VD->getInit(); |
614 | 640 | bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); |
615 | 640 | bool IsDeviceOrConstantVar = |
616 | 640 | !IsSharedVar && |
617 | 640 | (635 VD->hasAttr<CUDADeviceAttr>()635 || VD->hasAttr<CUDAConstantAttr>()509 ); |
618 | 640 | if (IsDeviceOrConstantVar || IsSharedVar201 ) { |
619 | 444 | if (HasAllowedCUDADeviceStaticInitializer( |
620 | 444 | *this, VD, IsSharedVar ? CICK_Shared5 : CICK_DeviceOrConstant439 )) |
621 | 426 | return; |
622 | 18 | Diag(VD->getLocation(), |
623 | 18 | IsSharedVar ? diag::err_shared_var_init2 : diag::err_dynamic_var_init16 ) |
624 | 18 | << Init->getSourceRange(); |
625 | 18 | VD->setInvalidDecl(); |
626 | 196 | } else { |
627 | | // This is a host-side global variable. Check that the initializer is |
628 | | // callable from the host side. |
629 | 196 | const FunctionDecl *InitFn = nullptr; |
630 | 196 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { |
631 | 37 | InitFn = CE->getConstructor(); |
632 | 159 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { |
633 | 3 | InitFn = CE->getDirectCallee(); |
634 | 3 | } |
635 | 196 | if (InitFn) { |
636 | 40 | CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); |
637 | 40 | if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice26 ) { |
638 | 3 | Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) |
639 | 3 | << InitFnTarget << InitFn; |
640 | 3 | Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; |
641 | 3 | VD->setInvalidDecl(); |
642 | 3 | } |
643 | 40 | } |
644 | 196 | } |
645 | 640 | } |
646 | | |
647 | | // With -fcuda-host-device-constexpr, an unattributed constexpr function is |
648 | | // treated as implicitly __host__ __device__, unless: |
649 | | // * it is a variadic function (device-side variadic functions are not |
650 | | // allowed), or |
651 | | // * a __device__ function with this signature was already declared, in which |
652 | | // case in which case we output an error, unless the __device__ decl is in a |
653 | | // system header, in which case we leave the constexpr function unattributed. |
654 | | // |
655 | | // In addition, all function decls are treated as __host__ __device__ when |
656 | | // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a |
657 | | // #pragma clang force_cuda_host_device_begin/end |
658 | | // pair). |
659 | | void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, |
660 | 14.9k | const LookupResult &Previous) { |
661 | 14.9k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
662 | | |
663 | 14.9k | if (ForceCUDAHostDeviceDepth > 0) { |
664 | 127 | if (!NewD->hasAttr<CUDAHostAttr>()) |
665 | 125 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
666 | 127 | if (!NewD->hasAttr<CUDADeviceAttr>()) |
667 | 125 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
668 | 127 | return; |
669 | 127 | } |
670 | | |
671 | 14.8k | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr()14.7k || |
672 | 14.8k | NewD->isVariadic()53 || NewD->hasAttr<CUDAHostAttr>()49 || |
673 | 14.8k | NewD->hasAttr<CUDADeviceAttr>()37 || NewD->hasAttr<CUDAGlobalAttr>()33 ) |
674 | 14.7k | return; |
675 | | |
676 | | // Is D a __device__ function with the same signature as NewD, ignoring CUDA |
677 | | // attributes? |
678 | 33 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { |
679 | 12 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) |
680 | 4 | D = Using->getTargetDecl(); |
681 | 12 | FunctionDecl *OldD = D->getAsFunction(); |
682 | 12 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && |
683 | 12 | !OldD->hasAttr<CUDAHostAttr>() && |
684 | 12 | !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, |
685 | 12 | /* ConsiderCudaAttrs = */ false); |
686 | 12 | }; |
687 | 33 | auto It = llvm::find_if(Previous, IsMatchingDeviceFn); |
688 | 33 | if (It != Previous.end()) { |
689 | | // We found a __device__ function with the same name and signature as NewD |
690 | | // (ignoring CUDA attrs). This is an error unless that function is defined |
691 | | // in a system header, in which case we simply return without making NewD |
692 | | // host+device. |
693 | 12 | NamedDecl *Match = *It; |
694 | 12 | if (!getSourceManager().isInSystemHeader(Match->getLocation())) { |
695 | 4 | Diag(NewD->getLocation(), |
696 | 4 | diag::err_cuda_unattributed_constexpr_cannot_overload_device) |
697 | 4 | << NewD; |
698 | 4 | Diag(Match->getLocation(), |
699 | 4 | diag::note_cuda_conflicting_device_function_declared_here); |
700 | 4 | } |
701 | 12 | return; |
702 | 12 | } |
703 | | |
704 | 21 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
705 | 21 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
706 | 21 | } |
707 | | |
708 | | // TODO: `__constant__` memory may be a limited resource for certain targets. |
709 | | // A safeguard may be needed at the end of compilation pipeline if |
710 | | // `__constant__` memory usage goes beyond limit. |
711 | 3.07M | void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { |
712 | | // Do not promote dependent variables since the cotr/dtor/initializer are |
713 | | // not determined. Do it after instantiation. |
714 | 3.07M | if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>()1.71k && |
715 | 3.07M | !VD->hasAttr<CUDASharedAttr>()1.55k && |
716 | 3.07M | (1.50k VD->isFileVarDecl()1.50k || VD->isStaticDataMember()1.08k ) && |
717 | 3.07M | !IsDependentVar(VD)420 && |
718 | 3.07M | (395 (395 VD->isConstexpr()395 || VD->getType().isConstQualified()340 ) && |
719 | 395 | HasAllowedCUDADeviceStaticInitializer(*this, VD, |
720 | 200 | CICK_DeviceOrConstant))) { |
721 | 183 | VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); |
722 | 183 | } |
723 | 3.07M | } |
724 | | |
725 | | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, |
726 | 108 | unsigned DiagID) { |
727 | 108 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
728 | 108 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
729 | 108 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
730 | 108 | if (!CurFunContext) |
731 | 2 | return SemaDiagnosticBuilder::K_Nop; |
732 | 106 | switch (CurrentCUDATarget()) { |
733 | 10 | case CFT_Global: |
734 | 62 | case CFT_Device: |
735 | 62 | return SemaDiagnosticBuilder::K_Immediate; |
736 | 34 | case CFT_HostDevice: |
737 | | // An HD function counts as host code if we're compiling for host, and |
738 | | // device code if we're compiling for device. Defer any errors in device |
739 | | // mode until the function is known-emitted. |
740 | 34 | if (!getLangOpts().CUDAIsDevice) |
741 | 6 | return SemaDiagnosticBuilder::K_Nop; |
742 | 28 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
743 | 7 | return SemaDiagnosticBuilder::K_Immediate; |
744 | 21 | return (getEmissionStatus(CurFunContext) == |
745 | 21 | FunctionEmissionStatus::Emitted) |
746 | 21 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack5 |
747 | 21 | : SemaDiagnosticBuilder::K_Deferred16 ; |
748 | 10 | default: |
749 | 10 | return SemaDiagnosticBuilder::K_Nop; |
750 | 106 | } |
751 | 106 | }(); |
752 | 108 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
753 | 108 | } |
754 | | |
755 | | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, |
756 | 42 | unsigned DiagID) { |
757 | 42 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
758 | 42 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
759 | 42 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
760 | 42 | if (!CurFunContext) |
761 | 0 | return SemaDiagnosticBuilder::K_Nop; |
762 | 42 | switch (CurrentCUDATarget()) { |
763 | 6 | case CFT_Host: |
764 | 6 | return SemaDiagnosticBuilder::K_Immediate; |
765 | 1 | case CFT_HostDevice: |
766 | | // An HD function counts as host code if we're compiling for host, and |
767 | | // device code if we're compiling for device. Defer any errors in device |
768 | | // mode until the function is known-emitted. |
769 | 1 | if (getLangOpts().CUDAIsDevice) |
770 | 0 | return SemaDiagnosticBuilder::K_Nop; |
771 | 1 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
772 | 0 | return SemaDiagnosticBuilder::K_Immediate; |
773 | 1 | return (getEmissionStatus(CurFunContext) == |
774 | 1 | FunctionEmissionStatus::Emitted) |
775 | 1 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack0 |
776 | 1 | : SemaDiagnosticBuilder::K_Deferred; |
777 | 35 | default: |
778 | 35 | return SemaDiagnosticBuilder::K_Nop; |
779 | 42 | } |
780 | 42 | }(); |
781 | 42 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
782 | 42 | } |
783 | | |
784 | 23.2k | bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { |
785 | 23.2k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
786 | 23.2k | assert(Callee && "Callee may not be null."); |
787 | | |
788 | 23.2k | auto &ExprEvalCtx = ExprEvalContexts.back(); |
789 | 23.2k | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()23.0k ) |
790 | 203 | return true; |
791 | | |
792 | | // FIXME: Is bailing out early correct here? Should we instead assume that |
793 | | // the caller is a global initializer? |
794 | 23.0k | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
795 | 23.0k | if (!Caller) |
796 | 311 | return true; |
797 | | |
798 | | // If the caller is known-emitted, mark the callee as known-emitted. |
799 | | // Otherwise, mark the call in our call graph so we can traverse it later. |
800 | 22.7k | bool CallerKnownEmitted = |
801 | 22.7k | getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; |
802 | 22.7k | SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, |
803 | 22.7k | CallerKnownEmitted] { |
804 | 22.7k | switch (IdentifyCUDAPreference(Caller, Callee)) { |
805 | 32 | case CFP_Never: |
806 | 174 | case CFP_WrongSide: |
807 | 174 | assert(Caller && "Never/wrongSide calls require a non-null caller"); |
808 | | // If we know the caller will be emitted, we know this wrong-side call |
809 | | // will be emitted, so it's an immediate error. Otherwise, defer the |
810 | | // error until we know the caller is emitted. |
811 | 174 | return CallerKnownEmitted |
812 | 174 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack62 |
813 | 174 | : SemaDiagnosticBuilder::K_Deferred112 ; |
814 | 22.5k | default: |
815 | 22.5k | return SemaDiagnosticBuilder::K_Nop; |
816 | 22.7k | } |
817 | 22.7k | }(); |
818 | | |
819 | 22.7k | if (DiagKind == SemaDiagnosticBuilder::K_Nop) { |
820 | | // For -fgpu-rdc, keep track of external kernels used by host functions. |
821 | 22.5k | if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode16.8k && |
822 | 22.5k | Callee->hasAttr<CUDAGlobalAttr>()246 && !Callee->isDefined()20 ) |
823 | 16 | getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); |
824 | 22.5k | return true; |
825 | 22.5k | } |
826 | | |
827 | | // Avoid emitting this error twice for the same location. Using a hashtable |
828 | | // like this is unfortunate, but because we must continue parsing as normal |
829 | | // after encountering a deferred error, it's otherwise very tricky for us to |
830 | | // ensure that we only emit this deferred error once. |
831 | 174 | if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) |
832 | 74 | return true; |
833 | | |
834 | 100 | SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) |
835 | 100 | << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee |
836 | 100 | << IdentifyCUDATarget(Caller); |
837 | 100 | if (!Callee->getBuiltinID()) |
838 | 92 | SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), |
839 | 92 | diag::note_previous_decl, Caller, *this) |
840 | 92 | << Callee; |
841 | 100 | return DiagKind != SemaDiagnosticBuilder::K_Immediate && |
842 | 100 | DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; |
843 | 174 | } |
844 | | |
845 | | // Check the wrong-sided reference capture of lambda for CUDA/HIP. |
846 | | // A lambda function may capture a stack variable by reference when it is |
847 | | // defined and uses the capture by reference when the lambda is called. When |
848 | | // the capture and use happen on different sides, the capture is invalid and |
849 | | // should be diagnosed. |
850 | | void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, |
851 | 98 | const sema::Capture &Capture) { |
852 | | // In host compilation we only need to check lambda functions emitted on host |
853 | | // side. In such lambda functions, a reference capture is invalid only |
854 | | // if the lambda structure is populated by a device function or kernel then |
855 | | // is passed to and called by a host function. However that is impossible, |
856 | | // since a device function or kernel can only call a device function, also a |
857 | | // kernel cannot pass a lambda back to a host function since we cannot |
858 | | // define a kernel argument type which can hold the lambda before the lambda |
859 | | // itself is defined. |
860 | 98 | if (!LangOpts.CUDAIsDevice) |
861 | 32 | return; |
862 | | |
863 | | // File-scope lambda can only do init captures for global variables, which |
864 | | // results in passing by value for these global variables. |
865 | 66 | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
866 | 66 | if (!Caller) |
867 | 0 | return; |
868 | | |
869 | | // In device compilation, we only need to check lambda functions which are |
870 | | // emitted on device side. For such lambdas, a reference capture is invalid |
871 | | // only if the lambda structure is populated by a host function then passed |
872 | | // to and called in a device function or kernel. |
873 | 66 | bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); |
874 | 66 | bool CallerIsHost = |
875 | 66 | !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); |
876 | 66 | bool ShouldCheck = CalleeIsDevice && CallerIsHost; |
877 | 66 | if (!ShouldCheck || !Capture.isReferenceCapture()56 ) |
878 | 26 | return; |
879 | 40 | auto DiagKind = SemaDiagnosticBuilder::K_Deferred; |
880 | 40 | if (Capture.isVariableCapture()) { |
881 | 19 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
882 | 19 | diag::err_capture_bad_target, Callee, *this) |
883 | 19 | << Capture.getVariable(); |
884 | 21 | } else if (Capture.isThisCapture()) { |
885 | | // Capture of this pointer is allowed since this pointer may be pointing to |
886 | | // managed memory which is accessible on both device and host sides. It only |
887 | | // results in invalid memory access if this pointer points to memory not |
888 | | // accessible on device side. |
889 | 21 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
890 | 21 | diag::warn_maybe_capture_bad_target_this_ptr, Callee, |
891 | 21 | *this); |
892 | 21 | } |
893 | 40 | } |
894 | | |
895 | 241 | void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { |
896 | 241 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
897 | 241 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()193 ) |
898 | 104 | return; |
899 | 137 | Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
900 | 137 | Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
901 | 137 | } |
902 | | |
903 | | void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, |
904 | 17.0k | const LookupResult &Previous) { |
905 | 17.0k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
906 | 17.0k | CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); |
907 | 17.0k | for (NamedDecl *OldND : Previous) { |
908 | 12.9k | FunctionDecl *OldFD = OldND->getAsFunction(); |
909 | 12.9k | if (!OldFD) |
910 | 437 | continue; |
911 | | |
912 | 12.4k | CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); |
913 | | // Don't allow HD and global functions to overload other functions with the |
914 | | // same signature. We allow overloading based on CUDA attributes so that |
915 | | // functions can have different implementations on the host and device, but |
916 | | // HD/global functions "exist" in some sense on both the host and device, so |
917 | | // should have the same implementation on both sides. |
918 | 12.4k | if (NewTarget != OldTarget && |
919 | 12.4k | (3.44k (NewTarget == CFT_HostDevice)3.44k || (OldTarget == CFT_HostDevice)3.23k || |
920 | 3.44k | (NewTarget == CFT_Global)3.21k || (OldTarget == CFT_Global)3.21k ) && |
921 | 12.4k | !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, |
922 | 224 | /* ConsiderCudaAttrs = */ false)) { |
923 | 4 | Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) |
924 | 4 | << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; |
925 | 4 | Diag(OldFD->getLocation(), diag::note_previous_declaration); |
926 | 4 | NewFD->setInvalidDecl(); |
927 | 4 | break; |
928 | 4 | } |
929 | 12.4k | } |
930 | 17.0k | } |
931 | | |
932 | | template <typename AttrTy> |
933 | | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, |
934 | 33 | const FunctionDecl &TemplateFD) { |
935 | 33 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { |
936 | 18 | AttrTy *Clone = Attribute->clone(S.Context); |
937 | 18 | Clone->setInherited(true); |
938 | 18 | FD->addAttr(Clone); |
939 | 18 | } |
940 | 33 | } SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAGlobalAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 934 | 11 | const FunctionDecl &TemplateFD) { | 935 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 936 | 0 | AttrTy *Clone = Attribute->clone(S.Context); | 937 | 0 | Clone->setInherited(true); | 938 | 0 | FD->addAttr(Clone); | 939 | 0 | } | 940 | 11 | } |
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAHostAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 934 | 11 | const FunctionDecl &TemplateFD) { | 935 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 936 | 9 | AttrTy *Clone = Attribute->clone(S.Context); | 937 | 9 | Clone->setInherited(true); | 938 | 9 | FD->addAttr(Clone); | 939 | 9 | } | 940 | 11 | } |
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDADeviceAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 934 | 11 | const FunctionDecl &TemplateFD) { | 935 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 936 | 9 | AttrTy *Clone = Attribute->clone(S.Context); | 937 | 9 | Clone->setInherited(true); | 938 | 9 | FD->addAttr(Clone); | 939 | 9 | } | 940 | 11 | } |
|
941 | | |
942 | | void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, |
943 | 11 | const FunctionTemplateDecl &TD) { |
944 | 11 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); |
945 | 11 | copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); |
946 | 11 | copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); |
947 | 11 | copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); |
948 | 11 | } |
949 | | |
950 | 13.6k | std::string Sema::getCudaConfigureFuncName() const { |
951 | 13.6k | if (getLangOpts().HIP) |
952 | 11.9k | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"28 |
953 | 11.9k | : "hipConfigureCall"11.9k ; |
954 | | |
955 | | // New CUDA kernel launch sequence. |
956 | 1.74k | if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), |
957 | 1.74k | CudaFeature::CUDA_USES_NEW_LAUNCH)) |
958 | 135 | return "__cudaPushCallConfiguration"; |
959 | | |
960 | | // Legacy CUDA kernel configuration call |
961 | 1.60k | return "cudaConfigureCall"; |
962 | 1.74k | } |