/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Sema/SemaCUDA.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// \file |
9 | | /// This file implements semantic analysis for CUDA constructs. |
10 | | /// |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/AST/ASTContext.h" |
14 | | #include "clang/AST/Decl.h" |
15 | | #include "clang/AST/ExprCXX.h" |
16 | | #include "clang/Basic/Cuda.h" |
17 | | #include "clang/Basic/TargetInfo.h" |
18 | | #include "clang/Lex/Preprocessor.h" |
19 | | #include "clang/Sema/Lookup.h" |
20 | | #include "clang/Sema/ScopeInfo.h" |
21 | | #include "clang/Sema/Sema.h" |
22 | | #include "clang/Sema/SemaDiagnostic.h" |
23 | | #include "clang/Sema/SemaInternal.h" |
24 | | #include "clang/Sema/Template.h" |
25 | | #include "llvm/ADT/Optional.h" |
26 | | #include "llvm/ADT/SmallVector.h" |
27 | | using namespace clang; |
28 | | |
29 | 60 | template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { |
30 | 60 | if (!D) |
31 | 0 | return false; |
32 | 60 | if (auto *A = D->getAttr<AttrT>()) |
33 | 60 | return !A->isImplicit(); |
34 | 0 | return false; |
35 | 60 | } |
36 | | |
37 | 7 | void Sema::PushForceCUDAHostDevice() { |
38 | 7 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
39 | 0 | ForceCUDAHostDeviceDepth++; |
40 | 7 | } |
41 | | |
42 | 5 | bool Sema::PopForceCUDAHostDevice() { |
43 | 5 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
44 | 5 | if (ForceCUDAHostDeviceDepth == 0) |
45 | 0 | return false; |
46 | 5 | ForceCUDAHostDeviceDepth--; |
47 | 5 | return true; |
48 | 5 | } |
49 | | |
50 | | ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
51 | | MultiExprArg ExecConfig, |
52 | 179 | SourceLocation GGGLoc) { |
53 | 179 | FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); |
54 | 179 | if (!ConfigDecl) |
55 | 0 | return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) |
56 | 0 | << getCudaConfigureFuncName()); |
57 | 179 | QualType ConfigQTy = ConfigDecl->getType(); |
58 | | |
59 | 179 | DeclRefExpr *ConfigDR = new (Context) |
60 | 179 | DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); |
61 | 179 | MarkFunctionReferenced(LLLLoc, ConfigDecl); |
62 | | |
63 | 179 | return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, |
64 | 179 | /*IsExecConfig=*/true); |
65 | 179 | } |
66 | | |
67 | | Sema::CUDAFunctionTarget |
68 | 34 | Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { |
69 | 34 | bool HasHostAttr = false; |
70 | 34 | bool HasDeviceAttr = false; |
71 | 34 | bool HasGlobalAttr = false; |
72 | 34 | bool HasInvalidTargetAttr = false; |
73 | 35 | for (const ParsedAttr &AL : Attrs) { |
74 | 35 | switch (AL.getKind()) { |
75 | 13 | case ParsedAttr::AT_CUDAGlobal: |
76 | 13 | HasGlobalAttr = true; |
77 | 13 | break; |
78 | 10 | case ParsedAttr::AT_CUDAHost: |
79 | 10 | HasHostAttr = true; |
80 | 10 | break; |
81 | 12 | case ParsedAttr::AT_CUDADevice: |
82 | 12 | HasDeviceAttr = true; |
83 | 12 | break; |
84 | 0 | case ParsedAttr::AT_CUDAInvalidTarget: |
85 | 0 | HasInvalidTargetAttr = true; |
86 | 0 | break; |
87 | 0 | default: |
88 | 0 | break; |
89 | 35 | } |
90 | 35 | } |
91 | | |
92 | 34 | if (HasInvalidTargetAttr) |
93 | 0 | return CFT_InvalidTarget; |
94 | | |
95 | 34 | if (HasGlobalAttr) |
96 | 13 | return CFT_Global; |
97 | | |
98 | 21 | if (HasHostAttr && HasDeviceAttr10 ) |
99 | 6 | return CFT_HostDevice; |
100 | | |
101 | 15 | if (HasDeviceAttr) |
102 | 6 | return CFT_Device; |
103 | | |
104 | 9 | return CFT_Host; |
105 | 15 | } |
106 | | |
107 | | template <typename A> |
108 | 111k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { |
109 | 111k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) 79.3k { |
110 | 107k | return isa<A>(Attribute) && |
111 | 107k | !(56.9k IgnoreImplicitAttr56.9k && Attribute->isImplicit()78 ); |
112 | 107k | }); SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const Line | Count | Source | 109 | 52.4k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 52.4k | return isa<A>(Attribute) && | 111 | 52.4k | !(34.1k IgnoreImplicitAttr34.1k && Attribute->isImplicit()40 ); | 112 | 52.4k | }); |
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const Line | Count | Source | 109 | 55.0k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 55.0k | return isa<A>(Attribute) && | 111 | 55.0k | !(22.8k IgnoreImplicitAttr22.8k && Attribute->isImplicit()38 ); | 112 | 55.0k | }); |
|
113 | 111k | } SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool) Line | Count | Source | 108 | 55.6k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { | 109 | 55.6k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 39.6k | return isa<A>(Attribute) && | 111 | 39.6k | !(IgnoreImplicitAttr && Attribute->isImplicit()); | 112 | 39.6k | }); | 113 | 55.6k | } |
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool) Line | Count | Source | 108 | 55.6k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { | 109 | 55.6k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 39.6k | return isa<A>(Attribute) && | 111 | 39.6k | !(IgnoreImplicitAttr && Attribute->isImplicit()); | 112 | 39.6k | }); | 113 | 55.6k | } |
|
114 | | |
115 | | /// IdentifyCUDATarget - Determine the CUDA compilation target for this function |
116 | | Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, |
117 | 58.7k | bool IgnoreImplicitHDAttr) { |
118 | | // Code that lives outside a function is run on the host. |
119 | 58.7k | if (D == nullptr) |
120 | 277 | return CFT_Host; |
121 | | |
122 | 58.4k | if (D->hasAttr<CUDAInvalidTargetAttr>()) |
123 | 39 | return CFT_InvalidTarget; |
124 | | |
125 | 58.4k | if (D->hasAttr<CUDAGlobalAttr>()) |
126 | 2.74k | return CFT_Global; |
127 | | |
128 | 55.6k | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { |
129 | 34.1k | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) |
130 | 17.8k | return CFT_HostDevice; |
131 | 16.2k | return CFT_Device; |
132 | 34.1k | } else if (21.5k hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)21.5k ) { |
133 | 4.93k | return CFT_Host; |
134 | 16.6k | } else if ((D->isImplicit() || !D->isUserProvided()16.0k ) && |
135 | 16.6k | !IgnoreImplicitHDAttr588 ) { |
136 | | // Some implicit declarations (like intrinsic functions) are not marked. |
137 | | // Set the most lenient target on them for maximal flexibility. |
138 | 588 | return CFT_HostDevice; |
139 | 588 | } |
140 | | |
141 | 16.0k | return CFT_Host; |
142 | 55.6k | } |
143 | | |
144 | | /// IdentifyTarget - Determine the CUDA compilation target for this variable. |
145 | 624 | Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { |
146 | 624 | if (Var->hasAttr<HIPManagedAttr>()) |
147 | 68 | return CVT_Unified; |
148 | | // Only constexpr and const variabless with implicit constant attribute |
149 | | // are emitted on both sides. Such variables are promoted to device side |
150 | | // only if they have static constant intializers on device side. |
151 | 556 | if ((Var->isConstexpr() || Var->getType().isConstQualified()500 ) && |
152 | 556 | Var->hasAttr<CUDAConstantAttr>()162 && |
153 | 556 | !hasExplicitAttr<CUDAConstantAttr>(Var)60 ) |
154 | 60 | return CVT_Both; |
155 | 496 | if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>()355 || |
156 | 496 | Var->hasAttr<CUDASharedAttr>()285 || |
157 | 496 | Var->getType()->isCUDADeviceBuiltinSurfaceType()215 || |
158 | 496 | Var->getType()->isCUDADeviceBuiltinTextureType()211 ) |
159 | 289 | return CVT_Device; |
160 | | // Function-scope static variable without explicit device or constant |
161 | | // attribute are emitted |
162 | | // - on both sides in host device functions |
163 | | // - on device side in device or global functions |
164 | 207 | if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { |
165 | 28 | switch (IdentifyCUDATarget(FD)) { |
166 | 12 | case CFT_HostDevice: |
167 | 12 | return CVT_Both; |
168 | 10 | case CFT_Device: |
169 | 10 | case CFT_Global: |
170 | 10 | return CVT_Device; |
171 | 6 | default: |
172 | 6 | return CVT_Host; |
173 | 28 | } |
174 | 28 | } |
175 | 179 | return CVT_Host; |
176 | 207 | } |
177 | | |
178 | | // * CUDA Call preference table |
179 | | // |
180 | | // F - from, |
181 | | // T - to |
182 | | // Ph - preference in host mode |
183 | | // Pd - preference in device mode |
184 | | // H - handled in (x) |
185 | | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. |
186 | | // |
187 | | // | F | T | Ph | Pd | H | |
188 | | // |----+----+-----+-----+-----+ |
189 | | // | d | d | N | N | (c) | |
190 | | // | d | g | -- | -- | (a) | |
191 | | // | d | h | -- | -- | (e) | |
192 | | // | d | hd | HD | HD | (b) | |
193 | | // | g | d | N | N | (c) | |
194 | | // | g | g | -- | -- | (a) | |
195 | | // | g | h | -- | -- | (e) | |
196 | | // | g | hd | HD | HD | (b) | |
197 | | // | h | d | -- | -- | (e) | |
198 | | // | h | g | N | N | (c) | |
199 | | // | h | h | N | N | (c) | |
200 | | // | h | hd | HD | HD | (b) | |
201 | | // | hd | d | WS | SS | (d) | |
202 | | // | hd | g | SS | -- |(d/a)| |
203 | | // | hd | h | SS | WS | (d) | |
204 | | // | hd | hd | HD | HD | (b) | |
205 | | |
206 | | Sema::CUDAFunctionPreference |
207 | | Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, |
208 | 17.8k | const FunctionDecl *Callee) { |
209 | 17.8k | assert(Callee && "Callee must be valid."); |
210 | 0 | CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); |
211 | 17.8k | CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); |
212 | | |
213 | | // If one of the targets is invalid, the check always fails, no matter what |
214 | | // the other target is. |
215 | 17.8k | if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) |
216 | 7 | return CFP_Never; |
217 | | |
218 | | // (a) Can't call global from some contexts until we support CUDA's |
219 | | // dynamic parallelism. |
220 | 17.8k | if (CalleeTarget == CFT_Global && |
221 | 17.8k | (682 CallerTarget == CFT_Global682 || CallerTarget == CFT_Device681 )) |
222 | 7 | return CFP_Never; |
223 | | |
224 | | // (b) Calling HostDevice is OK for everyone. |
225 | 17.8k | if (CalleeTarget == CFT_HostDevice) |
226 | 10.5k | return CFP_HostDevice; |
227 | | |
228 | | // (c) Best case scenarios |
229 | 7.30k | if (CalleeTarget == CallerTarget || |
230 | 7.30k | (2.45k CallerTarget == CFT_Host2.45k && CalleeTarget == CFT_Global755 ) || |
231 | 7.30k | (1.80k CallerTarget == CFT_Global1.80k && CalleeTarget == CFT_Device328 )) |
232 | 5.83k | return CFP_Native; |
233 | | |
234 | | // (d) HostDevice behavior depends on compilation mode. |
235 | 1.47k | if (CallerTarget == CFT_HostDevice) { |
236 | | // It's OK to call a compilation-mode matching function from an HD one. |
237 | 1.26k | if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device592 ) || |
238 | 1.26k | (994 !getLangOpts().CUDAIsDevice994 && |
239 | 994 | (674 CalleeTarget == CFT_Host674 || CalleeTarget == CFT_Global293 ))) |
240 | 669 | return CFP_SameSide; |
241 | | |
242 | | // Calls from HD to non-mode-matching functions (i.e., to host functions |
243 | | // when compiling in device mode or to device functions when compiling in |
244 | | // host mode) are allowed at the sema level, but eventually rejected if |
245 | | // they're ever codegened. TODO: Reject said calls earlier. |
246 | 597 | return CFP_WrongSide; |
247 | 1.26k | } |
248 | | |
249 | | // (e) Calling across device/host boundary is not something you should do. |
250 | 212 | if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device101 ) || |
251 | 212 | (111 CallerTarget == CFT_Device111 && CalleeTarget == CFT_Host109 ) || |
252 | 212 | (2 CallerTarget == CFT_Global2 && CalleeTarget == CFT_Host2 )) |
253 | 212 | return CFP_Never; |
254 | | |
255 | 0 | llvm_unreachable("All cases should've been handled by now."); |
256 | 0 | } |
257 | | |
258 | 0 | template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { |
259 | 0 | if (!D) |
260 | 0 | return false; |
261 | 0 | if (auto *A = D->getAttr<AttrT>()) |
262 | 0 | return A->isImplicit(); |
263 | 0 | return D->isImplicit(); |
264 | 0 | } Unexecuted instantiation: SemaCUDA.cpp:bool hasImplicitAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*) Unexecuted instantiation: SemaCUDA.cpp:bool hasImplicitAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*) |
265 | | |
266 | 0 | bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { |
267 | 0 | bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); |
268 | 0 | bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); |
269 | 0 | return IsImplicitDevAttr && IsImplicitHostAttr; |
270 | 0 | } |
271 | | |
272 | | void Sema::EraseUnwantedCUDAMatches( |
273 | | const FunctionDecl *Caller, |
274 | 4 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { |
275 | 4 | if (Matches.size() <= 1) |
276 | 0 | return; |
277 | | |
278 | 4 | using Pair = std::pair<DeclAccessPair, FunctionDecl*>; |
279 | | |
280 | | // Gets the CUDA function preference for a call from Caller to Match. |
281 | 20 | auto GetCFP = [&](const Pair &Match) { |
282 | 20 | return IdentifyCUDAPreference(Caller, Match.second); |
283 | 20 | }; |
284 | | |
285 | | // Find the best call preference among the functions in Matches. |
286 | 4 | CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( |
287 | 4 | Matches.begin(), Matches.end(), |
288 | 4 | [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); |
289 | | |
290 | | // Erase all functions with lower priority. |
291 | 4 | llvm::erase_if(Matches, |
292 | 8 | [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); |
293 | 4 | } |
294 | | |
295 | | /// When an implicitly-declared special member has to invoke more than one |
296 | | /// base/field special member, conflicts may occur in the targets of these |
297 | | /// members. For example, if one base's member __host__ and another's is |
298 | | /// __device__, it's a conflict. |
299 | | /// This function figures out if the given targets \param Target1 and |
300 | | /// \param Target2 conflict, and if they do not it fills in |
301 | | /// \param ResolvedTarget with a target that resolves for both calls. |
302 | | /// \return true if there's a conflict, false otherwise. |
303 | | static bool |
304 | | resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, |
305 | | Sema::CUDAFunctionTarget Target2, |
306 | 67 | Sema::CUDAFunctionTarget *ResolvedTarget) { |
307 | | // Only free functions and static member functions may be global. |
308 | 67 | assert(Target1 != Sema::CFT_Global); |
309 | 0 | assert(Target2 != Sema::CFT_Global); |
310 | | |
311 | 67 | if (Target1 == Sema::CFT_HostDevice) { |
312 | 37 | *ResolvedTarget = Target2; |
313 | 37 | } else if (30 Target2 == Sema::CFT_HostDevice30 ) { |
314 | 1 | *ResolvedTarget = Target1; |
315 | 29 | } else if (Target1 != Target2) { |
316 | 25 | return true; |
317 | 25 | } else { |
318 | 4 | *ResolvedTarget = Target1; |
319 | 4 | } |
320 | | |
321 | 42 | return false; |
322 | 67 | } |
323 | | |
324 | | bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
325 | | CXXSpecialMember CSM, |
326 | | CXXMethodDecl *MemberDecl, |
327 | | bool ConstRHS, |
328 | 2.63k | bool Diagnose) { |
329 | | // If the defaulted special member is defined lexically outside of its |
330 | | // owning class, or the special member already has explicit device or host |
331 | | // attributes, do not infer. |
332 | 2.63k | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); |
333 | 2.63k | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); |
334 | 2.63k | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); |
335 | 2.63k | bool HasExplicitAttr = |
336 | 2.63k | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()1.27k ) || |
337 | 2.63k | (2.63k HasH2.63k && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()1.27k ); |
338 | 2.63k | if (!InClass || HasExplicitAttr2.63k ) |
339 | 14 | return false; |
340 | | |
341 | 2.62k | llvm::Optional<CUDAFunctionTarget> InferredTarget; |
342 | | |
343 | | // We're going to invoke special member lookup; mark that these special |
344 | | // members are called from this one, and not from its caller. |
345 | 2.62k | ContextRAII MethodContext(*this, MemberDecl); |
346 | | |
347 | | // Look for special members in base classes that should be invoked from here. |
348 | | // Infer the target of this member base on the ones it should call. |
349 | | // Skip direct and indirect virtual bases for abstract classes. |
350 | 2.62k | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; |
351 | 2.62k | for (const auto &B : ClassDecl->bases()) { |
352 | 279 | if (!B.isVirtual()) { |
353 | 279 | Bases.push_back(&B); |
354 | 279 | } |
355 | 279 | } |
356 | | |
357 | 2.62k | if (!ClassDecl->isAbstract()) { |
358 | 2.62k | llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); |
359 | 2.62k | } |
360 | | |
361 | 2.62k | for (const auto *B : Bases) { |
362 | 279 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); |
363 | 279 | if (!BaseType) { |
364 | 0 | continue; |
365 | 0 | } |
366 | | |
367 | 279 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); |
368 | 279 | Sema::SpecialMemberOverloadResult SMOR = |
369 | 279 | LookupSpecialMember(BaseClassDecl, CSM, |
370 | 279 | /* ConstArg */ ConstRHS, |
371 | 279 | /* VolatileArg */ false, |
372 | 279 | /* RValueThis */ false, |
373 | 279 | /* ConstThis */ false, |
374 | 279 | /* VolatileThis */ false); |
375 | | |
376 | 279 | if (!SMOR.getMethod()) |
377 | 5 | continue; |
378 | | |
379 | 274 | CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); |
380 | 274 | if (!InferredTarget.hasValue()) { |
381 | 244 | InferredTarget = BaseMethodTarget; |
382 | 244 | } else { |
383 | 30 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
384 | 30 | InferredTarget.getValue(), BaseMethodTarget, |
385 | 30 | InferredTarget.getPointer()); |
386 | 30 | if (ResolutionError) { |
387 | 13 | if (Diagnose) { |
388 | 3 | Diag(ClassDecl->getLocation(), |
389 | 3 | diag::note_implicit_member_target_infer_collision) |
390 | 3 | << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; |
391 | 3 | } |
392 | 13 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
393 | 13 | return true; |
394 | 13 | } |
395 | 30 | } |
396 | 274 | } |
397 | | |
398 | | // Same as for bases, but now for special members of fields. |
399 | 2.61k | for (const auto *F : ClassDecl->fields()) { |
400 | 2.00k | if (F->isInvalidDecl()) { |
401 | 0 | continue; |
402 | 0 | } |
403 | | |
404 | 2.00k | const RecordType *FieldType = |
405 | 2.00k | Context.getBaseElementType(F->getType())->getAs<RecordType>(); |
406 | 2.00k | if (!FieldType) { |
407 | 1.83k | continue; |
408 | 1.83k | } |
409 | | |
410 | 166 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); |
411 | 166 | Sema::SpecialMemberOverloadResult SMOR = |
412 | 166 | LookupSpecialMember(FieldRecDecl, CSM, |
413 | 166 | /* ConstArg */ ConstRHS && !F->isMutable()66 , |
414 | 166 | /* VolatileArg */ false, |
415 | 166 | /* RValueThis */ false, |
416 | 166 | /* ConstThis */ false, |
417 | 166 | /* VolatileThis */ false); |
418 | | |
419 | 166 | if (!SMOR.getMethod()) |
420 | 0 | continue; |
421 | | |
422 | 166 | CUDAFunctionTarget FieldMethodTarget = |
423 | 166 | IdentifyCUDATarget(SMOR.getMethod()); |
424 | 166 | if (!InferredTarget.hasValue()) { |
425 | 129 | InferredTarget = FieldMethodTarget; |
426 | 129 | } else { |
427 | 37 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
428 | 37 | InferredTarget.getValue(), FieldMethodTarget, |
429 | 37 | InferredTarget.getPointer()); |
430 | 37 | if (ResolutionError) { |
431 | 12 | if (Diagnose) { |
432 | 4 | Diag(ClassDecl->getLocation(), |
433 | 4 | diag::note_implicit_member_target_infer_collision) |
434 | 4 | << (unsigned)CSM << InferredTarget.getValue() |
435 | 4 | << FieldMethodTarget; |
436 | 4 | } |
437 | 12 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
438 | 12 | return true; |
439 | 12 | } |
440 | 37 | } |
441 | 166 | } |
442 | | |
443 | | |
444 | | // If no target was inferred, mark this member as __host__ __device__; |
445 | | // it's the least restrictive option that can be invoked from any target. |
446 | 2.60k | bool NeedsH = true, NeedsD = true; |
447 | 2.60k | if (InferredTarget.hasValue()) { |
448 | 348 | if (InferredTarget.getValue() == CFT_Device) |
449 | 58 | NeedsH = false; |
450 | 290 | else if (InferredTarget.getValue() == CFT_Host) |
451 | 67 | NeedsD = false; |
452 | 348 | } |
453 | | |
454 | | // We either setting attributes first time, or the inferred ones must match |
455 | | // previously set ones. |
456 | 2.60k | if (NeedsD && !HasD2.53k ) |
457 | 1.26k | MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
458 | 2.60k | if (NeedsH && !HasH2.54k ) |
459 | 1.27k | MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
460 | | |
461 | 2.60k | return false; |
462 | 2.61k | } |
463 | | |
464 | 23 | bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { |
465 | 23 | if (!CD->isDefined() && CD->isTemplateInstantiation()0 ) |
466 | 0 | InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); |
467 | | |
468 | | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered |
469 | | // empty at a point in the translation unit, if it is either a |
470 | | // trivial constructor |
471 | 23 | if (CD->isTrivial()) |
472 | 9 | return true; |
473 | | |
474 | | // ... or it satisfies all of the following conditions: |
475 | | // The constructor function has been defined. |
476 | | // The constructor function has no parameters, |
477 | | // and the function body is an empty compound statement. |
478 | 14 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 05 )) |
479 | 9 | return false; |
480 | | |
481 | | // Its class has no virtual functions and no virtual base classes. |
482 | 5 | if (CD->getParent()->isDynamicClass()) |
483 | 0 | return false; |
484 | | |
485 | | // Union ctor does not call ctors of its data members. |
486 | 5 | if (CD->getParent()->isUnion()) |
487 | 4 | return true; |
488 | | |
489 | | // The only form of initializer allowed is an empty constructor. |
490 | | // This will recursively check all base classes and member initializers |
491 | 1 | if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { |
492 | 0 | if (const CXXConstructExpr *CE = |
493 | 0 | dyn_cast<CXXConstructExpr>(CI->getInit())) |
494 | 0 | return isEmptyCudaConstructor(Loc, CE->getConstructor()); |
495 | 0 | return false; |
496 | 0 | })) |
497 | 0 | return false; |
498 | | |
499 | 1 | return true; |
500 | 1 | } |
501 | | |
502 | 22 | bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { |
503 | | // No destructor -> no problem. |
504 | 22 | if (!DD) |
505 | 13 | return true; |
506 | | |
507 | 9 | if (!DD->isDefined() && DD->isTemplateInstantiation()0 ) |
508 | 0 | InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); |
509 | | |
510 | | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered |
511 | | // empty at a point in the translation unit, if it is either a |
512 | | // trivial constructor |
513 | 9 | if (DD->isTrivial()) |
514 | 4 | return true; |
515 | | |
516 | | // ... or it satisfies all of the following conditions: |
517 | | // The destructor function has been defined. |
518 | | // and the function body is an empty compound statement. |
519 | 5 | if (!DD->hasTrivialBody()) |
520 | 3 | return false; |
521 | | |
522 | 2 | const CXXRecordDecl *ClassDecl = DD->getParent(); |
523 | | |
524 | | // Its class has no virtual functions and no virtual base classes. |
525 | 2 | if (ClassDecl->isDynamicClass()) |
526 | 0 | return false; |
527 | | |
528 | | // Union does not have base class and union dtor does not call dtors of its |
529 | | // data members. |
530 | 2 | if (DD->getParent()->isUnion()) |
531 | 2 | return true; |
532 | | |
533 | | // Only empty destructors are allowed. This will recursively check |
534 | | // destructors for all base classes... |
535 | 0 | if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { |
536 | 0 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) |
537 | 0 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); |
538 | 0 | return true; |
539 | 0 | })) |
540 | 0 | return false; |
541 | | |
542 | | // ... and member fields. |
543 | 0 | if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { |
544 | 0 | if (CXXRecordDecl *RD = Field->getType() |
545 | 0 | ->getBaseElementTypeUnsafe() |
546 | 0 | ->getAsCXXRecordDecl()) |
547 | 0 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); |
548 | 0 | return true; |
549 | 0 | })) |
550 | 0 | return false; |
551 | | |
552 | 0 | return true; |
553 | 0 | } |
554 | | |
555 | | namespace { |
556 | | enum CUDAInitializerCheckKind { |
557 | | CICK_DeviceOrConstant, // Check initializer for device/constant variable |
558 | | CICK_Shared, // Check initializer for shared variable |
559 | | }; |
560 | | |
561 | 768 | bool IsDependentVar(VarDecl *VD) { |
562 | 768 | if (VD->getType()->isDependentType()) |
563 | 22 | return true; |
564 | 746 | if (const auto *Init = VD->getInit()) |
565 | 669 | return Init->isValueDependent(); |
566 | 77 | return false; |
567 | 746 | } |
568 | | |
569 | | // Check whether a variable has an allowed initializer for a CUDA device side |
570 | | // variable with global storage. \p VD may be a host variable to be checked for |
571 | | // potential promotion to device side variable. |
572 | | // |
573 | | // CUDA/HIP allows only empty constructors as initializers for global |
574 | | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
575 | | // __shared__ variables whether they are local or not (they all are implicitly |
576 | | // static in CUDA). One exception is that CUDA allows constant initializers |
577 | | // for __constant__ and __device__ variables. |
578 | | bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, |
579 | 259 | CUDAInitializerCheckKind CheckKind) { |
580 | 259 | assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); |
581 | 0 | assert(!IsDependentVar(VD) && "do not check dependent var"); |
582 | 0 | const Expr *Init = VD->getInit(); |
583 | 259 | auto IsEmptyInit = [&](const Expr *Init) { |
584 | 259 | if (!Init) |
585 | 0 | return true; |
586 | 259 | if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) { |
587 | 23 | return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); |
588 | 23 | } |
589 | 236 | return false; |
590 | 259 | }; |
591 | 259 | auto IsConstantInit = [&](const Expr *Init) { |
592 | 244 | assert(Init); |
593 | 0 | ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, |
594 | 244 | /*NoWronSidedVars=*/true); |
595 | 244 | return Init->isConstantInitializer(S.Context, |
596 | 244 | VD->getType()->isReferenceType()); |
597 | 244 | }; |
598 | 259 | auto HasEmptyDtor = [&](VarDecl *VD) { |
599 | 231 | if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) |
600 | 22 | return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); |
601 | 209 | return true; |
602 | 231 | }; |
603 | 259 | if (CheckKind == CICK_Shared) |
604 | 5 | return IsEmptyInit(Init) && HasEmptyDtor(VD)4 ; |
605 | 254 | return S.LangOpts.GPUAllowDeviceInit || |
606 | 254 | ((IsEmptyInit(Init) || IsConstantInit(Init)244 ) && HasEmptyDtor(VD)227 ); |
607 | 259 | } |
608 | | } // namespace |
609 | | |
610 | 1.47k | void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { |
611 | | // Do not check dependent variables since the ctor/dtor/initializer are not |
612 | | // determined. Do it after instantiation. |
613 | 1.47k | if (VD->isInvalidDecl() || !VD->hasInit()1.46k || !VD->hasGlobalStorage()918 || |
614 | 1.47k | IsDependentVar(VD)312 ) |
615 | 1.18k | return; |
616 | 287 | const Expr *Init = VD->getInit(); |
617 | 287 | bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); |
618 | 287 | bool IsDeviceOrConstantVar = |
619 | 287 | !IsSharedVar && |
620 | 287 | (282 VD->hasAttr<CUDADeviceAttr>()282 || VD->hasAttr<CUDAConstantAttr>()188 ); |
621 | 287 | if (IsDeviceOrConstantVar || IsSharedVar107 ) { |
622 | 185 | if (HasAllowedCUDADeviceStaticInitializer( |
623 | 185 | *this, VD, IsSharedVar ? CICK_Shared5 : CICK_DeviceOrConstant180 )) |
624 | 169 | return; |
625 | 16 | Diag(VD->getLocation(), |
626 | 16 | IsSharedVar ? diag::err_shared_var_init2 : diag::err_dynamic_var_init14 ) |
627 | 16 | << Init->getSourceRange(); |
628 | 16 | VD->setInvalidDecl(); |
629 | 102 | } else { |
630 | | // This is a host-side global variable. Check that the initializer is |
631 | | // callable from the host side. |
632 | 102 | const FunctionDecl *InitFn = nullptr; |
633 | 102 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { |
634 | 29 | InitFn = CE->getConstructor(); |
635 | 73 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { |
636 | 3 | InitFn = CE->getDirectCallee(); |
637 | 3 | } |
638 | 102 | if (InitFn) { |
639 | 32 | CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); |
640 | 32 | if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice24 ) { |
641 | 3 | Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) |
642 | 3 | << InitFnTarget << InitFn; |
643 | 3 | Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; |
644 | 3 | VD->setInvalidDecl(); |
645 | 3 | } |
646 | 32 | } |
647 | 102 | } |
648 | 287 | } |
649 | | |
650 | | // With -fcuda-host-device-constexpr, an unattributed constexpr function is |
651 | | // treated as implicitly __host__ __device__, unless: |
652 | | // * it is a variadic function (device-side variadic functions are not |
653 | | // allowed), or |
654 | | // * a __device__ function with this signature was already declared, in which |
655 | | // case in which case we output an error, unless the __device__ decl is in a |
656 | | // system header, in which case we leave the constexpr function unattributed. |
657 | | // |
658 | | // In addition, all function decls are treated as __host__ __device__ when |
659 | | // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a |
660 | | // #pragma clang force_cuda_host_device_begin/end |
661 | | // pair). |
662 | | void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, |
663 | 3.33k | const LookupResult &Previous) { |
664 | 3.33k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
665 | | |
666 | 3.33k | if (ForceCUDAHostDeviceDepth > 0) { |
667 | 7 | if (!NewD->hasAttr<CUDAHostAttr>()) |
668 | 5 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
669 | 7 | if (!NewD->hasAttr<CUDADeviceAttr>()) |
670 | 5 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
671 | 7 | return; |
672 | 7 | } |
673 | | |
674 | 3.32k | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr()3.30k || |
675 | 3.32k | NewD->isVariadic()33 || NewD->hasAttr<CUDAHostAttr>()29 || |
676 | 3.32k | NewD->hasAttr<CUDADeviceAttr>()25 || NewD->hasAttr<CUDAGlobalAttr>()21 ) |
677 | 3.30k | return; |
678 | | |
679 | | // Is D a __device__ function with the same signature as NewD, ignoring CUDA |
680 | | // attributes? |
681 | 21 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { |
682 | 12 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) |
683 | 4 | D = Using->getTargetDecl(); |
684 | 12 | FunctionDecl *OldD = D->getAsFunction(); |
685 | 12 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && |
686 | 12 | !OldD->hasAttr<CUDAHostAttr>() && |
687 | 12 | !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, |
688 | 12 | /* ConsiderCudaAttrs = */ false); |
689 | 12 | }; |
690 | 21 | auto It = llvm::find_if(Previous, IsMatchingDeviceFn); |
691 | 21 | if (It != Previous.end()) { |
692 | | // We found a __device__ function with the same name and signature as NewD |
693 | | // (ignoring CUDA attrs). This is an error unless that function is defined |
694 | | // in a system header, in which case we simply return without making NewD |
695 | | // host+device. |
696 | 12 | NamedDecl *Match = *It; |
697 | 12 | if (!getSourceManager().isInSystemHeader(Match->getLocation())) { |
698 | 4 | Diag(NewD->getLocation(), |
699 | 4 | diag::err_cuda_unattributed_constexpr_cannot_overload_device) |
700 | 4 | << NewD; |
701 | 4 | Diag(Match->getLocation(), |
702 | 4 | diag::note_cuda_conflicting_device_function_declared_here); |
703 | 4 | } |
704 | 12 | return; |
705 | 12 | } |
706 | | |
707 | 9 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
708 | 9 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
709 | 9 | } |
710 | | |
711 | | // TODO: `__constant__` memory may be a limited resource for certain targets. |
712 | | // A safeguard may be needed at the end of compilation pipeline if |
713 | | // `__constant__` memory usage goes beyond limit. |
714 | 2.26M | void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { |
715 | | // Do not promote dependent variables since the cotr/dtor/initializer are |
716 | | // not determined. Do it after instantiation. |
717 | 2.26M | if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>()618 && |
718 | 2.26M | !VD->hasAttr<CUDAConstantAttr>()570 && !VD->hasAttr<CUDASharedAttr>()570 && |
719 | 2.26M | (532 VD->isFileVarDecl()532 || VD->isStaticDataMember()335 ) && |
720 | 2.26M | !IsDependentVar(VD)197 && |
721 | 2.26M | (189 (189 VD->isConstexpr()189 || VD->getType().isConstQualified()151 ) && |
722 | 189 | HasAllowedCUDADeviceStaticInitializer(*this, VD, |
723 | 74 | CICK_DeviceOrConstant))) { |
724 | 59 | VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); |
725 | 59 | } |
726 | 2.26M | } |
727 | | |
728 | | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, |
729 | 106 | unsigned DiagID) { |
730 | 106 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
731 | 0 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
732 | 106 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
733 | 106 | if (!CurFunContext) |
734 | 2 | return SemaDiagnosticBuilder::K_Nop; |
735 | 104 | switch (CurrentCUDATarget()) { |
736 | 8 | case CFT_Global: |
737 | 60 | case CFT_Device: |
738 | 60 | return SemaDiagnosticBuilder::K_Immediate; |
739 | 34 | case CFT_HostDevice: |
740 | | // An HD function counts as host code if we're compiling for host, and |
741 | | // device code if we're compiling for device. Defer any errors in device |
742 | | // mode until the function is known-emitted. |
743 | 34 | if (!getLangOpts().CUDAIsDevice) |
744 | 6 | return SemaDiagnosticBuilder::K_Nop; |
745 | 28 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
746 | 7 | return SemaDiagnosticBuilder::K_Immediate; |
747 | 21 | return (getEmissionStatus(CurFunContext) == |
748 | 21 | FunctionEmissionStatus::Emitted) |
749 | 21 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack5 |
750 | 21 | : SemaDiagnosticBuilder::K_Deferred16 ; |
751 | 10 | default: |
752 | 10 | return SemaDiagnosticBuilder::K_Nop; |
753 | 104 | } |
754 | 104 | }(); |
755 | 106 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
756 | 106 | } |
757 | | |
758 | | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, |
759 | 35 | unsigned DiagID) { |
760 | 35 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
761 | 0 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
762 | 35 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
763 | 35 | if (!CurFunContext) |
764 | 0 | return SemaDiagnosticBuilder::K_Nop; |
765 | 35 | switch (CurrentCUDATarget()) { |
766 | 6 | case CFT_Host: |
767 | 6 | return SemaDiagnosticBuilder::K_Immediate; |
768 | 1 | case CFT_HostDevice: |
769 | | // An HD function counts as host code if we're compiling for host, and |
770 | | // device code if we're compiling for device. Defer any errors in device |
771 | | // mode until the function is known-emitted. |
772 | 1 | if (getLangOpts().CUDAIsDevice) |
773 | 0 | return SemaDiagnosticBuilder::K_Nop; |
774 | 1 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
775 | 0 | return SemaDiagnosticBuilder::K_Immediate; |
776 | 1 | return (getEmissionStatus(CurFunContext) == |
777 | 1 | FunctionEmissionStatus::Emitted) |
778 | 1 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack0 |
779 | 1 | : SemaDiagnosticBuilder::K_Deferred; |
780 | 28 | default: |
781 | 28 | return SemaDiagnosticBuilder::K_Nop; |
782 | 35 | } |
783 | 35 | }(); |
784 | 35 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
785 | 35 | } |
786 | | |
787 | 10.7k | bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { |
788 | 10.7k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
789 | 0 | assert(Callee && "Callee may not be null."); |
790 | | |
791 | 0 | auto &ExprEvalCtx = ExprEvalContexts.back(); |
792 | 10.7k | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) |
793 | 27 | return true; |
794 | | |
795 | | // FIXME: Is bailing out early correct here? Should we instead assume that |
796 | | // the caller is a global initializer? |
797 | 10.7k | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
798 | 10.7k | if (!Caller) |
799 | 236 | return true; |
800 | | |
801 | | // If the caller is known-emitted, mark the callee as known-emitted. |
802 | | // Otherwise, mark the call in our call graph so we can traverse it later. |
803 | 10.5k | bool CallerKnownEmitted = |
804 | 10.5k | getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; |
805 | 10.5k | SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, |
806 | 10.5k | CallerKnownEmitted] { |
807 | 10.5k | switch (IdentifyCUDAPreference(Caller, Callee)) { |
808 | 20 | case CFP_Never: |
809 | 146 | case CFP_WrongSide: |
810 | 146 | assert(Caller && "Never/wrongSide calls require a non-null caller"); |
811 | | // If we know the caller will be emitted, we know this wrong-side call |
812 | | // will be emitted, so it's an immediate error. Otherwise, defer the |
813 | | // error until we know the caller is emitted. |
814 | 146 | return CallerKnownEmitted |
815 | 146 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack58 |
816 | 146 | : SemaDiagnosticBuilder::K_Deferred88 ; |
817 | 10.3k | default: |
818 | 10.3k | return SemaDiagnosticBuilder::K_Nop; |
819 | 10.5k | } |
820 | 10.5k | }(); |
821 | | |
822 | 10.5k | if (DiagKind == SemaDiagnosticBuilder::K_Nop) { |
823 | | // For -fgpu-rdc, keep track of external kernels used by host functions. |
824 | 10.3k | if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode6.00k && |
825 | 10.3k | Callee->hasAttr<CUDAGlobalAttr>()216 && !Callee->isDefined()20 ) |
826 | 16 | getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); |
827 | 10.3k | return true; |
828 | 10.3k | } |
829 | | |
830 | | // Avoid emitting this error twice for the same location. Using a hashtable |
831 | | // like this is unfortunate, but because we must continue parsing as normal |
832 | | // after encountering a deferred error, it's otherwise very tricky for us to |
833 | | // ensure that we only emit this deferred error once. |
834 | 146 | if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) |
835 | 60 | return true; |
836 | | |
837 | 86 | SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) |
838 | 86 | << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee |
839 | 86 | << IdentifyCUDATarget(Caller); |
840 | 86 | if (!Callee->getBuiltinID()) |
841 | 86 | SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), |
842 | 86 | diag::note_previous_decl, Caller, *this) |
843 | 86 | << Callee; |
844 | 86 | return DiagKind != SemaDiagnosticBuilder::K_Immediate && |
845 | 86 | DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; |
846 | 146 | } |
847 | | |
848 | | // Check the wrong-sided reference capture of lambda for CUDA/HIP. |
849 | | // A lambda function may capture a stack variable by reference when it is |
850 | | // defined and uses the capture by reference when the lambda is called. When |
851 | | // the capture and use happen on different sides, the capture is invalid and |
852 | | // should be diagnosed. |
853 | | void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, |
854 | 98 | const sema::Capture &Capture) { |
855 | | // In host compilation we only need to check lambda functions emitted on host |
856 | | // side. In such lambda functions, a reference capture is invalid only |
857 | | // if the lambda structure is populated by a device function or kernel then |
858 | | // is passed to and called by a host function. However that is impossible, |
859 | | // since a device function or kernel can only call a device function, also a |
860 | | // kernel cannot pass a lambda back to a host function since we cannot |
861 | | // define a kernel argument type which can hold the lambda before the lambda |
862 | | // itself is defined. |
863 | 98 | if (!LangOpts.CUDAIsDevice) |
864 | 32 | return; |
865 | | |
866 | | // File-scope lambda can only do init captures for global variables, which |
867 | | // results in passing by value for these global variables. |
868 | 66 | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
869 | 66 | if (!Caller) |
870 | 0 | return; |
871 | | |
872 | | // In device compilation, we only need to check lambda functions which are |
873 | | // emitted on device side. For such lambdas, a reference capture is invalid |
874 | | // only if the lambda structure is populated by a host function then passed |
875 | | // to and called in a device function or kernel. |
876 | 66 | bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); |
877 | 66 | bool CallerIsHost = |
878 | 66 | !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); |
879 | 66 | bool ShouldCheck = CalleeIsDevice && CallerIsHost; |
880 | 66 | if (!ShouldCheck || !Capture.isReferenceCapture()56 ) |
881 | 26 | return; |
882 | 40 | auto DiagKind = SemaDiagnosticBuilder::K_Deferred; |
883 | 40 | if (Capture.isVariableCapture()) { |
884 | 19 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
885 | 19 | diag::err_capture_bad_target, Callee, *this) |
886 | 19 | << Capture.getVariable(); |
887 | 21 | } else if (Capture.isThisCapture()) { |
888 | | // Capture of this pointer is allowed since this pointer may be pointing to |
889 | | // managed memory which is accessible on both device and host sides. It only |
890 | | // results in invalid memory access if this pointer points to memory not |
891 | | // accessible on device side. |
892 | 21 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
893 | 21 | diag::warn_maybe_capture_bad_target_this_ptr, Callee, |
894 | 21 | *this); |
895 | 21 | } |
896 | 40 | } |
897 | | |
898 | 221 | void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { |
899 | 221 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
900 | 221 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()181 ) |
901 | 90 | return; |
902 | 131 | Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
903 | 131 | Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
904 | 131 | } |
905 | | |
906 | | void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, |
907 | 4.88k | const LookupResult &Previous) { |
908 | 4.88k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
909 | 0 | CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); |
910 | 4.88k | for (NamedDecl *OldND : Previous) { |
911 | 2.33k | FunctionDecl *OldFD = OldND->getAsFunction(); |
912 | 2.33k | if (!OldFD) |
913 | 91 | continue; |
914 | | |
915 | 2.24k | CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); |
916 | | // Don't allow HD and global functions to overload other functions with the |
917 | | // same signature. We allow overloading based on CUDA attributes so that |
918 | | // functions can have different implementations on the host and device, but |
919 | | // HD/global functions "exist" in some sense on both the host and device, so |
920 | | // should have the same implementation on both sides. |
921 | 2.24k | if (NewTarget != OldTarget && |
922 | 2.24k | (1.02k (NewTarget == CFT_HostDevice)1.02k || (OldTarget == CFT_HostDevice)855 || |
923 | 1.02k | (NewTarget == CFT_Global)841 || (OldTarget == CFT_Global)841 ) && |
924 | 2.24k | !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, |
925 | 184 | /* ConsiderCudaAttrs = */ false)) { |
926 | 4 | Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) |
927 | 4 | << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; |
928 | 4 | Diag(OldFD->getLocation(), diag::note_previous_declaration); |
929 | 4 | NewFD->setInvalidDecl(); |
930 | 4 | break; |
931 | 4 | } |
932 | 2.24k | } |
933 | 4.88k | } |
934 | | |
935 | | template <typename AttrTy> |
936 | | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, |
937 | 33 | const FunctionDecl &TemplateFD) { |
938 | 33 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { |
939 | 18 | AttrTy *Clone = Attribute->clone(S.Context); |
940 | 18 | Clone->setInherited(true); |
941 | 18 | FD->addAttr(Clone); |
942 | 18 | } |
943 | 33 | } SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAGlobalAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 937 | 11 | const FunctionDecl &TemplateFD) { | 938 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 939 | 0 | AttrTy *Clone = Attribute->clone(S.Context); | 940 | 0 | Clone->setInherited(true); | 941 | 0 | FD->addAttr(Clone); | 942 | 0 | } | 943 | 11 | } |
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAHostAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 937 | 11 | const FunctionDecl &TemplateFD) { | 938 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 939 | 9 | AttrTy *Clone = Attribute->clone(S.Context); | 940 | 9 | Clone->setInherited(true); | 941 | 9 | FD->addAttr(Clone); | 942 | 9 | } | 943 | 11 | } |
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDADeviceAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 937 | 11 | const FunctionDecl &TemplateFD) { | 938 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 939 | 9 | AttrTy *Clone = Attribute->clone(S.Context); | 940 | 9 | Clone->setInherited(true); | 941 | 9 | FD->addAttr(Clone); | 942 | 9 | } | 943 | 11 | } |
|
944 | | |
945 | | void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, |
946 | 11 | const FunctionTemplateDecl &TD) { |
947 | 11 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); |
948 | 11 | copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); |
949 | 11 | copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); |
950 | 11 | copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); |
951 | 11 | } |
952 | | |
953 | 2.28k | std::string Sema::getCudaConfigureFuncName() const { |
954 | 2.28k | if (getLangOpts().HIP) |
955 | 685 | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"18 |
956 | 685 | : "hipConfigureCall"667 ; |
957 | | |
958 | | // New CUDA kernel launch sequence. |
959 | 1.59k | if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), |
960 | 1.59k | CudaFeature::CUDA_USES_NEW_LAUNCH)) |
961 | 73 | return "__cudaPushCallConfiguration"; |
962 | | |
963 | | // Legacy CUDA kernel configuration call |
964 | 1.52k | return "cudaConfigureCall"; |
965 | 1.59k | } |