/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Sema/SemaCUDA.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// \file |
9 | | /// This file implements semantic analysis for CUDA constructs. |
10 | | /// |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/AST/ASTContext.h" |
14 | | #include "clang/AST/Decl.h" |
15 | | #include "clang/AST/ExprCXX.h" |
16 | | #include "clang/Basic/Cuda.h" |
17 | | #include "clang/Basic/TargetInfo.h" |
18 | | #include "clang/Lex/Preprocessor.h" |
19 | | #include "clang/Sema/Lookup.h" |
20 | | #include "clang/Sema/ScopeInfo.h" |
21 | | #include "clang/Sema/Sema.h" |
22 | | #include "clang/Sema/SemaDiagnostic.h" |
23 | | #include "clang/Sema/SemaInternal.h" |
24 | | #include "clang/Sema/Template.h" |
25 | | #include "llvm/ADT/Optional.h" |
26 | | #include "llvm/ADT/SmallVector.h" |
27 | | using namespace clang; |
28 | | |
29 | 146 | template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { |
30 | 146 | if (!D) |
31 | 0 | return false; |
32 | 146 | if (auto *A = D->getAttr<AttrT>()) |
33 | 146 | return !A->isImplicit(); |
34 | 0 | return false; |
35 | 146 | } |
36 | | |
37 | 7 | void Sema::PushForceCUDAHostDevice() { |
38 | 7 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
39 | 0 | ForceCUDAHostDeviceDepth++; |
40 | 7 | } |
41 | | |
42 | 5 | bool Sema::PopForceCUDAHostDevice() { |
43 | 5 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
44 | 5 | if (ForceCUDAHostDeviceDepth == 0) |
45 | 0 | return false; |
46 | 5 | ForceCUDAHostDeviceDepth--; |
47 | 5 | return true; |
48 | 5 | } |
49 | | |
50 | | ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
51 | | MultiExprArg ExecConfig, |
52 | 179 | SourceLocation GGGLoc) { |
53 | 179 | FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); |
54 | 179 | if (!ConfigDecl) |
55 | 0 | return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) |
56 | 0 | << getCudaConfigureFuncName()); |
57 | 179 | QualType ConfigQTy = ConfigDecl->getType(); |
58 | | |
59 | 179 | DeclRefExpr *ConfigDR = new (Context) |
60 | 179 | DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); |
61 | 179 | MarkFunctionReferenced(LLLLoc, ConfigDecl); |
62 | | |
63 | 179 | return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, |
64 | 179 | /*IsExecConfig=*/true); |
65 | 179 | } |
66 | | |
67 | | Sema::CUDAFunctionTarget |
68 | 34 | Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { |
69 | 34 | bool HasHostAttr = false; |
70 | 34 | bool HasDeviceAttr = false; |
71 | 34 | bool HasGlobalAttr = false; |
72 | 34 | bool HasInvalidTargetAttr = false; |
73 | 35 | for (const ParsedAttr &AL : Attrs) { |
74 | 35 | switch (AL.getKind()) { |
75 | 13 | case ParsedAttr::AT_CUDAGlobal: |
76 | 13 | HasGlobalAttr = true; |
77 | 13 | break; |
78 | 10 | case ParsedAttr::AT_CUDAHost: |
79 | 10 | HasHostAttr = true; |
80 | 10 | break; |
81 | 12 | case ParsedAttr::AT_CUDADevice: |
82 | 12 | HasDeviceAttr = true; |
83 | 12 | break; |
84 | 0 | case ParsedAttr::AT_CUDAInvalidTarget: |
85 | 0 | HasInvalidTargetAttr = true; |
86 | 0 | break; |
87 | 0 | default: |
88 | 0 | break; |
89 | 35 | } |
90 | 35 | } |
91 | | |
92 | 34 | if (HasInvalidTargetAttr) |
93 | 0 | return CFT_InvalidTarget; |
94 | | |
95 | 34 | if (HasGlobalAttr) |
96 | 13 | return CFT_Global; |
97 | | |
98 | 21 | if (HasHostAttr && HasDeviceAttr10 ) |
99 | 6 | return CFT_HostDevice; |
100 | | |
101 | 15 | if (HasDeviceAttr) |
102 | 6 | return CFT_Device; |
103 | | |
104 | 9 | return CFT_Host; |
105 | 15 | } |
106 | | |
107 | | template <typename A> |
108 | 180k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { |
109 | 193k | return D->hasAttrs()180k && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) 137k { |
110 | 193k | return isa<A>(Attribute) && |
111 | 193k | !(87.4k IgnoreImplicitAttr87.4k && Attribute->isImplicit()78 ); |
112 | 193k | }); SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const Line | Count | Source | 109 | 83.0k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 83.0k | return isa<A>(Attribute) && | 111 | 83.0k | !(62.7k IgnoreImplicitAttr62.7k && Attribute->isImplicit()40 ); | 112 | 83.0k | }); |
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const Line | Count | Source | 109 | 110k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 110k | return isa<A>(Attribute) && | 111 | 110k | !(24.7k IgnoreImplicitAttr24.7k && Attribute->isImplicit()38 ); | 112 | 110k | }); |
|
113 | 180k | } SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool) Line | Count | Source | 108 | 90.2k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { | 109 | 90.2k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 68.5k | return isa<A>(Attribute) && | 111 | 68.5k | !(IgnoreImplicitAttr && Attribute->isImplicit()); | 112 | 68.5k | }); | 113 | 90.2k | } |
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool) Line | Count | Source | 108 | 90.2k | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { | 109 | 90.2k | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | 110 | 68.5k | return isa<A>(Attribute) && | 111 | 68.5k | !(IgnoreImplicitAttr && Attribute->isImplicit()); | 112 | 68.5k | }); | 113 | 90.2k | } |
|
114 | | |
115 | | /// IdentifyCUDATarget - Determine the CUDA compilation target for this function |
116 | | Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, |
117 | 94.1k | bool IgnoreImplicitHDAttr) { |
118 | | // Code that lives outside a function is run on the host. |
119 | 94.1k | if (D == nullptr) |
120 | 797 | return CFT_Host; |
121 | | |
122 | 93.4k | if (D->hasAttr<CUDAInvalidTargetAttr>()) |
123 | 39 | return CFT_InvalidTarget; |
124 | | |
125 | 93.3k | if (D->hasAttr<CUDAGlobalAttr>()) |
126 | 3.12k | return CFT_Global; |
127 | | |
128 | 90.2k | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { |
129 | 62.7k | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) |
130 | 19.7k | return CFT_HostDevice; |
131 | 43.0k | return CFT_Device; |
132 | 62.7k | } else if (27.4k hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)27.4k ) { |
133 | 4.94k | return CFT_Host; |
134 | 22.5k | } else if ((D->isImplicit() || !D->isUserProvided()21.7k ) && |
135 | 22.5k | !IgnoreImplicitHDAttr863 ) { |
136 | | // Some implicit declarations (like intrinsic functions) are not marked. |
137 | | // Set the most lenient target on them for maximal flexibility. |
138 | 863 | return CFT_HostDevice; |
139 | 863 | } |
140 | | |
141 | 21.6k | return CFT_Host; |
142 | 90.2k | } |
143 | | |
144 | | /// IdentifyTarget - Determine the CUDA compilation target for this variable. |
145 | 1.03k | Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { |
146 | 1.03k | if (Var->hasAttr<HIPManagedAttr>()) |
147 | 68 | return CVT_Unified; |
148 | | // Only constexpr and const variabless with implicit constant attribute |
149 | | // are emitted on both sides. Such variables are promoted to device side |
150 | | // only if they have static constant intializers on device side. |
151 | 967 | if ((Var->isConstexpr() || Var->getType().isConstQualified()881 ) && |
152 | 967 | Var->hasAttr<CUDAConstantAttr>()294 && |
153 | 967 | !hasExplicitAttr<CUDAConstantAttr>(Var)146 ) |
154 | 134 | return CVT_Both; |
155 | 833 | if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>()559 || |
156 | 833 | Var->hasAttr<CUDASharedAttr>()433 || |
157 | 833 | Var->getType()->isCUDADeviceBuiltinSurfaceType()291 || |
158 | 833 | Var->getType()->isCUDADeviceBuiltinTextureType()287 ) |
159 | 550 | return CVT_Device; |
160 | | // Function-scope static variable without explicit device or constant |
161 | | // attribute are emitted |
162 | | // - on both sides in host device functions |
163 | | // - on device side in device or global functions |
164 | 283 | if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { |
165 | 64 | switch (IdentifyCUDATarget(FD)) { |
166 | 22 | case CFT_HostDevice: |
167 | 22 | return CVT_Both; |
168 | 23 | case CFT_Device: |
169 | 36 | case CFT_Global: |
170 | 36 | return CVT_Device; |
171 | 6 | default: |
172 | 6 | return CVT_Host; |
173 | 64 | } |
174 | 64 | } |
175 | 219 | return CVT_Host; |
176 | 283 | } |
177 | | |
178 | | // * CUDA Call preference table |
179 | | // |
180 | | // F - from, |
181 | | // T - to |
182 | | // Ph - preference in host mode |
183 | | // Pd - preference in device mode |
184 | | // H - handled in (x) |
185 | | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. |
186 | | // |
187 | | // | F | T | Ph | Pd | H | |
188 | | // |----+----+-----+-----+-----+ |
189 | | // | d | d | N | N | (c) | |
190 | | // | d | g | -- | -- | (a) | |
191 | | // | d | h | -- | -- | (e) | |
192 | | // | d | hd | HD | HD | (b) | |
193 | | // | g | d | N | N | (c) | |
194 | | // | g | g | -- | -- | (a) | |
195 | | // | g | h | -- | -- | (e) | |
196 | | // | g | hd | HD | HD | (b) | |
197 | | // | h | d | -- | -- | (e) | |
198 | | // | h | g | N | N | (c) | |
199 | | // | h | h | N | N | (c) | |
200 | | // | h | hd | HD | HD | (b) | |
201 | | // | hd | d | WS | SS | (d) | |
202 | | // | hd | g | SS | -- |(d/a)| |
203 | | // | hd | h | SS | WS | (d) | |
204 | | // | hd | hd | HD | HD | (b) | |
205 | | |
206 | | Sema::CUDAFunctionPreference |
207 | | Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, |
208 | 27.1k | const FunctionDecl *Callee) { |
209 | 27.1k | assert(Callee && "Callee must be valid."); |
210 | 0 | CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); |
211 | 27.1k | CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); |
212 | | |
213 | | // If one of the targets is invalid, the check always fails, no matter what |
214 | | // the other target is. |
215 | 27.1k | if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) |
216 | 7 | return CFP_Never; |
217 | | |
218 | | // (a) Can't call global from some contexts until we support CUDA's |
219 | | // dynamic parallelism. |
220 | 27.1k | if (CalleeTarget == CFT_Global && |
221 | 27.1k | (682 CallerTarget == CFT_Global682 || CallerTarget == CFT_Device681 )) |
222 | 7 | return CFP_Never; |
223 | | |
224 | | // (b) Calling HostDevice is OK for everyone. |
225 | 27.1k | if (CalleeTarget == CFT_HostDevice) |
226 | 11.5k | return CFP_HostDevice; |
227 | | |
228 | | // (c) Best case scenarios |
229 | 15.6k | if (CalleeTarget == CallerTarget || |
230 | 15.6k | (2.71k CallerTarget == CFT_Host2.71k && CalleeTarget == CFT_Global776 ) || |
231 | 15.6k | (2.05k CallerTarget == CFT_Global2.05k && CalleeTarget == CFT_Device424 )) |
232 | 13.9k | return CFP_Native; |
233 | | |
234 | | // (d) HostDevice behavior depends on compilation mode. |
235 | 1.64k | if (CallerTarget == CFT_HostDevice) { |
236 | | // It's OK to call a compilation-mode matching function from an HD one. |
237 | 1.36k | if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device639 ) || |
238 | 1.36k | (1.08k !getLangOpts().CUDAIsDevice1.08k && |
239 | 1.08k | (727 CalleeTarget == CFT_Host727 || CalleeTarget == CFT_Global302 ))) |
240 | 725 | return CFP_SameSide; |
241 | | |
242 | | // Calls from HD to non-mode-matching functions (i.e., to host functions |
243 | | // when compiling in device mode or to device functions when compiling in |
244 | | // host mode) are allowed at the sema level, but eventually rejected if |
245 | | // they're ever codegened. TODO: Reject said calls earlier. |
246 | 641 | return CFP_WrongSide; |
247 | 1.36k | } |
248 | | |
249 | | // (e) Calling across device/host boundary is not something you should do. |
250 | 275 | if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device122 ) || |
251 | 275 | (153 CallerTarget == CFT_Device153 && CalleeTarget == CFT_Host145 ) || |
252 | 275 | (8 CallerTarget == CFT_Global8 && CalleeTarget == CFT_Host8 )) |
253 | 275 | return CFP_Never; |
254 | | |
255 | 0 | llvm_unreachable("All cases should've been handled by now."); |
256 | 0 | } |
257 | | |
258 | 0 | template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { |
259 | 0 | if (!D) |
260 | 0 | return false; |
261 | 0 | if (auto *A = D->getAttr<AttrT>()) |
262 | 0 | return A->isImplicit(); |
263 | 0 | return D->isImplicit(); |
264 | 0 | } Unexecuted instantiation: SemaCUDA.cpp:bool hasImplicitAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*) Unexecuted instantiation: SemaCUDA.cpp:bool hasImplicitAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*) |
265 | | |
266 | 0 | bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { |
267 | 0 | bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); |
268 | 0 | bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); |
269 | 0 | return IsImplicitDevAttr && IsImplicitHostAttr; |
270 | 0 | } |
271 | | |
272 | | void Sema::EraseUnwantedCUDAMatches( |
273 | | const FunctionDecl *Caller, |
274 | 4 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { |
275 | 4 | if (Matches.size() <= 1) |
276 | 0 | return; |
277 | | |
278 | 4 | using Pair = std::pair<DeclAccessPair, FunctionDecl*>; |
279 | | |
280 | | // Gets the CUDA function preference for a call from Caller to Match. |
281 | 20 | auto GetCFP = [&](const Pair &Match) { |
282 | 20 | return IdentifyCUDAPreference(Caller, Match.second); |
283 | 20 | }; |
284 | | |
285 | | // Find the best call preference among the functions in Matches. |
286 | 4 | CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( |
287 | 4 | Matches.begin(), Matches.end(), |
288 | 4 | [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); |
289 | | |
290 | | // Erase all functions with lower priority. |
291 | 4 | llvm::erase_if(Matches, |
292 | 8 | [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); |
293 | 4 | } |
294 | | |
295 | | /// When an implicitly-declared special member has to invoke more than one |
296 | | /// base/field special member, conflicts may occur in the targets of these |
297 | | /// members. For example, if one base's member __host__ and another's is |
298 | | /// __device__, it's a conflict. |
299 | | /// This function figures out if the given targets \param Target1 and |
300 | | /// \param Target2 conflict, and if they do not it fills in |
301 | | /// \param ResolvedTarget with a target that resolves for both calls. |
302 | | /// \return true if there's a conflict, false otherwise. |
303 | | static bool |
304 | | resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, |
305 | | Sema::CUDAFunctionTarget Target2, |
306 | 83 | Sema::CUDAFunctionTarget *ResolvedTarget) { |
307 | | // Only free functions and static member functions may be global. |
308 | 83 | assert(Target1 != Sema::CFT_Global); |
309 | 0 | assert(Target2 != Sema::CFT_Global); |
310 | | |
311 | 83 | if (Target1 == Sema::CFT_HostDevice) { |
312 | 53 | *ResolvedTarget = Target2; |
313 | 53 | } else if (30 Target2 == Sema::CFT_HostDevice30 ) { |
314 | 1 | *ResolvedTarget = Target1; |
315 | 29 | } else if (Target1 != Target2) { |
316 | 25 | return true; |
317 | 25 | } else { |
318 | 4 | *ResolvedTarget = Target1; |
319 | 4 | } |
320 | | |
321 | 58 | return false; |
322 | 83 | } |
323 | | |
324 | | bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
325 | | CXXSpecialMember CSM, |
326 | | CXXMethodDecl *MemberDecl, |
327 | | bool ConstRHS, |
328 | 3.04k | bool Diagnose) { |
329 | | // If the defaulted special member is defined lexically outside of its |
330 | | // owning class, or the special member already has explicit device or host |
331 | | // attributes, do not infer. |
332 | 3.04k | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); |
333 | 3.04k | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); |
334 | 3.04k | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); |
335 | 3.04k | bool HasExplicitAttr = |
336 | 3.04k | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()1.48k ) || |
337 | 3.04k | (3.03k HasH3.03k && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()1.47k ); |
338 | 3.04k | if (!InClass || HasExplicitAttr3.03k ) |
339 | 14 | return false; |
340 | | |
341 | 3.03k | llvm::Optional<CUDAFunctionTarget> InferredTarget; |
342 | | |
343 | | // We're going to invoke special member lookup; mark that these special |
344 | | // members are called from this one, and not from its caller. |
345 | 3.03k | ContextRAII MethodContext(*this, MemberDecl); |
346 | | |
347 | | // Look for special members in base classes that should be invoked from here. |
348 | | // Infer the target of this member base on the ones it should call. |
349 | | // Skip direct and indirect virtual bases for abstract classes. |
350 | 3.03k | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; |
351 | 3.03k | for (const auto &B : ClassDecl->bases()) { |
352 | 355 | if (!B.isVirtual()) { |
353 | 355 | Bases.push_back(&B); |
354 | 355 | } |
355 | 355 | } |
356 | | |
357 | 3.03k | if (!ClassDecl->isAbstract()) { |
358 | 2.97k | llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); |
359 | 2.97k | } |
360 | | |
361 | 3.03k | for (const auto *B : Bases) { |
362 | 355 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); |
363 | 355 | if (!BaseType) { |
364 | 0 | continue; |
365 | 0 | } |
366 | | |
367 | 355 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); |
368 | 355 | Sema::SpecialMemberOverloadResult SMOR = |
369 | 355 | LookupSpecialMember(BaseClassDecl, CSM, |
370 | 355 | /* ConstArg */ ConstRHS, |
371 | 355 | /* VolatileArg */ false, |
372 | 355 | /* RValueThis */ false, |
373 | 355 | /* ConstThis */ false, |
374 | 355 | /* VolatileThis */ false); |
375 | | |
376 | 355 | if (!SMOR.getMethod()) |
377 | 5 | continue; |
378 | | |
379 | 350 | CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); |
380 | 350 | if (!InferredTarget) { |
381 | 320 | InferredTarget = BaseMethodTarget; |
382 | 320 | } else { |
383 | 30 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
384 | 30 | InferredTarget.value(), BaseMethodTarget, |
385 | 30 | InferredTarget.getPointer()); |
386 | 30 | if (ResolutionError) { |
387 | 13 | if (Diagnose) { |
388 | 3 | Diag(ClassDecl->getLocation(), |
389 | 3 | diag::note_implicit_member_target_infer_collision) |
390 | 3 | << (unsigned)CSM << InferredTarget.value() << BaseMethodTarget; |
391 | 3 | } |
392 | 13 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
393 | 13 | return true; |
394 | 13 | } |
395 | 30 | } |
396 | 350 | } |
397 | | |
398 | | // Same as for bases, but now for special members of fields. |
399 | 3.02k | for (const auto *F : ClassDecl->fields()) { |
400 | 2.49k | if (F->isInvalidDecl()) { |
401 | 0 | continue; |
402 | 0 | } |
403 | | |
404 | 2.49k | const RecordType *FieldType = |
405 | 2.49k | Context.getBaseElementType(F->getType())->getAs<RecordType>(); |
406 | 2.49k | if (!FieldType) { |
407 | 2.25k | continue; |
408 | 2.25k | } |
409 | | |
410 | 242 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); |
411 | 242 | Sema::SpecialMemberOverloadResult SMOR = |
412 | 242 | LookupSpecialMember(FieldRecDecl, CSM, |
413 | 242 | /* ConstArg */ ConstRHS && !F->isMutable()94 , |
414 | 242 | /* VolatileArg */ false, |
415 | 242 | /* RValueThis */ false, |
416 | 242 | /* ConstThis */ false, |
417 | 242 | /* VolatileThis */ false); |
418 | | |
419 | 242 | if (!SMOR.getMethod()) |
420 | 0 | continue; |
421 | | |
422 | 242 | CUDAFunctionTarget FieldMethodTarget = |
423 | 242 | IdentifyCUDATarget(SMOR.getMethod()); |
424 | 242 | if (!InferredTarget) { |
425 | 189 | InferredTarget = FieldMethodTarget; |
426 | 189 | } else { |
427 | 53 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
428 | 53 | InferredTarget.value(), FieldMethodTarget, |
429 | 53 | InferredTarget.getPointer()); |
430 | 53 | if (ResolutionError) { |
431 | 12 | if (Diagnose) { |
432 | 4 | Diag(ClassDecl->getLocation(), |
433 | 4 | diag::note_implicit_member_target_infer_collision) |
434 | 4 | << (unsigned)CSM << InferredTarget.value() << FieldMethodTarget; |
435 | 4 | } |
436 | 12 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); |
437 | 12 | return true; |
438 | 12 | } |
439 | 53 | } |
440 | 242 | } |
441 | | |
442 | | |
443 | | // If no target was inferred, mark this member as __host__ __device__; |
444 | | // it's the least restrictive option that can be invoked from any target. |
445 | 3.00k | bool NeedsH = true, NeedsD = true; |
446 | 3.00k | if (InferredTarget) { |
447 | 484 | if (InferredTarget.value() == CFT_Device) |
448 | 58 | NeedsH = false; |
449 | 426 | else if (InferredTarget.value() == CFT_Host) |
450 | 67 | NeedsD = false; |
451 | 484 | } |
452 | | |
453 | | // We either setting attributes first time, or the inferred ones must match |
454 | | // previously set ones. |
455 | 3.00k | if (NeedsD && !HasD2.94k ) |
456 | 1.47k | MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
457 | 3.00k | if (NeedsH && !HasH2.95k ) |
458 | 1.47k | MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
459 | | |
460 | 3.00k | return false; |
461 | 3.02k | } |
462 | | |
463 | 33 | bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { |
464 | 33 | if (!CD->isDefined() && CD->isTemplateInstantiation()2 ) |
465 | 0 | InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); |
466 | | |
467 | | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered |
468 | | // empty at a point in the translation unit, if it is either a |
469 | | // trivial constructor |
470 | 33 | if (CD->isTrivial()) |
471 | 9 | return true; |
472 | | |
473 | | // ... or it satisfies all of the following conditions: |
474 | | // The constructor function has been defined. |
475 | | // The constructor function has no parameters, |
476 | | // and the function body is an empty compound statement. |
477 | 24 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 013 )) |
478 | 15 | return false; |
479 | | |
480 | | // Its class has no virtual functions and no virtual base classes. |
481 | 9 | if (CD->getParent()->isDynamicClass()) |
482 | 0 | return false; |
483 | | |
484 | | // Union ctor does not call ctors of its data members. |
485 | 9 | if (CD->getParent()->isUnion()) |
486 | 4 | return true; |
487 | | |
488 | | // The only form of initializer allowed is an empty constructor. |
489 | | // This will recursively check all base classes and member initializers |
490 | 5 | if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { |
491 | 4 | if (const CXXConstructExpr *CE = |
492 | 4 | dyn_cast<CXXConstructExpr>(CI->getInit())) |
493 | 4 | return isEmptyCudaConstructor(Loc, CE->getConstructor()); |
494 | 0 | return false; |
495 | 4 | })) |
496 | 4 | return false; |
497 | | |
498 | 1 | return true; |
499 | 5 | } |
500 | | |
501 | 26 | bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { |
502 | | // No destructor -> no problem. |
503 | 26 | if (!DD) |
504 | 17 | return true; |
505 | | |
506 | 9 | if (!DD->isDefined() && DD->isTemplateInstantiation()0 ) |
507 | 0 | InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); |
508 | | |
509 | | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered |
510 | | // empty at a point in the translation unit, if it is either a |
511 | | // trivial constructor |
512 | 9 | if (DD->isTrivial()) |
513 | 4 | return true; |
514 | | |
515 | | // ... or it satisfies all of the following conditions: |
516 | | // The destructor function has been defined. |
517 | | // and the function body is an empty compound statement. |
518 | 5 | if (!DD->hasTrivialBody()) |
519 | 3 | return false; |
520 | | |
521 | 2 | const CXXRecordDecl *ClassDecl = DD->getParent(); |
522 | | |
523 | | // Its class has no virtual functions and no virtual base classes. |
524 | 2 | if (ClassDecl->isDynamicClass()) |
525 | 0 | return false; |
526 | | |
527 | | // Union does not have base class and union dtor does not call dtors of its |
528 | | // data members. |
529 | 2 | if (DD->getParent()->isUnion()) |
530 | 2 | return true; |
531 | | |
532 | | // Only empty destructors are allowed. This will recursively check |
533 | | // destructors for all base classes... |
534 | 0 | if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { |
535 | 0 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) |
536 | 0 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); |
537 | 0 | return true; |
538 | 0 | })) |
539 | 0 | return false; |
540 | | |
541 | | // ... and member fields. |
542 | 0 | if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { |
543 | 0 | if (CXXRecordDecl *RD = Field->getType() |
544 | 0 | ->getBaseElementTypeUnsafe() |
545 | 0 | ->getAsCXXRecordDecl()) |
546 | 0 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); |
547 | 0 | return true; |
548 | 0 | })) |
549 | 0 | return false; |
550 | | |
551 | 0 | return true; |
552 | 0 | } |
553 | | |
554 | | namespace { |
555 | | enum CUDAInitializerCheckKind { |
556 | | CICK_DeviceOrConstant, // Check initializer for device/constant variable |
557 | | CICK_Shared, // Check initializer for shared variable |
558 | | }; |
559 | | |
560 | 1.37k | bool IsDependentVar(VarDecl *VD) { |
561 | 1.37k | if (VD->getType()->isDependentType()) |
562 | 28 | return true; |
563 | 1.34k | if (const auto *Init = VD->getInit()) |
564 | 1.20k | return Init->isValueDependent(); |
565 | 140 | return false; |
566 | 1.34k | } |
567 | | |
568 | | // Check whether a variable has an allowed initializer for a CUDA device side |
569 | | // variable with global storage. \p VD may be a host variable to be checked for |
570 | | // potential promotion to device side variable. |
571 | | // |
572 | | // CUDA/HIP allows only empty constructors as initializers for global |
573 | | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
574 | | // __shared__ variables whether they are local or not (they all are implicitly |
575 | | // static in CUDA). One exception is that CUDA allows constant initializers |
576 | | // for __constant__ and __device__ variables. |
577 | | bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD, |
578 | 477 | CUDAInitializerCheckKind CheckKind) { |
579 | 477 | assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); |
580 | 0 | assert(!IsDependentVar(VD) && "do not check dependent var"); |
581 | 0 | const Expr *Init = VD->getInit(); |
582 | 477 | auto IsEmptyInit = [&](const Expr *Init) { |
583 | 476 | if (!Init) |
584 | 0 | return true; |
585 | 476 | if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) { |
586 | 29 | return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); |
587 | 29 | } |
588 | 447 | return false; |
589 | 476 | }; |
590 | 477 | auto IsConstantInit = [&](const Expr *Init) { |
591 | 461 | assert(Init); |
592 | 0 | ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context, |
593 | 461 | /*NoWronSidedVars=*/true); |
594 | 461 | return Init->isConstantInitializer(S.Context, |
595 | 461 | VD->getType()->isReferenceType()); |
596 | 461 | }; |
597 | 477 | auto HasEmptyDtor = [&](VarDecl *VD) { |
598 | 444 | if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) |
599 | 26 | return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); |
600 | 418 | return true; |
601 | 444 | }; |
602 | 477 | if (CheckKind == CICK_Shared) |
603 | 5 | return IsEmptyInit(Init) && HasEmptyDtor(VD)4 ; |
604 | 472 | return S.LangOpts.GPUAllowDeviceInit || |
605 | 472 | (471 (471 IsEmptyInit(Init)471 || IsConstantInit(Init)461 ) && HasEmptyDtor(VD)440 ); |
606 | 477 | } |
607 | | } // namespace |
608 | | |
609 | 2.16k | void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { |
610 | | // Do not check dependent variables since the ctor/dtor/initializer are not |
611 | | // determined. Do it after instantiation. |
612 | 2.16k | if (VD->isInvalidDecl() || !VD->hasInit()2.15k || !VD->hasGlobalStorage()1.35k || |
613 | 2.16k | IsDependentVar(VD)541 ) |
614 | 1.65k | return; |
615 | 503 | const Expr *Init = VD->getInit(); |
616 | 503 | bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); |
617 | 503 | bool IsDeviceOrConstantVar = |
618 | 503 | !IsSharedVar && |
619 | 503 | (498 VD->hasAttr<CUDADeviceAttr>()498 || VD->hasAttr<CUDAConstantAttr>()372 ); |
620 | 503 | if (IsDeviceOrConstantVar || IsSharedVar174 ) { |
621 | 334 | if (HasAllowedCUDADeviceStaticInitializer( |
622 | 334 | *this, VD, IsSharedVar ? CICK_Shared5 : CICK_DeviceOrConstant329 )) |
623 | 316 | return; |
624 | 18 | Diag(VD->getLocation(), |
625 | 18 | IsSharedVar ? diag::err_shared_var_init2 : diag::err_dynamic_var_init16 ) |
626 | 18 | << Init->getSourceRange(); |
627 | 18 | VD->setInvalidDecl(); |
628 | 169 | } else { |
629 | | // This is a host-side global variable. Check that the initializer is |
630 | | // callable from the host side. |
631 | 169 | const FunctionDecl *InitFn = nullptr; |
632 | 169 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { |
633 | 37 | InitFn = CE->getConstructor(); |
634 | 132 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { |
635 | 3 | InitFn = CE->getDirectCallee(); |
636 | 3 | } |
637 | 169 | if (InitFn) { |
638 | 40 | CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); |
639 | 40 | if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice26 ) { |
640 | 3 | Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) |
641 | 3 | << InitFnTarget << InitFn; |
642 | 3 | Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; |
643 | 3 | VD->setInvalidDecl(); |
644 | 3 | } |
645 | 40 | } |
646 | 169 | } |
647 | 503 | } |
648 | | |
649 | | // With -fcuda-host-device-constexpr, an unattributed constexpr function is |
650 | | // treated as implicitly __host__ __device__, unless: |
651 | | // * it is a variadic function (device-side variadic functions are not |
652 | | // allowed), or |
653 | | // * a __device__ function with this signature was already declared, in which |
654 | | // case in which case we output an error, unless the __device__ decl is in a |
655 | | // system header, in which case we leave the constexpr function unattributed. |
656 | | // |
657 | | // In addition, all function decls are treated as __host__ __device__ when |
658 | | // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a |
659 | | // #pragma clang force_cuda_host_device_begin/end |
660 | | // pair). |
661 | | void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, |
662 | 7.73k | const LookupResult &Previous) { |
663 | 7.73k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
664 | | |
665 | 7.73k | if (ForceCUDAHostDeviceDepth > 0) { |
666 | 7 | if (!NewD->hasAttr<CUDAHostAttr>()) |
667 | 5 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
668 | 7 | if (!NewD->hasAttr<CUDADeviceAttr>()) |
669 | 5 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
670 | 7 | return; |
671 | 7 | } |
672 | | |
673 | 7.73k | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr()7.70k || |
674 | 7.73k | NewD->isVariadic()41 || NewD->hasAttr<CUDAHostAttr>()37 || |
675 | 7.73k | NewD->hasAttr<CUDADeviceAttr>()33 || NewD->hasAttr<CUDAGlobalAttr>()29 ) |
676 | 7.70k | return; |
677 | | |
678 | | // Is D a __device__ function with the same signature as NewD, ignoring CUDA |
679 | | // attributes? |
680 | 29 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { |
681 | 12 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) |
682 | 4 | D = Using->getTargetDecl(); |
683 | 12 | FunctionDecl *OldD = D->getAsFunction(); |
684 | 12 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && |
685 | 12 | !OldD->hasAttr<CUDAHostAttr>() && |
686 | 12 | !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, |
687 | 12 | /* ConsiderCudaAttrs = */ false); |
688 | 12 | }; |
689 | 29 | auto It = llvm::find_if(Previous, IsMatchingDeviceFn); |
690 | 29 | if (It != Previous.end()) { |
691 | | // We found a __device__ function with the same name and signature as NewD |
692 | | // (ignoring CUDA attrs). This is an error unless that function is defined |
693 | | // in a system header, in which case we simply return without making NewD |
694 | | // host+device. |
695 | 12 | NamedDecl *Match = *It; |
696 | 12 | if (!getSourceManager().isInSystemHeader(Match->getLocation())) { |
697 | 4 | Diag(NewD->getLocation(), |
698 | 4 | diag::err_cuda_unattributed_constexpr_cannot_overload_device) |
699 | 4 | << NewD; |
700 | 4 | Diag(Match->getLocation(), |
701 | 4 | diag::note_cuda_conflicting_device_function_declared_here); |
702 | 4 | } |
703 | 12 | return; |
704 | 12 | } |
705 | | |
706 | 17 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
707 | 17 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
708 | 17 | } |
709 | | |
710 | | // TODO: `__constant__` memory may be a limited resource for certain targets. |
711 | | // A safeguard may be needed at the end of compilation pipeline if |
712 | | // `__constant__` memory usage goes beyond limit. |
713 | 2.66M | void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { |
714 | | // Do not promote dependent variables since the cotr/dtor/initializer are |
715 | | // not determined. Do it after instantiation. |
716 | 2.66M | if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>()1.15k && |
717 | 2.66M | !VD->hasAttr<CUDAConstantAttr>()1.04k && !VD->hasAttr<CUDASharedAttr>()1.04k && |
718 | 2.66M | (996 VD->isFileVarDecl()996 || VD->isStaticDataMember()643 ) && |
719 | 2.66M | !IsDependentVar(VD)353 && |
720 | 2.66M | (336 (336 VD->isConstexpr()336 || VD->getType().isConstQualified()282 ) && |
721 | 336 | HasAllowedCUDADeviceStaticInitializer(*this, VD, |
722 | 143 | CICK_DeviceOrConstant))) { |
723 | 126 | VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); |
724 | 126 | } |
725 | 2.66M | } |
726 | | |
727 | | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, |
728 | 108 | unsigned DiagID) { |
729 | 108 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
730 | 0 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
731 | 108 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
732 | 108 | if (!CurFunContext) |
733 | 2 | return SemaDiagnosticBuilder::K_Nop; |
734 | 106 | switch (CurrentCUDATarget()) { |
735 | 10 | case CFT_Global: |
736 | 62 | case CFT_Device: |
737 | 62 | return SemaDiagnosticBuilder::K_Immediate; |
738 | 34 | case CFT_HostDevice: |
739 | | // An HD function counts as host code if we're compiling for host, and |
740 | | // device code if we're compiling for device. Defer any errors in device |
741 | | // mode until the function is known-emitted. |
742 | 34 | if (!getLangOpts().CUDAIsDevice) |
743 | 6 | return SemaDiagnosticBuilder::K_Nop; |
744 | 28 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
745 | 7 | return SemaDiagnosticBuilder::K_Immediate; |
746 | 21 | return (getEmissionStatus(CurFunContext) == |
747 | 21 | FunctionEmissionStatus::Emitted) |
748 | 21 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack5 |
749 | 21 | : SemaDiagnosticBuilder::K_Deferred16 ; |
750 | 10 | default: |
751 | 10 | return SemaDiagnosticBuilder::K_Nop; |
752 | 106 | } |
753 | 106 | }(); |
754 | 108 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
755 | 108 | } |
756 | | |
757 | | Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, |
758 | 41 | unsigned DiagID) { |
759 | 41 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
760 | 0 | FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true); |
761 | 41 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
762 | 41 | if (!CurFunContext) |
763 | 0 | return SemaDiagnosticBuilder::K_Nop; |
764 | 41 | switch (CurrentCUDATarget()) { |
765 | 6 | case CFT_Host: |
766 | 6 | return SemaDiagnosticBuilder::K_Immediate; |
767 | 1 | case CFT_HostDevice: |
768 | | // An HD function counts as host code if we're compiling for host, and |
769 | | // device code if we're compiling for device. Defer any errors in device |
770 | | // mode until the function is known-emitted. |
771 | 1 | if (getLangOpts().CUDAIsDevice) |
772 | 0 | return SemaDiagnosticBuilder::K_Nop; |
773 | 1 | if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) |
774 | 0 | return SemaDiagnosticBuilder::K_Immediate; |
775 | 1 | return (getEmissionStatus(CurFunContext) == |
776 | 1 | FunctionEmissionStatus::Emitted) |
777 | 1 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack0 |
778 | 1 | : SemaDiagnosticBuilder::K_Deferred; |
779 | 34 | default: |
780 | 34 | return SemaDiagnosticBuilder::K_Nop; |
781 | 41 | } |
782 | 41 | }(); |
783 | 41 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this); |
784 | 41 | } |
785 | | |
786 | 15.1k | bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { |
787 | 15.1k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
788 | 0 | assert(Callee && "Callee may not be null."); |
789 | | |
790 | 0 | auto &ExprEvalCtx = ExprEvalContexts.back(); |
791 | 15.1k | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()15.1k ) |
792 | 107 | return true; |
793 | | |
794 | | // FIXME: Is bailing out early correct here? Should we instead assume that |
795 | | // the caller is a global initializer? |
796 | 15.0k | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
797 | 15.0k | if (!Caller) |
798 | 287 | return true; |
799 | | |
800 | | // If the caller is known-emitted, mark the callee as known-emitted. |
801 | | // Otherwise, mark the call in our call graph so we can traverse it later. |
802 | 14.7k | bool CallerKnownEmitted = |
803 | 14.7k | getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; |
804 | 14.7k | SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, |
805 | 14.7k | CallerKnownEmitted] { |
806 | 14.7k | switch (IdentifyCUDAPreference(Caller, Callee)) { |
807 | 32 | case CFP_Never: |
808 | 174 | case CFP_WrongSide: |
809 | 174 | assert(Caller && "Never/wrongSide calls require a non-null caller"); |
810 | | // If we know the caller will be emitted, we know this wrong-side call |
811 | | // will be emitted, so it's an immediate error. Otherwise, defer the |
812 | | // error until we know the caller is emitted. |
813 | 174 | return CallerKnownEmitted |
814 | 174 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack62 |
815 | 174 | : SemaDiagnosticBuilder::K_Deferred112 ; |
816 | 14.6k | default: |
817 | 14.6k | return SemaDiagnosticBuilder::K_Nop; |
818 | 14.7k | } |
819 | 14.7k | }(); |
820 | | |
821 | 14.7k | if (DiagKind == SemaDiagnosticBuilder::K_Nop) { |
822 | | // For -fgpu-rdc, keep track of external kernels used by host functions. |
823 | 14.6k | if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode10.0k && |
824 | 14.6k | Callee->hasAttr<CUDAGlobalAttr>()246 && !Callee->isDefined()20 ) |
825 | 16 | getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); |
826 | 14.6k | return true; |
827 | 14.6k | } |
828 | | |
829 | | // Avoid emitting this error twice for the same location. Using a hashtable |
830 | | // like this is unfortunate, but because we must continue parsing as normal |
831 | | // after encountering a deferred error, it's otherwise very tricky for us to |
832 | | // ensure that we only emit this deferred error once. |
833 | 174 | if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) |
834 | 74 | return true; |
835 | | |
836 | 100 | SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) |
837 | 100 | << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee |
838 | 100 | << IdentifyCUDATarget(Caller); |
839 | 100 | if (!Callee->getBuiltinID()) |
840 | 92 | SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), |
841 | 92 | diag::note_previous_decl, Caller, *this) |
842 | 92 | << Callee; |
843 | 100 | return DiagKind != SemaDiagnosticBuilder::K_Immediate && |
844 | 100 | DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; |
845 | 174 | } |
846 | | |
847 | | // Check the wrong-sided reference capture of lambda for CUDA/HIP. |
848 | | // A lambda function may capture a stack variable by reference when it is |
849 | | // defined and uses the capture by reference when the lambda is called. When |
850 | | // the capture and use happen on different sides, the capture is invalid and |
851 | | // should be diagnosed. |
852 | | void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, |
853 | 98 | const sema::Capture &Capture) { |
854 | | // In host compilation we only need to check lambda functions emitted on host |
855 | | // side. In such lambda functions, a reference capture is invalid only |
856 | | // if the lambda structure is populated by a device function or kernel then |
857 | | // is passed to and called by a host function. However that is impossible, |
858 | | // since a device function or kernel can only call a device function, also a |
859 | | // kernel cannot pass a lambda back to a host function since we cannot |
860 | | // define a kernel argument type which can hold the lambda before the lambda |
861 | | // itself is defined. |
862 | 98 | if (!LangOpts.CUDAIsDevice) |
863 | 32 | return; |
864 | | |
865 | | // File-scope lambda can only do init captures for global variables, which |
866 | | // results in passing by value for these global variables. |
867 | 66 | FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); |
868 | 66 | if (!Caller) |
869 | 0 | return; |
870 | | |
871 | | // In device compilation, we only need to check lambda functions which are |
872 | | // emitted on device side. For such lambdas, a reference capture is invalid |
873 | | // only if the lambda structure is populated by a host function then passed |
874 | | // to and called in a device function or kernel. |
875 | 66 | bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); |
876 | 66 | bool CallerIsHost = |
877 | 66 | !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); |
878 | 66 | bool ShouldCheck = CalleeIsDevice && CallerIsHost; |
879 | 66 | if (!ShouldCheck || !Capture.isReferenceCapture()56 ) |
880 | 26 | return; |
881 | 40 | auto DiagKind = SemaDiagnosticBuilder::K_Deferred; |
882 | 40 | if (Capture.isVariableCapture()) { |
883 | 19 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
884 | 19 | diag::err_capture_bad_target, Callee, *this) |
885 | 19 | << Capture.getVariable(); |
886 | 21 | } else if (Capture.isThisCapture()) { |
887 | | // Capture of this pointer is allowed since this pointer may be pointing to |
888 | | // managed memory which is accessible on both device and host sides. It only |
889 | | // results in invalid memory access if this pointer points to memory not |
890 | | // accessible on device side. |
891 | 21 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
892 | 21 | diag::warn_maybe_capture_bad_target_this_ptr, Callee, |
893 | 21 | *this); |
894 | 21 | } |
895 | 40 | } |
896 | | |
897 | 227 | void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { |
898 | 227 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
899 | 227 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()187 ) |
900 | 90 | return; |
901 | 137 | Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); |
902 | 137 | Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); |
903 | 137 | } |
904 | | |
905 | | void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, |
906 | 9.67k | const LookupResult &Previous) { |
907 | 9.67k | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); |
908 | 0 | CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); |
909 | 9.67k | for (NamedDecl *OldND : Previous) { |
910 | 6.90k | FunctionDecl *OldFD = OldND->getAsFunction(); |
911 | 6.90k | if (!OldFD) |
912 | 211 | continue; |
913 | | |
914 | 6.69k | CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); |
915 | | // Don't allow HD and global functions to overload other functions with the |
916 | | // same signature. We allow overloading based on CUDA attributes so that |
917 | | // functions can have different implementations on the host and device, but |
918 | | // HD/global functions "exist" in some sense on both the host and device, so |
919 | | // should have the same implementation on both sides. |
920 | 6.69k | if (NewTarget != OldTarget && |
921 | 6.69k | (2.24k (NewTarget == CFT_HostDevice)2.24k || (OldTarget == CFT_HostDevice)2.04k || |
922 | 2.24k | (NewTarget == CFT_Global)2.03k || (OldTarget == CFT_Global)2.03k ) && |
923 | 6.69k | !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, |
924 | 216 | /* ConsiderCudaAttrs = */ false)) { |
925 | 4 | Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) |
926 | 4 | << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; |
927 | 4 | Diag(OldFD->getLocation(), diag::note_previous_declaration); |
928 | 4 | NewFD->setInvalidDecl(); |
929 | 4 | break; |
930 | 4 | } |
931 | 6.69k | } |
932 | 9.67k | } |
933 | | |
934 | | template <typename AttrTy> |
935 | | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, |
936 | 33 | const FunctionDecl &TemplateFD) { |
937 | 33 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { |
938 | 18 | AttrTy *Clone = Attribute->clone(S.Context); |
939 | 18 | Clone->setInherited(true); |
940 | 18 | FD->addAttr(Clone); |
941 | 18 | } |
942 | 33 | } SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAGlobalAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 936 | 11 | const FunctionDecl &TemplateFD) { | 937 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 938 | 0 | AttrTy *Clone = Attribute->clone(S.Context); | 939 | 0 | Clone->setInherited(true); | 940 | 0 | FD->addAttr(Clone); | 941 | 0 | } | 942 | 11 | } |
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAHostAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 936 | 11 | const FunctionDecl &TemplateFD) { | 937 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 938 | 9 | AttrTy *Clone = Attribute->clone(S.Context); | 939 | 9 | Clone->setInherited(true); | 940 | 9 | FD->addAttr(Clone); | 941 | 9 | } | 942 | 11 | } |
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDADeviceAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&) Line | Count | Source | 936 | 11 | const FunctionDecl &TemplateFD) { | 937 | 11 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | 938 | 9 | AttrTy *Clone = Attribute->clone(S.Context); | 939 | 9 | Clone->setInherited(true); | 940 | 9 | FD->addAttr(Clone); | 941 | 9 | } | 942 | 11 | } |
|
943 | | |
944 | | void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, |
945 | 11 | const FunctionTemplateDecl &TD) { |
946 | 11 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); |
947 | 11 | copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); |
948 | 11 | copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); |
949 | 11 | copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); |
950 | 11 | } |
951 | | |
952 | 6.61k | std::string Sema::getCudaConfigureFuncName() const { |
953 | 6.61k | if (getLangOpts().HIP) |
954 | 4.95k | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"22 |
955 | 4.95k | : "hipConfigureCall"4.93k ; |
956 | | |
957 | | // New CUDA kernel launch sequence. |
958 | 1.65k | if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), |
959 | 1.65k | CudaFeature::CUDA_USES_NEW_LAUNCH)) |
960 | 73 | return "__cudaPushCallConfiguration"; |
961 | | |
962 | | // Legacy CUDA kernel configuration call |
963 | 1.58k | return "cudaConfigureCall"; |
964 | 1.65k | } |