Coverage Report

Created: 2020-02-15 09:57

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Sema/SemaCUDA.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file implements semantic analysis for CUDA constructs.
10
///
11
//===----------------------------------------------------------------------===//
12
13
#include "clang/AST/ASTContext.h"
14
#include "clang/AST/Decl.h"
15
#include "clang/AST/ExprCXX.h"
16
#include "clang/Basic/Cuda.h"
17
#include "clang/Lex/Preprocessor.h"
18
#include "clang/Sema/Lookup.h"
19
#include "clang/Sema/Sema.h"
20
#include "clang/Sema/SemaDiagnostic.h"
21
#include "clang/Sema/SemaInternal.h"
22
#include "clang/Sema/Template.h"
23
#include "llvm/ADT/Optional.h"
24
#include "llvm/ADT/SmallVector.h"
25
using namespace clang;
26
27
7
void Sema::PushForceCUDAHostDevice() {
28
7
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
29
7
  ForceCUDAHostDeviceDepth++;
30
7
}
31
32
5
bool Sema::PopForceCUDAHostDevice() {
33
5
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
34
5
  if (ForceCUDAHostDeviceDepth == 0)
35
0
    return false;
36
5
  ForceCUDAHostDeviceDepth--;
37
5
  return true;
38
5
}
39
40
ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
41
                                         MultiExprArg ExecConfig,
42
52
                                         SourceLocation GGGLoc) {
43
52
  FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
44
52
  if (!ConfigDecl)
45
0
    return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
46
0
                     << getCudaConfigureFuncName());
47
52
  QualType ConfigQTy = ConfigDecl->getType();
48
52
49
52
  DeclRefExpr *ConfigDR = new (Context)
50
52
      DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
51
52
  MarkFunctionReferenced(LLLLoc, ConfigDecl);
52
52
53
52
  return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
54
52
                       /*IsExecConfig=*/true);
55
52
}
56
57
Sema::CUDAFunctionTarget
58
26
Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
59
26
  bool HasHostAttr = false;
60
26
  bool HasDeviceAttr = false;
61
26
  bool HasGlobalAttr = false;
62
26
  bool HasInvalidTargetAttr = false;
63
27
  for (const ParsedAttr &AL : Attrs) {
64
27
    switch (AL.getKind()) {
65
9
    case ParsedAttr::AT_CUDAGlobal:
66
9
      HasGlobalAttr = true;
67
9
      break;
68
10
    case ParsedAttr::AT_CUDAHost:
69
10
      HasHostAttr = true;
70
10
      break;
71
8
    case ParsedAttr::AT_CUDADevice:
72
8
      HasDeviceAttr = true;
73
8
      break;
74
0
    case ParsedAttr::AT_CUDAInvalidTarget:
75
0
      HasInvalidTargetAttr = true;
76
0
      break;
77
0
    default:
78
0
      break;
79
27
    }
80
27
  }
81
26
82
26
  if (HasInvalidTargetAttr)
83
0
    return CFT_InvalidTarget;
84
26
85
26
  if (HasGlobalAttr)
86
9
    return CFT_Global;
87
17
88
17
  if (HasHostAttr && 
HasDeviceAttr10
)
89
6
    return CFT_HostDevice;
90
11
91
11
  if (HasDeviceAttr)
92
2
    return CFT_Device;
93
9
94
9
  return CFT_Host;
95
9
}
96
97
template <typename A>
98
112k
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
99
125k
  return 
D->hasAttrs()112k
&&
llvm::any_of(D->getAttrs(), [&](Attr *Attribute) 99.0k
{
100
125k
           return isa<A>(Attribute) &&
101
125k
                  
!(66.5k
IgnoreImplicitAttr66.5k
&&
Attribute->isImplicit()74
);
102
125k
         });
SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const
Line
Count
Source
99
60.8k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
60.8k
           return isa<A>(Attribute) &&
101
60.8k
                  
!(43.1k
IgnoreImplicitAttr43.1k
&&
Attribute->isImplicit()36
);
102
60.8k
         });
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const
Line
Count
Source
99
64.8k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
64.8k
           return isa<A>(Attribute) &&
101
64.8k
                  
!(23.3k
IgnoreImplicitAttr23.3k
&&
Attribute->isImplicit()38
);
102
64.8k
         });
103
112k
}
SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool)
Line
Count
Source
98
56.4k
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
99
56.4k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
49.5k
           return isa<A>(Attribute) &&
101
49.5k
                  !(IgnoreImplicitAttr && Attribute->isImplicit());
102
49.5k
         });
103
56.4k
}
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool)
Line
Count
Source
98
56.4k
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
99
56.4k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
49.5k
           return isa<A>(Attribute) &&
101
49.5k
                  !(IgnoreImplicitAttr && Attribute->isImplicit());
102
49.5k
         });
103
56.4k
}
104
105
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
106
Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
107
58.4k
                                                  bool IgnoreImplicitHDAttr) {
108
58.4k
  // Code that lives outside a function is run on the host.
109
58.4k
  if (D == nullptr)
110
575
    return CFT_Host;
111
57.8k
112
57.8k
  if (D->hasAttr<CUDAInvalidTargetAttr>())
113
36
    return CFT_InvalidTarget;
114
57.8k
115
57.8k
  if (D->hasAttr<CUDAGlobalAttr>())
116
1.40k
    return CFT_Global;
117
56.4k
118
56.4k
  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
119
43.1k
    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
120
17.2k
      return CFT_HostDevice;
121
25.9k
    return CFT_Device;
122
25.9k
  } else 
if (13.2k
hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)13.2k
) {
123
6.15k
    return CFT_Host;
124
7.14k
  } else if (D->isImplicit() && 
!IgnoreImplicitHDAttr211
) {
125
211
    // Some implicit declarations (like intrinsic functions) are not marked.
126
211
    // Set the most lenient target on them for maximal flexibility.
127
211
    return CFT_HostDevice;
128
211
  }
129
6.93k
130
6.93k
  return CFT_Host;
131
6.93k
}
132
133
// * CUDA Call preference table
134
//
135
// F - from,
136
// T - to
137
// Ph - preference in host mode
138
// Pd - preference in device mode
139
// H  - handled in (x)
140
// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
141
//
142
// | F  | T  | Ph  | Pd  |  H  |
143
// |----+----+-----+-----+-----+
144
// | d  | d  | N   | N   | (c) |
145
// | d  | g  | --  | --  | (a) |
146
// | d  | h  | --  | --  | (e) |
147
// | d  | hd | HD  | HD  | (b) |
148
// | g  | d  | N   | N   | (c) |
149
// | g  | g  | --  | --  | (a) |
150
// | g  | h  | --  | --  | (e) |
151
// | g  | hd | HD  | HD  | (b) |
152
// | h  | d  | --  | --  | (e) |
153
// | h  | g  | N   | N   | (c) |
154
// | h  | h  | N   | N   | (c) |
155
// | h  | hd | HD  | HD  | (b) |
156
// | hd | d  | WS  | SS  | (d) |
157
// | hd | g  | SS  | --  |(d/a)|
158
// | hd | h  | SS  | WS  | (d) |
159
// | hd | hd | HD  | HD  | (b) |
160
161
Sema::CUDAFunctionPreference
162
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
163
14.6k
                             const FunctionDecl *Callee) {
164
14.6k
  assert(Callee && "Callee must be valid.");
165
14.6k
  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
166
14.6k
  CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
167
14.6k
168
14.6k
  // If one of the targets is invalid, the check always fails, no matter what
169
14.6k
  // the other target is.
170
14.6k
  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
171
7
    return CFP_Never;
172
14.6k
173
14.6k
  // (a) Can't call global from some contexts until we support CUDA's
174
14.6k
  // dynamic parallelism.
175
14.6k
  if (CalleeTarget == CFT_Global &&
176
14.6k
      
(191
CallerTarget == CFT_Global191
||
CallerTarget == CFT_Device185
))
177
16
    return CFP_Never;
178
14.6k
179
14.6k
  // (b) Calling HostDevice is OK for everyone.
180
14.6k
  if (CalleeTarget == CFT_HostDevice)
181
6.35k
    return CFP_HostDevice;
182
8.32k
183
8.32k
  // (c) Best case scenarios
184
8.32k
  if (CalleeTarget == CallerTarget ||
185
8.32k
      
(2.18k
CallerTarget == CFT_Host2.18k
&&
CalleeTarget == CFT_Global486
) ||
186
8.32k
      
(2.02k
CallerTarget == CFT_Global2.02k
&&
CalleeTarget == CFT_Device310
))
187
6.58k
    return CFP_Native;
188
1.73k
189
1.73k
  // (d) HostDevice behavior depends on compilation mode.
190
1.73k
  if (CallerTarget == CFT_HostDevice) {
191
1.26k
    // It's OK to call a compilation-mode matching function from an HD one.
192
1.26k
    if ((getLangOpts().CUDAIsDevice && 
CalleeTarget == CFT_Device581
) ||
193
1.26k
        
(949
!getLangOpts().CUDAIsDevice949
&&
194
949
         
(683
CalleeTarget == CFT_Host683
||
CalleeTarget == CFT_Global311
)))
195
698
      return CFP_SameSide;
196
566
197
566
    // Calls from HD to non-mode-matching functions (i.e., to host functions
198
566
    // when compiling in device mode or to device functions when compiling in
199
566
    // host mode) are allowed at the sema level, but eventually rejected if
200
566
    // they're ever codegened.  TODO: Reject said calls earlier.
201
566
    return CFP_WrongSide;
202
566
  }
203
473
204
473
  // (e) Calling across device/host boundary is not something you should do.
205
473
  if ((CallerTarget == CFT_Host && 
CalleeTarget == CFT_Device328
) ||
206
473
      
(145
CallerTarget == CFT_Device145
&&
CalleeTarget == CFT_Host123
) ||
207
473
      
(22
CallerTarget == CFT_Global22
&&
CalleeTarget == CFT_Host22
))
208
473
    return CFP_Never;
209
0
210
0
  llvm_unreachable("All cases should've been handled by now.");
211
0
}
212
213
void Sema::EraseUnwantedCUDAMatches(
214
    const FunctionDecl *Caller,
215
4
    SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
216
4
  if (Matches.size() <= 1)
217
0
    return;
218
4
219
4
  using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
220
4
221
4
  // Gets the CUDA function preference for a call from Caller to Match.
222
20
  auto GetCFP = [&](const Pair &Match) {
223
20
    return IdentifyCUDAPreference(Caller, Match.second);
224
20
  };
225
4
226
4
  // Find the best call preference among the functions in Matches.
227
4
  CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
228
4
      Matches.begin(), Matches.end(),
229
4
      [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
230
4
231
4
  // Erase all functions with lower priority.
232
4
  llvm::erase_if(Matches,
233
8
                 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
234
4
}
235
236
/// When an implicitly-declared special member has to invoke more than one
237
/// base/field special member, conflicts may occur in the targets of these
238
/// members. For example, if one base's member __host__ and another's is
239
/// __device__, it's a conflict.
240
/// This function figures out if the given targets \param Target1 and
241
/// \param Target2 conflict, and if they do not it fills in
242
/// \param ResolvedTarget with a target that resolves for both calls.
243
/// \return true if there's a conflict, false otherwise.
244
static bool
245
resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
246
                                Sema::CUDAFunctionTarget Target2,
247
54
                                Sema::CUDAFunctionTarget *ResolvedTarget) {
248
54
  // Only free functions and static member functions may be global.
249
54
  assert(Target1 != Sema::CFT_Global);
250
54
  assert(Target2 != Sema::CFT_Global);
251
54
252
54
  if (Target1 == Sema::CFT_HostDevice) {
253
27
    *ResolvedTarget = Target2;
254
27
  } else if (Target2 == Sema::CFT_HostDevice) {
255
0
    *ResolvedTarget = Target1;
256
27
  } else if (Target1 != Target2) {
257
22
    return true;
258
22
  } else {
259
5
    *ResolvedTarget = Target1;
260
5
  }
261
54
262
54
  
return false32
;
263
54
}
264
265
bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
266
                                                   CXXSpecialMember CSM,
267
                                                   CXXMethodDecl *MemberDecl,
268
                                                   bool ConstRHS,
269
2.39k
                                                   bool Diagnose) {
270
2.39k
  // If the defaulted special member is defined lexically outside of its
271
2.39k
  // owning class, or the special member already has explicit device or host
272
2.39k
  // attributes, do not infer.
273
2.39k
  bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
274
2.39k
  bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
275
2.39k
  bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
276
2.39k
  bool HasExplicitAttr =
277
2.39k
      (HasD && 
!MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()1.08k
) ||
278
2.39k
      
(2.38k
HasH2.38k
&&
!MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()1.05k
);
279
2.39k
  if (!InClass || 
HasExplicitAttr2.38k
)
280
14
    return false;
281
2.37k
282
2.37k
  llvm::Optional<CUDAFunctionTarget> InferredTarget;
283
2.37k
284
2.37k
  // We're going to invoke special member lookup; mark that these special
285
2.37k
  // members are called from this one, and not from its caller.
286
2.37k
  ContextRAII MethodContext(*this, MemberDecl);
287
2.37k
288
2.37k
  // Look for special members in base classes that should be invoked from here.
289
2.37k
  // Infer the target of this member base on the ones it should call.
290
2.37k
  // Skip direct and indirect virtual bases for abstract classes.
291
2.37k
  llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
292
2.37k
  for (const auto &B : ClassDecl->bases()) {
293
419
    if (!B.isVirtual()) {
294
359
      Bases.push_back(&B);
295
359
    }
296
419
  }
297
2.37k
298
2.37k
  if (!ClassDecl->isAbstract()) {
299
2.37k
    for (const auto &VB : ClassDecl->vbases()) {
300
60
      Bases.push_back(&VB);
301
60
    }
302
2.37k
  }
303
2.37k
304
2.37k
  for (const auto *B : Bases) {
305
419
    const RecordType *BaseType = B->getType()->getAs<RecordType>();
306
419
    if (!BaseType) {
307
0
      continue;
308
0
    }
309
419
310
419
    CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
311
419
    Sema::SpecialMemberOverloadResult SMOR =
312
419
        LookupSpecialMember(BaseClassDecl, CSM,
313
419
                            /* ConstArg */ ConstRHS,
314
419
                            /* VolatileArg */ false,
315
419
                            /* RValueThis */ false,
316
419
                            /* ConstThis */ false,
317
419
                            /* VolatileThis */ false);
318
419
319
419
    if (!SMOR.getMethod())
320
5
      continue;
321
414
322
414
    CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod());
323
414
    if (!InferredTarget.hasValue()) {
324
389
      InferredTarget = BaseMethodTarget;
325
389
    } else {
326
25
      bool ResolutionError = resolveCalleeCUDATargetConflict(
327
25
          InferredTarget.getValue(), BaseMethodTarget,
328
25
          InferredTarget.getPointer());
329
25
      if (ResolutionError) {
330
12
        if (Diagnose) {
331
3
          Diag(ClassDecl->getLocation(),
332
3
               diag::note_implicit_member_target_infer_collision)
333
3
              << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
334
3
        }
335
12
        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
336
12
        return true;
337
12
      }
338
25
    }
339
414
  }
340
2.37k
341
2.37k
  // Same as for bases, but now for special members of fields.
342
2.37k
  
for (const auto *F : ClassDecl->fields())2.36k
{
343
1.09k
    if (F->isInvalidDecl()) {
344
0
      continue;
345
0
    }
346
1.09k
347
1.09k
    const RecordType *FieldType =
348
1.09k
        Context.getBaseElementType(F->getType())->getAs<RecordType>();
349
1.09k
    if (!FieldType) {
350
750
      continue;
351
750
    }
352
344
353
344
    CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
354
344
    Sema::SpecialMemberOverloadResult SMOR =
355
344
        LookupSpecialMember(FieldRecDecl, CSM,
356
344
                            /* ConstArg */ ConstRHS && 
!F->isMutable()109
,
357
344
                            /* VolatileArg */ false,
358
344
                            /* RValueThis */ false,
359
344
                            /* ConstThis */ false,
360
344
                            /* VolatileThis */ false);
361
344
362
344
    if (!SMOR.getMethod())
363
0
      continue;
364
344
365
344
    CUDAFunctionTarget FieldMethodTarget =
366
344
        IdentifyCUDATarget(SMOR.getMethod());
367
344
    if (!InferredTarget.hasValue()) {
368
315
      InferredTarget = FieldMethodTarget;
369
315
    } else {
370
29
      bool ResolutionError = resolveCalleeCUDATargetConflict(
371
29
          InferredTarget.getValue(), FieldMethodTarget,
372
29
          InferredTarget.getPointer());
373
29
      if (ResolutionError) {
374
10
        if (Diagnose) {
375
4
          Diag(ClassDecl->getLocation(),
376
4
               diag::note_implicit_member_target_infer_collision)
377
4
              << (unsigned)CSM << InferredTarget.getValue()
378
4
              << FieldMethodTarget;
379
4
        }
380
10
        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
381
10
        return true;
382
10
      }
383
29
    }
384
344
  }
385
2.36k
386
2.36k
387
2.36k
  // If no target was inferred, mark this member as __host__ __device__;
388
2.36k
  // it's the least restrictive option that can be invoked from any target.
389
2.36k
  bool NeedsH = true, NeedsD = true;
390
2.35k
  if (InferredTarget.hasValue()) {
391
682
    if (InferredTarget.getValue() == CFT_Device)
392
118
      NeedsH = false;
393
564
    else if (InferredTarget.getValue() == CFT_Host)
394
66
      NeedsD = false;
395
682
  }
396
2.35k
397
2.35k
  // We either setting attributes first time, or the inferred ones must match
398
2.35k
  // previously set ones.
399
2.35k
  if (NeedsD && 
!HasD2.28k
)
400
1.21k
    MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
401
2.35k
  if (NeedsH && 
!HasH2.23k
)
402
1.19k
    MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
403
2.35k
404
2.35k
  return false;
405
2.36k
}
406
407
288
bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
408
288
  if (!CD->isDefined() && 
CD->isTemplateInstantiation()31
)
409
23
    InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
410
288
411
288
  // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
412
288
  // empty at a point in the translation unit, if it is either a
413
288
  // trivial constructor
414
288
  if (CD->isTrivial())
415
81
    return true;
416
207
417
207
  // ... or it satisfies all of the following conditions:
418
207
  // The constructor function has been defined.
419
207
  // The constructor function has no parameters,
420
207
  // and the function body is an empty compound statement.
421
207
  if (!(CD->hasTrivialBody() && 
CD->getNumParams() == 0147
))
422
90
    return false;
423
117
424
117
  // Its class has no virtual functions and no virtual base classes.
425
117
  if (CD->getParent()->isDynamicClass())
426
18
    return false;
427
99
428
99
  // The only form of initializer allowed is an empty constructor.
429
99
  // This will recursively check all base classes and member initializers
430
99
  if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
431
57
        if (const CXXConstructExpr *CE =
432
33
                dyn_cast<CXXConstructExpr>(CI->getInit()))
433
33
          return isEmptyCudaConstructor(Loc, CE->getConstructor());
434
24
        return false;
435
24
      }))
436
48
    return false;
437
51
438
51
  return true;
439
51
}
440
441
153
bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
442
153
  // No destructor -> no problem.
443
153
  if (!DD)
444
67
    return true;
445
86
446
86
  if (!DD->isDefined() && 
DD->isTemplateInstantiation()6
)
447
0
    InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
448
86
449
86
  // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
450
86
  // empty at a point in the translation unit, if it is either a
451
86
  // trivial constructor
452
86
  if (DD->isTrivial())
453
18
    return true;
454
68
455
68
  // ... or it satisfies all of the following conditions:
456
68
  // The destructor function has been defined.
457
68
  // and the function body is an empty compound statement.
458
68
  if (!DD->hasTrivialBody())
459
32
    return false;
460
36
461
36
  const CXXRecordDecl *ClassDecl = DD->getParent();
462
36
463
36
  // Its class has no virtual functions and no virtual base classes.
464
36
  if (ClassDecl->isDynamicClass())
465
0
    return false;
466
36
467
36
  // Only empty destructors are allowed. This will recursively check
468
36
  // destructors for all base classes...
469
36
  if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
470
6
        if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
471
6
          return isEmptyCudaDestructor(Loc, RD->getDestructor());
472
0
        return true;
473
0
      }))
474
6
    return false;
475
30
476
30
  // ... and member fields.
477
30
  if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
478
12
        if (CXXRecordDecl *RD = Field->getType()
479
12
                                    ->getBaseElementTypeUnsafe()
480
12
                                    ->getAsCXXRecordDecl())
481
12
          return isEmptyCudaDestructor(Loc, RD->getDestructor());
482
0
        return true;
483
0
      }))
484
12
    return false;
485
18
486
18
  return true;
487
18
}
488
489
1.50k
void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
490
1.50k
  if (VD->isInvalidDecl() || 
!VD->hasInit()1.44k
||
!VD->hasGlobalStorage()1.01k
)
491
1.16k
    return;
492
341
  const Expr *Init = VD->getInit();
493
341
  if (VD->hasAttr<CUDADeviceAttr>() || 
VD->hasAttr<CUDAConstantAttr>()233
||
494
341
      
VD->hasAttr<CUDASharedAttr>()138
) {
495
302
    if (LangOpts.GPUAllowDeviceInit)
496
1
      return;
497
301
    assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>());
498
301
    bool AllowedInit = false;
499
301
    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
500
255
      AllowedInit =
501
255
          isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
502
301
    // We'll allow constant initializers even if it's a non-empty
503
301
    // constructor according to CUDA rules. This deviates from NVCC,
504
301
    // but allows us to handle things like constexpr constructors.
505
301
    if (!AllowedInit &&
506
301
        
(178
VD->hasAttr<CUDADeviceAttr>()178
||
VD->hasAttr<CUDAConstantAttr>()110
))
507
126
      AllowedInit = VD->getInit()->isConstantInitializer(
508
126
          Context, VD->getType()->isReferenceType());
509
301
510
301
    // Also make sure that destructor, if there is one, is empty.
511
301
    if (AllowedInit)
512
165
      if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
513
135
        AllowedInit =
514
135
            isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
515
301
516
301
    if (!AllowedInit) {
517
168
      Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
518
168
                                  ? 
diag::err_shared_var_init64
519
168
                                  : 
diag::err_dynamic_var_init104
)
520
168
          << Init->getSourceRange();
521
168
      VD->setInvalidDecl();
522
168
    }
523
301
  } else {
524
39
    // This is a host-side global variable.  Check that the initializer is
525
39
    // callable from the host side.
526
39
    const FunctionDecl *InitFn = nullptr;
527
39
    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
528
9
      InitFn = CE->getConstructor();
529
30
    } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
530
1
      InitFn = CE->getDirectCallee();
531
1
    }
532
39
    if (InitFn) {
533
10
      CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
534
10
      if (InitFnTarget != CFT_Host && 
InitFnTarget != CFT_HostDevice4
) {
535
3
        Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
536
3
            << InitFnTarget << InitFn;
537
3
        Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
538
3
        VD->setInvalidDecl();
539
3
      }
540
10
    }
541
39
  }
542
341
}
543
544
// With -fcuda-host-device-constexpr, an unattributed constexpr function is
545
// treated as implicitly __host__ __device__, unless:
546
//  * it is a variadic function (device-side variadic functions are not
547
//    allowed), or
548
//  * a __device__ function with this signature was already declared, in which
549
//    case in which case we output an error, unless the __device__ decl is in a
550
//    system header, in which case we leave the constexpr function unattributed.
551
//
552
// In addition, all function decls are treated as __host__ __device__ when
553
// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
554
//   #pragma clang force_cuda_host_device_begin/end
555
// pair).
556
void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
557
2.83k
                                       const LookupResult &Previous) {
558
2.83k
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
559
2.83k
560
2.83k
  if (ForceCUDAHostDeviceDepth > 0) {
561
7
    if (!NewD->hasAttr<CUDAHostAttr>())
562
5
      NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
563
7
    if (!NewD->hasAttr<CUDADeviceAttr>())
564
5
      NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
565
7
    return;
566
7
  }
567
2.82k
568
2.82k
  if (!getLangOpts().CUDAHostDeviceConstexpr || 
!NewD->isConstexpr()2.79k
||
569
2.82k
      
NewD->isVariadic()33
||
NewD->hasAttr<CUDAHostAttr>()29
||
570
2.82k
      
NewD->hasAttr<CUDADeviceAttr>()25
||
NewD->hasAttr<CUDAGlobalAttr>()21
)
571
2.80k
    return;
572
21
573
21
  // Is D a __device__ function with the same signature as NewD, ignoring CUDA
574
21
  // attributes?
575
21
  auto IsMatchingDeviceFn = [&](NamedDecl *D) {
576
12
    if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
577
4
      D = Using->getTargetDecl();
578
12
    FunctionDecl *OldD = D->getAsFunction();
579
12
    return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
580
12
           !OldD->hasAttr<CUDAHostAttr>() &&
581
12
           !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
582
12
                       /* ConsiderCudaAttrs = */ false);
583
12
  };
584
21
  auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
585
21
  if (It != Previous.end()) {
586
12
    // We found a __device__ function with the same name and signature as NewD
587
12
    // (ignoring CUDA attrs).  This is an error unless that function is defined
588
12
    // in a system header, in which case we simply return without making NewD
589
12
    // host+device.
590
12
    NamedDecl *Match = *It;
591
12
    if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
592
4
      Diag(NewD->getLocation(),
593
4
           diag::err_cuda_unattributed_constexpr_cannot_overload_device)
594
4
          << NewD;
595
4
      Diag(Match->getLocation(),
596
4
           diag::note_cuda_conflicting_device_function_declared_here);
597
4
    }
598
12
    return;
599
12
  }
600
9
601
9
  NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
602
9
  NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
603
9
}
604
605
Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
606
65
                                                   unsigned DiagID) {
607
65
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
608
65
  DeviceDiagBuilder::Kind DiagKind = [this] {
609
65
    switch (CurrentCUDATarget()) {
610
22
    case CFT_Global:
611
22
    case CFT_Device:
612
22
      return DeviceDiagBuilder::K_Immediate;
613
37
    case CFT_HostDevice:
614
37
      // An HD function counts as host code if we're compiling for host, and
615
37
      // device code if we're compiling for device.  Defer any errors in device
616
37
      // mode until the function is known-emitted.
617
37
      if (getLangOpts().CUDAIsDevice) {
618
31
        return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
619
31
                FunctionEmissionStatus::Emitted)
620
31
                   ? 
DeviceDiagBuilder::K_ImmediateWithCallStack6
621
31
                   : 
DeviceDiagBuilder::K_Deferred25
;
622
31
      }
623
6
      return DeviceDiagBuilder::K_Nop;
624
6
625
6
    default:
626
6
      return DeviceDiagBuilder::K_Nop;
627
65
    }
628
65
  }();
629
65
  return DeviceDiagBuilder(DiagKind, Loc, DiagID,
630
65
                           dyn_cast<FunctionDecl>(CurContext), *this);
631
65
}
632
633
Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
634
21
                                                 unsigned DiagID) {
635
21
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
636
21
  DeviceDiagBuilder::Kind DiagKind = [this] {
637
21
    switch (CurrentCUDATarget()) {
638
2
    case CFT_Host:
639
2
      return DeviceDiagBuilder::K_Immediate;
640
2
    case CFT_HostDevice:
641
2
      // An HD function counts as host code if we're compiling for host, and
642
2
      // device code if we're compiling for device.  Defer any errors in device
643
2
      // mode until the function is known-emitted.
644
2
      if (getLangOpts().CUDAIsDevice)
645
0
        return DeviceDiagBuilder::K_Nop;
646
2
647
2
      return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
648
2
              FunctionEmissionStatus::Emitted)
649
2
                 ? 
DeviceDiagBuilder::K_ImmediateWithCallStack0
650
2
                 : DeviceDiagBuilder::K_Deferred;
651
17
    default:
652
17
      return DeviceDiagBuilder::K_Nop;
653
21
    }
654
21
  }();
655
21
  return DeviceDiagBuilder(DiagKind, Loc, DiagID,
656
21
                           dyn_cast<FunctionDecl>(CurContext), *this);
657
21
}
658
659
9.77k
bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
660
9.77k
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
661
9.77k
  assert(Callee && "Callee may not be null.");
662
9.77k
663
9.77k
  auto &ExprEvalCtx = ExprEvalContexts.back();
664
9.77k
  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
665
27
    return true;
666
9.75k
667
9.75k
  // FIXME: Is bailing out early correct here?  Should we instead assume that
668
9.75k
  // the caller is a global initializer?
669
9.75k
  FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
670
9.75k
  if (!Caller)
671
876
    return true;
672
8.87k
673
8.87k
  // If the caller is known-emitted, mark the callee as known-emitted.
674
8.87k
  // Otherwise, mark the call in our call graph so we can traverse it later.
675
8.87k
  bool CallerKnownEmitted =
676
8.87k
      getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
677
8.87k
  if (CallerKnownEmitted) {
678
6.48k
    // Host-side references to a __global__ function refer to the stub, so the
679
6.48k
    // function itself is never emitted and therefore should not be marked.
680
6.48k
    if (!shouldIgnoreInHostDeviceCheck(Callee))
681
6.40k
      markKnownEmitted(
682
8.62k
          *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) {
683
8.62k
            return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted;
684
8.62k
          });
685
6.48k
  } else {
686
2.38k
    // If we have
687
2.38k
    //   host fn calls kernel fn calls host+device,
688
2.38k
    // the HD function does not get instantiated on the host.  We model this by
689
2.38k
    // omitting at the call to the kernel from the callgraph.  This ensures
690
2.38k
    // that, when compiling for host, only HD functions actually called from the
691
2.38k
    // host get marked as known-emitted.
692
2.38k
    if (!shouldIgnoreInHostDeviceCheck(Callee))
693
2.37k
      DeviceCallGraph[Caller].insert({Callee, Loc});
694
2.38k
  }
695
8.87k
696
8.87k
  DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee,
697
8.87k
                                      CallerKnownEmitted] {
698
8.87k
    switch (IdentifyCUDAPreference(Caller, Callee)) {
699
33
    case CFP_Never:
700
33
      return DeviceDiagBuilder::K_Immediate;
701
138
    case CFP_WrongSide:
702
138
      assert(Caller && "WrongSide calls require a non-null caller");
703
138
      // If we know the caller will be emitted, we know this wrong-side call
704
138
      // will be emitted, so it's an immediate error.  Otherwise, defer the
705
138
      // error until we know the caller is emitted.
706
138
      return CallerKnownEmitted ? 
DeviceDiagBuilder::K_ImmediateWithCallStack74
707
138
                                : 
DeviceDiagBuilder::K_Deferred64
;
708
8.70k
    default:
709
8.70k
      return DeviceDiagBuilder::K_Nop;
710
8.87k
    }
711
8.87k
  }();
712
8.87k
713
8.87k
  if (DiagKind == DeviceDiagBuilder::K_Nop)
714
8.70k
    return true;
715
171
716
171
  // Avoid emitting this error twice for the same location.  Using a hashtable
717
171
  // like this is unfortunate, but because we must continue parsing as normal
718
171
  // after encountering a deferred error, it's otherwise very tricky for us to
719
171
  // ensure that we only emit this deferred error once.
720
171
  if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
721
52
    return true;
722
119
723
119
  DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
724
119
      << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
725
119
  DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
726
119
                    Caller, *this)
727
119
      << Callee;
728
119
  return DiagKind != DeviceDiagBuilder::K_Immediate &&
729
119
         
DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack86
;
730
119
}
731
732
92
void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
733
92
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
734
92
  if (Method->hasAttr<CUDAHostAttr>() || 
Method->hasAttr<CUDADeviceAttr>()52
)
735
70
    return;
736
22
  FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
737
22
  if (!CurFn)
738
0
    return;
739
22
  CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
740
22
  if (Target == CFT_Global || 
Target == CFT_Device20
) {
741
12
    Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
742
12
  } else 
if (10
Target == CFT_HostDevice10
) {
743
2
    Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
744
2
    Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
745
2
  }
746
22
}
747
748
void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
749
4.20k
                                   const LookupResult &Previous) {
750
4.20k
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
751
4.20k
  CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
752
4.20k
  for (NamedDecl *OldND : Previous) {
753
2.07k
    FunctionDecl *OldFD = OldND->getAsFunction();
754
2.07k
    if (!OldFD)
755
82
      continue;
756
1.99k
757
1.99k
    CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
758
1.99k
    // Don't allow HD and global functions to overload other functions with the
759
1.99k
    // same signature.  We allow overloading based on CUDA attributes so that
760
1.99k
    // functions can have different implementations on the host and device, but
761
1.99k
    // HD/global functions "exist" in some sense on both the host and device, so
762
1.99k
    // should have the same implementation on both sides.
763
1.99k
    if (NewTarget != OldTarget &&
764
1.99k
        
(991
(NewTarget == CFT_HostDevice)991
||
(OldTarget == CFT_HostDevice)699
||
765
991
         
(NewTarget == CFT_Global)675
||
(OldTarget == CFT_Global)673
) &&
766
1.99k
        !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
767
318
                    /* ConsiderCudaAttrs = */ false)) {
768
26
      Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
769
26
          << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
770
26
      Diag(OldFD->getLocation(), diag::note_previous_declaration);
771
26
      NewFD->setInvalidDecl();
772
26
      break;
773
26
    }
774
1.99k
  }
775
4.20k
}
776
777
template <typename AttrTy>
778
static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
779
33
                              const FunctionDecl &TemplateFD) {
780
33
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
781
18
    AttrTy *Clone = Attribute->clone(S.Context);
782
18
    Clone->setInherited(true);
783
18
    FD->addAttr(Clone);
784
18
  }
785
33
}
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAGlobalAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&)
Line
Count
Source
779
11
                              const FunctionDecl &TemplateFD) {
780
11
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
781
0
    AttrTy *Clone = Attribute->clone(S.Context);
782
0
    Clone->setInherited(true);
783
0
    FD->addAttr(Clone);
784
0
  }
785
11
}
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAHostAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&)
Line
Count
Source
779
11
                              const FunctionDecl &TemplateFD) {
780
11
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
781
9
    AttrTy *Clone = Attribute->clone(S.Context);
782
9
    Clone->setInherited(true);
783
9
    FD->addAttr(Clone);
784
9
  }
785
11
}
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDADeviceAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&)
Line
Count
Source
779
11
                              const FunctionDecl &TemplateFD) {
780
11
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
781
9
    AttrTy *Clone = Attribute->clone(S.Context);
782
9
    Clone->setInherited(true);
783
9
    FD->addAttr(Clone);
784
9
  }
785
11
}
786
787
void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
788
11
                                  const FunctionTemplateDecl &TD) {
789
11
  const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
790
11
  copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
791
11
  copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
792
11
  copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
793
11
}
794
795
1.86k
std::string Sema::getCudaConfigureFuncName() const {
796
1.86k
  if (getLangOpts().HIP)
797
162
    return getLangOpts().HIPUseNewLaunchAPI ? 
"__hipPushCallConfiguration"6
798
162
                                            : 
"hipConfigureCall"156
;
799
1.70k
800
1.70k
  // New CUDA kernel launch sequence.
801
1.70k
  if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
802
1.70k
                         CudaFeature::CUDA_USES_NEW_LAUNCH))
803
34
    return "__cudaPushCallConfiguration";
804
1.66k
805
1.66k
  // Legacy CUDA kernel configuration call
806
1.66k
  return "cudaConfigureCall";
807
1.66k
}