Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/lib/Sema/SemaCUDA.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file implements semantic analysis for CUDA constructs.
10
///
11
//===----------------------------------------------------------------------===//
12
13
#include "clang/AST/ASTContext.h"
14
#include "clang/AST/Decl.h"
15
#include "clang/AST/ExprCXX.h"
16
#include "clang/Basic/Cuda.h"
17
#include "clang/Lex/Preprocessor.h"
18
#include "clang/Sema/Lookup.h"
19
#include "clang/Sema/Sema.h"
20
#include "clang/Sema/SemaDiagnostic.h"
21
#include "clang/Sema/SemaInternal.h"
22
#include "clang/Sema/Template.h"
23
#include "llvm/ADT/Optional.h"
24
#include "llvm/ADT/SmallVector.h"
25
using namespace clang;
26
27
7
void Sema::PushForceCUDAHostDevice() {
28
7
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
29
7
  ForceCUDAHostDeviceDepth++;
30
7
}
31
32
5
bool Sema::PopForceCUDAHostDevice() {
33
5
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
34
5
  if (ForceCUDAHostDeviceDepth == 0)
35
0
    return false;
36
5
  ForceCUDAHostDeviceDepth--;
37
5
  return true;
38
5
}
39
40
ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
41
                                         MultiExprArg ExecConfig,
42
42
                                         SourceLocation GGGLoc) {
43
42
  FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
44
42
  if (!ConfigDecl)
45
0
    return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
46
0
                     << getCudaConfigureFuncName());
47
42
  QualType ConfigQTy = ConfigDecl->getType();
48
42
49
42
  DeclRefExpr *ConfigDR = new (Context)
50
42
      DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
51
42
  MarkFunctionReferenced(LLLLoc, ConfigDecl);
52
42
53
42
  return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
54
42
                       /*IsExecConfig=*/true);
55
42
}
56
57
Sema::CUDAFunctionTarget
58
26
Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
59
26
  bool HasHostAttr = false;
60
26
  bool HasDeviceAttr = false;
61
26
  bool HasGlobalAttr = false;
62
26
  bool HasInvalidTargetAttr = false;
63
27
  for (const ParsedAttr &AL : Attrs) {
64
27
    switch (AL.getKind()) {
65
27
    case ParsedAttr::AT_CUDAGlobal:
66
9
      HasGlobalAttr = true;
67
9
      break;
68
27
    case ParsedAttr::AT_CUDAHost:
69
10
      HasHostAttr = true;
70
10
      break;
71
27
    case ParsedAttr::AT_CUDADevice:
72
8
      HasDeviceAttr = true;
73
8
      break;
74
27
    case ParsedAttr::AT_CUDAInvalidTarget:
75
0
      HasInvalidTargetAttr = true;
76
0
      break;
77
27
    default:
78
0
      break;
79
27
    }
80
27
  }
81
26
82
26
  if (HasInvalidTargetAttr)
83
0
    return CFT_InvalidTarget;
84
26
85
26
  if (HasGlobalAttr)
86
9
    return CFT_Global;
87
17
88
17
  if (HasHostAttr && 
HasDeviceAttr10
)
89
6
    return CFT_HostDevice;
90
11
91
11
  if (HasDeviceAttr)
92
2
    return CFT_Device;
93
9
94
9
  return CFT_Host;
95
9
}
96
97
template <typename A>
98
105k
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
99
120k
  return 
D->hasAttrs()105k
&&
llvm::any_of(D->getAttrs(), [&](Attr *Attribute) 94.3k
{
100
120k
           return isa<A>(Attribute) &&
101
120k
                  
!(63.9k
IgnoreImplicitAttr63.9k
&&
Attribute->isImplicit()74
);
102
120k
         });
SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const
Line
Count
Source
99
57.5k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
57.5k
           return isa<A>(Attribute) &&
101
57.5k
                  
!(41.2k
IgnoreImplicitAttr41.2k
&&
Attribute->isImplicit()36
);
102
57.5k
         });
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool)::'lambda'(clang::Attr*)::operator()(clang::Attr*) const
Line
Count
Source
99
62.7k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
62.7k
           return isa<A>(Attribute) &&
101
62.7k
                  
!(22.6k
IgnoreImplicitAttr22.6k
&&
Attribute->isImplicit()38
);
102
62.7k
         });
103
105k
}
SemaCUDA.cpp:bool hasAttr<clang::CUDADeviceAttr>(clang::FunctionDecl const*, bool)
Line
Count
Source
98
52.9k
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
99
52.9k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
47.1k
           return isa<A>(Attribute) &&
101
47.1k
                  !(IgnoreImplicitAttr && Attribute->isImplicit());
102
47.1k
         });
103
52.9k
}
SemaCUDA.cpp:bool hasAttr<clang::CUDAHostAttr>(clang::FunctionDecl const*, bool)
Line
Count
Source
98
52.9k
static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
99
52.9k
  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100
47.1k
           return isa<A>(Attribute) &&
101
47.1k
                  !(IgnoreImplicitAttr && Attribute->isImplicit());
102
47.1k
         });
103
52.9k
}
104
105
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
106
Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
107
55.5k
                                                  bool IgnoreImplicitHDAttr) {
108
55.5k
  // Code that lives outside a function is run on the host.
109
55.5k
  if (D == nullptr)
110
1.27k
    return CFT_Host;
111
54.3k
112
54.3k
  if (D->hasAttr<CUDAInvalidTargetAttr>())
113
33
    return CFT_InvalidTarget;
114
54.2k
115
54.2k
  if (D->hasAttr<CUDAGlobalAttr>())
116
1.29k
    return CFT_Global;
117
52.9k
118
52.9k
  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
119
41.2k
    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
120
16.8k
      return CFT_HostDevice;
121
24.3k
    return CFT_Device;
122
24.3k
  } else 
if (11.7k
hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)11.7k
) {
123
5.80k
    return CFT_Host;
124
5.92k
  } else if (D->isImplicit() && 
!IgnoreImplicitHDAttr295
) {
125
295
    // Some implicit declarations (like intrinsic functions) are not marked.
126
295
    // Set the most lenient target on them for maximal flexibility.
127
295
    return CFT_HostDevice;
128
295
  }
129
5.62k
130
5.62k
  return CFT_Host;
131
5.62k
}
132
133
// * CUDA Call preference table
134
//
135
// F - from,
136
// T - to
137
// Ph - preference in host mode
138
// Pd - preference in device mode
139
// H  - handled in (x)
140
// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
141
//
142
// | F  | T  | Ph  | Pd  |  H  |
143
// |----+----+-----+-----+-----+
144
// | d  | d  | N   | N   | (c) |
145
// | d  | g  | --  | --  | (a) |
146
// | d  | h  | --  | --  | (e) |
147
// | d  | hd | HD  | HD  | (b) |
148
// | g  | d  | N   | N   | (c) |
149
// | g  | g  | --  | --  | (a) |
150
// | g  | h  | --  | --  | (e) |
151
// | g  | hd | HD  | HD  | (b) |
152
// | h  | d  | --  | --  | (e) |
153
// | h  | g  | N   | N   | (c) |
154
// | h  | h  | N   | N   | (c) |
155
// | h  | hd | HD  | HD  | (b) |
156
// | hd | d  | WS  | SS  | (d) |
157
// | hd | g  | SS  | --  |(d/a)|
158
// | hd | h  | SS  | WS  | (d) |
159
// | hd | hd | HD  | HD  | (b) |
160
161
Sema::CUDAFunctionPreference
162
Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
163
15.5k
                             const FunctionDecl *Callee) {
164
15.5k
  assert(Callee && "Callee must be valid.");
165
15.5k
  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
166
15.5k
  CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
167
15.5k
168
15.5k
  // If one of the targets is invalid, the check always fails, no matter what
169
15.5k
  // the other target is.
170
15.5k
  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
171
12
    return CFP_Never;
172
15.5k
173
15.5k
  // (a) Can't call global from some contexts until we support CUDA's
174
15.5k
  // dynamic parallelism.
175
15.5k
  if (CalleeTarget == CFT_Global &&
176
15.5k
      
(166
CallerTarget == CFT_Global166
||
CallerTarget == CFT_Device158
))
177
20
    return CFP_Never;
178
15.5k
179
15.5k
  // (b) Calling HostDevice is OK for everyone.
180
15.5k
  if (CalleeTarget == CFT_HostDevice)
181
7.44k
    return CFP_HostDevice;
182
8.11k
183
8.11k
  // (c) Best case scenarios
184
8.11k
  if (CalleeTarget == CallerTarget ||
185
8.11k
      
(2.21k
CallerTarget == CFT_Host2.21k
&&
CalleeTarget == CFT_Global585
) ||
186
8.11k
      
(2.09k
CallerTarget == CFT_Global2.09k
&&
CalleeTarget == CFT_Device286
))
187
6.28k
    return CFP_Native;
188
1.83k
189
1.83k
  // (d) HostDevice behavior depends on compilation mode.
190
1.83k
  if (CallerTarget == CFT_HostDevice) {
191
1.16k
    // It's OK to call a compilation-mode matching function from an HD one.
192
1.16k
    if ((getLangOpts().CUDAIsDevice && 
CalleeTarget == CFT_Device550
) ||
193
1.16k
        
(861
!getLangOpts().CUDAIsDevice861
&&
194
861
         
(618
CalleeTarget == CFT_Host618
||
CalleeTarget == CFT_Global268
)))
195
668
      return CFP_SameSide;
196
500
197
500
    // Calls from HD to non-mode-matching functions (i.e., to host functions
198
500
    // when compiling in device mode or to device functions when compiling in
199
500
    // host mode) are allowed at the sema level, but eventually rejected if
200
500
    // they're ever codegened.  TODO: Reject said calls earlier.
201
500
    return CFP_WrongSide;
202
500
  }
203
668
204
668
  // (e) Calling across device/host boundary is not something you should do.
205
668
  if ((CallerTarget == CFT_Host && 
CalleeTarget == CFT_Device456
) ||
206
668
      
(212
CallerTarget == CFT_Device212
&&
CalleeTarget == CFT_Host180
) ||
207
668
      
(32
CallerTarget == CFT_Global32
&&
CalleeTarget == CFT_Host32
))
208
668
    return CFP_Never;
209
0
210
0
  llvm_unreachable("All cases should've been handled by now.");
211
0
}
212
213
void Sema::EraseUnwantedCUDAMatches(
214
    const FunctionDecl *Caller,
215
4
    SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
216
4
  if (Matches.size() <= 1)
217
0
    return;
218
4
219
4
  using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
220
4
221
4
  // Gets the CUDA function preference for a call from Caller to Match.
222
20
  auto GetCFP = [&](const Pair &Match) {
223
20
    return IdentifyCUDAPreference(Caller, Match.second);
224
20
  };
225
4
226
4
  // Find the best call preference among the functions in Matches.
227
4
  CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
228
4
      Matches.begin(), Matches.end(),
229
4
      [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
230
4
231
4
  // Erase all functions with lower priority.
232
4
  llvm::erase_if(Matches,
233
8
                 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
234
4
}
235
236
/// When an implicitly-declared special member has to invoke more than one
237
/// base/field special member, conflicts may occur in the targets of these
238
/// members. For example, if one base's member __host__ and another's is
239
/// __device__, it's a conflict.
240
/// This function figures out if the given targets \param Target1 and
241
/// \param Target2 conflict, and if they do not it fills in
242
/// \param ResolvedTarget with a target that resolves for both calls.
243
/// \return true if there's a conflict, false otherwise.
244
static bool
245
resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
246
                                Sema::CUDAFunctionTarget Target2,
247
48
                                Sema::CUDAFunctionTarget *ResolvedTarget) {
248
48
  // Only free functions and static member functions may be global.
249
48
  assert(Target1 != Sema::CFT_Global);
250
48
  assert(Target2 != Sema::CFT_Global);
251
48
252
48
  if (Target1 == Sema::CFT_HostDevice) {
253
27
    *ResolvedTarget = Target2;
254
27
  } else 
if (21
Target2 == Sema::CFT_HostDevice21
) {
255
1
    *ResolvedTarget = Target1;
256
20
  } else if (Target1 != Target2) {
257
15
    return true;
258
15
  } else {
259
5
    *ResolvedTarget = Target1;
260
5
  }
261
48
262
48
  
return false33
;
263
48
}
264
265
bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
266
                                                   CXXSpecialMember CSM,
267
                                                   CXXMethodDecl *MemberDecl,
268
                                                   bool ConstRHS,
269
2.17k
                                                   bool Diagnose) {
270
2.17k
  llvm::Optional<CUDAFunctionTarget> InferredTarget;
271
2.17k
272
2.17k
  // We're going to invoke special member lookup; mark that these special
273
2.17k
  // members are called from this one, and not from its caller.
274
2.17k
  ContextRAII MethodContext(*this, MemberDecl);
275
2.17k
276
2.17k
  // Look for special members in base classes that should be invoked from here.
277
2.17k
  // Infer the target of this member base on the ones it should call.
278
2.17k
  // Skip direct and indirect virtual bases for abstract classes.
279
2.17k
  llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
280
2.17k
  for (const auto &B : ClassDecl->bases()) {
281
411
    if (!B.isVirtual()) {
282
351
      Bases.push_back(&B);
283
351
    }
284
411
  }
285
2.17k
286
2.17k
  if (!ClassDecl->isAbstract()) {
287
2.17k
    for (const auto &VB : ClassDecl->vbases()) {
288
60
      Bases.push_back(&VB);
289
60
    }
290
2.17k
  }
291
2.17k
292
2.17k
  for (const auto *B : Bases) {
293
411
    const RecordType *BaseType = B->getType()->getAs<RecordType>();
294
411
    if (!BaseType) {
295
0
      continue;
296
0
    }
297
411
298
411
    CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
299
411
    Sema::SpecialMemberOverloadResult SMOR =
300
411
        LookupSpecialMember(BaseClassDecl, CSM,
301
411
                            /* ConstArg */ ConstRHS,
302
411
                            /* VolatileArg */ false,
303
411
                            /* RValueThis */ false,
304
411
                            /* ConstThis */ false,
305
411
                            /* VolatileThis */ false);
306
411
307
411
    if (!SMOR.getMethod())
308
7
      continue;
309
404
310
404
    CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod());
311
404
    if (!InferredTarget.hasValue()) {
312
385
      InferredTarget = BaseMethodTarget;
313
385
    } else {
314
19
      bool ResolutionError = resolveCalleeCUDATargetConflict(
315
19
          InferredTarget.getValue(), BaseMethodTarget,
316
19
          InferredTarget.getPointer());
317
19
      if (ResolutionError) {
318
5
        if (Diagnose) {
319
2
          Diag(ClassDecl->getLocation(),
320
2
               diag::note_implicit_member_target_infer_collision)
321
2
              << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
322
2
        }
323
5
        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
324
5
        return true;
325
5
      }
326
19
    }
327
404
  }
328
2.17k
329
2.17k
  // Same as for bases, but now for special members of fields.
330
2.17k
  
for (const auto *F : ClassDecl->fields())2.16k
{
331
978
    if (F->isInvalidDecl()) {
332
0
      continue;
333
0
    }
334
978
335
978
    const RecordType *FieldType =
336
978
        Context.getBaseElementType(F->getType())->getAs<RecordType>();
337
978
    if (!FieldType) {
338
664
      continue;
339
664
    }
340
314
341
314
    CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
342
314
    Sema::SpecialMemberOverloadResult SMOR =
343
314
        LookupSpecialMember(FieldRecDecl, CSM,
344
314
                            /* ConstArg */ ConstRHS && 
!F->isMutable()97
,
345
314
                            /* VolatileArg */ false,
346
314
                            /* RValueThis */ false,
347
314
                            /* ConstThis */ false,
348
314
                            /* VolatileThis */ false);
349
314
350
314
    if (!SMOR.getMethod())
351
0
      continue;
352
314
353
314
    CUDAFunctionTarget FieldMethodTarget =
354
314
        IdentifyCUDATarget(SMOR.getMethod());
355
314
    if (!InferredTarget.hasValue()) {
356
285
      InferredTarget = FieldMethodTarget;
357
285
    } else {
358
29
      bool ResolutionError = resolveCalleeCUDATargetConflict(
359
29
          InferredTarget.getValue(), FieldMethodTarget,
360
29
          InferredTarget.getPointer());
361
29
      if (ResolutionError) {
362
10
        if (Diagnose) {
363
4
          Diag(ClassDecl->getLocation(),
364
4
               diag::note_implicit_member_target_infer_collision)
365
4
              << (unsigned)CSM << InferredTarget.getValue()
366
4
              << FieldMethodTarget;
367
4
        }
368
10
        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
369
10
        return true;
370
10
      }
371
29
    }
372
314
  }
373
2.16k
374
2.16k
  
if (2.15k
InferredTarget.hasValue()2.15k
) {
375
655
    if (InferredTarget.getValue() == CFT_Device) {
376
118
      MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
377
537
    } else if (InferredTarget.getValue() == CFT_Host) {
378
63
      MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
379
474
    } else {
380
474
      MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
381
474
      MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
382
474
    }
383
1.50k
  } else {
384
1.50k
    // If no target was inferred, mark this member as __host__ __device__;
385
1.50k
    // it's the least restrictive option that can be invoked from any target.
386
1.50k
    MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
387
1.50k
    MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
388
1.50k
  }
389
2.15k
390
2.15k
  return false;
391
2.16k
}
392
393
288
bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
394
288
  if (!CD->isDefined() && 
CD->isTemplateInstantiation()31
)
395
23
    InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
396
288
397
288
  // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
398
288
  // empty at a point in the translation unit, if it is either a
399
288
  // trivial constructor
400
288
  if (CD->isTrivial())
401
81
    return true;
402
207
403
207
  // ... or it satisfies all of the following conditions:
404
207
  // The constructor function has been defined.
405
207
  // The constructor function has no parameters,
406
207
  // and the function body is an empty compound statement.
407
207
  if (!(CD->hasTrivialBody() && 
CD->getNumParams() == 0147
))
408
90
    return false;
409
117
410
117
  // Its class has no virtual functions and no virtual base classes.
411
117
  if (CD->getParent()->isDynamicClass())
412
18
    return false;
413
99
414
99
  // The only form of initializer allowed is an empty constructor.
415
99
  // This will recursively check all base classes and member initializers
416
99
  if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
417
57
        if (const CXXConstructExpr *CE =
418
33
                dyn_cast<CXXConstructExpr>(CI->getInit()))
419
33
          return isEmptyCudaConstructor(Loc, CE->getConstructor());
420
24
        return false;
421
24
      }))
422
48
    return false;
423
51
424
51
  return true;
425
51
}
426
427
153
bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
428
153
  // No destructor -> no problem.
429
153
  if (!DD)
430
67
    return true;
431
86
432
86
  if (!DD->isDefined() && 
DD->isTemplateInstantiation()6
)
433
0
    InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
434
86
435
86
  // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
436
86
  // empty at a point in the translation unit, if it is either a
437
86
  // trivial constructor
438
86
  if (DD->isTrivial())
439
18
    return true;
440
68
441
68
  // ... or it satisfies all of the following conditions:
442
68
  // The destructor function has been defined.
443
68
  // and the function body is an empty compound statement.
444
68
  if (!DD->hasTrivialBody())
445
32
    return false;
446
36
447
36
  const CXXRecordDecl *ClassDecl = DD->getParent();
448
36
449
36
  // Its class has no virtual functions and no virtual base classes.
450
36
  if (ClassDecl->isDynamicClass())
451
0
    return false;
452
36
453
36
  // Only empty destructors are allowed. This will recursively check
454
36
  // destructors for all base classes...
455
36
  if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
456
6
        if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
457
6
          return isEmptyCudaDestructor(Loc, RD->getDestructor());
458
0
        return true;
459
0
      }))
460
6
    return false;
461
30
462
30
  // ... and member fields.
463
30
  if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
464
12
        if (CXXRecordDecl *RD = Field->getType()
465
12
                                    ->getBaseElementTypeUnsafe()
466
12
                                    ->getAsCXXRecordDecl())
467
12
          return isEmptyCudaDestructor(Loc, RD->getDestructor());
468
0
        return true;
469
0
      }))
470
12
    return false;
471
18
472
18
  return true;
473
18
}
474
475
1.43k
void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
476
1.43k
  if (VD->isInvalidDecl() || 
!VD->hasInit()1.38k
||
!VD->hasGlobalStorage()956
)
477
1.09k
    return;
478
340
  const Expr *Init = VD->getInit();
479
340
  if (VD->hasAttr<CUDADeviceAttr>() || 
VD->hasAttr<CUDAConstantAttr>()233
||
480
340
      
VD->hasAttr<CUDASharedAttr>()138
) {
481
301
    assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>());
482
301
    bool AllowedInit = false;
483
301
    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
484
255
      AllowedInit =
485
255
          isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
486
301
    // We'll allow constant initializers even if it's a non-empty
487
301
    // constructor according to CUDA rules. This deviates from NVCC,
488
301
    // but allows us to handle things like constexpr constructors.
489
301
    if (!AllowedInit &&
490
301
        
(178
VD->hasAttr<CUDADeviceAttr>()178
||
VD->hasAttr<CUDAConstantAttr>()110
))
491
126
      AllowedInit = VD->getInit()->isConstantInitializer(
492
126
          Context, VD->getType()->isReferenceType());
493
301
494
301
    // Also make sure that destructor, if there is one, is empty.
495
301
    if (AllowedInit)
496
165
      if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
497
135
        AllowedInit =
498
135
            isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
499
301
500
301
    if (!AllowedInit) {
501
168
      Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
502
168
                                  ? 
diag::err_shared_var_init64
503
168
                                  : 
diag::err_dynamic_var_init104
)
504
168
          << Init->getSourceRange();
505
168
      VD->setInvalidDecl();
506
168
    }
507
301
  } else {
508
39
    // This is a host-side global variable.  Check that the initializer is
509
39
    // callable from the host side.
510
39
    const FunctionDecl *InitFn = nullptr;
511
39
    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
512
9
      InitFn = CE->getConstructor();
513
30
    } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
514
1
      InitFn = CE->getDirectCallee();
515
1
    }
516
39
    if (InitFn) {
517
10
      CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
518
10
      if (InitFnTarget != CFT_Host && 
InitFnTarget != CFT_HostDevice4
) {
519
3
        Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
520
3
            << InitFnTarget << InitFn;
521
3
        Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
522
3
        VD->setInvalidDecl();
523
3
      }
524
10
    }
525
39
  }
526
340
}
527
528
// With -fcuda-host-device-constexpr, an unattributed constexpr function is
529
// treated as implicitly __host__ __device__, unless:
530
//  * it is a variadic function (device-side variadic functions are not
531
//    allowed), or
532
//  * a __device__ function with this signature was already declared, in which
533
//    case in which case we output an error, unless the __device__ decl is in a
534
//    system header, in which case we leave the constexpr function unattributed.
535
//
536
// In addition, all function decls are treated as __host__ __device__ when
537
// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
538
//   #pragma clang force_cuda_host_device_begin/end
539
// pair).
540
void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
541
2.48k
                                       const LookupResult &Previous) {
542
2.48k
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
543
2.48k
544
2.48k
  if (ForceCUDAHostDeviceDepth > 0) {
545
7
    if (!NewD->hasAttr<CUDAHostAttr>())
546
5
      NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
547
7
    if (!NewD->hasAttr<CUDADeviceAttr>())
548
5
      NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
549
7
    return;
550
7
  }
551
2.47k
552
2.47k
  if (!getLangOpts().CUDAHostDeviceConstexpr || 
!NewD->isConstexpr()2.45k
||
553
2.47k
      
NewD->isVariadic()17
||
NewD->hasAttr<CUDAHostAttr>()15
||
554
2.47k
      
NewD->hasAttr<CUDADeviceAttr>()13
||
NewD->hasAttr<CUDAGlobalAttr>()11
)
555
2.46k
    return;
556
11
557
11
  // Is D a __device__ function with the same signature as NewD, ignoring CUDA
558
11
  // attributes?
559
11
  auto IsMatchingDeviceFn = [&](NamedDecl *D) {
560
6
    if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
561
2
      D = Using->getTargetDecl();
562
6
    FunctionDecl *OldD = D->getAsFunction();
563
6
    return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
564
6
           !OldD->hasAttr<CUDAHostAttr>() &&
565
6
           !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
566
6
                       /* ConsiderCudaAttrs = */ false);
567
6
  };
568
11
  auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
569
11
  if (It != Previous.end()) {
570
6
    // We found a __device__ function with the same name and signature as NewD
571
6
    // (ignoring CUDA attrs).  This is an error unless that function is defined
572
6
    // in a system header, in which case we simply return without making NewD
573
6
    // host+device.
574
6
    NamedDecl *Match = *It;
575
6
    if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
576
2
      Diag(NewD->getLocation(),
577
2
           diag::err_cuda_unattributed_constexpr_cannot_overload_device)
578
2
          << NewD;
579
2
      Diag(Match->getLocation(),
580
2
           diag::note_cuda_conflicting_device_function_declared_here);
581
2
    }
582
6
    return;
583
6
  }
584
5
585
5
  NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
586
5
  NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
587
5
}
588
589
// Do we know that we will eventually codegen the given function?
590
14.5k
static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
591
14.5k
  // Templates are emitted when they're instantiated.
592
14.5k
  if (FD->isDependentContext())
593
210
    return false;
594
14.3k
595
14.3k
  // When compiling for device, host functions are never emitted.  Similarly,
596
14.3k
  // when compiling for host, device and global functions are never emitted.
597
14.3k
  // (Technically, we do emit a host-side stub for global functions, but this
598
14.3k
  // doesn't count for our purposes here.)
599
14.3k
  Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
600
14.3k
  if (S.getLangOpts().CUDAIsDevice && 
T == Sema::CFT_Host10.4k
)
601
631
    return false;
602
13.7k
  if (!S.getLangOpts().CUDAIsDevice &&
603
13.7k
      
(3.89k
T == Sema::CFT_Device3.89k
||
T == Sema::CFT_Global3.22k
))
604
746
    return false;
605
12.9k
606
12.9k
  // Check whether this function is externally visible -- if so, it's
607
12.9k
  // known-emitted.
608
12.9k
  //
609
12.9k
  // We have to check the GVA linkage of the function's *definition* -- if we
610
12.9k
  // only have a declaration, we don't know whether or not the function will be
611
12.9k
  // emitted, because (say) the definition could include "inline".
612
12.9k
  FunctionDecl *Def = FD->getDefinition();
613
12.9k
614
12.9k
  if (Def &&
615
12.9k
      
!isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))9.00k
)
616
6.05k
    return true;
617
6.92k
618
6.92k
  // Otherwise, the function is known-emitted if it's in our set of
619
6.92k
  // known-emitted functions.
620
6.92k
  return S.DeviceKnownEmittedFns.count(FD) > 0;
621
6.92k
}
622
623
Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
624
65
                                                   unsigned DiagID) {
625
65
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
626
65
  DeviceDiagBuilder::Kind DiagKind = [this] {
627
65
    switch (CurrentCUDATarget()) {
628
65
    case CFT_Global:
629
22
    case CFT_Device:
630
22
      return DeviceDiagBuilder::K_Immediate;
631
37
    case CFT_HostDevice:
632
37
      // An HD function counts as host code if we're compiling for host, and
633
37
      // device code if we're compiling for device.  Defer any errors in device
634
37
      // mode until the function is known-emitted.
635
37
      if (getLangOpts().CUDAIsDevice) {
636
31
        return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
637
31
                   ? 
DeviceDiagBuilder::K_ImmediateWithCallStack6
638
31
                   : 
DeviceDiagBuilder::K_Deferred25
;
639
31
      }
640
6
      return DeviceDiagBuilder::K_Nop;
641
6
642
6
    default:
643
6
      return DeviceDiagBuilder::K_Nop;
644
65
    }
645
65
  }();
646
65
  return DeviceDiagBuilder(DiagKind, Loc, DiagID,
647
65
                           dyn_cast<FunctionDecl>(CurContext), *this);
648
65
}
649
650
Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
651
21
                                                 unsigned DiagID) {
652
21
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
653
21
  DeviceDiagBuilder::Kind DiagKind = [this] {
654
21
    switch (CurrentCUDATarget()) {
655
21
    case CFT_Host:
656
2
      return DeviceDiagBuilder::K_Immediate;
657
21
    case CFT_HostDevice:
658
2
      // An HD function counts as host code if we're compiling for host, and
659
2
      // device code if we're compiling for device.  Defer any errors in device
660
2
      // mode until the function is known-emitted.
661
2
      if (getLangOpts().CUDAIsDevice)
662
0
        return DeviceDiagBuilder::K_Nop;
663
2
664
2
      return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
665
2
                 ? 
DeviceDiagBuilder::K_ImmediateWithCallStack0
666
2
                 : DeviceDiagBuilder::K_Deferred;
667
17
    default:
668
17
      return DeviceDiagBuilder::K_Nop;
669
21
    }
670
21
  }();
671
21
  return DeviceDiagBuilder(DiagKind, Loc, DiagID,
672
21
                           dyn_cast<FunctionDecl>(CurContext), *this);
673
21
}
674
675
9.11k
bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
676
9.11k
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
677
9.11k
  assert(Callee && "Callee may not be null.");
678
9.11k
679
9.11k
  auto &ExprEvalCtx = ExprEvalContexts.back();
680
9.11k
  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
681
24
    return true;
682
9.09k
683
9.09k
  // FIXME: Is bailing out early correct here?  Should we instead assume that
684
9.09k
  // the caller is a global initializer?
685
9.09k
  FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
686
9.09k
  if (!Caller)
687
869
    return true;
688
8.22k
689
8.22k
  // If the caller is known-emitted, mark the callee as known-emitted.
690
8.22k
  // Otherwise, mark the call in our call graph so we can traverse it later.
691
8.22k
  bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
692
8.22k
  if (CallerKnownEmitted) {
693
6.16k
    // Host-side references to a __global__ function refer to the stub, so the
694
6.16k
    // function itself is never emitted and therefore should not be marked.
695
6.16k
    if (getLangOpts().CUDAIsDevice || 
IdentifyCUDATarget(Callee) != CFT_Global1.48k
)
696
6.09k
      markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted);
697
6.16k
  } else {
698
2.05k
    // If we have
699
2.05k
    //   host fn calls kernel fn calls host+device,
700
2.05k
    // the HD function does not get instantiated on the host.  We model this by
701
2.05k
    // omitting at the call to the kernel from the callgraph.  This ensures
702
2.05k
    // that, when compiling for host, only HD functions actually called from the
703
2.05k
    // host get marked as known-emitted.
704
2.05k
    if (getLangOpts().CUDAIsDevice || 
IdentifyCUDATarget(Callee) != CFT_Global1.05k
)
705
2.04k
      DeviceCallGraph[Caller].insert({Callee, Loc});
706
2.05k
  }
707
8.22k
708
8.22k
  DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee,
709
8.22k
                                      CallerKnownEmitted] {
710
8.22k
    switch (IdentifyCUDAPreference(Caller, Callee)) {
711
8.22k
    case CFP_Never:
712
33
      return DeviceDiagBuilder::K_Immediate;
713
8.22k
    case CFP_WrongSide:
714
101
      assert(Caller && "WrongSide calls require a non-null caller");
715
101
      // If we know the caller will be emitted, we know this wrong-side call
716
101
      // will be emitted, so it's an immediate error.  Otherwise, defer the
717
101
      // error until we know the caller is emitted.
718
101
      return CallerKnownEmitted ? 
DeviceDiagBuilder::K_ImmediateWithCallStack60
719
101
                                : 
DeviceDiagBuilder::K_Deferred41
;
720
8.22k
    default:
721
8.09k
      return DeviceDiagBuilder::K_Nop;
722
8.22k
    }
723
8.22k
  }();
724
8.22k
725
8.22k
  if (DiagKind == DeviceDiagBuilder::K_Nop)
726
8.09k
    return true;
727
134
728
134
  // Avoid emitting this error twice for the same location.  Using a hashtable
729
134
  // like this is unfortunate, but because we must continue parsing as normal
730
134
  // after encountering a deferred error, it's otherwise very tricky for us to
731
134
  // ensure that we only emit this deferred error once.
732
134
  if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
733
33
    return true;
734
101
735
101
  DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
736
101
      << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
737
101
  DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
738
101
                    Caller, *this)
739
101
      << Callee;
740
101
  return DiagKind != DeviceDiagBuilder::K_Immediate &&
741
101
         
DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack68
;
742
101
}
743
744
82
void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
745
82
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
746
82
  if (Method->hasAttr<CUDAHostAttr>() || 
Method->hasAttr<CUDADeviceAttr>()42
)
747
68
    return;
748
14
  FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
749
14
  if (!CurFn)
750
0
    return;
751
14
  CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
752
14
  if (Target == CFT_Global || 
Target == CFT_Device12
) {
753
8
    Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
754
8
  } else 
if (6
Target == CFT_HostDevice6
) {
755
2
    Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
756
2
    Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
757
2
  }
758
14
}
759
760
void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
761
3.75k
                                   const LookupResult &Previous) {
762
3.75k
  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
763
3.75k
  CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
764
3.75k
  for (NamedDecl *OldND : Previous) {
765
1.84k
    FunctionDecl *OldFD = OldND->getAsFunction();
766
1.84k
    if (!OldFD)
767
80
      continue;
768
1.76k
769
1.76k
    CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
770
1.76k
    // Don't allow HD and global functions to overload other functions with the
771
1.76k
    // same signature.  We allow overloading based on CUDA attributes so that
772
1.76k
    // functions can have different implementations on the host and device, but
773
1.76k
    // HD/global functions "exist" in some sense on both the host and device, so
774
1.76k
    // should have the same implementation on both sides.
775
1.76k
    if (NewTarget != OldTarget &&
776
1.76k
        
(879
(NewTarget == CFT_HostDevice)879
||
(OldTarget == CFT_HostDevice)622
||
777
879
         
(NewTarget == CFT_Global)598
||
(OldTarget == CFT_Global)596
) &&
778
1.76k
        !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
779
283
                    /* ConsiderCudaAttrs = */ false)) {
780
26
      Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
781
26
          << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
782
26
      Diag(OldFD->getLocation(), diag::note_previous_declaration);
783
26
      NewFD->setInvalidDecl();
784
26
      break;
785
26
    }
786
1.76k
  }
787
3.75k
}
788
789
template <typename AttrTy>
790
static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
791
33
                              const FunctionDecl &TemplateFD) {
792
33
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
793
18
    AttrTy *Clone = Attribute->clone(S.Context);
794
18
    Clone->setInherited(true);
795
18
    FD->addAttr(Clone);
796
18
  }
797
33
}
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAGlobalAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&)
Line
Count
Source
791
11
                              const FunctionDecl &TemplateFD) {
792
11
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
793
0
    AttrTy *Clone = Attribute->clone(S.Context);
794
0
    Clone->setInherited(true);
795
0
    FD->addAttr(Clone);
796
0
  }
797
11
}
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDAHostAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&)
Line
Count
Source
791
11
                              const FunctionDecl &TemplateFD) {
792
11
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
793
9
    AttrTy *Clone = Attribute->clone(S.Context);
794
9
    Clone->setInherited(true);
795
9
    FD->addAttr(Clone);
796
9
  }
797
11
}
SemaCUDA.cpp:void copyAttrIfPresent<clang::CUDADeviceAttr>(clang::Sema&, clang::FunctionDecl*, clang::FunctionDecl const&)
Line
Count
Source
791
11
                              const FunctionDecl &TemplateFD) {
792
11
  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
793
9
    AttrTy *Clone = Attribute->clone(S.Context);
794
9
    Clone->setInherited(true);
795
9
    FD->addAttr(Clone);
796
9
  }
797
11
}
798
799
void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
800
11
                                  const FunctionTemplateDecl &TD) {
801
11
  const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
802
11
  copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
803
11
  copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
804
11
  copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
805
11
}
806
807
1.61k
std::string Sema::getCudaConfigureFuncName() const {
808
1.61k
  if (getLangOpts().HIP)
809
39
    return "hipConfigureCall";
810
1.57k
811
1.57k
  // New CUDA kernel launch sequence.
812
1.57k
  if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
813
1.57k
                         CudaFeature::CUDA_USES_NEW_LAUNCH))
814
34
    return "__cudaPushCallConfiguration";
815
1.54k
816
1.54k
  // Legacy CUDA kernel configuration call
817
1.54k
  return "cudaConfigureCall";
818
1.54k
}