Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This provides a class for OpenMP runtime code generation.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGCXXABI.h"
14
#include "CGCleanup.h"
15
#include "CGOpenMPRuntime.h"
16
#include "CGRecordLayout.h"
17
#include "CodeGenFunction.h"
18
#include "clang/CodeGen/ConstantInitBuilder.h"
19
#include "clang/AST/Decl.h"
20
#include "clang/AST/StmtOpenMP.h"
21
#include "clang/Basic/BitmaskEnum.h"
22
#include "llvm/ADT/ArrayRef.h"
23
#include "llvm/Bitcode/BitcodeReader.h"
24
#include "llvm/IR/DerivedTypes.h"
25
#include "llvm/IR/GlobalValue.h"
26
#include "llvm/IR/Value.h"
27
#include "llvm/Support/Format.h"
28
#include "llvm/Support/raw_ostream.h"
29
#include <cassert>
30
31
using namespace clang;
32
using namespace CodeGen;
33
34
namespace {
35
/// Base class for handling code generation inside OpenMP regions.
36
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37
public:
38
  /// Kinds of OpenMP regions used in codegen.
39
  enum CGOpenMPRegionKind {
40
    /// Region with outlined function for standalone 'parallel'
41
    /// directive.
42
    ParallelOutlinedRegion,
43
    /// Region with outlined function for standalone 'task' directive.
44
    TaskOutlinedRegion,
45
    /// Region for constructs that do not require function outlining,
46
    /// like 'for', 'sections', 'atomic' etc. directives.
47
    InlinedRegion,
48
    /// Region with outlined function for standalone 'target' directive.
49
    TargetRegion,
50
  };
51
52
  CGOpenMPRegionInfo(const CapturedStmt &CS,
53
                     const CGOpenMPRegionKind RegionKind,
54
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55
                     bool HasCancel)
56
      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57
14.1k
        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58
59
  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61
                     bool HasCancel)
62
      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63
25.5k
        Kind(Kind), HasCancel(HasCancel) {}
64
65
  /// Get a variable or parameter for storing global thread id
66
  /// inside OpenMP construct.
67
  virtual const VarDecl *getThreadIDVariable() const = 0;
68
69
  /// Emit the captured statement body.
70
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71
72
  /// Get an LValue for the current ThreadID variable.
73
  /// \return LValue for thread id variable. This LValue always has type int32*.
74
  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75
76
4
  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77
78
0
  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79
80
100
  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81
82
250
  bool hasCancel() const { return HasCancel; }
83
84
16.5k
  static bool classof(const CGCapturedStmtInfo *Info) {
85
16.5k
    return Info->getKind() == CR_OpenMP;
86
16.5k
  }
87
88
39.6k
  ~CGOpenMPRegionInfo() override = default;
89
90
protected:
91
  CGOpenMPRegionKind RegionKind;
92
  RegionCodeGenTy CodeGen;
93
  OpenMPDirectiveKind Kind;
94
  bool HasCancel;
95
};
96
97
/// API for captured statement code generation in OpenMP constructs.
98
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99
public:
100
  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101
                             const RegionCodeGenTy &CodeGen,
102
                             OpenMPDirectiveKind Kind, bool HasCancel,
103
                             StringRef HelperName)
104
      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105
                           HasCancel),
106
7.33k
        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107
7.33k
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108
7.33k
  }
109
110
  /// Get a variable or parameter for storing global thread id
111
  /// inside OpenMP construct.
112
24.9k
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113
114
  /// Get the name of the capture helper.
115
7.36k
  StringRef getHelperName() const override { return HelperName; }
116
117
0
  static bool classof(const CGCapturedStmtInfo *Info) {
118
0
    return CGOpenMPRegionInfo::classof(Info) &&
119
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120
0
               ParallelOutlinedRegion;
121
0
  }
122
123
private:
124
  /// A variable or parameter storing global thread id for OpenMP
125
  /// constructs.
126
  const VarDecl *ThreadIDVar;
127
  StringRef HelperName;
128
};
129
130
/// API for captured statement code generation in OpenMP constructs.
131
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132
public:
133
  class UntiedTaskActionTy final : public PrePostActionTy {
134
    bool Untied;
135
    const VarDecl *PartIDVar;
136
    const RegionCodeGenTy UntiedCodeGen;
137
    llvm::SwitchInst *UntiedSwitch = nullptr;
138
139
  public:
140
    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141
                       const RegionCodeGenTy &UntiedCodeGen)
142
442
        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143
442
    void Enter(CodeGenFunction &CGF) override {
144
442
      if (Untied) {
145
8
        // Emit task switching point.
146
8
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147
8
            CGF.GetAddrOfLocalVar(PartIDVar),
148
8
            PartIDVar->getType()->castAs<PointerType>());
149
8
        llvm::Value *Res =
150
8
            CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151
8
        llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152
8
        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153
8
        CGF.EmitBlock(DoneBB);
154
8
        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155
8
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156
8
        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157
8
                              CGF.Builder.GetInsertBlock());
158
8
        emitUntiedSwitch(CGF);
159
8
      }
160
442
    }
161
16
    void emitUntiedSwitch(CodeGenFunction &CGF) const {
162
16
      if (Untied) {
163
14
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164
14
            CGF.GetAddrOfLocalVar(PartIDVar),
165
14
            PartIDVar->getType()->castAs<PointerType>());
166
14
        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167
14
                              PartIdLVal);
168
14
        UntiedCodeGen(CGF);
169
14
        CodeGenFunction::JumpDest CurPoint =
170
14
            CGF.getJumpDestInCurrentScope(".untied.next.");
171
14
        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172
14
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173
14
        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174
14
                              CGF.Builder.GetInsertBlock());
175
14
        CGF.EmitBranchThroughCleanup(CurPoint);
176
14
        CGF.EmitBlock(CurPoint.getBlock());
177
14
      }
178
16
    }
179
8
    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180
  };
181
  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182
                                 const VarDecl *ThreadIDVar,
183
                                 const RegionCodeGenTy &CodeGen,
184
                                 OpenMPDirectiveKind Kind, bool HasCancel,
185
                                 const UntiedTaskActionTy &Action)
186
      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187
442
        ThreadIDVar(ThreadIDVar), Action(Action) {
188
442
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189
442
  }
190
191
  /// Get a variable or parameter for storing global thread id
192
  /// inside OpenMP construct.
193
120
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194
195
  /// Get an LValue for the current ThreadID variable.
196
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197
198
  /// Get the name of the capture helper.
199
442
  StringRef getHelperName() const override { return ".omp_outlined."; }
200
201
8
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
202
8
    Action.emitUntiedSwitch(CGF);
203
8
  }
204
205
0
  static bool classof(const CGCapturedStmtInfo *Info) {
206
0
    return CGOpenMPRegionInfo::classof(Info) &&
207
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208
0
               TaskOutlinedRegion;
209
0
  }
210
211
private:
212
  /// A variable or parameter storing global thread id for OpenMP
213
  /// constructs.
214
  const VarDecl *ThreadIDVar;
215
  /// Action for emitting code for untied tasks.
216
  const UntiedTaskActionTy &Action;
217
};
218
219
/// API for inlined captured statement code generation in OpenMP
220
/// constructs.
221
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222
public:
223
  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224
                            const RegionCodeGenTy &CodeGen,
225
                            OpenMPDirectiveKind Kind, bool HasCancel)
226
      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227
        OldCSI(OldCSI),
228
25.5k
        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229
230
  // Retrieve the value of the context parameter.
231
0
  llvm::Value *getContextValue() const override {
232
0
    if (OuterRegionInfo)
233
0
      return OuterRegionInfo->getContextValue();
234
0
    llvm_unreachable("No context value for inlined OpenMP region");
235
0
  }
236
237
0
  void setContextValue(llvm::Value *V) override {
238
0
    if (OuterRegionInfo) {
239
0
      OuterRegionInfo->setContextValue(V);
240
0
      return;
241
0
    }
242
0
    llvm_unreachable("No context value for inlined OpenMP region");
243
0
  }
244
245
  /// Lookup the captured field decl for a variable.
246
19.6k
  const FieldDecl *lookup(const VarDecl *VD) const override {
247
19.6k
    if (OuterRegionInfo)
248
9.88k
      return OuterRegionInfo->lookup(VD);
249
9.78k
    // If there is no outer outlined region,no need to lookup in a list of
250
9.78k
    // captured variables, we can use the original one.
251
9.78k
    return nullptr;
252
9.78k
  }
253
254
0
  FieldDecl *getThisFieldDecl() const override {
255
0
    if (OuterRegionInfo)
256
0
      return OuterRegionInfo->getThisFieldDecl();
257
0
    return nullptr;
258
0
  }
259
260
  /// Get a variable or parameter for storing global thread id
261
  /// inside OpenMP construct.
262
4.75k
  const VarDecl *getThreadIDVariable() const override {
263
4.75k
    if (OuterRegionInfo)
264
4.67k
      return OuterRegionInfo->getThreadIDVariable();
265
74
    return nullptr;
266
74
  }
267
268
  /// Get an LValue for the current ThreadID variable.
269
4.67k
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270
4.67k
    if (OuterRegionInfo)
271
4.67k
      return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272
0
    llvm_unreachable("No LValue for inlined OpenMP construct");
273
0
  }
274
275
  /// Get the name of the capture helper.
276
0
  StringRef getHelperName() const override {
277
0
    if (auto *OuterRegionInfo = getOldCSI())
278
0
      return OuterRegionInfo->getHelperName();
279
0
    llvm_unreachable("No helper name for inlined OpenMP construct");
280
0
  }
281
282
6
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
283
6
    if (OuterRegionInfo)
284
4
      OuterRegionInfo->emitUntiedSwitch(CGF);
285
6
  }
286
287
25.3k
  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288
289
0
  static bool classof(const CGCapturedStmtInfo *Info) {
290
0
    return CGOpenMPRegionInfo::classof(Info) &&
291
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292
0
  }
293
294
25.5k
  ~CGOpenMPInlinedRegionInfo() override = default;
295
296
private:
297
  /// CodeGen info about outer OpenMP region.
298
  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299
  CGOpenMPRegionInfo *OuterRegionInfo;
300
};
301
302
/// API for captured statement code generation in OpenMP target
303
/// constructs. For this captures, implicit parameters are used instead of the
304
/// captured fields. The name of the target region has to be unique in a given
305
/// application so it is provided by the client, because only the client has
306
/// the information to generate that.
307
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308
public:
309
  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310
                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
311
      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312
                           /*HasCancel=*/false),
313
6.33k
        HelperName(HelperName) {}
314
315
  /// This is unused for target regions because each starts executing
316
  /// with a single thread.
317
1.43k
  const VarDecl *getThreadIDVariable() const override { return nullptr; }
318
319
  /// Get the name of the capture helper.
320
6.34k
  StringRef getHelperName() const override { return HelperName; }
321
322
0
  static bool classof(const CGCapturedStmtInfo *Info) {
323
0
    return CGOpenMPRegionInfo::classof(Info) &&
324
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325
0
  }
326
327
private:
328
  StringRef HelperName;
329
};
330
331
0
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332
0
  llvm_unreachable("No codegen for expressions");
333
0
}
334
/// API for generation of expressions captured in a innermost OpenMP
335
/// region.
336
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337
public:
338
  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339
      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340
                                  OMPD_unknown,
341
                                  /*HasCancel=*/false),
342
168
        PrivScope(CGF) {
343
168
    // Make sure the globals captured in the provided statement are local by
344
168
    // using the privatization logic. We assume the same variable is not
345
168
    // captured more than once.
346
370
    for (const auto &C : CS.captures()) {
347
370
      if (!C.capturesVariable() && 
!C.capturesVariableByCopy()284
)
348
16
        continue;
349
354
350
354
      const VarDecl *VD = C.getCapturedVar();
351
354
      if (VD->isLocalVarDeclOrParm())
352
288
        continue;
353
66
354
66
      DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355
66
                      /*RefersToEnclosingVariableOrCapture=*/false,
356
66
                      VD->getType().getNonReferenceType(), VK_LValue,
357
66
                      C.getLocation());
358
66
      PrivScope.addPrivate(
359
66
          VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360
66
    }
361
168
    (void)PrivScope.Privatize();
362
168
  }
363
364
  /// Lookup the captured field decl for a variable.
365
0
  const FieldDecl *lookup(const VarDecl *VD) const override {
366
0
    if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367
0
      return FD;
368
0
    return nullptr;
369
0
  }
370
371
  /// Emit the captured statement body.
372
0
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373
0
    llvm_unreachable("No body for expressions");
374
0
  }
375
376
  /// Get a variable or parameter for storing global thread id
377
  /// inside OpenMP construct.
378
0
  const VarDecl *getThreadIDVariable() const override {
379
0
    llvm_unreachable("No thread id for expressions");
380
0
  }
381
382
  /// Get the name of the capture helper.
383
0
  StringRef getHelperName() const override {
384
0
    llvm_unreachable("No helper name for expressions");
385
0
  }
386
387
0
  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388
389
private:
390
  /// Private scope to capture global variables.
391
  CodeGenFunction::OMPPrivateScope PrivScope;
392
};
393
394
/// RAII for emitting code of OpenMP constructs.
395
class InlinedOpenMPRegionRAII {
396
  CodeGenFunction &CGF;
397
  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398
  FieldDecl *LambdaThisCaptureField = nullptr;
399
  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400
401
public:
402
  /// Constructs region for combined constructs.
403
  /// \param CodeGen Code generation sequence for combined directives. Includes
404
  /// a list of functions used for code generation of implicitly inlined
405
  /// regions.
406
  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407
                          OpenMPDirectiveKind Kind, bool HasCancel)
408
25.3k
      : CGF(CGF) {
409
25.3k
    // Start emission for the construct.
410
25.3k
    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411
25.3k
        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412
25.3k
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413
25.3k
    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414
25.3k
    CGF.LambdaThisCaptureField = nullptr;
415
25.3k
    BlockInfo = CGF.BlockInfo;
416
25.3k
    CGF.BlockInfo = nullptr;
417
25.3k
  }
418
419
25.3k
  ~InlinedOpenMPRegionRAII() {
420
25.3k
    // Restore original CapturedStmtInfo only if we're done with code emission.
421
25.3k
    auto *OldCSI =
422
25.3k
        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423
25.3k
    delete CGF.CapturedStmtInfo;
424
25.3k
    CGF.CapturedStmtInfo = OldCSI;
425
25.3k
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426
25.3k
    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427
25.3k
    CGF.BlockInfo = BlockInfo;
428
25.3k
  }
429
};
430
431
/// Values for bit flags used in the ident_t to describe the fields.
432
/// All enumeric elements are named and described in accordance with the code
433
/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434
enum OpenMPLocationFlags : unsigned {
435
  /// Use trampoline for internal microtask.
436
  OMP_IDENT_IMD = 0x01,
437
  /// Use c-style ident structure.
438
  OMP_IDENT_KMPC = 0x02,
439
  /// Atomic reduction option for kmpc_reduce.
440
  OMP_ATOMIC_REDUCE = 0x10,
441
  /// Explicit 'barrier' directive.
442
  OMP_IDENT_BARRIER_EXPL = 0x20,
443
  /// Implicit barrier in code.
444
  OMP_IDENT_BARRIER_IMPL = 0x40,
445
  /// Implicit barrier in 'for' directive.
446
  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447
  /// Implicit barrier in 'sections' directive.
448
  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449
  /// Implicit barrier in 'single' directive.
450
  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451
  /// Call of __kmp_for_static_init for static loop.
452
  OMP_IDENT_WORK_LOOP = 0x200,
453
  /// Call of __kmp_for_static_init for sections.
454
  OMP_IDENT_WORK_SECTIONS = 0x400,
455
  /// Call of __kmp_for_static_init for distribute.
456
  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457
  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458
};
459
460
namespace {
461
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462
/// Values for bit flags for marking which requires clauses have been used.
463
enum OpenMPOffloadingRequiresDirFlags : int64_t {
464
  /// flag undefined.
465
  OMP_REQ_UNDEFINED               = 0x000,
466
  /// no requires clause present.
467
  OMP_REQ_NONE                    = 0x001,
468
  /// reverse_offload clause.
469
  OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470
  /// unified_address clause.
471
  OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472
  /// unified_shared_memory clause.
473
  OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474
  /// dynamic_allocators clause.
475
  OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476
  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477
};
478
479
enum OpenMPOffloadingReservedDeviceIDs {
480
  /// Device ID if the device was not defined, runtime should get it
481
  /// from environment variables in the spec.
482
  OMP_DEVICEID_UNDEF = -1,
483
};
484
} // anonymous namespace
485
486
/// Describes ident structure that describes a source location.
487
/// All descriptions are taken from
488
/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489
/// Original structure:
490
/// typedef struct ident {
491
///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492
///                                  see above  */
493
///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494
///                                  KMP_IDENT_KMPC identifies this union
495
///                                  member  */
496
///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497
///                                  see above */
498
///#if USE_ITT_BUILD
499
///                            /*  but currently used for storing
500
///                                region-specific ITT */
501
///                            /*  contextual information. */
502
///#endif /* USE_ITT_BUILD */
503
///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504
///                                 C++  */
505
///    char const *psource;    /**< String describing the source location.
506
///                            The string is composed of semi-colon separated
507
//                             fields which describe the source file,
508
///                            the function and a pair of line numbers that
509
///                            delimit the construct.
510
///                             */
511
/// } ident_t;
512
enum IdentFieldIndex {
513
  /// might be used in Fortran
514
  IdentField_Reserved_1,
515
  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516
  IdentField_Flags,
517
  /// Not really used in Fortran any more
518
  IdentField_Reserved_2,
519
  /// Source[4] in Fortran, do not use for C++
520
  IdentField_Reserved_3,
521
  /// String describing the source location. The string is composed of
522
  /// semi-colon separated fields which describe the source file, the function
523
  /// and a pair of line numbers that delimit the construct.
524
  IdentField_PSource
525
};
526
527
/// Schedule types for 'omp for' loops (these enumerators are taken from
528
/// the enum sched_type in kmp.h).
529
enum OpenMPSchedType {
530
  /// Lower bound for default (unordered) versions.
531
  OMP_sch_lower = 32,
532
  OMP_sch_static_chunked = 33,
533
  OMP_sch_static = 34,
534
  OMP_sch_dynamic_chunked = 35,
535
  OMP_sch_guided_chunked = 36,
536
  OMP_sch_runtime = 37,
537
  OMP_sch_auto = 38,
538
  /// static with chunk adjustment (e.g., simd)
539
  OMP_sch_static_balanced_chunked = 45,
540
  /// Lower bound for 'ordered' versions.
541
  OMP_ord_lower = 64,
542
  OMP_ord_static_chunked = 65,
543
  OMP_ord_static = 66,
544
  OMP_ord_dynamic_chunked = 67,
545
  OMP_ord_guided_chunked = 68,
546
  OMP_ord_runtime = 69,
547
  OMP_ord_auto = 70,
548
  OMP_sch_default = OMP_sch_static,
549
  /// dist_schedule types
550
  OMP_dist_sch_static_chunked = 91,
551
  OMP_dist_sch_static = 92,
552
  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553
  /// Set if the monotonic schedule modifier was present.
554
  OMP_sch_modifier_monotonic = (1 << 29),
555
  /// Set if the nonmonotonic schedule modifier was present.
556
  OMP_sch_modifier_nonmonotonic = (1 << 30),
557
};
558
559
enum OpenMPRTLFunction {
560
  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561
  /// kmpc_micro microtask, ...);
562
  OMPRTL__kmpc_fork_call,
563
  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564
  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565
  OMPRTL__kmpc_threadprivate_cached,
566
  /// Call to void __kmpc_threadprivate_register( ident_t *,
567
  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568
  OMPRTL__kmpc_threadprivate_register,
569
  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570
  OMPRTL__kmpc_global_thread_num,
571
  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572
  // kmp_critical_name *crit);
573
  OMPRTL__kmpc_critical,
574
  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575
  // global_tid, kmp_critical_name *crit, uintptr_t hint);
576
  OMPRTL__kmpc_critical_with_hint,
577
  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578
  // kmp_critical_name *crit);
579
  OMPRTL__kmpc_end_critical,
580
  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581
  // global_tid);
582
  OMPRTL__kmpc_cancel_barrier,
583
  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584
  OMPRTL__kmpc_barrier,
585
  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586
  OMPRTL__kmpc_for_static_fini,
587
  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588
  // global_tid);
589
  OMPRTL__kmpc_serialized_parallel,
590
  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591
  // global_tid);
592
  OMPRTL__kmpc_end_serialized_parallel,
593
  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594
  // kmp_int32 num_threads);
595
  OMPRTL__kmpc_push_num_threads,
596
  // Call to void __kmpc_flush(ident_t *loc);
597
  OMPRTL__kmpc_flush,
598
  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599
  OMPRTL__kmpc_master,
600
  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601
  OMPRTL__kmpc_end_master,
602
  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603
  // int end_part);
604
  OMPRTL__kmpc_omp_taskyield,
605
  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606
  OMPRTL__kmpc_single,
607
  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608
  OMPRTL__kmpc_end_single,
609
  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610
  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611
  // kmp_routine_entry_t *task_entry);
612
  OMPRTL__kmpc_omp_task_alloc,
613
  // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614
  // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615
  // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616
  // kmp_int64 device_id);
617
  OMPRTL__kmpc_omp_target_task_alloc,
618
  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619
  // new_task);
620
  OMPRTL__kmpc_omp_task,
621
  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622
  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623
  // kmp_int32 didit);
624
  OMPRTL__kmpc_copyprivate,
625
  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626
  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627
  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628
  OMPRTL__kmpc_reduce,
629
  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630
  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631
  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632
  // *lck);
633
  OMPRTL__kmpc_reduce_nowait,
634
  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635
  // kmp_critical_name *lck);
636
  OMPRTL__kmpc_end_reduce,
637
  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638
  // kmp_critical_name *lck);
639
  OMPRTL__kmpc_end_reduce_nowait,
640
  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641
  // kmp_task_t * new_task);
642
  OMPRTL__kmpc_omp_task_begin_if0,
643
  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644
  // kmp_task_t * new_task);
645
  OMPRTL__kmpc_omp_task_complete_if0,
646
  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647
  OMPRTL__kmpc_ordered,
648
  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649
  OMPRTL__kmpc_end_ordered,
650
  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651
  // global_tid);
652
  OMPRTL__kmpc_omp_taskwait,
653
  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654
  OMPRTL__kmpc_taskgroup,
655
  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656
  OMPRTL__kmpc_end_taskgroup,
657
  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658
  // int proc_bind);
659
  OMPRTL__kmpc_push_proc_bind,
660
  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661
  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662
  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663
  OMPRTL__kmpc_omp_task_with_deps,
664
  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665
  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666
  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667
  OMPRTL__kmpc_omp_wait_deps,
668
  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669
  // global_tid, kmp_int32 cncl_kind);
670
  OMPRTL__kmpc_cancellationpoint,
671
  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672
  // kmp_int32 cncl_kind);
673
  OMPRTL__kmpc_cancel,
674
  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675
  // kmp_int32 num_teams, kmp_int32 thread_limit);
676
  OMPRTL__kmpc_push_num_teams,
677
  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678
  // microtask, ...);
679
  OMPRTL__kmpc_fork_teams,
680
  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681
  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682
  // sched, kmp_uint64 grainsize, void *task_dup);
683
  OMPRTL__kmpc_taskloop,
684
  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685
  // num_dims, struct kmp_dim *dims);
686
  OMPRTL__kmpc_doacross_init,
687
  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688
  OMPRTL__kmpc_doacross_fini,
689
  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690
  // *vec);
691
  OMPRTL__kmpc_doacross_post,
692
  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693
  // *vec);
694
  OMPRTL__kmpc_doacross_wait,
695
  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696
  // *data);
697
  OMPRTL__kmpc_task_reduction_init,
698
  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699
  // *d);
700
  OMPRTL__kmpc_task_reduction_get_th_data,
701
  // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702
  OMPRTL__kmpc_alloc,
703
  // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704
  OMPRTL__kmpc_free,
705
706
  //
707
  // Offloading related calls
708
  //
709
  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710
  // size);
711
  OMPRTL__kmpc_push_target_tripcount,
712
  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713
  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714
  // *arg_types);
715
  OMPRTL__tgt_target,
716
  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717
  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718
  // *arg_types);
719
  OMPRTL__tgt_target_nowait,
720
  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721
  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722
  // *arg_types, int32_t num_teams, int32_t thread_limit);
723
  OMPRTL__tgt_target_teams,
724
  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725
  // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726
  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727
  OMPRTL__tgt_target_teams_nowait,
728
  // Call to void __tgt_register_requires(int64_t flags);
729
  OMPRTL__tgt_register_requires,
730
  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731
  OMPRTL__tgt_register_lib,
732
  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733
  OMPRTL__tgt_unregister_lib,
734
  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735
  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736
  OMPRTL__tgt_target_data_begin,
737
  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738
  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739
  // *arg_types);
740
  OMPRTL__tgt_target_data_begin_nowait,
741
  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742
  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743
  OMPRTL__tgt_target_data_end,
744
  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745
  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746
  // *arg_types);
747
  OMPRTL__tgt_target_data_end_nowait,
748
  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749
  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750
  OMPRTL__tgt_target_data_update,
751
  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752
  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753
  // *arg_types);
754
  OMPRTL__tgt_target_data_update_nowait,
755
};
756
757
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
758
/// region.
759
class CleanupTy final : public EHScopeStack::Cleanup {
760
  PrePostActionTy *Action;
761
762
public:
763
2.93k
  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
764
2.99k
  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
765
2.99k
    if (!CGF.HaveInsertPoint())
766
0
      return;
767
2.99k
    Action->Exit(CGF);
768
2.99k
  }
769
};
770
771
} // anonymous namespace
772
773
50.6k
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
774
50.6k
  CodeGenFunction::RunCleanupsScope Scope(CGF);
775
50.6k
  if (PrePostAction) {
776
2.93k
    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
777
2.93k
    Callback(CodeGen, CGF, *PrePostAction);
778
47.7k
  } else {
779
47.7k
    PrePostActionTy Action;
780
47.7k
    Callback(CodeGen, CGF, Action);
781
47.7k
  }
782
50.6k
}
783
784
/// Check if the combiner is a call to UDR combiner and if it is so return the
785
/// UDR decl used for reduction.
786
static const OMPDeclareReductionDecl *
787
677
getReductionInit(const Expr *ReductionOp) {
788
677
  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
789
89
    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
790
53
      if (const auto *DRE =
791
53
              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
792
53
        if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
793
53
          return DRD;
794
624
  return nullptr;
795
624
}
796
797
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
798
                                             const OMPDeclareReductionDecl *DRD,
799
                                             const Expr *InitOp,
800
                                             Address Private, Address Original,
801
45
                                             QualType Ty) {
802
45
  if (DRD->getInitializer()) {
803
39
    std::pair<llvm::Function *, llvm::Function *> Reduction =
804
39
        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
805
39
    const auto *CE = cast<CallExpr>(InitOp);
806
39
    const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
807
39
    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
808
39
    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
809
39
    const auto *LHSDRE =
810
39
        cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
811
39
    const auto *RHSDRE =
812
39
        cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
813
39
    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
814
39
    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
815
39
                            [=]() { return Private; });
816
39
    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
817
39
                            [=]() { return Original; });
818
39
    (void)PrivateScope.Privatize();
819
39
    RValue Func = RValue::get(Reduction.second);
820
39
    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
821
39
    CGF.EmitIgnoredExpr(InitOp);
822
39
  } else {
823
6
    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
824
6
    std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
825
6
    auto *GV = new llvm::GlobalVariable(
826
6
        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
827
6
        llvm::GlobalValue::PrivateLinkage, Init, Name);
828
6
    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
829
6
    RValue InitRVal;
830
6
    switch (CGF.getEvaluationKind(Ty)) {
831
6
    case TEK_Scalar:
832
6
      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
833
6
      break;
834
6
    case TEK_Complex:
835
0
      InitRVal =
836
0
          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
837
0
      break;
838
6
    case TEK_Aggregate:
839
0
      InitRVal = RValue::getAggregate(LV.getAddress());
840
0
      break;
841
6
    }
842
6
    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
843
6
    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
844
6
    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
845
6
                         /*IsInitializer=*/false);
846
6
  }
847
45
}
848
849
/// Emit initialization of arrays of complex types.
850
/// \param DestAddr Address of the array.
851
/// \param Type Type of array.
852
/// \param Init Initial expression of array.
853
/// \param SrcAddr Address of the original array.
854
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
855
                                 QualType Type, bool EmitDeclareReductionInit,
856
                                 const Expr *Init,
857
                                 const OMPDeclareReductionDecl *DRD,
858
139
                                 Address SrcAddr = Address::invalid()) {
859
139
  // Perform element-by-element initialization.
860
139
  QualType ElementTy;
861
139
862
139
  // Drill down to the base element type on both arrays.
863
139
  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
864
139
  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
865
139
  DestAddr =
866
139
      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
867
139
  if (DRD)
868
23
    SrcAddr =
869
23
        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
870
139
871
139
  llvm::Value *SrcBegin = nullptr;
872
139
  if (DRD)
873
23
    SrcBegin = SrcAddr.getPointer();
874
139
  llvm::Value *DestBegin = DestAddr.getPointer();
875
139
  // Cast from pointer to array type to pointer to single element.
876
139
  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
877
139
  // The basic structure here is a while-do loop.
878
139
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
879
139
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
880
139
  llvm::Value *IsEmpty =
881
139
      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
882
139
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
883
139
884
139
  // Enter the loop body, making that address the current address.
885
139
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
886
139
  CGF.EmitBlock(BodyBB);
887
139
888
139
  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
889
139
890
139
  llvm::PHINode *SrcElementPHI = nullptr;
891
139
  Address SrcElementCurrent = Address::invalid();
892
139
  if (DRD) {
893
23
    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
894
23
                                          "omp.arraycpy.srcElementPast");
895
23
    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
896
23
    SrcElementCurrent =
897
23
        Address(SrcElementPHI,
898
23
                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
899
23
  }
900
139
  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
901
139
      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
902
139
  DestElementPHI->addIncoming(DestBegin, EntryBB);
903
139
  Address DestElementCurrent =
904
139
      Address(DestElementPHI,
905
139
              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906
139
907
139
  // Emit copy.
908
139
  {
909
139
    CodeGenFunction::RunCleanupsScope InitScope(CGF);
910
139
    if (EmitDeclareReductionInit) {
911
23
      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
912
23
                                       SrcElementCurrent, ElementTy);
913
23
    } else
914
116
      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
915
116
                           /*IsInitializer=*/false);
916
139
  }
917
139
918
139
  if (DRD) {
919
23
    // Shift the address forward by one element.
920
23
    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
921
23
        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
922
23
    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
923
23
  }
924
139
925
139
  // Shift the address forward by one element.
926
139
  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
927
139
      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
928
139
  // Check whether we've reached the end.
929
139
  llvm::Value *Done =
930
139
      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
931
139
  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
932
139
  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
933
139
934
139
  // Done.
935
139
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
936
139
}
937
938
584
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
939
584
  return CGF.EmitOMPSharedLValue(E);
940
584
}
941
942
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
943
584
                                            const Expr *E) {
944
584
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
945
87
    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
946
497
  return LValue();
947
497
}
948
949
void ReductionCodeGen::emitAggregateInitialization(
950
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
951
139
    const OMPDeclareReductionDecl *DRD) {
952
139
  // Emit VarDecl with copy init for arrays.
953
139
  // Get the address of the original variable captured in current
954
139
  // captured region.
955
139
  const auto *PrivateVD =
956
139
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
957
139
  bool EmitDeclareReductionInit =
958
139
      DRD && 
(23
DRD->getInitializer()23
||
!PrivateVD->hasInit()2
);
959
139
  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
960
139
                       EmitDeclareReductionInit,
961
139
                       EmitDeclareReductionInit ? 
ClausesData[N].ReductionOp23
962
139
                                                : 
PrivateVD->getInit()116
,
963
139
                       DRD, SharedLVal.getAddress());
964
139
}
965
966
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
967
                                   ArrayRef<const Expr *> Privates,
968
10.4k
                                   ArrayRef<const Expr *> ReductionOps) {
969
10.4k
  ClausesData.reserve(Shareds.size());
970
10.4k
  SharedAddresses.reserve(Shareds.size());
971
10.4k
  Sizes.reserve(Shareds.size());
972
10.4k
  BaseDecls.reserve(Shareds.size());
973
10.4k
  auto IPriv = Privates.begin();
974
10.4k
  auto IRed = ReductionOps.begin();
975
10.4k
  for (const Expr *Ref : Shareds) {
976
584
    ClausesData.emplace_back(Ref, *IPriv, *IRed);
977
584
    std::advance(IPriv, 1);
978
584
    std::advance(IRed, 1);
979
584
  }
980
10.4k
}
981
982
584
void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
983
584
  assert(SharedAddresses.size() == N &&
984
584
         "Number of generated lvalues must be exactly N.");
985
584
  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
986
584
  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
987
584
  SharedAddresses.emplace_back(First, Second);
988
584
}
989
990
584
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
991
584
  const auto *PrivateVD =
992
584
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
993
584
  QualType PrivateType = PrivateVD->getType();
994
584
  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
995
584
  if (!PrivateType->isVariablyModifiedType()) {
996
507
    Sizes.emplace_back(
997
507
        CGF.getTypeSize(
998
507
            SharedAddresses[N].first.getType().getNonReferenceType()),
999
507
        nullptr);
1000
507
    return;
1001
507
  }
1002
77
  llvm::Value *Size;
1003
77
  llvm::Value *SizeInChars;
1004
77
  auto *ElemType =
1005
77
      cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1006
77
          ->getElementType();
1007
77
  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1008
77
  if (AsArraySection) {
1009
44
    Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1010
44
                                     SharedAddresses[N].first.getPointer());
1011
44
    Size = CGF.Builder.CreateNUWAdd(
1012
44
        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1013
44
    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1014
44
  } else {
1015
33
    SizeInChars = CGF.getTypeSize(
1016
33
        SharedAddresses[N].first.getType().getNonReferenceType());
1017
33
    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1018
33
  }
1019
77
  Sizes.emplace_back(SizeInChars, Size);
1020
77
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
1021
77
      CGF,
1022
77
      cast<OpaqueValueExpr>(
1023
77
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1024
77
      RValue::get(Size));
1025
77
  CGF.EmitVariablyModifiedType(PrivateType);
1026
77
}
1027
1028
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1029
117
                                         llvm::Value *Size) {
1030
117
  const auto *PrivateVD =
1031
117
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1032
117
  QualType PrivateType = PrivateVD->getType();
1033
117
  if (!PrivateType->isVariablyModifiedType()) {
1034
89
    assert(!Size && !Sizes[N].second &&
1035
89
           "Size should be nullptr for non-variably modified reduction "
1036
89
           "items.");
1037
89
    return;
1038
89
  }
1039
28
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
1040
28
      CGF,
1041
28
      cast<OpaqueValueExpr>(
1042
28
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1043
28
      RValue::get(Size));
1044
28
  CGF.EmitVariablyModifiedType(PrivateType);
1045
28
}
1046
1047
void ReductionCodeGen::emitInitialization(
1048
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1049
564
    llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1050
564
  assert(SharedAddresses.size() > N && "No variable was generated");
1051
564
  const auto *PrivateVD =
1052
564
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1053
564
  const OMPDeclareReductionDecl *DRD =
1054
564
      getReductionInit(ClausesData[N].ReductionOp);
1055
564
  QualType PrivateType = PrivateVD->getType();
1056
564
  PrivateAddr = CGF.Builder.CreateElementBitCast(
1057
564
      PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1058
564
  QualType SharedType = SharedAddresses[N].first.getType();
1059
564
  SharedLVal = CGF.MakeAddrLValue(
1060
564
      CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1061
564
                                       CGF.ConvertTypeForMem(SharedType)),
1062
564
      SharedType, SharedAddresses[N].first.getBaseInfo(),
1063
564
      CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1064
564
  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1065
139
    emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1066
425
  } else if (DRD && 
(26
DRD->getInitializer()26
||
!PrivateVD->hasInit()8
)) {
1067
22
    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1068
22
                                     PrivateAddr, SharedLVal.getAddress(),
1069
22
                                     SharedLVal.getType());
1070
403
  } else if (!DefaultInit(CGF) && 
PrivateVD->hasInit()31
&&
1071
403
             
!CGF.isTrivialInitializer(PrivateVD->getInit())31
) {
1072
31
    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1073
31
                         PrivateVD->getType().getQualifiers(),
1074
31
                         /*IsInitializer=*/false);
1075
31
  }
1076
564
}
1077
1078
64
bool ReductionCodeGen::needCleanups(unsigned N) {
1079
64
  const auto *PrivateVD =
1080
64
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1081
64
  QualType PrivateType = PrivateVD->getType();
1082
64
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1083
64
  return DTorKind != QualType::DK_none;
1084
64
}
1085
1086
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1087
11
                                    Address PrivateAddr) {
1088
11
  const auto *PrivateVD =
1089
11
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1090
11
  QualType PrivateType = PrivateVD->getType();
1091
11
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1092
11
  if (needCleanups(N)) {
1093
11
    PrivateAddr = CGF.Builder.CreateElementBitCast(
1094
11
        PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1095
11
    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1096
11
  }
1097
11
}
1098
1099
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1100
85
                          LValue BaseLV) {
1101
85
  BaseTy = BaseTy.getNonReferenceType();
1102
109
  while ((BaseTy->isPointerType() || 
BaseTy->isReferenceType()85
) &&
1103
109
         
!CGF.getContext().hasSameType(BaseTy, ElTy)24
) {
1104
24
    if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1105
24
      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1106
24
    } else {
1107
0
      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1108
0
      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1109
0
    }
1110
24
    BaseTy = BaseTy->getPointeeType();
1111
24
  }
1112
85
  return CGF.MakeAddrLValue(
1113
85
      CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1114
85
                                       CGF.ConvertTypeForMem(ElTy)),
1115
85
      BaseLV.getType(), BaseLV.getBaseInfo(),
1116
85
      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1117
85
}
1118
1119
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1120
                          llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1121
85
                          llvm::Value *Addr) {
1122
85
  Address Tmp = Address::invalid();
1123
85
  Address TopTmp = Address::invalid();
1124
85
  Address MostTopTmp = Address::invalid();
1125
85
  BaseTy = BaseTy.getNonReferenceType();
1126
109
  while ((BaseTy->isPointerType() || 
BaseTy->isReferenceType()85
) &&
1127
109
         
!CGF.getContext().hasSameType(BaseTy, ElTy)24
) {
1128
24
    Tmp = CGF.CreateMemTemp(BaseTy);
1129
24
    if (TopTmp.isValid())
1130
10
      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1131
14
    else
1132
14
      MostTopTmp = Tmp;
1133
24
    TopTmp = Tmp;
1134
24
    BaseTy = BaseTy->getPointeeType();
1135
24
  }
1136
85
  llvm::Type *Ty = BaseLVType;
1137
85
  if (Tmp.isValid())
1138
14
    Ty = Tmp.getElementType();
1139
85
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1140
85
  if (Tmp.isValid()) {
1141
14
    CGF.Builder.CreateStore(Addr, Tmp);
1142
14
    return MostTopTmp;
1143
14
  }
1144
71
  return Address(Addr, BaseLVAlignment);
1145
71
}
1146
1147
573
static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1148
573
  const VarDecl *OrigVD = nullptr;
1149
573
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1150
97
    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1151
109
    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1152
12
      Base = TempOASE->getBase()->IgnoreParenImpCasts();
1153
105
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1154
8
      Base = TempASE->getBase()->IgnoreParenImpCasts();
1155
97
    DE = cast<DeclRefExpr>(Base);
1156
97
    OrigVD = cast<VarDecl>(DE->getDecl());
1157
476
  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1158
0
    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1159
0
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1160
0
      Base = TempASE->getBase()->IgnoreParenImpCasts();
1161
0
    DE = cast<DeclRefExpr>(Base);
1162
0
    OrigVD = cast<VarDecl>(DE->getDecl());
1163
0
  }
1164
573
  return OrigVD;
1165
573
}
1166
1167
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1168
531
                                               Address PrivateAddr) {
1169
531
  const DeclRefExpr *DE;
1170
531
  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1171
85
    BaseDecls.emplace_back(OrigVD);
1172
85
    LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1173
85
    LValue BaseLValue =
1174
85
        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1175
85
                    OriginalBaseLValue);
1176
85
    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1177
85
        BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1178
85
    llvm::Value *PrivatePointer =
1179
85
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1180
85
            PrivateAddr.getPointer(),
1181
85
            SharedAddresses[N].first.getAddress().getType());
1182
85
    llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1183
85
    return castToBase(CGF, OrigVD->getType(),
1184
85
                      SharedAddresses[N].first.getType(),
1185
85
                      OriginalBaseLValue.getAddress().getType(),
1186
85
                      OriginalBaseLValue.getAlignment(), Ptr);
1187
85
  }
1188
446
  BaseDecls.emplace_back(
1189
446
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1190
446
  return PrivateAddr;
1191
446
}
1192
1193
113
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1194
113
  const OMPDeclareReductionDecl *DRD =
1195
113
      getReductionInit(ClausesData[N].ReductionOp);
1196
113
  return DRD && 
DRD->getInitializer()4
;
1197
113
}
1198
1199
8.30k
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1200
8.30k
  return CGF.EmitLoadOfPointerLValue(
1201
8.30k
      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1202
8.30k
      getThreadIDVariable()->getType()->castAs<PointerType>());
1203
8.30k
}
1204
1205
39.5k
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1206
39.5k
  if (!CGF.HaveInsertPoint())
1207
0
    return;
1208
39.5k
  // 1.2.2 OpenMP Language Terminology
1209
39.5k
  // Structured block - An executable statement with a single entry at the
1210
39.5k
  // top and a single exit at the bottom.
1211
39.5k
  // The point of exit cannot be a branch out of the structured block.
1212
39.5k
  // longjmp() and throw() must not violate the entry/exit criteria.
1213
39.5k
  CGF.EHStack.pushTerminate();
1214
39.5k
  CodeGen(CGF);
1215
39.5k
  CGF.EHStack.popTerminate();
1216
39.5k
}
1217
1218
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1219
40
    CodeGenFunction &CGF) {
1220
40
  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1221
40
                            getThreadIDVariable()->getType(),
1222
40
                            AlignmentSource::Decl);
1223
40
}
1224
1225
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1226
36.3k
                                       QualType FieldTy) {
1227
36.3k
  auto *Field = FieldDecl::Create(
1228
36.3k
      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1229
36.3k
      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1230
36.3k
      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1231
36.3k
  Field->setAccess(AS_public);
1232
36.3k
  DC->addDecl(Field);
1233
36.3k
  return Field;
1234
36.3k
}
1235
1236
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1237
                                 StringRef Separator)
1238
    : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1239
3.46k
      OffloadEntriesInfoManager(CGM) {
1240
3.46k
  ASTContext &C = CGM.getContext();
1241
3.46k
  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1242
3.46k
  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1243
3.46k
  RD->startDefinition();
1244
3.46k
  // reserved_1
1245
3.46k
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1246
3.46k
  // flags
1247
3.46k
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1248
3.46k
  // reserved_2
1249
3.46k
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1250
3.46k
  // reserved_3
1251
3.46k
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1252
3.46k
  // psource
1253
3.46k
  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1254
3.46k
  RD->completeDefinition();
1255
3.46k
  IdentQTy = C.getRecordType(RD);
1256
3.46k
  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1257
3.46k
  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1258
3.46k
1259
3.46k
  loadOffloadInfoMetadata();
1260
3.46k
}
1261
1262
3.46k
void CGOpenMPRuntime::clear() {
1263
3.46k
  InternalVars.clear();
1264
3.46k
  // Clean non-target variable declarations possibly used only in debug info.
1265
3.46k
  for (const auto &Data : EmittedNonTargetVariables) {
1266
11
    if (!Data.getValue().pointsToAliveValue())
1267
0
      continue;
1268
11
    auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1269
11
    if (!GV)
1270
0
      continue;
1271
11
    if (!GV->isDeclaration() || GV->getNumUses() > 0)
1272
10
      continue;
1273
1
    GV->eraseFromParent();
1274
1
  }
1275
3.46k
}
1276
1277
40.5k
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1278
40.5k
  SmallString<128> Buffer;
1279
40.5k
  llvm::raw_svector_ostream OS(Buffer);
1280
40.5k
  StringRef Sep = FirstSeparator;
1281
83.1k
  for (StringRef Part : Parts) {
1282
83.1k
    OS << Sep << Part;
1283
83.1k
    Sep = Separator;
1284
83.1k
  }
1285
40.5k
  return OS.str();
1286
40.5k
}
1287
1288
static llvm::Function *
1289
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1290
                          const Expr *CombinerInitializer, const VarDecl *In,
1291
142
                          const VarDecl *Out, bool IsCombiner) {
1292
142
  // void .omp_combiner.(Ty *in, Ty *out);
1293
142
  ASTContext &C = CGM.getContext();
1294
142
  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1295
142
  FunctionArgList Args;
1296
142
  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1297
142
                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1298
142
  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1299
142
                              /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1300
142
  Args.push_back(&OmpOutParm);
1301
142
  Args.push_back(&OmpInParm);
1302
142
  const CGFunctionInfo &FnInfo =
1303
142
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1304
142
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1305
142
  std::string Name = CGM.getOpenMPRuntime().getName(
1306
142
      {IsCombiner ? 
"omp_combiner"91
:
"omp_initializer"51
, ""});
1307
142
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1308
142
                                    Name, &CGM.getModule());
1309
142
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1310
142
  if (CGM.getLangOpts().Optimize) {
1311
0
    Fn->removeFnAttr(llvm::Attribute::NoInline);
1312
0
    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1313
0
    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1314
0
  }
1315
142
  CodeGenFunction CGF(CGM);
1316
142
  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1317
142
  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1318
142
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1319
142
                    Out->getLocation());
1320
142
  CodeGenFunction::OMPPrivateScope Scope(CGF);
1321
142
  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1322
142
  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1323
142
    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1324
142
        .getAddress();
1325
142
  });
1326
142
  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1327
142
  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1328
142
    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1329
142
        .getAddress();
1330
142
  });
1331
142
  (void)Scope.Privatize();
1332
142
  if (!IsCombiner && 
Out->hasInit()51
&&
1333
142
      
!CGF.isTrivialInitializer(Out->getInit())4
) {
1334
4
    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1335
4
                         Out->getType().getQualifiers(),
1336
4
                         /*IsInitializer=*/true);
1337
4
  }
1338
142
  if (CombinerInitializer)
1339
138
    CGF.EmitIgnoredExpr(CombinerInitializer);
1340
142
  Scope.ForceCleanup();
1341
142
  CGF.FinishFunction();
1342
142
  return Fn;
1343
142
}
1344
1345
void CGOpenMPRuntime::emitUserDefinedReduction(
1346
92
    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1347
92
  if (UDRMap.count(D) > 0)
1348
1
    return;
1349
91
  llvm::Function *Combiner = emitCombinerOrInitializer(
1350
91
      CGM, D->getType(), D->getCombiner(),
1351
91
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1352
91
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1353
91
      /*IsCombiner=*/true);
1354
91
  llvm::Function *Initializer = nullptr;
1355
91
  if (const Expr *Init = D->getInitializer()) {
1356
51
    Initializer = emitCombinerOrInitializer(
1357
51
        CGM, D->getType(),
1358
51
        D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? 
Init47
1359
51
                                                                     : 
nullptr4
,
1360
51
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1361
51
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1362
51
        /*IsCombiner=*/false);
1363
51
  }
1364
91
  UDRMap.try_emplace(D, Combiner, Initializer);
1365
91
  if (CGF) {
1366
24
    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1367
24
    Decls.second.push_back(D);
1368
24
  }
1369
91
}
1370
1371
std::pair<llvm::Function *, llvm::Function *>
1372
180
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1373
180
  auto I = UDRMap.find(D);
1374
180
  if (I != UDRMap.end())
1375
156
    return I->second;
1376
24
  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1377
24
  return UDRMap.lookup(D);
1378
24
}
1379
1380
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1381
    CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1382
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1383
7.33k
    const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1384
7.33k
  assert(ThreadIDVar->getType()->isPointerType() &&
1385
7.33k
         "thread id variable must be of type kmp_int32 *");
1386
7.33k
  CodeGenFunction CGF(CGM, true);
1387
7.33k
  bool HasCancel = false;
1388
7.33k
  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1389
648
    HasCancel = OPD->hasCancel();
1390
6.68k
  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1391
12
    HasCancel = OPSD->hasCancel();
1392
6.67k
  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1393
137
    HasCancel = OPFD->hasCancel();
1394
6.53k
  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1395
337
    HasCancel = OPFD->hasCancel();
1396
6.20k
  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1397
307
    HasCancel = OPFD->hasCancel();
1398
5.89k
  else if (const auto *OPFD =
1399
436
               dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1400
436
    HasCancel = OPFD->hasCancel();
1401
5.45k
  else if (const auto *OPFD =
1402
734
               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1403
734
    HasCancel = OPFD->hasCancel();
1404
7.33k
  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1405
7.33k
                                    HasCancel, OutlinedHelperName);
1406
7.33k
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1407
7.33k
  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1408
7.33k
}
1409
1410
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1411
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1412
3.78k
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1413
3.78k
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1414
3.78k
  return emitParallelOrTeamsOutlinedFunction(
1415
3.78k
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1416
3.78k
}
1417
1418
llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1419
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1420
3.55k
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1421
3.55k
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1422
3.55k
  return emitParallelOrTeamsOutlinedFunction(
1423
3.55k
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1424
3.55k
}
1425
1426
llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1427
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1428
    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1429
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1430
442
    bool Tied, unsigned &NumberOfParts) {
1431
442
  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1432
442
                                              PrePostActionTy &) {
1433
14
    llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1434
14
    llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1435
14
    llvm::Value *TaskArgs[] = {
1436
14
        UpLoc, ThreadID,
1437
14
        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1438
14
                                    TaskTVar->getType()->castAs<PointerType>())
1439
14
            .getPointer()};
1440
14
    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1441
14
  };
1442
442
  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1443
442
                                                            UntiedCodeGen);
1444
442
  CodeGen.setAction(Action);
1445
442
  assert(!ThreadIDVar->getType()->isPointerType() &&
1446
442
         "thread id variable must be of type kmp_int32 for tasks");
1447
442
  const OpenMPDirectiveKind Region =
1448
442
      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? 
OMPD_taskloop65
1449
442
                                                      : 
OMPD_task377
;
1450
442
  const CapturedStmt *CS = D.getCapturedStmt(Region);
1451
442
  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1452
442
  CodeGenFunction CGF(CGM, true);
1453
442
  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1454
442
                                        InnermostKind,
1455
442
                                        TD ? 
TD->hasCancel()85
:
false357
, Action);
1456
442
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1457
442
  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1458
442
  if (!Tied)
1459
8
    NumberOfParts = Action.getNumberOfParts();
1460
442
  return Res;
1461
442
}
1462
1463
static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1464
                             const RecordDecl *RD, const CGRecordLayout &RL,
1465
11.0k
                             ArrayRef<llvm::Constant *> Data) {
1466
11.0k
  llvm::StructType *StructTy = RL.getLLVMType();
1467
11.0k
  unsigned PrevIdx = 0;
1468
11.0k
  ConstantInitBuilder CIBuilder(CGM);
1469
11.0k
  auto DI = Data.begin();
1470
52.9k
  for (const FieldDecl *FD : RD->fields()) {
1471
52.9k
    unsigned Idx = RL.getLLVMFieldNo(FD);
1472
52.9k
    // Fill the alignment.
1473
52.9k
    for (unsigned I = PrevIdx; I < Idx; 
++I0
)
1474
0
      Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1475
52.9k
    PrevIdx = Idx + 1;
1476
52.9k
    Fields.add(*DI);
1477
52.9k
    ++DI;
1478
52.9k
  }
1479
11.0k
}
1480
1481
template <class... As>
1482
static llvm::GlobalVariable *
1483
createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1484
                   ArrayRef<llvm::Constant *> Data, const Twine &Name,
1485
9.89k
                   As &&... Args) {
1486
9.89k
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487
9.89k
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488
9.89k
  ConstantInitBuilder CIBuilder(CGM);
1489
9.89k
  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490
9.89k
  buildStructValue(Fields, CGM, RD, RL, Data);
1491
9.89k
  return Fields.finishAndCreateGlobal(
1492
9.89k
      Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493
9.89k
      std::forward<As>(Args)...);
1494
9.89k
}
CGOpenMPRuntime.cpp:llvm::GlobalVariable* createGlobalStruct<llvm::GlobalValue::LinkageTypes>(clang::CodeGen::CodeGenModule&, clang::QualType, bool, llvm::ArrayRef<llvm::Constant*>, llvm::Twine const&, llvm::GlobalValue::LinkageTypes&&)
Line
Count
Source
1485
8.76k
                   As &&... Args) {
1486
8.76k
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487
8.76k
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488
8.76k
  ConstantInitBuilder CIBuilder(CGM);
1489
8.76k
  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490
8.76k
  buildStructValue(Fields, CGM, RD, RL, Data);
1491
8.76k
  return Fields.finishAndCreateGlobal(
1492
8.76k
      Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493
8.76k
      std::forward<As>(Args)...);
1494
8.76k
}
CGOpenMPRuntime.cpp:llvm::GlobalVariable* createGlobalStruct<>(clang::CodeGen::CodeGenModule&, clang::QualType, bool, llvm::ArrayRef<llvm::Constant*>, llvm::Twine const&)
Line
Count
Source
1485
1.13k
                   As &&... Args) {
1486
1.13k
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487
1.13k
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488
1.13k
  ConstantInitBuilder CIBuilder(CGM);
1489
1.13k
  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490
1.13k
  buildStructValue(Fields, CGM, RD, RL, Data);
1491
1.13k
  return Fields.finishAndCreateGlobal(
1492
1.13k
      Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493
1.13k
      std::forward<As>(Args)...);
1494
1.13k
}
1495
1496
template <typename T>
1497
static void
1498
createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1499
                                         ArrayRef<llvm::Constant *> Data,
1500
1.13k
                                         T &Parent) {
1501
1.13k
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1502
1.13k
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1503
1.13k
  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1504
1.13k
  buildStructValue(Fields, CGM, RD, RL, Data);
1505
1.13k
  Fields.finishAndAddTo(Parent);
1506
1.13k
}
1507
1508
23.0k
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1509
23.0k
  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1510
23.0k
  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1511
23.0k
  FlagsTy FlagsKey(Flags, Reserved2Flags);
1512
23.0k
  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1513
23.0k
  if (!Entry) {
1514
3.21k
    if (!DefaultOpenMPPSource) {
1515
1.46k
      // Initialize default location for psource field of ident_t structure of
1516
1.46k
      // all ident_t objects. Format is ";file;function;line;column;;".
1517
1.46k
      // Taken from
1518
1.46k
      // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1519
1.46k
      DefaultOpenMPPSource =
1520
1.46k
          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1521
1.46k
      DefaultOpenMPPSource =
1522
1.46k
          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1523
1.46k
    }
1524
3.21k
1525
3.21k
    llvm::Constant *Data[] = {
1526
3.21k
        llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1527
3.21k
        llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1528
3.21k
        llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1529
3.21k
        llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1530
3.21k
    llvm::GlobalValue *DefaultOpenMPLocation =
1531
3.21k
        createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1532
3.21k
                           llvm::GlobalValue::PrivateLinkage);
1533
3.21k
    DefaultOpenMPLocation->setUnnamedAddr(
1534
3.21k
        llvm::GlobalValue::UnnamedAddr::Global);
1535
3.21k
1536
3.21k
    OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1537
3.21k
  }
1538
23.0k
  return Address(Entry, Align);
1539
23.0k
}
1540
1541
void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1542
1.60k
                                             bool AtCurrentPoint) {
1543
1.60k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1544
1.60k
  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1545
1.60k
1546
1.60k
  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1547
1.60k
  if (AtCurrentPoint) {
1548
677
    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1549
677
        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1550
927
  } else {
1551
927
    Elem.second.ServiceInsertPt =
1552
927
        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1553
927
    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1554
927
  }
1555
1.60k
}
1556
1557
6.42k
void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1558
6.42k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1559
6.42k
  if (Elem.second.ServiceInsertPt) {
1560
1.60k
    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1561
1.60k
    Elem.second.ServiceInsertPt = nullptr;
1562
1.60k
    Ptr->eraseFromParent();
1563
1.60k
  }
1564
6.42k
}
1565
1566
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1567
                                                 SourceLocation Loc,
1568
23.2k
                                                 unsigned Flags) {
1569
23.2k
  Flags |= OMP_IDENT_KMPC;
1570
23.2k
  // If no debug info is generated - return global default location.
1571
23.2k
  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1572
23.2k
      
Loc.isInvalid()437
)
1573
22.8k
    return getOrCreateDefaultLocation(Flags).getPointer();
1574
405
1575
405
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576
405
1577
405
  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1578
405
  Address LocValue = Address::invalid();
1579
405
  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1580
405
  if (I != OpenMPLocThreadIDMap.end())
1581
266
    LocValue = Address(I->second.DebugLoc, Align);
1582
405
1583
405
  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1584
405
  // GetOpenMPThreadID was called before this routine.
1585
405
  if (!LocValue.isValid()) {
1586
157
    // Generate "ident_t .kmpc_loc.addr;"
1587
157
    Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1588
157
    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1589
157
    Elem.second.DebugLoc = AI.getPointer();
1590
157
    LocValue = AI;
1591
157
1592
157
    if (!Elem.second.ServiceInsertPt)
1593
139
      setLocThreadIdInsertPt(CGF);
1594
157
    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1595
157
    CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1596
157
    CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1597
157
                             CGF.getTypeSize(IdentQTy));
1598
157
  }
1599
405
1600
405
  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1601
405
  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1602
405
  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1603
405
  LValue PSource =
1604
405
      CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1605
405
1606
405
  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1607
405
  if (OMPDebugLoc == nullptr) {
1608
246
    SmallString<128> Buffer2;
1609
246
    llvm::raw_svector_ostream OS2(Buffer2);
1610
246
    // Build debug location
1611
246
    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1612
246
    OS2 << ";" << PLoc.getFilename() << ";";
1613
246
    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1614
228
      OS2 << FD->getQualifiedNameAsString();
1615
246
    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1616
246
    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1617
246
    OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1618
246
  }
1619
405
  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1620
405
  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1621
405
1622
405
  // Our callers always pass this to a runtime function, so for
1623
405
  // convenience, go ahead and return a naked pointer.
1624
405
  return LocValue.getPointer();
1625
405
}
1626
1627
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1628
15.9k
                                          SourceLocation Loc) {
1629
15.9k
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1630
15.9k
1631
15.9k
  llvm::Value *ThreadID = nullptr;
1632
15.9k
  // Check whether we've already cached a load of the thread id in this
1633
15.9k
  // function.
1634
15.9k
  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1635
15.9k
  if (I != OpenMPLocThreadIDMap.end()) {
1636
7.07k
    ThreadID = I->second.ThreadID;
1637
7.07k
    if (ThreadID != nullptr)
1638
6.38k
      return ThreadID;
1639
9.52k
  }
1640
9.52k
  // If exceptions are enabled, do not use parameter to avoid possible crash.
1641
9.52k
  if (!CGF.EHStack.requiresLandingPad() || 
!CGF.getLangOpts().Exceptions9.19k
||
1642
9.52k
      
!CGF.getLangOpts().CXXExceptions302
||
1643
9.52k
      
CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()302
) {
1644
9.47k
    if (auto *OMPRegionInfo =
1645
9.09k
            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1646
9.09k
      if (OMPRegionInfo->getThreadIDVariable()) {
1647
8.09k
        // Check if this an outlined function with thread id passed as argument.
1648
8.09k
        LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1649
8.09k
        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1650
8.09k
        // If value loaded in entry block, cache it and use it everywhere in
1651
8.09k
        // function.
1652
8.09k
        if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1653
4.18k
          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1654
4.18k
          Elem.second.ThreadID = ThreadID;
1655
4.18k
        }
1656
8.09k
        return ThreadID;
1657
8.09k
      }
1658
1.43k
    }
1659
9.47k
  }
1660
1.43k
1661
1.43k
  // This is not an outlined function region - need to call __kmpc_int32
1662
1.43k
  // kmpc_global_thread_num(ident_t *loc).
1663
1.43k
  // Generate thread id value and cache this value for use across the
1664
1.43k
  // function.
1665
1.43k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1666
1.43k
  if (!Elem.second.ServiceInsertPt)
1667
788
    setLocThreadIdInsertPt(CGF);
1668
1.43k
  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1669
1.43k
  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1670
1.43k
  llvm::CallInst *Call = CGF.Builder.CreateCall(
1671
1.43k
      createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1672
1.43k
      emitUpdateLocation(CGF, Loc));
1673
1.43k
  Call->setCallingConv(CGF.getRuntimeCC());
1674
1.43k
  Elem.second.ThreadID = Call;
1675
1.43k
  return Call;
1676
1.43k
}
1677
1678
43.7k
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1679
43.7k
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1680
43.7k
  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1681
5.74k
    clearLocThreadIdInsertPt(CGF);
1682
5.74k
    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1683
5.74k
  }
1684
43.7k
  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1685
16
    for(auto *D : FunctionUDRMap[CGF.CurFn])
1686
24
      UDRMap.erase(D);
1687
16
    FunctionUDRMap.erase(CGF.CurFn);
1688
16
  }
1689
43.7k
}
1690
1691
25.0k
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1692
25.0k
  return IdentTy->getPointerTo();
1693
25.0k
}
1694
1695
12.9k
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1696
12.9k
  if (!Kmpc_MicroTy) {
1697
1.27k
    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1698
1.27k
    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1699
1.27k
                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1700
1.27k
    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1701
1.27k
  }
1702
12.9k
  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1703
12.9k
}
1704
1705
27.9k
llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1706
27.9k
  llvm::FunctionCallee RTLFn = nullptr;
1707
27.9k
  switch (static_cast<OpenMPRTLFunction>(Function)) {
1708
27.9k
  case OMPRTL__kmpc_fork_call: {
1709
3.19k
    // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1710
3.19k
    // microtask, ...);
1711
3.19k
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1712
3.19k
                                getKmpc_MicroPointerTy()};
1713
3.19k
    auto *FnTy =
1714
3.19k
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1715
3.19k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1716
3.19k
    if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1717
3.19k
      if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1718
827
        llvm::LLVMContext &Ctx = F->getContext();
1719
827
        llvm::MDBuilder MDB(Ctx);
1720
827
        // Annotate the callback behavior of the __kmpc_fork_call:
1721
827
        //  - The callback callee is argument number 2 (microtask).
1722
827
        //  - The first two arguments of the callback callee are unknown (-1).
1723
827
        //  - All variadic arguments to the __kmpc_fork_call are passed to the
1724
827
        //    callback callee.
1725
827
        F->addMetadata(
1726
827
            llvm::LLVMContext::MD_callback,
1727
827
            *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1728
827
                                        2, {-1, -1},
1729
827
                                        /* VarArgsArePassed */ true)}));
1730
827
      }
1731
3.19k
    }
1732
3.19k
    break;
1733
27.9k
  }
1734
27.9k
  case OMPRTL__kmpc_global_thread_num: {
1735
1.46k
    // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1736
1.46k
    llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1737
1.46k
    auto *FnTy =
1738
1.46k
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1739
1.46k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1740
1.46k
    break;
1741
27.9k
  }
1742
27.9k
  case OMPRTL__kmpc_threadprivate_cached: {
1743
138
    // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1744
138
    // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1745
138
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1746
138
                                CGM.VoidPtrTy, CGM.SizeTy,
1747
138
                                CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1748
138
    auto *FnTy =
1749
138
        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1750
138
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1751
138
    break;
1752
27.9k
  }
1753
27.9k
  case OMPRTL__kmpc_critical: {
1754
117
    // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1755
117
    // kmp_critical_name *crit);
1756
117
    llvm::Type *TypeParams[] = {
1757
117
        getIdentTyPointerTy(), CGM.Int32Ty,
1758
117
        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1759
117
    auto *FnTy =
1760
117
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761
117
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1762
117
    break;
1763
27.9k
  }
1764
27.9k
  case OMPRTL__kmpc_critical_with_hint: {
1765
3
    // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1766
3
    // kmp_critical_name *crit, uintptr_t hint);
1767
3
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1768
3
                                llvm::PointerType::getUnqual(KmpCriticalNameTy),
1769
3
                                CGM.IntPtrTy};
1770
3
    auto *FnTy =
1771
3
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772
3
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1773
3
    break;
1774
27.9k
  }
1775
27.9k
  case OMPRTL__kmpc_threadprivate_register: {
1776
25
    // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1777
25
    // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1778
25
    // typedef void *(*kmpc_ctor)(void *);
1779
25
    auto *KmpcCtorTy =
1780
25
        llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1781
25
                                /*isVarArg*/ false)->getPointerTo();
1782
25
    // typedef void *(*kmpc_cctor)(void *, void *);
1783
25
    llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1784
25
    auto *KmpcCopyCtorTy =
1785
25
        llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1786
25
                                /*isVarArg*/ false)
1787
25
            ->getPointerTo();
1788
25
    // typedef void (*kmpc_dtor)(void *);
1789
25
    auto *KmpcDtorTy =
1790
25
        llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1791
25
            ->getPointerTo();
1792
25
    llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1793
25
                              KmpcCopyCtorTy, KmpcDtorTy};
1794
25
    auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1795
25
                                        /*isVarArg*/ false);
1796
25
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1797
25
    break;
1798
27.9k
  }
1799
27.9k
  case OMPRTL__kmpc_end_critical: {
1800
120
    // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1801
120
    // kmp_critical_name *crit);
1802
120
    llvm::Type *TypeParams[] = {
1803
120
        getIdentTyPointerTy(), CGM.Int32Ty,
1804
120
        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1805
120
    auto *FnTy =
1806
120
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807
120
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1808
120
    break;
1809
27.9k
  }
1810
27.9k
  case OMPRTL__kmpc_cancel_barrier: {
1811
2
    // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1812
2
    // global_tid);
1813
2
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814
2
    auto *FnTy =
1815
2
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1816
2
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1817
2
    break;
1818
27.9k
  }
1819
27.9k
  case OMPRTL__kmpc_barrier: {
1820
515
    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1821
515
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1822
515
    auto *FnTy =
1823
515
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1824
515
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1825
515
    break;
1826
27.9k
  }
1827
27.9k
  case OMPRTL__kmpc_for_static_fini: {
1828
5.47k
    // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1829
5.47k
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1830
5.47k
    auto *FnTy =
1831
5.47k
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1832
5.47k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1833
5.47k
    break;
1834
27.9k
  }
1835
27.9k
  case OMPRTL__kmpc_push_num_threads: {
1836
127
    // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1837
127
    // kmp_int32 num_threads)
1838
127
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1839
127
                                CGM.Int32Ty};
1840
127
    auto *FnTy =
1841
127
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842
127
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1843
127
    break;
1844
27.9k
  }
1845
27.9k
  case OMPRTL__kmpc_serialized_parallel: {
1846
167
    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1847
167
    // global_tid);
1848
167
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1849
167
    auto *FnTy =
1850
167
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1851
167
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1852
167
    break;
1853
27.9k
  }
1854
27.9k
  case OMPRTL__kmpc_end_serialized_parallel: {
1855
167
    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1856
167
    // global_tid);
1857
167
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1858
167
    auto *FnTy =
1859
167
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860
167
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1861
167
    break;
1862
27.9k
  }
1863
27.9k
  case OMPRTL__kmpc_flush: {
1864
40
    // Build void __kmpc_flush(ident_t *loc);
1865
40
    llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1866
40
    auto *FnTy =
1867
40
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1868
40
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1869
40
    break;
1870
27.9k
  }
1871
27.9k
  case OMPRTL__kmpc_master: {
1872
9
    // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1873
9
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1874
9
    auto *FnTy =
1875
9
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1876
9
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1877
9
    break;
1878
27.9k
  }
1879
27.9k
  case OMPRTL__kmpc_end_master: {
1880
9
    // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1881
9
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1882
9
    auto *FnTy =
1883
9
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1884
9
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1885
9
    break;
1886
27.9k
  }
1887
27.9k
  case OMPRTL__kmpc_omp_taskyield: {
1888
8
    // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1889
8
    // int end_part);
1890
8
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1891
8
    auto *FnTy =
1892
8
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1893
8
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1894
8
    break;
1895
27.9k
  }
1896
27.9k
  case OMPRTL__kmpc_single: {
1897
39
    // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1898
39
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899
39
    auto *FnTy =
1900
39
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901
39
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1902
39
    break;
1903
27.9k
  }
1904
27.9k
  case OMPRTL__kmpc_end_single: {
1905
39
    // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1906
39
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1907
39
    auto *FnTy =
1908
39
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909
39
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1910
39
    break;
1911
27.9k
  }
1912
27.9k
  case OMPRTL__kmpc_omp_task_alloc: {
1913
306
    // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1914
306
    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1915
306
    // kmp_routine_entry_t *task_entry);
1916
306
    assert(KmpRoutineEntryPtrTy != nullptr &&
1917
306
           "Type kmp_routine_entry_t must be created.");
1918
306
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1919
306
                                CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1920
306
    // Return void * and then cast to particular kmp_task_t type.
1921
306
    auto *FnTy =
1922
306
        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1923
306
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1924
306
    break;
1925
27.9k
  }
1926
27.9k
  case OMPRTL__kmpc_omp_target_task_alloc: {
1927
136
    // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1928
136
    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1929
136
    // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1930
136
    assert(KmpRoutineEntryPtrTy != nullptr &&
1931
136
           "Type kmp_routine_entry_t must be created.");
1932
136
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1933
136
                                CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1934
136
                                CGM.Int64Ty};
1935
136
    // Return void * and then cast to particular kmp_task_t type.
1936
136
    auto *FnTy =
1937
136
        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1938
136
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1939
136
    break;
1940
27.9k
  }
1941
27.9k
  case OMPRTL__kmpc_omp_task: {
1942
77
    // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1943
77
    // *new_task);
1944
77
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1945
77
                                CGM.VoidPtrTy};
1946
77
    auto *FnTy =
1947
77
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1948
77
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1949
77
    break;
1950
27.9k
  }
1951
27.9k
  case OMPRTL__kmpc_copyprivate: {
1952
18
    // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1953
18
    // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1954
18
    // kmp_int32 didit);
1955
18
    llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1956
18
    auto *CpyFnTy =
1957
18
        llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1958
18
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1959
18
                                CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1960
18
                                CGM.Int32Ty};
1961
18
    auto *FnTy =
1962
18
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1963
18
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1964
18
    break;
1965
27.9k
  }
1966
27.9k
  case OMPRTL__kmpc_reduce: {
1967
136
    // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1968
136
    // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1969
136
    // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1970
136
    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1971
136
    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1972
136
                                               /*isVarArg=*/false);
1973
136
    llvm::Type *TypeParams[] = {
1974
136
        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1975
136
        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1976
136
        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1977
136
    auto *FnTy =
1978
136
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1979
136
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1980
136
    break;
1981
27.9k
  }
1982
27.9k
  case OMPRTL__kmpc_reduce_nowait: {
1983
170
    // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1984
170
    // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1985
170
    // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1986
170
    // *lck);
1987
170
    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1988
170
    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1989
170
                                               /*isVarArg=*/false);
1990
170
    llvm::Type *TypeParams[] = {
1991
170
        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1992
170
        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1993
170
        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1994
170
    auto *FnTy =
1995
170
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1996
170
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1997
170
    break;
1998
27.9k
  }
1999
27.9k
  case OMPRTL__kmpc_end_reduce: {
2000
272
    // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2001
272
    // kmp_critical_name *lck);
2002
272
    llvm::Type *TypeParams[] = {
2003
272
        getIdentTyPointerTy(), CGM.Int32Ty,
2004
272
        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005
272
    auto *FnTy =
2006
272
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007
272
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2008
272
    break;
2009
27.9k
  }
2010
27.9k
  case OMPRTL__kmpc_end_reduce_nowait: {
2011
170
    // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2012
170
    // kmp_critical_name *lck);
2013
170
    llvm::Type *TypeParams[] = {
2014
170
        getIdentTyPointerTy(), CGM.Int32Ty,
2015
170
        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016
170
    auto *FnTy =
2017
170
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018
170
    RTLFn =
2019
170
        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2020
170
    break;
2021
27.9k
  }
2022
27.9k
  case OMPRTL__kmpc_omp_task_begin_if0: {
2023
174
    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2024
174
    // *new_task);
2025
174
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2026
174
                                CGM.VoidPtrTy};
2027
174
    auto *FnTy =
2028
174
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029
174
    RTLFn =
2030
174
        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2031
174
    break;
2032
27.9k
  }
2033
27.9k
  case OMPRTL__kmpc_omp_task_complete_if0: {
2034
174
    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035
174
    // *new_task);
2036
174
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037
174
                                CGM.VoidPtrTy};
2038
174
    auto *FnTy =
2039
174
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040
174
    RTLFn = CGM.CreateRuntimeFunction(FnTy,
2041
174
                                      /*Name=*/"__kmpc_omp_task_complete_if0");
2042
174
    break;
2043
27.9k
  }
2044
27.9k
  case OMPRTL__kmpc_ordered: {
2045
8
    // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2046
8
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2047
8
    auto *FnTy =
2048
8
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049
8
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2050
8
    break;
2051
27.9k
  }
2052
27.9k
  case OMPRTL__kmpc_end_ordered: {
2053
8
    // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2054
8
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2055
8
    auto *FnTy =
2056
8
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2057
8
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2058
8
    break;
2059
27.9k
  }
2060
27.9k
  case OMPRTL__kmpc_omp_taskwait: {
2061
6
    // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2062
6
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2063
6
    auto *FnTy =
2064
6
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2065
6
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2066
6
    break;
2067
27.9k
  }
2068
27.9k
  case OMPRTL__kmpc_taskgroup: {
2069
92
    // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2070
92
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2071
92
    auto *FnTy =
2072
92
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073
92
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2074
92
    break;
2075
27.9k
  }
2076
27.9k
  case OMPRTL__kmpc_end_taskgroup: {
2077
92
    // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2078
92
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2079
92
    auto *FnTy =
2080
92
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2081
92
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2082
92
    break;
2083
27.9k
  }
2084
27.9k
  case OMPRTL__kmpc_push_proc_bind: {
2085
52
    // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2086
52
    // int proc_bind)
2087
52
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2088
52
    auto *FnTy =
2089
52
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2090
52
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2091
52
    break;
2092
27.9k
  }
2093
27.9k
  case OMPRTL__kmpc_omp_task_with_deps: {
2094
152
    // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2095
152
    // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2096
152
    // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2097
152
    llvm::Type *TypeParams[] = {
2098
152
        getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2099
152
        CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2100
152
    auto *FnTy =
2101
152
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2102
152
    RTLFn =
2103
152
        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2104
152
    break;
2105
27.9k
  }
2106
27.9k
  case OMPRTL__kmpc_omp_wait_deps: {
2107
164
    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2108
164
    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2109
164
    // kmp_depend_info_t *noalias_dep_list);
2110
164
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2111
164
                                CGM.Int32Ty,           CGM.VoidPtrTy,
2112
164
                                CGM.Int32Ty,           CGM.VoidPtrTy};
2113
164
    auto *FnTy =
2114
164
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2115
164
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2116
164
    break;
2117
27.9k
  }
2118
27.9k
  case OMPRTL__kmpc_cancellationpoint: {
2119
32
    // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2120
32
    // global_tid, kmp_int32 cncl_kind)
2121
32
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2122
32
    auto *FnTy =
2123
32
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2124
32
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2125
32
    break;
2126
27.9k
  }
2127
27.9k
  case OMPRTL__kmpc_cancel: {
2128
66
    // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2129
66
    // kmp_int32 cncl_kind)
2130
66
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2131
66
    auto *FnTy =
2132
66
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2133
66
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2134
66
    break;
2135
27.9k
  }
2136
27.9k
  case OMPRTL__kmpc_push_num_teams: {
2137
257
    // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2138
257
    // kmp_int32 num_teams, kmp_int32 num_threads)
2139
257
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2140
257
        CGM.Int32Ty};
2141
257
    auto *FnTy =
2142
257
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2143
257
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2144
257
    break;
2145
27.9k
  }
2146
27.9k
  case OMPRTL__kmpc_fork_teams: {
2147
3.26k
    // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2148
3.26k
    // microtask, ...);
2149
3.26k
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150
3.26k
                                getKmpc_MicroPointerTy()};
2151
3.26k
    auto *FnTy =
2152
3.26k
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2153
3.26k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2154
3.26k
    if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2155
3.26k
      if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2156
930
        llvm::LLVMContext &Ctx = F->getContext();
2157
930
        llvm::MDBuilder MDB(Ctx);
2158
930
        // Annotate the callback behavior of the __kmpc_fork_teams:
2159
930
        //  - The callback callee is argument number 2 (microtask).
2160
930
        //  - The first two arguments of the callback callee are unknown (-1).
2161
930
        //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2162
930
        //    callback callee.
2163
930
        F->addMetadata(
2164
930
            llvm::LLVMContext::MD_callback,
2165
930
            *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2166
930
                                        2, {-1, -1},
2167
930
                                        /* VarArgsArePassed */ true)}));
2168
930
      }
2169
3.26k
    }
2170
3.26k
    break;
2171
27.9k
  }
2172
27.9k
  case OMPRTL__kmpc_taskloop: {
2173
65
    // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2174
65
    // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2175
65
    // sched, kmp_uint64 grainsize, void *task_dup);
2176
65
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2177
65
                                CGM.IntTy,
2178
65
                                CGM.VoidPtrTy,
2179
65
                                CGM.IntTy,
2180
65
                                CGM.Int64Ty->getPointerTo(),
2181
65
                                CGM.Int64Ty->getPointerTo(),
2182
65
                                CGM.Int64Ty,
2183
65
                                CGM.IntTy,
2184
65
                                CGM.IntTy,
2185
65
                                CGM.Int64Ty,
2186
65
                                CGM.VoidPtrTy};
2187
65
    auto *FnTy =
2188
65
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2189
65
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2190
65
    break;
2191
27.9k
  }
2192
27.9k
  case OMPRTL__kmpc_doacross_init: {
2193
12
    // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2194
12
    // num_dims, struct kmp_dim *dims);
2195
12
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2196
12
                                CGM.Int32Ty,
2197
12
                                CGM.Int32Ty,
2198
12
                                CGM.VoidPtrTy};
2199
12
    auto *FnTy =
2200
12
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2201
12
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2202
12
    break;
2203
27.9k
  }
2204
27.9k
  case OMPRTL__kmpc_doacross_fini: {
2205
12
    // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2206
12
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2207
12
    auto *FnTy =
2208
12
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2209
12
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2210
12
    break;
2211
27.9k
  }
2212
27.9k
  case OMPRTL__kmpc_doacross_post: {
2213
6
    // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2214
6
    // *vec);
2215
6
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2216
6
                                CGM.Int64Ty->getPointerTo()};
2217
6
    auto *FnTy =
2218
6
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219
6
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2220
6
    break;
2221
27.9k
  }
2222
27.9k
  case OMPRTL__kmpc_doacross_wait: {
2223
8
    // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2224
8
    // *vec);
2225
8
    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2226
8
                                CGM.Int64Ty->getPointerTo()};
2227
8
    auto *FnTy =
2228
8
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2229
8
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2230
8
    break;
2231
27.9k
  }
2232
27.9k
  case OMPRTL__kmpc_task_reduction_init: {
2233
20
    // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2234
20
    // *data);
2235
20
    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2236
20
    auto *FnTy =
2237
20
        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2238
20
    RTLFn =
2239
20
        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2240
20
    break;
2241
27.9k
  }
2242
27.9k
  case OMPRTL__kmpc_task_reduction_get_th_data: {
2243
20
    // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2244
20
    // *d);
2245
20
    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2246
20
    auto *FnTy =
2247
20
        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2248
20
    RTLFn = CGM.CreateRuntimeFunction(
2249
20
        FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2250
20
    break;
2251
27.9k
  }
2252
27.9k
  case OMPRTL__kmpc_alloc: {
2253
34
    // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2254
34
    // al); omp_allocator_handle_t type is void *.
2255
34
    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2256
34
    auto *FnTy =
2257
34
        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2258
34
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2259
34
    break;
2260
27.9k
  }
2261
27.9k
  case OMPRTL__kmpc_free: {
2262
34
    // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2263
34
    // al); omp_allocator_handle_t type is void *.
2264
34
    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2265
34
    auto *FnTy =
2266
34
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2267
34
    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2268
34
    break;
2269
27.9k
  }
2270
27.9k
  case OMPRTL__kmpc_push_target_tripcount: {
2271
2.24k
    // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2272
2.24k
    // size);
2273
2.24k
    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2274
2.24k
    llvm::FunctionType *FnTy =
2275
2.24k
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2276
2.24k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2277
2.24k
    break;
2278
27.9k
  }
2279
27.9k
  case OMPRTL__tgt_target: {
2280
862
    // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2281
862
    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2282
862
    // *arg_types);
2283
862
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284
862
                                CGM.VoidPtrTy,
2285
862
                                CGM.Int32Ty,
2286
862
                                CGM.VoidPtrPtrTy,
2287
862
                                CGM.VoidPtrPtrTy,
2288
862
                                CGM.Int64Ty->getPointerTo(),
2289
862
                                CGM.Int64Ty->getPointerTo()};
2290
862
    auto *FnTy =
2291
862
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2292
862
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2293
862
    break;
2294
27.9k
  }
2295
27.9k
  case OMPRTL__tgt_target_nowait: {
2296
16
    // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2297
16
    // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2298
16
    // int64_t *arg_types);
2299
16
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2300
16
                                CGM.VoidPtrTy,
2301
16
                                CGM.Int32Ty,
2302
16
                                CGM.VoidPtrPtrTy,
2303
16
                                CGM.VoidPtrPtrTy,
2304
16
                                CGM.Int64Ty->getPointerTo(),
2305
16
                                CGM.Int64Ty->getPointerTo()};
2306
16
    auto *FnTy =
2307
16
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2308
16
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2309
16
    break;
2310
27.9k
  }
2311
27.9k
  case OMPRTL__tgt_target_teams: {
2312
3.32k
    // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2313
3.32k
    // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2314
3.32k
    // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2315
3.32k
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2316
3.32k
                                CGM.VoidPtrTy,
2317
3.32k
                                CGM.Int32Ty,
2318
3.32k
                                CGM.VoidPtrPtrTy,
2319
3.32k
                                CGM.VoidPtrPtrTy,
2320
3.32k
                                CGM.Int64Ty->getPointerTo(),
2321
3.32k
                                CGM.Int64Ty->getPointerTo(),
2322
3.32k
                                CGM.Int32Ty,
2323
3.32k
                                CGM.Int32Ty};
2324
3.32k
    auto *FnTy =
2325
3.32k
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2326
3.32k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2327
3.32k
    break;
2328
27.9k
  }
2329
27.9k
  case OMPRTL__tgt_target_teams_nowait: {
2330
96
    // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2331
96
    // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2332
96
    // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2333
96
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2334
96
                                CGM.VoidPtrTy,
2335
96
                                CGM.Int32Ty,
2336
96
                                CGM.VoidPtrPtrTy,
2337
96
                                CGM.VoidPtrPtrTy,
2338
96
                                CGM.Int64Ty->getPointerTo(),
2339
96
                                CGM.Int64Ty->getPointerTo(),
2340
96
                                CGM.Int32Ty,
2341
96
                                CGM.Int32Ty};
2342
96
    auto *FnTy =
2343
96
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2344
96
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2345
96
    break;
2346
27.9k
  }
2347
27.9k
  case OMPRTL__tgt_register_requires: {
2348
1.13k
    // Build void __tgt_register_requires(int64_t flags);
2349
1.13k
    llvm::Type *TypeParams[] = {CGM.Int64Ty};
2350
1.13k
    auto *FnTy =
2351
1.13k
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2352
1.13k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2353
1.13k
    break;
2354
27.9k
  }
2355
27.9k
  case OMPRTL__tgt_register_lib: {
2356
1.13k
    // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2357
1.13k
    QualType ParamTy =
2358
1.13k
        CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2359
1.13k
    llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2360
1.13k
    auto *FnTy =
2361
1.13k
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362
1.13k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2363
1.13k
    break;
2364
27.9k
  }
2365
27.9k
  case OMPRTL__tgt_unregister_lib: {
2366
1.13k
    // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2367
1.13k
    QualType ParamTy =
2368
1.13k
        CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2369
1.13k
    llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2370
1.13k
    auto *FnTy =
2371
1.13k
        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2372
1.13k
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2373
1.13k
    break;
2374
27.9k
  }
2375
27.9k
  case OMPRTL__tgt_target_data_begin: {
2376
118
    // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2377
118
    // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2378
118
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379
118
                                CGM.Int32Ty,
2380
118
                                CGM.VoidPtrPtrTy,
2381
118
                                CGM.VoidPtrPtrTy,
2382
118
                                CGM.Int64Ty->getPointerTo(),
2383
118
                                CGM.Int64Ty->getPointerTo()};
2384
118
    auto *FnTy =
2385
118
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386
118
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2387
118
    break;
2388
27.9k
  }
2389
27.9k
  case OMPRTL__tgt_target_data_begin_nowait: {
2390
8
    // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2391
8
    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2392
8
    // *arg_types);
2393
8
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394
8
                                CGM.Int32Ty,
2395
8
                                CGM.VoidPtrPtrTy,
2396
8
                                CGM.VoidPtrPtrTy,
2397
8
                                CGM.Int64Ty->getPointerTo(),
2398
8
                                CGM.Int64Ty->getPointerTo()};
2399
8
    auto *FnTy =
2400
8
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401
8
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2402
8
    break;
2403
27.9k
  }
2404
27.9k
  case OMPRTL__tgt_target_data_end: {
2405
112
    // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2406
112
    // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2407
112
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408
112
                                CGM.Int32Ty,
2409
112
                                CGM.VoidPtrPtrTy,
2410
112
                                CGM.VoidPtrPtrTy,
2411
112
                                CGM.Int64Ty->getPointerTo(),
2412
112
                                CGM.Int64Ty->getPointerTo()};
2413
112
    auto *FnTy =
2414
112
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2415
112
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2416
112
    break;
2417
27.9k
  }
2418
27.9k
  case OMPRTL__tgt_target_data_end_nowait: {
2419
8
    // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2420
8
    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2421
8
    // *arg_types);
2422
8
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2423
8
                                CGM.Int32Ty,
2424
8
                                CGM.VoidPtrPtrTy,
2425
8
                                CGM.VoidPtrPtrTy,
2426
8
                                CGM.Int64Ty->getPointerTo(),
2427
8
                                CGM.Int64Ty->getPointerTo()};
2428
8
    auto *FnTy =
2429
8
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2430
8
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2431
8
    break;
2432
27.9k
  }
2433
27.9k
  case OMPRTL__tgt_target_data_update: {
2434
42
    // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2435
42
    // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2436
42
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2437
42
                                CGM.Int32Ty,
2438
42
                                CGM.VoidPtrPtrTy,
2439
42
                                CGM.VoidPtrPtrTy,
2440
42
                                CGM.Int64Ty->getPointerTo(),
2441
42
                                CGM.Int64Ty->getPointerTo()};
2442
42
    auto *FnTy =
2443
42
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2444
42
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2445
42
    break;
2446
27.9k
  }
2447
27.9k
  case OMPRTL__tgt_target_data_update_nowait: {
2448
8
    // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2449
8
    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2450
8
    // *arg_types);
2451
8
    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2452
8
                                CGM.Int32Ty,
2453
8
                                CGM.VoidPtrPtrTy,
2454
8
                                CGM.VoidPtrPtrTy,
2455
8
                                CGM.Int64Ty->getPointerTo(),
2456
8
                                CGM.Int64Ty->getPointerTo()};
2457
8
    auto *FnTy =
2458
8
        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2459
8
    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2460
8
    break;
2461
27.9k
  }
2462
27.9k
  }
2463
27.9k
  assert(RTLFn && "Unable to find OpenMP runtime function");
2464
27.9k
  return RTLFn;
2465
27.9k
}
2466
2467
llvm::FunctionCallee
2468
5.41k
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2469
5.41k
  assert((IVSize == 32 || IVSize == 64) &&
2470
5.41k
         "IV size is not compatible with the omp runtime");
2471
5.41k
  StringRef Name = IVSize == 32 ? 
(IVSigned 5.25k
?
"__kmpc_for_static_init_4"5.20k
2472
5.25k
                                            : 
"__kmpc_for_static_init_4u"46
)
2473
5.41k
                                : 
(IVSigned 165
?
"__kmpc_for_static_init_8"115
2474
165
                                            : 
"__kmpc_for_static_init_8u"50
);
2475
5.41k
  llvm::Type *ITy = IVSize == 32 ? 
CGM.Int32Ty5.25k
:
CGM.Int64Ty165
;
2476
5.41k
  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2477
5.41k
  llvm::Type *TypeParams[] = {
2478
5.41k
    getIdentTyPointerTy(),                     // loc
2479
5.41k
    CGM.Int32Ty,                               // tid
2480
5.41k
    CGM.Int32Ty,                               // schedtype
2481
5.41k
    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2482
5.41k
    PtrTy,                                     // p_lower
2483
5.41k
    PtrTy,                                     // p_upper
2484
5.41k
    PtrTy,                                     // p_stride
2485
5.41k
    ITy,                                       // incr
2486
5.41k
    ITy                                        // chunk
2487
5.41k
  };
2488
5.41k
  auto *FnTy =
2489
5.41k
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490
5.41k
  return CGM.CreateRuntimeFunction(FnTy, Name);
2491
5.41k
}
2492
2493
llvm::FunctionCallee
2494
602
CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2495
602
  assert((IVSize == 32 || IVSize == 64) &&
2496
602
         "IV size is not compatible with the omp runtime");
2497
602
  StringRef Name =
2498
602
      IVSize == 32
2499
602
          ? 
(IVSigned 577
?
"__kmpc_dispatch_init_4"577
:
"__kmpc_dispatch_init_4u"0
)
2500
602
          : 
(IVSigned 25
?
"__kmpc_dispatch_init_8"9
:
"__kmpc_dispatch_init_8u"16
);
2501
602
  llvm::Type *ITy = IVSize == 32 ? 
CGM.Int32Ty577
:
CGM.Int64Ty25
;
2502
602
  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2503
602
                               CGM.Int32Ty,           // tid
2504
602
                               CGM.Int32Ty,           // schedtype
2505
602
                               ITy,                   // lower
2506
602
                               ITy,                   // upper
2507
602
                               ITy,                   // stride
2508
602
                               ITy                    // chunk
2509
602
  };
2510
602
  auto *FnTy =
2511
602
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512
602
  return CGM.CreateRuntimeFunction(FnTy, Name);
2513
602
}
2514
2515
llvm::FunctionCallee
2516
27
CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2517
27
  assert((IVSize == 32 || IVSize == 64) &&
2518
27
         "IV size is not compatible with the omp runtime");
2519
27
  StringRef Name =
2520
27
      IVSize == 32
2521
27
          ? 
(IVSigned 23
?
"__kmpc_dispatch_fini_4"23
:
"__kmpc_dispatch_fini_4u"0
)
2522
27
          : 
(IVSigned 4
?
"__kmpc_dispatch_fini_8"2
:
"__kmpc_dispatch_fini_8u"2
);
2523
27
  llvm::Type *TypeParams[] = {
2524
27
      getIdentTyPointerTy(), // loc
2525
27
      CGM.Int32Ty,           // tid
2526
27
  };
2527
27
  auto *FnTy =
2528
27
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529
27
  return CGM.CreateRuntimeFunction(FnTy, Name);
2530
27
}
2531
2532
llvm::FunctionCallee
2533
602
CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2534
602
  assert((IVSize == 32 || IVSize == 64) &&
2535
602
         "IV size is not compatible with the omp runtime");
2536
602
  StringRef Name =
2537
602
      IVSize == 32
2538
602
          ? 
(IVSigned 577
?
"__kmpc_dispatch_next_4"577
:
"__kmpc_dispatch_next_4u"0
)
2539
602
          : 
(IVSigned 25
?
"__kmpc_dispatch_next_8"9
:
"__kmpc_dispatch_next_8u"16
);
2540
602
  llvm::Type *ITy = IVSize == 32 ? 
CGM.Int32Ty577
:
CGM.Int64Ty25
;
2541
602
  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2542
602
  llvm::Type *TypeParams[] = {
2543
602
    getIdentTyPointerTy(),                     // loc
2544
602
    CGM.Int32Ty,                               // tid
2545
602
    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2546
602
    PtrTy,                                     // p_lower
2547
602
    PtrTy,                                     // p_upper
2548
602
    PtrTy                                      // p_stride
2549
602
  };
2550
602
  auto *FnTy =
2551
602
      llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2552
602
  return CGM.CreateRuntimeFunction(FnTy, Name);
2553
602
}
2554
2555
160
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2556
160
  if (CGM.getLangOpts().OpenMPSimd)
2557
10
    return Address::invalid();
2558
150
  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2559
150
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2560
150
  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2561
150
              
(22
*Res == OMPDeclareTargetDeclAttr::MT_To22
&&
2562
150
               
HasRequiresUnifiedSharedMemory22
))) {
2563
150
    SmallString<64> PtrName;
2564
150
    {
2565
150
      llvm::raw_svector_ostream OS(PtrName);
2566
150
      OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
2567
150
    }
2568
150
    llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2569
150
    if (!Ptr) {
2570
22
      QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2571
22
      Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2572
22
                                        PtrName);
2573
22
      if (!CGM.getLangOpts().OpenMPIsDevice) {
2574
10
        auto *GV = cast<llvm::GlobalVariable>(Ptr);
2575
10
        GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2576
10
        GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2577
10
      }
2578
22
      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2579
22
      registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2580
22
    }
2581
150
    return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2582
150
  }
2583
0
  return Address::invalid();
2584
0
}
2585
2586
llvm::Constant *
2587
96
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2588
96
  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2589
96
         !CGM.getContext().getTargetInfo().isTLSSupported());
2590
96
  // Lookup the entry, lazily creating it if necessary.
2591
96
  std::string Suffix = getName({"cache", ""});
2592
96
  return getOrCreateInternalVariable(
2593
96
      CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2594
96
}
2595
2596
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2597
                                                const VarDecl *VD,
2598
                                                Address VDAddr,
2599
160
                                                SourceLocation Loc) {
2600
160
  if (CGM.getLangOpts().OpenMPUseTLS &&
2601
160
      
CGM.getContext().getTargetInfo().isTLSSupported()64
)
2602
64
    return VDAddr;
2603
96
2604
96
  llvm::Type *VarTy = VDAddr.getElementType();
2605
96
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2606
96
                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2607
96
                                                       CGM.Int8PtrTy),
2608
96
                         CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2609
96
                         getOrCreateThreadPrivateCache(VD)};
2610
96
  return Address(CGF.EmitRuntimeCall(
2611
96
      createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2612
96
                 VDAddr.getAlignment());
2613
96
}
2614
2615
void CGOpenMPRuntime::emitThreadPrivateVarInit(
2616
    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2617
25
    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2618
25
  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2619
25
  // library.
2620
25
  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2621
25
  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2622
25
                      OMPLoc);
2623
25
  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2624
25
  // to register constructor/destructor for variable.
2625
25
  llvm::Value *Args[] = {
2626
25
      OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2627
25
      Ctor, CopyCtor, Dtor};
2628
25
  CGF.EmitRuntimeCall(
2629
25
      createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2630
25
}
2631
2632
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2633
    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2634
92
    bool PerformInit, CodeGenFunction *CGF) {
2635
92
  if (CGM.getLangOpts().OpenMPUseTLS &&
2636
92
      
CGM.getContext().getTargetInfo().isTLSSupported()50
)
2637
50
    return nullptr;
2638
42
2639
42
  VD = VD->getDefinition(CGM.getContext());
2640
42
  if (VD && 
ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second36
) {
2641
29
    QualType ASTTy = VD->getType();
2642
29
2643
29
    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2644
29
    const Expr *Init = VD->getAnyInitializer();
2645
29
    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2646
25
      // Generate function that re-emits the declaration's initializer into the
2647
25
      // threadprivate copy of the variable VD
2648
25
      CodeGenFunction CtorCGF(CGM);
2649
25
      FunctionArgList Args;
2650
25
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2651
25
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2652
25
                            ImplicitParamDecl::Other);
2653
25
      Args.push_back(&Dst);
2654
25
2655
25
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2656
25
          CGM.getContext().VoidPtrTy, Args);
2657
25
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2658
25
      std::string Name = getName({"__kmpc_global_ctor_", ""});
2659
25
      llvm::Function *Fn =
2660
25
          CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2661
25
      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2662
25
                            Args, Loc, Loc);
2663
25
      llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2664
25
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2665
25
          CGM.getContext().VoidPtrTy, Dst.getLocation());
2666
25
      Address Arg = Address(ArgVal, VDAddr.getAlignment());
2667
25
      Arg = CtorCGF.Builder.CreateElementBitCast(
2668
25
          Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2669
25
      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2670
25
                               /*IsInitializer=*/true);
2671
25
      ArgVal = CtorCGF.EmitLoadOfScalar(
2672
25
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2673
25
          CGM.getContext().VoidPtrTy, Dst.getLocation());
2674
25
      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2675
25
      CtorCGF.FinishFunction();
2676
25
      Ctor = Fn;
2677
25
    }
2678
29
    if (VD->getType().isDestructedType() != QualType::DK_none) {
2679
23
      // Generate function that emits destructor call for the threadprivate copy
2680
23
      // of the variable VD
2681
23
      CodeGenFunction DtorCGF(CGM);
2682
23
      FunctionArgList Args;
2683
23
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2684
23
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2685
23
                            ImplicitParamDecl::Other);
2686
23
      Args.push_back(&Dst);
2687
23
2688
23
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2689
23
          CGM.getContext().VoidTy, Args);
2690
23
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2691
23
      std::string Name = getName({"__kmpc_global_dtor_", ""});
2692
23
      llvm::Function *Fn =
2693
23
          CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2694
23
      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2695
23
      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2696
23
                            Loc, Loc);
2697
23
      // Create a scope with an artificial location for the body of this function.
2698
23
      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2699
23
      llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2700
23
          DtorCGF.GetAddrOfLocalVar(&Dst),
2701
23
          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2702
23
      DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2703
23
                          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2704
23
                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2705
23
      DtorCGF.FinishFunction();
2706
23
      Dtor = Fn;
2707
23
    }
2708
29
    // Do not emit init function if it is not required.
2709
29
    if (!Ctor && 
!Dtor4
)
2710
4
      return nullptr;
2711
25
2712
25
    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2713
25
    auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2714
25
                                               /*isVarArg=*/false)
2715
25
                           ->getPointerTo();
2716
25
    // Copying constructor for the threadprivate variable.
2717
25
    // Must be NULL - reserved by runtime, but currently it requires that this
2718
25
    // parameter is always NULL. Otherwise it fires assertion.
2719
25
    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2720
25
    if (Ctor == nullptr) {
2721
0
      auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2722
0
                                             /*isVarArg=*/false)
2723
0
                         ->getPointerTo();
2724
0
      Ctor = llvm::Constant::getNullValue(CtorTy);
2725
0
    }
2726
25
    if (Dtor == nullptr) {
2727
2
      auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2728
2
                                             /*isVarArg=*/false)
2729
2
                         ->getPointerTo();
2730
2
      Dtor = llvm::Constant::getNullValue(DtorTy);
2731
2
    }
2732
25
    if (!CGF) {
2733
7
      auto *InitFunctionTy =
2734
7
          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2735
7
      std::string Name = getName({"__omp_threadprivate_init_", ""});
2736
7
      llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2737
7
          InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2738
7
      CodeGenFunction InitCGF(CGM);
2739
7
      FunctionArgList ArgList;
2740
7
      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2741
7
                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
2742
7
                            Loc, Loc);
2743
7
      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2744
7
      InitCGF.FinishFunction();
2745
7
      return InitFunction;
2746
7
    }
2747
18
    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2748
18
  }
2749
42
  
return nullptr31
;
2750
42
}
2751
2752
/// Obtain information that uniquely identifies a target entry. This
2753
/// consists of the file and device IDs as well as line number associated with
2754
/// the relevant entry source location.
2755
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2756
                                     unsigned &DeviceID, unsigned &FileID,
2757
8.70k
                                     unsigned &LineNum) {
2758
8.70k
  SourceManager &SM = C.getSourceManager();
2759
8.70k
2760
8.70k
  // The loc should be always valid and have a file ID (the user cannot use
2761
8.70k
  // #pragma directives in macros)
2762
8.70k
2763
8.70k
  assert(Loc.isValid() && "Source location is expected to be always valid.");
2764
8.70k
2765
8.70k
  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2766
8.70k
  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2767
8.70k
2768
8.70k
  llvm::sys::fs::UniqueID ID;
2769
8.70k
  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2770
0
    SM.getDiagnostics().Report(diag::err_cannot_open_file)
2771
0
        << PLoc.getFilename() << EC.message();
2772
8.70k
2773
8.70k
  DeviceID = ID.getDevice();
2774
8.70k
  FileID = ID.getFile();
2775
8.70k
  LineNum = PLoc.getLine();
2776
8.70k
}
2777
2778
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2779
                                                     llvm::GlobalVariable *Addr,
2780
2.37k
                                                     bool PerformInit) {
2781
2.37k
  Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2782
2.37k
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2783
2.37k
  if (!Res || 
*Res == OMPDeclareTargetDeclAttr::MT_Link55
||
2784
2.37k
      
(55
*Res == OMPDeclareTargetDeclAttr::MT_To55
&&
2785
55
       HasRequiresUnifiedSharedMemory))
2786
2.32k
    return CGM.getLangOpts().OpenMPIsDevice;
2787
55
  VD = VD->getDefinition(CGM.getContext());
2788
55
  if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2789
3
    return CGM.getLangOpts().OpenMPIsDevice;
2790
52
2791
52
  QualType ASTTy = VD->getType();
2792
52
2793
52
  SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2794
52
  // Produce the unique prefix to identify the new target regions. We use
2795
52
  // the source location of the variable declaration which we know to not
2796
52
  // conflict with any target region.
2797
52
  unsigned DeviceID;
2798
52
  unsigned FileID;
2799
52
  unsigned Line;
2800
52
  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2801
52
  SmallString<128> Buffer, Out;
2802
52
  {
2803
52
    llvm::raw_svector_ostream OS(Buffer);
2804
52
    OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2805
52
       << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2806
52
  }
2807
52
2808
52
  const Expr *Init = VD->getAnyInitializer();
2809
52
  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2810
52
    llvm::Constant *Ctor;
2811
52
    llvm::Constant *ID;
2812
52
    if (CGM.getLangOpts().OpenMPIsDevice) {
2813
16
      // Generate function that re-emits the declaration's initializer into
2814
16
      // the threadprivate copy of the variable VD
2815
16
      CodeGenFunction CtorCGF(CGM);
2816
16
2817
16
      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2818
16
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2819
16
      llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2820
16
          FTy, Twine(Buffer, "_ctor"), FI, Loc);
2821
16
      auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2822
16
      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2823
16
                            FunctionArgList(), Loc, Loc);
2824
16
      auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2825
16
      CtorCGF.EmitAnyExprToMem(Init,
2826
16
                               Address(Addr, CGM.getContext().getDeclAlign(VD)),
2827
16
                               Init->getType().getQualifiers(),
2828
16
                               /*IsInitializer=*/true);
2829
16
      CtorCGF.FinishFunction();
2830
16
      Ctor = Fn;
2831
16
      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2832
16
      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2833
36
    } else {
2834
36
      Ctor = new llvm::GlobalVariable(
2835
36
          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2836
36
          llvm::GlobalValue::PrivateLinkage,
2837
36
          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2838
36
      ID = Ctor;
2839
36
    }
2840
52
2841
52
    // Register the information for the entry associated with the constructor.
2842
52
    Out.clear();
2843
52
    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2844
52
        DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2845
52
        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2846
52
  }
2847
52
  if (VD->getType().isDestructedType() != QualType::DK_none) {
2848
8
    llvm::Constant *Dtor;
2849
8
    llvm::Constant *ID;
2850
8
    if (CGM.getLangOpts().OpenMPIsDevice) {
2851
2
      // Generate function that emits destructor call for the threadprivate
2852
2
      // copy of the variable VD
2853
2
      CodeGenFunction DtorCGF(CGM);
2854
2
2855
2
      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2856
2
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2857
2
      llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2858
2
          FTy, Twine(Buffer, "_dtor"), FI, Loc);
2859
2
      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2860
2
      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2861
2
                            FunctionArgList(), Loc, Loc);
2862
2
      // Create a scope with an artificial location for the body of this
2863
2
      // function.
2864
2
      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2865
2
      DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2866
2
                          ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2867
2
                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2868
2
      DtorCGF.FinishFunction();
2869
2
      Dtor = Fn;
2870
2
      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2871
2
      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2872
6
    } else {
2873
6
      Dtor = new llvm::GlobalVariable(
2874
6
          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2875
6
          llvm::GlobalValue::PrivateLinkage,
2876
6
          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2877
6
      ID = Dtor;
2878
6
    }
2879
8
    // Register the information for the entry associated with the destructor.
2880
8
    Out.clear();
2881
8
    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2882
8
        DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2883
8
        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2884
8
  }
2885
52
  return CGM.getLangOpts().OpenMPIsDevice;
2886
52
}
2887
2888
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2889
                                                          QualType VarType,
2890
42
                                                          StringRef Name) {
2891
42
  std::string Suffix = getName({"artificial", ""});
2892
42
  std::string CacheSuffix = getName({"cache", ""});
2893
42
  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2894
42
  llvm::Value *GAddr =
2895
42
      getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2896
42
  llvm::Value *Args[] = {
2897
42
      emitUpdateLocation(CGF, SourceLocation()),
2898
42
      getThreadID(CGF, SourceLocation()),
2899
42
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2900
42
      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2901
42
                                /*isSigned=*/false),
2902
42
      getOrCreateInternalVariable(
2903
42
          CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2904
42
  return Address(
2905
42
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2906
42
          CGF.EmitRuntimeCall(
2907
42
              createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2908
42
          VarLVType->getPointerTo(/*AddrSpace=*/0)),
2909
42
      CGM.getPointerAlign());
2910
42
}
2911
2912
void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2913
                                      const RegionCodeGenTy &ThenGen,
2914
1.11k
                                      const RegionCodeGenTy &ElseGen) {
2915
1.11k
  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2916
1.11k
2917
1.11k
  // If the condition constant folds and can be elided, try to avoid emitting
2918
1.11k
  // the condition and the dead arm of the if/else.
2919
1.11k
  bool CondConstant;
2920
1.11k
  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2921
574
    if (CondConstant)
2922
283
      ThenGen(CGF);
2923
291
    else
2924
291
      ElseGen(CGF);
2925
574
    return;
2926
574
  }
2927
538
2928
538
  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2929
538
  // emit the conditional branch.
2930
538
  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2931
538
  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2932
538
  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2933
538
  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2934
538
2935
538
  // Emit the 'then' code.
2936
538
  CGF.EmitBlock(ThenBlock);
2937
538
  ThenGen(CGF);
2938
538
  CGF.EmitBranch(ContBlock);
2939
538
  // Emit the 'else' code if present.
2940
538
  // There is no need to emit line number for unconditional branch.
2941
538
  (void)ApplyDebugLocation::CreateEmpty(CGF);
2942
538
  CGF.EmitBlock(ElseBlock);
2943
538
  ElseGen(CGF);
2944
538
  // There is no need to emit line number for unconditional branch.
2945
538
  (void)ApplyDebugLocation::CreateEmpty(CGF);
2946
538
  CGF.EmitBranch(ContBlock);
2947
538
  // Emit the continuation block for code after the if.
2948
538
  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2949
538
}
2950
2951
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2952
                                       llvm::Function *OutlinedFn,
2953
                                       ArrayRef<llvm::Value *> CapturedVars,
2954
3.27k
                                       const Expr *IfCond) {
2955
3.27k
  if (!CGF.HaveInsertPoint())
2956
0
    return;
2957
3.27k
  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2958
3.27k
  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2959
3.27k
                                                     PrePostActionTy &) {
2960
3.19k
    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2961
3.19k
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2962
3.19k
    llvm::Value *Args[] = {
2963
3.19k
        RTLoc,
2964
3.19k
        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2965
3.19k
        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2966
3.19k
    llvm::SmallVector<llvm::Value *, 16> RealArgs;
2967
3.19k
    RealArgs.append(std::begin(Args), std::end(Args));
2968
3.19k
    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2969
3.19k
2970
3.19k
    llvm::FunctionCallee RTLFn =
2971
3.19k
        RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2972
3.19k
    CGF.EmitRuntimeCall(RTLFn, RealArgs);
2973
3.19k
  };
2974
3.27k
  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2975
3.27k
                                                          PrePostActionTy &) {
2976
167
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2977
167
    llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2978
167
    // Build calls:
2979
167
    // __kmpc_serialized_parallel(&Loc, GTid);
2980
167
    llvm::Value *Args[] = {RTLoc, ThreadID};
2981
167
    CGF.EmitRuntimeCall(
2982
167
        RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2983
167
2984
167
    // OutlinedFn(&GTid, &zero, CapturedStruct);
2985
167
    Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2986
167
                                                        /*Name*/ ".zero.addr");
2987
167
    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2988
167
    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2989
167
    // ThreadId for serialized parallels is 0.
2990
167
    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2991
167
    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2992
167
    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2993
167
    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2994
167
2995
167
    // __kmpc_end_serialized_parallel(&Loc, GTid);
2996
167
    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2997
167
    CGF.EmitRuntimeCall(
2998
167
        RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2999
167
        EndArgs);
3000
167
  };
3001
3.27k
  if (IfCond) {
3002
207
    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3003
3.07k
  } else {
3004
3.07k
    RegionCodeGenTy ThenRCG(ThenGen);
3005
3.07k
    ThenRCG(CGF);
3006
3.07k
  }
3007
3.27k
}
3008
3009
// If we're inside an (outlined) parallel region, use the region info's
3010
// thread-ID variable (it is passed in a first argument of the outlined function
3011
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3012
// regular serial code region, get thread ID by calling kmp_int32
3013
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3014
// return the address of that temp.
3015
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3016
768
                                             SourceLocation Loc) {
3017
768
  if (auto *OMPRegionInfo =
3018
768
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3019
768
    if (OMPRegionInfo->getThreadIDVariable())
3020
258
      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3021
510
3022
510
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
3023
510
  QualType Int32Ty =
3024
510
      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3025
510
  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3026
510
  CGF.EmitStoreOfScalar(ThreadID,
3027
510
                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3028
510
3029
510
  return ThreadIDTemp;
3030
510
}
3031
3032
llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3033
646
    llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3034
646
  SmallString<256> Buffer;
3035
646
  llvm::raw_svector_ostream Out(Buffer);
3036
646
  Out << Name;
3037
646
  StringRef RuntimeName = Out.str();
3038
646
  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3039
646
  if (Elem.second) {
3040
417
    assert(Elem.second->getType()->getPointerElementType() == Ty &&
3041
417
           "OMP internal variable has different type than requested");
3042
417
    return &*Elem.second;
3043
417
  }
3044
229
3045
229
  return Elem.second = new llvm::GlobalVariable(
3046
229
             CGM.getModule(), Ty, /*IsConstant*/ false,
3047
229
             llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3048
229
             Elem.first(), /*InsertBefore=*/nullptr,
3049
229
             llvm::GlobalValue::NotThreadLocal, AddressSpace);
3050
229
}
3051
3052
426
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3053
426
  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3054
426
  std::string Name = getName({Prefix, "var"});
3055
426
  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3056
426
}
3057
3058
namespace {
3059
/// Common pre(post)-action for different OpenMP constructs.
3060
class CommonActionTy final : public PrePostActionTy {
3061
  llvm::FunctionCallee EnterCallee;
3062
  ArrayRef<llvm::Value *> EnterArgs;
3063
  llvm::FunctionCallee ExitCallee;
3064
  ArrayRef<llvm::Value *> ExitArgs;
3065
  bool Conditional;
3066
  llvm::BasicBlock *ContBlock = nullptr;
3067
3068
public:
3069
  CommonActionTy(llvm::FunctionCallee EnterCallee,
3070
                 ArrayRef<llvm::Value *> EnterArgs,
3071
                 llvm::FunctionCallee ExitCallee,
3072
                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3073
      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3074
884
        ExitArgs(ExitArgs), Conditional(Conditional) {}
3075
442
  void Enter(CodeGenFunction &CGF) override {
3076
442
    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3077
442
    if (Conditional) {
3078
48
      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3079
48
      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3080
48
      ContBlock = CGF.createBasicBlock("omp_if.end");
3081
48
      // Generate the branch (If-stmt)
3082
48
      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3083
48
      CGF.EmitBlock(ThenBlock);
3084
48
    }
3085
442
  }
3086
48
  void Done(CodeGenFunction &CGF) {
3087
48
    // Emit the rest of blocks/branches
3088
48
    CGF.EmitBranch(ContBlock);
3089
48
    CGF.EmitBlock(ContBlock, true);
3090
48
  }
3091
935
  void Exit(CodeGenFunction &CGF) override {
3092
935
    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3093
935
  }
3094
};
3095
} // anonymous namespace
3096
3097
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3098
                                         StringRef CriticalName,
3099
                                         const RegionCodeGenTy &CriticalOpGen,
3100
120
                                         SourceLocation Loc, const Expr *Hint) {
3101
120
  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3102
120
  // CriticalOpGen();
3103
120
  // __kmpc_end_critical(ident_t *, gtid, Lock);
3104
120
  // Prepare arguments and build a call to __kmpc_critical
3105
120
  if (!CGF.HaveInsertPoint())
3106
0
    return;
3107
120
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3108
120
                         getCriticalRegionLock(CriticalName)};
3109
120
  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3110
120
                                                std::end(Args));
3111
120
  if (Hint) {
3112
3
    EnterArgs.push_back(CGF.Builder.CreateIntCast(
3113
3
        CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3114
3
  }
3115
120
  CommonActionTy Action(
3116
120
      createRuntimeFunction(Hint ? 
OMPRTL__kmpc_critical_with_hint3
3117
120
                                 : 
OMPRTL__kmpc_critical117
),
3118
120
      EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3119
120
  CriticalOpGen.setAction(Action);
3120
120
  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3121
120
}
3122
3123
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3124
                                       const RegionCodeGenTy &MasterOpGen,
3125
9
                                       SourceLocation Loc) {
3126
9
  if (!CGF.HaveInsertPoint())
3127
0
    return;
3128
9
  // if(__kmpc_master(ident_t *, gtid)) {
3129
9
  //   MasterOpGen();
3130
9
  //   __kmpc_end_master(ident_t *, gtid);
3131
9
  // }
3132
9
  // Prepare arguments and build a call to __kmpc_master
3133
9
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3134
9
  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3135
9
                        createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3136
9
                        /*Conditional=*/true);
3137
9
  MasterOpGen.setAction(Action);
3138
9
  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3139
9
  Action.Done(CGF);
3140
9
}
3141
3142
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3143
8
                                        SourceLocation Loc) {
3144
8
  if (!CGF.HaveInsertPoint())
3145
0
    return;
3146
8
  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3147
8
  llvm::Value *Args[] = {
3148
8
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3149
8
      llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3150
8
  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3151
8
  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3152
2
    Region->emitUntiedSwitch(CGF);
3153
8
}
3154
3155
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3156
                                          const RegionCodeGenTy &TaskgroupOpGen,
3157
92
                                          SourceLocation Loc) {
3158
92
  if (!CGF.HaveInsertPoint())
3159
0
    return;
3160
92
  // __kmpc_taskgroup(ident_t *, gtid);
3161
92
  // TaskgroupOpGen();
3162
92
  // __kmpc_end_taskgroup(ident_t *, gtid);
3163
92
  // Prepare arguments and build a call to __kmpc_taskgroup
3164
92
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3165
92
  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3166
92
                        createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3167
92
                        Args);
3168
92
  TaskgroupOpGen.setAction(Action);
3169
92
  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3170
92
}
3171
3172
/// Given an array of pointers to variables, project the address of a
3173
/// given variable.
3174
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3175
930
                                      unsigned Index, const VarDecl *Var) {
3176
930
  // Pull out the pointer to the variable.
3177
930
  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3178
930
  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3179
930
3180
930
  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3181
930
  Addr = CGF.Builder.CreateElementBitCast(
3182
930
      Addr, CGF.ConvertTypeForMem(Var->getType()));
3183
930
  return Addr;
3184
930
}
3185
3186
static llvm::Value *emitCopyprivateCopyFunction(
3187
    CodeGenModule &CGM, llvm::Type *ArgsType,
3188
    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3189
    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3190
18
    SourceLocation Loc) {
3191
18
  ASTContext &C = CGM.getContext();
3192
18
  // void copy_func(void *LHSArg, void *RHSArg);
3193
18
  FunctionArgList Args;
3194
18
  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3195
18
                           ImplicitParamDecl::Other);
3196
18
  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3197
18
                           ImplicitParamDecl::Other);
3198
18
  Args.push_back(&LHSArg);
3199
18
  Args.push_back(&RHSArg);
3200
18
  const auto &CGFI =
3201
18
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3202
18
  std::string Name =
3203
18
      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3204
18
  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3205
18
                                    llvm::GlobalValue::InternalLinkage, Name,
3206
18
                                    &CGM.getModule());
3207
18
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3208
18
  Fn->setDoesNotRecurse();
3209
18
  CodeGenFunction CGF(CGM);
3210
18
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3211
18
  // Dest = (void*[n])(LHSArg);
3212
18
  // Src = (void*[n])(RHSArg);
3213
18
  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3214
18
      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3215
18
      ArgsType), CGF.getPointerAlign());
3216
18
  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3217
18
      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3218
18
      ArgsType), CGF.getPointerAlign());
3219
18
  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3220
18
  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3221
18
  // ...
3222
18
  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3223
61
  for (unsigned I = 0, E = AssignmentOps.size(); I < E; 
++I43
) {
3224
43
    const auto *DestVar =
3225
43
        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3226
43
    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3227
43
3228
43
    const auto *SrcVar =
3229
43
        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3230
43
    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3231
43
3232
43
    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3233
43
    QualType Type = VD->getType();
3234
43
    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3235
43
  }
3236
18
  CGF.FinishFunction();
3237
18
  return Fn;
3238
18
}
3239
3240
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3241
                                       const RegionCodeGenTy &SingleOpGen,
3242
                                       SourceLocation Loc,
3243
                                       ArrayRef<const Expr *> CopyprivateVars,
3244
                                       ArrayRef<const Expr *> SrcExprs,
3245
                                       ArrayRef<const Expr *> DstExprs,
3246
39
                                       ArrayRef<const Expr *> AssignmentOps) {
3247
39
  if (!CGF.HaveInsertPoint())
3248
0
    return;
3249
39
  assert(CopyprivateVars.size() == SrcExprs.size() &&
3250
39
         CopyprivateVars.size() == DstExprs.size() &&
3251
39
         CopyprivateVars.size() == AssignmentOps.size());
3252
39
  ASTContext &C = CGM.getContext();
3253
39
  // int32 did_it = 0;
3254
39
  // if(__kmpc_single(ident_t *, gtid)) {
3255
39
  //   SingleOpGen();
3256
39
  //   __kmpc_end_single(ident_t *, gtid);
3257
39
  //   did_it = 1;
3258
39
  // }
3259
39
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3260
39
  // <copy_func>, did_it);
3261
39
3262
39
  Address DidIt = Address::invalid();
3263
39
  if (!CopyprivateVars.empty()) {
3264
18
    // int32 did_it = 0;
3265
18
    QualType KmpInt32Ty =
3266
18
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3267
18
    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3268
18
    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3269
18
  }
3270
39
  // Prepare arguments and build a call to __kmpc_single
3271
39
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3272
39
  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3273
39
                        createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3274
39
                        /*Conditional=*/true);
3275
39
  SingleOpGen.setAction(Action);
3276
39
  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3277
39
  if (DidIt.isValid()) {
3278
18
    // did_it = 1;
3279
18
    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3280
18
  }
3281
39
  Action.Done(CGF);
3282
39
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3283
39
  // <copy_func>, did_it);
3284
39
  if (DidIt.isValid()) {
3285
18
    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3286
18
    QualType CopyprivateArrayTy =
3287
18
        C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3288
18
                               /*IndexTypeQuals=*/0);
3289
18
    // Create a list of all private variables for copyprivate.
3290
18
    Address CopyprivateList =
3291
18
        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3292
61
    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; 
++I43
) {
3293
43
      Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3294
43
      CGF.Builder.CreateStore(
3295
43
          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3296
43
              CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3297
43
          Elem);
3298
43
    }
3299
18
    // Build function that copies private values from single region to all other
3300
18
    // threads in the corresponding parallel region.
3301
18
    llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3302
18
        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3303
18
        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3304
18
    llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3305
18
    Address CL =
3306
18
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3307
18
                                                      CGF.VoidPtrTy);
3308
18
    llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3309
18
    llvm::Value *Args[] = {
3310
18
        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3311
18
        getThreadID(CGF, Loc),        // i32 <gtid>
3312
18
        BufSize,                      // size_t <buf_size>
3313
18
        CL.getPointer(),              // void *<copyprivate list>
3314
18
        CpyFn,                        // void (*) (void *, void *) <copy_func>
3315
18
        DidItVal                      // i32 did_it
3316
18
    };
3317
18
    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3318
18
  }
3319
39
}
3320
3321
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3322
                                        const RegionCodeGenTy &OrderedOpGen,
3323
12
                                        SourceLocation Loc, bool IsThreads) {
3324
12
  if (!CGF.HaveInsertPoint())
3325
0
    return;
3326
12
  // __kmpc_ordered(ident_t *, gtid);
3327
12
  // OrderedOpGen();
3328
12
  // __kmpc_end_ordered(ident_t *, gtid);
3329
12
  // Prepare arguments and build a call to __kmpc_ordered
3330
12
  if (IsThreads) {
3331
8
    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3332
8
    CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3333
8
                          createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3334
8
                          Args);
3335
8
    OrderedOpGen.setAction(Action);
3336
8
    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3337
8
    return;
3338
8
  }
3339
4
  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3340
4
}
3341
3342
679
unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3343
679
  unsigned Flags;
3344
679
  if (Kind == OMPD_for)
3345
424
    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3346
255
  else if (Kind == OMPD_sections)
3347
34
    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3348
221
  else if (Kind == OMPD_single)
3349
16
    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3350
205
  else if (Kind == OMPD_barrier)
3351
13
    Flags = OMP_IDENT_BARRIER_EXPL;
3352
192
  else
3353
192
    Flags = OMP_IDENT_BARRIER_IMPL;
3354
679
  return Flags;
3355
679
}
3356
3357
void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3358
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3359
1.95k
    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3360
1.95k
  // Check if the loop directive is actually a doacross loop directive. In this
3361
1.95k
  // case choose static, 1 schedule.
3362
1.95k
  if (llvm::any_of(
3363
1.95k
          S.getClausesOfKind<OMPOrderedClause>(),
3364
1.95k
          [](const OMPOrderedClause *C) 
{ return C->getNumForLoops(); }14
)) {
3365
8
    ScheduleKind = OMPC_SCHEDULE_static;
3366
8
    // Chunk size is 1 in this case.
3367
8
    llvm::APInt ChunkSize(32, 1);
3368
8
    ChunkExpr = IntegerLiteral::Create(
3369
8
        CGF.getContext(), ChunkSize,
3370
8
        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3371
8
        SourceLocation());
3372
8
  }
3373
1.95k
}
3374
3375
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3376
                                      OpenMPDirectiveKind Kind, bool EmitChecks,
3377
517
                                      bool ForceSimpleCall) {
3378
517
  if (!CGF.HaveInsertPoint())
3379
0
    return;
3380
517
  // Build call __kmpc_cancel_barrier(loc, thread_id);
3381
517
  // Build call __kmpc_barrier(loc, thread_id);
3382
517
  unsigned Flags = getDefaultFlagsForBarriers(Kind);
3383
517
  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3384
517
  // thread_id);
3385
517
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3386
517
                         getThreadID(CGF, Loc)};
3387
517
  if (auto *OMPRegionInfo =
3388
339
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3389
339
    if (!ForceSimpleCall && 
OMPRegionInfo->hasCancel()222
) {
3390
2
      llvm::Value *Result = CGF.EmitRuntimeCall(
3391
2
          createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3392
2
      if (EmitChecks) {
3393
2
        // if (__kmpc_cancel_barrier()) {
3394
2
        //   exit from construct;
3395
2
        // }
3396
2
        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3397
2
        llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3398
2
        llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3399
2
        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3400
2
        CGF.EmitBlock(ExitBB);
3401
2
        //   exit from construct;
3402
2
        CodeGenFunction::JumpDest CancelDestination =
3403
2
            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3404
2
        CGF.EmitBranchThroughCleanup(CancelDestination);
3405
2
        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3406
2
      }
3407
2
      return;
3408
2
    }
3409
515
  }
3410
515
  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3411
515
}
3412
3413
/// Map the OpenMP loop schedule to the runtime enumeration.
3414
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3415
9.82k
                                          bool Chunked, bool Ordered) {
3416
9.82k
  switch (ScheduleKind) {
3417
9.82k
  case OMPC_SCHEDULE_static:
3418
1.65k
    return Chunked ? 
(Ordered 930
?
OMP_ord_static_chunked7
:
OMP_sch_static_chunked923
)
3419
1.65k
                   : 
(Ordered 726
?
OMP_ord_static4
:
OMP_sch_static722
);
3420
9.82k
  case OMPC_SCHEDULE_dynamic:
3421
1.07k
    return Ordered ? 
OMP_ord_dynamic_chunked2
:
OMP_sch_dynamic_chunked1.07k
;
3422
9.82k
  case OMPC_SCHEDULE_guided:
3423
419
    return Ordered ? 
OMP_ord_guided_chunked1
:
OMP_sch_guided_chunked418
;
3424
9.82k
  case OMPC_SCHEDULE_runtime:
3425
416
    return Ordered ? 
OMP_ord_runtime4
:
OMP_sch_runtime412
;
3426
9.82k
  case OMPC_SCHEDULE_auto:
3427
417
    return Ordered ? 
OMP_ord_auto3
:
OMP_sch_auto414
;
3428
9.82k
  case OMPC_SCHEDULE_unknown:
3429
5.83k
    assert(!Chunked && "chunk was specified but schedule kind not known");
3430
5.83k
    return Ordered ? 
OMP_ord_static6
:
OMP_sch_static5.83k
;
3431
0
  }
3432
0
  llvm_unreachable("Unexpected runtime schedule");
3433
0
}
3434
3435
/// Map the OpenMP distribute schedule to the runtime enumeration.
3436
static OpenMPSchedType
3437
8.89k
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3438
8.89k
  // only static is allowed for dist_schedule
3439
8.89k
  return Chunked ? 
OMP_dist_sch_static_chunked1.36k
:
OMP_dist_sch_static7.52k
;
3440
8.89k
}
3441
3442
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3443
3.00k
                                         bool Chunked) const {
3444
3.00k
  OpenMPSchedType Schedule =
3445
3.00k
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3446
3.00k
  return Schedule == OMP_sch_static;
3447
3.00k
}
3448
3449
bool CGOpenMPRuntime::isStaticNonchunked(
3450
2.96k
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3451
2.96k
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3452
2.96k
  return Schedule == OMP_dist_sch_static;
3453
2.96k
}
3454
3455
bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3456
3.00k
                                      bool Chunked) const {
3457
3.00k
  OpenMPSchedType Schedule =
3458
3.00k
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3459
3.00k
  return Schedule == OMP_sch_static_chunked;
3460
3.00k
}
3461
3462
bool CGOpenMPRuntime::isStaticChunked(
3463
2.96k
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3464
2.96k
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3465
2.96k
  return Schedule == OMP_dist_sch_static_chunked;
3466
2.96k
}
3467
3468
766
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3469
766
  OpenMPSchedType Schedule =
3470
766
      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3471
766
  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3472
766
  return Schedule != OMP_sch_static;
3473
766
}
3474
3475
static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3476
                                  OpenMPScheduleClauseModifier M1,
3477
6.01k
                                  OpenMPScheduleClauseModifier M2) {
3478
6.01k
  int Modifier = 0;
3479
6.01k
  switch (M1) {
3480
6.01k
  case OMPC_SCHEDULE_MODIFIER_monotonic:
3481
15
    Modifier = OMP_sch_modifier_monotonic;
3482
15
    break;
3483
6.01k
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3484
7
    Modifier = OMP_sch_modifier_nonmonotonic;
3485
7
    break;
3486
6.01k
  case OMPC_SCHEDULE_MODIFIER_simd:
3487
6
    if (Schedule == OMP_sch_static_chunked)
3488
3
      Schedule = OMP_sch_static_balanced_chunked;
3489
6
    break;
3490
6.01k
  case OMPC_SCHEDULE_MODIFIER_last:
3491
5.99k
  case OMPC_SCHEDULE_MODIFIER_unknown:
3492
5.99k
    break;
3493
6.01k
  }
3494
6.01k
  switch (M2) {
3495
6.01k
  case OMPC_SCHEDULE_MODIFIER_monotonic:
3496
0
    Modifier = OMP_sch_modifier_monotonic;
3497
0
    break;
3498
6.01k
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3499
3
    Modifier = OMP_sch_modifier_nonmonotonic;
3500
3
    break;
3501
6.01k
  case OMPC_SCHEDULE_MODIFIER_simd:
3502
0
    if (Schedule == OMP_sch_static_chunked)
3503
0
      Schedule = OMP_sch_static_balanced_chunked;
3504
0
    break;
3505
6.01k
  case OMPC_SCHEDULE_MODIFIER_last:
3506
6.01k
  case OMPC_SCHEDULE_MODIFIER_unknown:
3507
6.01k
    break;
3508
6.01k
  }
3509
6.01k
  return Schedule | Modifier;
3510
6.01k
}
3511
3512
void CGOpenMPRuntime::emitForDispatchInit(
3513
    CodeGenFunction &CGF, SourceLocation Loc,
3514
    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3515
602
    bool Ordered, const DispatchRTInput &DispatchValues) {
3516
602
  if (!CGF.HaveInsertPoint())
3517
0
    return;
3518
602
  OpenMPSchedType Schedule = getRuntimeSchedule(
3519
602
      ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3520
602
  assert(Ordered ||
3521
602
         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3522
602
          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3523
602
          Schedule != OMP_sch_static_balanced_chunked));
3524
602
  // Call __kmpc_dispatch_init(
3525
602
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3526
602
  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3527
602
  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3528
602
3529
602
  // If the Chunk was not specified in the clause - use default value 1.
3530
602
  llvm::Value *Chunk = DispatchValues.Chunk ? 
DispatchValues.Chunk87
3531
602
                                            : 
CGF.Builder.getIntN(IVSize, 1)515
;
3532
602
  llvm::Value *Args[] = {
3533
602
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3534
602
      CGF.Builder.getInt32(addMonoNonMonoModifier(
3535
602
          Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3536
602
      DispatchValues.LB,                                // Lower
3537
602
      DispatchValues.UB,                                // Upper
3538
602
      CGF.Builder.getIntN(IVSize, 1),                   // Stride
3539
602
      Chunk                                             // Chunk
3540
602
  };
3541
602
  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3542
602
}
3543
3544
static void emitForStaticInitCall(
3545
    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3546
    llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3547
    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3548
5.41k
    const CGOpenMPRuntime::StaticRTInput &Values) {
3549
5.41k
  if (!CGF.HaveInsertPoint())
3550
0
    return;
3551
5.41k
3552
5.41k
  assert(!Values.Ordered);
3553
5.41k
  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3554
5.41k
         Schedule == OMP_sch_static_balanced_chunked ||
3555
5.41k
         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3556
5.41k
         Schedule == OMP_dist_sch_static ||
3557
5.41k
         Schedule == OMP_dist_sch_static_chunked);
3558
5.41k
3559
5.41k
  // Call __kmpc_for_static_init(
3560
5.41k
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3561
5.41k
  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3562
5.41k
  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3563
5.41k
  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3564
5.41k
  llvm::Value *Chunk = Values.Chunk;
3565
5.41k
  if (Chunk == nullptr) {
3566
4.65k
    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3567
4.65k
            Schedule == OMP_dist_sch_static) &&
3568
4.65k
           "expected static non-chunked schedule");
3569
4.65k
    // If the Chunk was not specified in the clause - use default value 1.
3570
4.65k
    Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3571
4.65k
  } else {
3572
759
    assert((Schedule == OMP_sch_static_chunked ||
3573
759
            Schedule == OMP_sch_static_balanced_chunked ||
3574
759
            Schedule == OMP_ord_static_chunked ||
3575
759
            Schedule == OMP_dist_sch_static_chunked) &&
3576
759
           "expected static chunked schedule");
3577
759
  }
3578
5.41k
  llvm::Value *Args[] = {
3579
5.41k
      UpdateLocation,
3580
5.41k
      ThreadId,
3581
5.41k
      CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3582
5.41k
                                                  M2)), // Schedule type
3583
5.41k
      Values.IL.getPointer(),                           // &isLastIter
3584
5.41k
      Values.LB.getPointer(),                           // &LB
3585
5.41k
      Values.UB.getPointer(),                           // &UB
3586
5.41k
      Values.ST.getPointer(),                           // &Stride
3587
5.41k
      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3588
5.41k
      Chunk                                             // Chunk
3589
5.41k
  };
3590
5.41k
  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3591
5.41k
}
3592
3593
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3594
                                        SourceLocation Loc,
3595
                                        OpenMPDirectiveKind DKind,
3596
                                        const OpenMPScheduleTy &ScheduleKind,
3597
2.45k
                                        const StaticRTInput &Values) {
3598
2.45k
  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3599
2.45k
      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3600
2.45k
  assert(isOpenMPWorksharingDirective(DKind) &&
3601
2.45k
         "Expected loop-based or sections-based directive.");
3602
2.45k
  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3603
2.45k
                                             isOpenMPLoopDirective(DKind)
3604
2.45k
                                                 ? 
OMP_IDENT_WORK_LOOP2.40k
3605
2.45k
                                                 : 
OMP_IDENT_WORK_SECTIONS52
);
3606
2.45k
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3607
2.45k
  llvm::FunctionCallee StaticInitFunction =
3608
2.45k
      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3609
2.45k
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3610
2.45k
                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3611
2.45k
}
3612
3613
void CGOpenMPRuntime::emitDistributeStaticInit(
3614
    CodeGenFunction &CGF, SourceLocation Loc,
3615
    OpenMPDistScheduleClauseKind SchedKind,
3616
2.96k
    const CGOpenMPRuntime::StaticRTInput &Values) {
3617
2.96k
  OpenMPSchedType ScheduleNum =
3618
2.96k
      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3619
2.96k
  llvm::Value *UpdatedLocation =
3620
2.96k
      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3621
2.96k
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3622
2.96k
  llvm::FunctionCallee StaticInitFunction =
3623
2.96k
      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3624
2.96k
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3625
2.96k
                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3626
2.96k
                        OMPC_SCHEDULE_MODIFIER_unknown, Values);
3627
2.96k
}
3628
3629
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3630
                                          SourceLocation Loc,
3631
5.47k
                                          OpenMPDirectiveKind DKind) {
3632
5.47k
  if (!CGF.HaveInsertPoint())
3633
0
    return;
3634
5.47k
  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3635
5.47k
  llvm::Value *Args[] = {
3636
5.47k
      emitUpdateLocation(CGF, Loc,
3637
5.47k
                         isOpenMPDistributeDirective(DKind)
3638
5.47k
                             ? 
OMP_IDENT_WORK_DISTRIBUTE4.45k
3639
5.47k
                             : isOpenMPLoopDirective(DKind)
3640
1.01k
                                   ? 
OMP_IDENT_WORK_LOOP944
3641
1.01k
                                   : 
OMP_IDENT_WORK_SECTIONS68
),
3642
5.47k
      getThreadID(CGF, Loc)};
3643
5.47k
  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3644
5.47k
                      Args);
3645
5.47k
}
3646
3647
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3648
                                                 SourceLocation Loc,
3649
                                                 unsigned IVSize,
3650
27
                                                 bool IVSigned) {
3651
27
  if (!CGF.HaveInsertPoint())
3652
0
    return;
3653
27
  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3654
27
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3655
27
  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3656
27
}
3657
3658
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3659
                                          SourceLocation Loc, unsigned IVSize,
3660
                                          bool IVSigned, Address IL,
3661
                                          Address LB, Address UB,
3662
602
                                          Address ST) {
3663
602
  // Call __kmpc_dispatch_next(
3664
602
  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3665
602
  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3666
602
  //          kmp_int[32|64] *p_stride);
3667
602
  llvm::Value *Args[] = {
3668
602
      emitUpdateLocation(CGF, Loc),
3669
602
      getThreadID(CGF, Loc),
3670
602
      IL.getPointer(), // &isLastIter
3671
602
      LB.getPointer(), // &Lower
3672
602
      UB.getPointer(), // &Upper
3673
602
      ST.getPointer()  // &Stride
3674
602
  };
3675
602
  llvm::Value *Call =
3676
602
      CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3677
602
  return CGF.EmitScalarConversion(
3678
602
      Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3679
602
      CGF.getContext().BoolTy, Loc);
3680
602
}
3681
3682
void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3683
                                           llvm::Value *NumThreads,
3684
127
                                           SourceLocation Loc) {
3685
127
  if (!CGF.HaveInsertPoint())
3686
0
    return;
3687
127
  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3688
127
  llvm::Value *Args[] = {
3689
127
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3690
127
      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3691
127
  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3692
127
                      Args);
3693
127
}
3694
3695
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3696
                                         OpenMPProcBindClauseKind ProcBind,
3697
52
                                         SourceLocation Loc) {
3698
52
  if (!CGF.HaveInsertPoint())
3699
0
    return;
3700
52
  // Constants for proc bind value accepted by the runtime.
3701
52
  enum ProcBindTy {
3702
52
    ProcBindFalse = 0,
3703
52
    ProcBindTrue,
3704
52
    ProcBindMaster,
3705
52
    ProcBindClose,
3706
52
    ProcBindSpread,
3707
52
    ProcBindIntel,
3708
52
    ProcBindDefault
3709
52
  } RuntimeProcBind;
3710
52
  switch (ProcBind) {
3711
52
  case OMPC_PROC_BIND_master:
3712
16
    RuntimeProcBind = ProcBindMaster;
3713
16
    break;
3714
52
  case OMPC_PROC_BIND_close:
3715
16
    RuntimeProcBind = ProcBindClose;
3716
16
    break;
3717
52
  case OMPC_PROC_BIND_spread:
3718
20
    RuntimeProcBind = ProcBindSpread;
3719
20
    break;
3720
52
  case OMPC_PROC_BIND_unknown:
3721
0
    llvm_unreachable("Unsupported proc_bind value.");
3722
52
  }
3723
52
  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3724
52
  llvm::Value *Args[] = {
3725
52
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3726
52
      llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3727
52
  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3728
52
}
3729
3730
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3731
40
                                SourceLocation Loc) {
3732
40
  if (!CGF.HaveInsertPoint())
3733
0
    return;
3734
40
  // Build call void __kmpc_flush(ident_t *loc)
3735
40
  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3736
40
                      emitUpdateLocation(CGF, Loc));
3737
40
}
3738
3739
namespace {
3740
/// Indexes of fields for type kmp_task_t.
3741
enum KmpTaskTFields {
3742
  /// List of shared variables.
3743
  KmpTaskTShareds,
3744
  /// Task routine.
3745
  KmpTaskTRoutine,
3746
  /// Partition id for the untied tasks.
3747
  KmpTaskTPartId,
3748
  /// Function with call of destructors for private variables.
3749
  Data1,
3750
  /// Task priority.
3751
  Data2,
3752
  /// (Taskloops only) Lower bound.
3753
  KmpTaskTLowerBound,
3754
  /// (Taskloops only) Upper bound.
3755
  KmpTaskTUpperBound,
3756
  /// (Taskloops only) Stride.
3757
  KmpTaskTStride,
3758
  /// (Taskloops only) Is last iteration flag.
3759
  KmpTaskTLastIter,
3760
  /// (Taskloops only) Reduction data.
3761
  KmpTaskTReductions,
3762
};
3763
} // anonymous namespace
3764
3765
4.50k
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3766
4.50k
  return OffloadEntriesTargetRegion.empty() &&
3767
4.50k
         
OffloadEntriesDeviceGlobalVar.empty()797
;
3768
4.50k
}
3769
3770
/// Initialize target region entry.
3771
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3772
    initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3773
                                    StringRef ParentName, unsigned LineNum,
3774
1.78k
                                    unsigned Order) {
3775
1.78k
  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3776
1.78k
                                             "only required for the device "
3777
1.78k
                                             "code generation.");
3778
1.78k
  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3779
1.78k
      OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3780
1.78k
                                   OMPTargetRegionEntryTargetRegion);
3781
1.78k
  ++OffloadingEntriesNum;
3782
1.78k
}
3783
3784
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3785
    registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3786
                                  StringRef ParentName, unsigned LineNum,
3787
                                  llvm::Constant *Addr, llvm::Constant *ID,
3788
6.09k
                                  OMPTargetRegionEntryKind Flags) {
3789
6.09k
  // If we are emitting code for a target, the entry is already initialized,
3790
6.09k
  // only has to be registered.
3791
6.09k
  if (CGM.getLangOpts().OpenMPIsDevice) {
3792
1.75k
    if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3793
0
      unsigned DiagID = CGM.getDiags().getCustomDiagID(
3794
0
          DiagnosticsEngine::Error,
3795
0
          "Unable to find target region on line '%0' in the device code.");
3796
0
      CGM.getDiags().Report(DiagID) << LineNum;
3797
0
      return;
3798
0
    }
3799
1.75k
    auto &Entry =
3800
1.75k
        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3801
1.75k
    assert(Entry.isValid() && "Entry not initialized!");
3802
1.75k
    Entry.setAddress(Addr);
3803
1.75k
    Entry.setID(ID);
3804
1.75k
    Entry.setFlags(Flags);
3805
4.33k
  } else {
3806
4.33k
    OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3807
4.33k
    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3808
4.33k
    ++OffloadingEntriesNum;
3809
4.33k
  }
3810
6.09k
}
3811
3812
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3813
    unsigned DeviceID, unsigned FileID, StringRef ParentName,
3814
4.06k
    unsigned LineNum) const {
3815
4.06k
  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3816
4.06k
  if (PerDevice == OffloadEntriesTargetRegion.end())
3817
0
    return false;
3818
4.06k
  auto PerFile = PerDevice->second.find(FileID);
3819
4.06k
  if (PerFile == PerDevice->second.end())
3820
0
    return false;
3821
4.06k
  auto PerParentName = PerFile->second.find(ParentName);
3822
4.06k
  if (PerParentName == PerFile->second.end())
3823
342
    return false;
3824
3.71k
  auto PerLine = PerParentName->second.find(LineNum);
3825
3.71k
  if (PerLine == PerParentName->second.end())
3826
87
    return false;
3827
3.63k
  // Fail if this entry is already registered.
3828
3.63k
  if (PerLine->second.getAddress() || 
PerLine->second.getID()3.48k
)
3829
145
    return false;
3830
3.48k
  return true;
3831
3.48k
}
3832
3833
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3834
1.44k
    const OffloadTargetRegionEntryInfoActTy &Action) {
3835
1.44k
  // Scan all target region entries and perform the provided action.
3836
1.44k
  for (const auto &D : OffloadEntriesTargetRegion)
3837
1.43k
    for (const auto &F : D.second)
3838
1.43k
      for (const auto &P : F.second)
3839
3.48k
        for (const auto &L : P.second)
3840
6.07k
          Action(D.first, F.first, P.first(), L.first, L.second);
3841
1.44k
}
3842
3843
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3844
    initializeDeviceGlobalVarEntryInfo(StringRef Name,
3845
                                       OMPTargetGlobalVarEntryKind Flags,
3846
66
                                       unsigned Order) {
3847
66
  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3848
66
                                             "only required for the device "
3849
66
                                             "code generation.");
3850
66
  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3851
66
  ++OffloadingEntriesNum;
3852
66
}
3853
3854
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3855
    registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3856
                                     CharUnits VarSize,
3857
                                     OMPTargetGlobalVarEntryKind Flags,
3858
244
                                     llvm::GlobalValue::LinkageTypes Linkage) {
3859
244
  if (CGM.getLangOpts().OpenMPIsDevice) {
3860
89
    auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3861
89
    assert(Entry.isValid() && Entry.getFlags() == Flags &&
3862
89
           "Entry not initialized!");
3863
89
    assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3864
89
           "Resetting with the new address.");
3865
89
    if (Entry.getAddress() && 
hasDeviceGlobalVarEntryInfo(VarName)26
) {
3866
26
      if (Entry.getVarSize().isZero()) {
3867
2
        Entry.setVarSize(VarSize);
3868
2
        Entry.setLinkage(Linkage);
3869
2
      }
3870
26
      return;
3871
26
    }
3872
63
    Entry.setVarSize(VarSize);
3873
63
    Entry.setLinkage(Linkage);
3874
63
    Entry.setAddress(Addr);
3875
155
  } else {
3876
155
    if (hasDeviceGlobalVarEntryInfo(VarName)) {
3877
75
      auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3878
75
      assert(Entry.isValid() && Entry.getFlags() == Flags &&
3879
75
             "Entry not initialized!");
3880
75
      assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3881
75
             "Resetting with the new address.");
3882
75
      if (Entry.getVarSize().isZero()) {
3883
10
        Entry.setVarSize(VarSize);
3884
10
        Entry.setLinkage(Linkage);
3885
10
      }
3886
75
      return;
3887
75
    }
3888
80
    OffloadEntriesDeviceGlobalVar.try_emplace(
3889
80
        VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3890
80
    ++OffloadingEntriesNum;
3891
80
  }
3892
244
}
3893
3894
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3895
    actOnDeviceGlobalVarEntriesInfo(
3896
1.44k
        const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3897
1.44k
  // Scan all target region entries and perform the provided action.
3898
1.44k
  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3899
143
    Action(E.getKey(), E.getValue());
3900
1.44k
}
3901
3902
llvm::Function *
3903
1.80k
CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3904
1.80k
  // If we don't have entries or if we are emitting code for the device, we
3905
1.80k
  // don't need to do anything.
3906
1.80k
  if (CGM.getLangOpts().OpenMPIsDevice || 
OffloadEntriesInfoManager.empty()1.49k
)
3907
673
    return nullptr;
3908
1.13k
3909
1.13k
  llvm::Module &M = CGM.getModule();
3910
1.13k
  ASTContext &C = CGM.getContext();
3911
1.13k
3912
1.13k
  // Get list of devices we care about
3913
1.13k
  const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3914
1.13k
3915
1.13k
  // We should be creating an offloading descriptor only if there are devices
3916
1.13k
  // specified.
3917
1.13k
  assert(!Devices.empty() && "No OpenMP offloading devices??");
3918
1.13k
3919
1.13k
  // Create the external variables that will point to the begin and end of the
3920
1.13k
  // host entries section. These will be defined by the linker.
3921
1.13k
  llvm::Type *OffloadEntryTy =
3922
1.13k
      CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3923
1.13k
  std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3924
1.13k
  auto *HostEntriesBegin = new llvm::GlobalVariable(
3925
1.13k
      M, OffloadEntryTy, /*isConstant=*/true,
3926
1.13k
      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3927
1.13k
      EntriesBeginName);
3928
1.13k
  std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3929
1.13k
  auto *HostEntriesEnd =
3930
1.13k
      new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3931
1.13k
                               llvm::GlobalValue::ExternalLinkage,
3932
1.13k
                               /*Initializer=*/nullptr, EntriesEndName);
3933
1.13k
3934
1.13k
  // Create all device images
3935
1.13k
  auto *DeviceImageTy = cast<llvm::StructType>(
3936
1.13k
      CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3937
1.13k
  ConstantInitBuilder DeviceImagesBuilder(CGM);
3938
1.13k
  ConstantArrayBuilder DeviceImagesEntries =
3939
1.13k
      DeviceImagesBuilder.beginArray(DeviceImageTy);
3940
1.13k
3941
1.13k
  for (const llvm::Triple &Device : Devices) {
3942
1.13k
    StringRef T = Device.getTriple();
3943
1.13k
    std::string BeginName = getName({"omp_offloading", "img_start", ""});
3944
1.13k
    auto *ImgBegin = new llvm::GlobalVariable(
3945
1.13k
        M, CGM.Int8Ty, /*isConstant=*/true,
3946
1.13k
        llvm::GlobalValue::ExternalWeakLinkage,
3947
1.13k
        /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3948
1.13k
    std::string EndName = getName({"omp_offloading", "img_end", ""});
3949
1.13k
    auto *ImgEnd = new llvm::GlobalVariable(
3950
1.13k
        M, CGM.Int8Ty, /*isConstant=*/true,
3951
1.13k
        llvm::GlobalValue::ExternalWeakLinkage,
3952
1.13k
        /*Initializer=*/nullptr, Twine(EndName).concat(T));
3953
1.13k
3954
1.13k
    llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3955
1.13k
                              HostEntriesEnd};
3956
1.13k
    createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3957
1.13k
                                             DeviceImagesEntries);
3958
1.13k
  }
3959
1.13k
3960
1.13k
  // Create device images global array.
3961
1.13k
  std::string ImagesName = getName({"omp_offloading", "device_images"});
3962
1.13k
  llvm::GlobalVariable *DeviceImages =
3963
1.13k
      DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3964
1.13k
                                                CGM.getPointerAlign(),
3965
1.13k
                                                /*isConstant=*/true);
3966
1.13k
  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3967
1.13k
3968
1.13k
  // This is a Zero array to be used in the creation of the constant expressions
3969
1.13k
  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3970
1.13k
                             llvm::Constant::getNullValue(CGM.Int32Ty)};
3971
1.13k
3972
1.13k
  // Create the target region descriptor.
3973
1.13k
  llvm::Constant *Data[] = {
3974
1.13k
      llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3975
1.13k
      llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3976
1.13k
                                           DeviceImages, Index),
3977
1.13k
      HostEntriesBegin, HostEntriesEnd};
3978
1.13k
  std::string Descriptor = getName({"omp_offloading", "descriptor"});
3979
1.13k
  llvm::GlobalVariable *Desc = createGlobalStruct(
3980
1.13k
      CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3981
1.13k
3982
1.13k
  // Emit code to register or unregister the descriptor at execution
3983
1.13k
  // startup or closing, respectively.
3984
1.13k
3985
1.13k
  llvm::Function *UnRegFn;
3986
1.13k
  {
3987
1.13k
    FunctionArgList Args;
3988
1.13k
    ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3989
1.13k
    Args.push_back(&DummyPtr);
3990
1.13k
3991
1.13k
    CodeGenFunction CGF(CGM);
3992
1.13k
    // Disable debug info for global (de-)initializer because they are not part
3993
1.13k
    // of some particular construct.
3994
1.13k
    CGF.disableDebugInfo();
3995
1.13k
    const auto &FI =
3996
1.13k
        CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3997
1.13k
    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3998
1.13k
    std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3999
1.13k
    UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4000
1.13k
    CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4001
1.13k
    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4002
1.13k
                        Desc);
4003
1.13k
    CGF.FinishFunction();
4004
1.13k
  }
4005
1.13k
  llvm::Function *RegFn;
4006
1.13k
  {
4007
1.13k
    CodeGenFunction CGF(CGM);
4008
1.13k
    // Disable debug info for global (de-)initializer because they are not part
4009
1.13k
    // of some particular construct.
4010
1.13k
    CGF.disableDebugInfo();
4011
1.13k
    const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4012
1.13k
    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4013
1.13k
4014
1.13k
    // Encode offload target triples into the registration function name. It
4015
1.13k
    // will serve as a comdat key for the registration/unregistration code for
4016
1.13k
    // this particular combination of offloading targets.
4017
1.13k
    SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4018
1.13k
    RegFnNameParts[0] = "omp_offloading";
4019
1.13k
    RegFnNameParts[1] = "descriptor_reg";
4020
1.13k
    llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4021
1.13k
                    [](const llvm::Triple &T) -> const std::string& {
4022
1.13k
                      return T.getTriple();
4023
1.13k
                    });
4024
1.13k
    llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4025
1.13k
    std::string Descriptor = getName(RegFnNameParts);
4026
1.13k
    RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4027
1.13k
    CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4028
1.13k
    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4029
1.13k
    // Create a variable to drive the registration and unregistration of the
4030
1.13k
    // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4031
1.13k
    ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4032
1.13k
                                  SourceLocation(), nullptr, C.CharTy,
4033
1.13k
                                  ImplicitParamDecl::Other);
4034
1.13k
    CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4035
1.13k
    CGF.FinishFunction();
4036
1.13k
  }
4037
1.13k
  if (CGM.supportsCOMDAT()) {
4038
1.11k
    // It is sufficient to call registration function only once, so create a
4039
1.11k
    // COMDAT group for registration/unregistration functions and associated
4040
1.11k
    // data. That would reduce startup time and code size. Registration
4041
1.11k
    // function serves as a COMDAT group key.
4042
1.11k
    llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4043
1.11k
    RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4044
1.11k
    RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4045
1.11k
    RegFn->setComdat(ComdatKey);
4046
1.11k
    UnRegFn->setComdat(ComdatKey);
4047
1.11k
    DeviceImages->setComdat(ComdatKey);
4048
1.11k
    Desc->setComdat(ComdatKey);
4049
1.11k
  }
4050
1.13k
  return RegFn;
4051
1.13k
}
4052
4053
void CGOpenMPRuntime::createOffloadEntry(
4054
    llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4055
5.54k
    llvm::GlobalValue::LinkageTypes Linkage) {
4056
5.54k
  StringRef Name = Addr->getName();
4057
5.54k
  llvm::Module &M = CGM.getModule();
4058
5.54k
  llvm::LLVMContext &C = M.getContext();
4059
5.54k
4060
5.54k
  // Create constant string with the name.
4061
5.54k
  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4062
5.54k
4063
5.54k
  std::string StringName = getName({"omp_offloading", "entry_name"});
4064
5.54k
  auto *Str = new llvm::GlobalVariable(
4065
5.54k
      M, StrPtrInit->getType(), /*isConstant=*/true,
4066
5.54k
      llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4067
5.54k
  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4068
5.54k
4069
5.54k
  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4070
5.54k
                            llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4071
5.54k
                            llvm::ConstantInt::get(CGM.SizeTy, Size),
4072
5.54k
                            llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4073
5.54k
                            llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4074
5.54k
  std::string EntryName = getName({"omp_offloading", "entry", ""});
4075
5.54k
  llvm::GlobalVariable *Entry = createGlobalStruct(
4076
5.54k
      CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4077
5.54k
      Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4078
5.54k
4079
5.54k
  // The entry has to be created in the section the linker expects it to be.
4080
5.54k
  std::string Section = getName({"omp_offloading", "entries"});
4081
5.54k
  Entry->setSection(Section);
4082
5.54k
}
4083
4084
1.80k
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4085
1.80k
  // Emit the offloading entries and metadata so that the device codegen side
4086
1.80k
  // can easily figure out what to emit. The produced metadata looks like
4087
1.80k
  // this:
4088
1.80k
  //
4089
1.80k
  // !omp_offload.info = !{!1, ...}
4090
1.80k
  //
4091
1.80k
  // Right now we only generate metadata for function that contain target
4092
1.80k
  // regions.
4093
1.80k
4094
1.80k
  // If we do not have entries, we don't need to do anything.
4095
1.80k
  if (OffloadEntriesInfoManager.empty())
4096
367
    return;
4097
1.44k
4098
1.44k
  llvm::Module &M = CGM.getModule();
4099
1.44k
  llvm::LLVMContext &C = M.getContext();
4100
1.44k
  SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4101
1.44k
      OrderedEntries(OffloadEntriesInfoManager.size());
4102
1.44k
  llvm::SmallVector<StringRef, 16> ParentFunctions(
4103
1.44k
      OffloadEntriesInfoManager.size());
4104
1.44k
4105
1.44k
  // Auxiliary methods to create metadata values and strings.
4106
30.8k
  auto &&GetMDInt = [this](unsigned V) {
4107
30.8k
    return llvm::ConstantAsMetadata::get(
4108
30.8k
        llvm::ConstantInt::get(CGM.Int32Ty, V));
4109
30.8k
  };
4110
1.44k
4111
6.21k
  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4112
1.44k
4113
1.44k
  // Create the offloading info metadata node.
4114
1.44k
  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4115
1.44k
4116
1.44k
  // Create function that emits metadata for each target region entry;
4117
1.44k
  auto &&TargetRegionMetadataEmitter =
4118
1.44k
      [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4119
1.44k
          unsigned DeviceID, unsigned FileID, StringRef ParentName,
4120
1.44k
          unsigned Line,
4121
6.07k
          const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4122
6.07k
        // Generate metadata for target regions. Each entry of this metadata
4123
6.07k
        // contains:
4124
6.07k
        // - Entry 0 -> Kind of this type of metadata (0).
4125
6.07k
        // - Entry 1 -> Device ID of the file where the entry was identified.
4126
6.07k
        // - Entry 2 -> File ID of the file where the entry was identified.
4127
6.07k
        // - Entry 3 -> Mangled name of the function where the entry was
4128
6.07k
        // identified.
4129
6.07k
        // - Entry 4 -> Line in the file where the entry was identified.
4130
6.07k
        // - Entry 5 -> Order the entry was created.
4131
6.07k
        // The first element of the metadata node is the kind.
4132
6.07k
        llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4133
6.07k
                                 GetMDInt(FileID),      GetMDString(ParentName),
4134
6.07k
                                 GetMDInt(Line),        GetMDInt(E.getOrder())};
4135
6.07k
4136
6.07k
        // Save this entry in the right position of the ordered entries array.
4137
6.07k
        OrderedEntries[E.getOrder()] = &E;
4138
6.07k
        ParentFunctions[E.getOrder()] = ParentName;
4139
6.07k
4140
6.07k
        // Add metadata to the named metadata node.
4141
6.07k
        MD->addOperand(llvm::MDNode::get(C, Ops));
4142
6.07k
      };
4143
1.44k
4144
1.44k
  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4145
1.44k
      TargetRegionMetadataEmitter);
4146
1.44k
4147
1.44k
  // Create function that emits metadata for each device global variable entry;
4148
1.44k
  auto &&DeviceGlobalVarMetadataEmitter =
4149
1.44k
      [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4150
1.44k
       MD](StringRef MangledName,
4151
1.44k
           const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4152
1.44k
               &E) {
4153
143
        // Generate metadata for global variables. Each entry of this metadata
4154
143
        // contains:
4155
143
        // - Entry 0 -> Kind of this type of metadata (1).
4156
143
        // - Entry 1 -> Mangled name of the variable.
4157
143
        // - Entry 2 -> Declare target kind.
4158
143
        // - Entry 3 -> Order the entry was created.
4159
143
        // The first element of the metadata node is the kind.
4160
143
        llvm::Metadata *Ops[] = {
4161
143
            GetMDInt(E.getKind()), GetMDString(MangledName),
4162
143
            GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4163
143
4164
143
        // Save this entry in the right position of the ordered entries array.
4165
143
        OrderedEntries[E.getOrder()] = &E;
4166
143
4167
143
        // Add metadata to the named metadata node.
4168
143
        MD->addOperand(llvm::MDNode::get(C, Ops));
4169
143
      };
4170
1.44k
4171
1.44k
  OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4172
1.44k
      DeviceGlobalVarMetadataEmitter);
4173
1.44k
4174
6.21k
  for (const auto *E : OrderedEntries) {
4175
6.21k
    assert(E && "All ordered entries must exist!");
4176
6.21k
    if (const auto *CE =
4177
6.07k
            dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4178
6.07k
                E)) {
4179
6.07k
      if (!CE->getID() || 
!CE->getAddress()6.07k
) {
4180
2
        // Do not blame the entry if the parent funtion is not emitted.
4181
2
        StringRef FnName = ParentFunctions[CE->getOrder()];
4182
2
        if (!CGM.GetGlobalValue(FnName))
4183
1
          continue;
4184
1
        unsigned DiagID = CGM.getDiags().getCustomDiagID(
4185
1
            DiagnosticsEngine::Error,
4186
1
            "Offloading entry for target region is incorrect: either the "
4187
1
            "address or the ID is invalid.");
4188
1
        CGM.getDiags().Report(DiagID);
4189
1
        continue;
4190
1
      }
4191
6.07k
      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4192
6.07k
                         CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4193
6.07k
    } else 
if (const auto *143
CE143
=
4194
143
                   dyn_cast<OffloadEntriesInfoManagerTy::
4195
143
                                OffloadEntryInfoDeviceGlobalVar>(E)) {
4196
143
      OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4197
143
          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4198
143
              CE->getFlags());
4199
143
      switch (Flags) {
4200
143
      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4201
124
        if (CGM.getLangOpts().OpenMPIsDevice &&
4202
124
            
CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()52
)
4203
1
          continue;
4204
123
        if (!CE->getAddress()) {
4205
0
          unsigned DiagID = CGM.getDiags().getCustomDiagID(
4206
0
              DiagnosticsEngine::Error,
4207
0
              "Offloading entry for declare target variable is incorrect: the "
4208
0
              "address is invalid.");
4209
0
          CGM.getDiags().Report(DiagID);
4210
0
          continue;
4211
0
        }
4212
123
        // The vaiable has no definition - no need to add the entry.
4213
123
        if (CE->getVarSize().isZero())
4214
36
          continue;
4215
87
        break;
4216
87
      }
4217
87
      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4218
19
        assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4219
19
                (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4220
19
               "Declaret target link address is set.");
4221
19
        if (CGM.getLangOpts().OpenMPIsDevice)
4222
11
          continue;
4223
8
        if (!CE->getAddress()) {
4224
0
          unsigned DiagID = CGM.getDiags().getCustomDiagID(
4225
0
              DiagnosticsEngine::Error,
4226
0
              "Offloading entry for declare target variable is incorrect: the "
4227
0
              "address is invalid.");
4228
0
          CGM.getDiags().Report(DiagID);
4229
0
          continue;
4230
0
        }
4231
8
        break;
4232
95
      }
4233
95
      createOffloadEntry(CE->getAddress(), CE->getAddress(),
4234
95
                         CE->getVarSize().getQuantity(), Flags,
4235
95
                         CE->getLinkage());
4236
95
    } else {
4237
0
      llvm_unreachable("Unsupported entry kind.");
4238
0
    }
4239
6.21k
  }
4240
1.44k
}
4241
4242
/// Loads all the offload entries information from the host IR
4243
/// metadata.
4244
3.46k
void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4245
3.46k
  // If we are in target mode, load the metadata from the host IR. This code has
4246
3.46k
  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4247
3.46k
4248
3.46k
  if (!CGM.getLangOpts().OpenMPIsDevice)
4249
3.13k
    return;
4250
332
4251
332
  if (CGM.getLangOpts().OMPHostIRFile.empty())
4252
0
    return;
4253
332
4254
332
  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4255
332
  if (auto EC = Buf.getError()) {
4256
0
    CGM.getDiags().Report(diag::err_cannot_open_file)
4257
0
        << CGM.getLangOpts().OMPHostIRFile << EC.message();
4258
0
    return;
4259
0
  }
4260
332
4261
332
  llvm::LLVMContext C;
4262
332
  auto ME = expectedToErrorOrAndEmitErrors(
4263
332
      C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4264
332
4265
332
  if (auto EC = ME.getError()) {
4266
0
    unsigned DiagID = CGM.getDiags().getCustomDiagID(
4267
0
        DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4268
0
    CGM.getDiags().Report(DiagID)
4269
0
        << CGM.getLangOpts().OMPHostIRFile << EC.message();
4270
0
    return;
4271
0
  }
4272
332
4273
332
  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4274
332
  if (!MD)
4275
23
    return;
4276
309
4277
1.85k
  
for (llvm::MDNode *MN : MD->operands())309
{
4278
9.11k
    auto &&GetMDInt = [MN](unsigned Idx) {
4279
9.11k
      auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4280
9.11k
      return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4281
9.11k
    };
4282
1.85k
4283
1.85k
    auto &&GetMDString = [MN](unsigned Idx) {
4284
1.85k
      auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4285
1.85k
      return V->getString();
4286
1.85k
    };
4287
1.85k
4288
1.85k
    switch (GetMDInt(0)) {
4289
1.85k
    default:
4290
0
      llvm_unreachable("Unexpected metadata!");
4291
1.85k
      
break0
;
4292
1.85k
    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4293
1.78k
        OffloadingEntryInfoTargetRegion:
4294
1.78k
      OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4295
1.78k
          /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4296
1.78k
          /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4297
1.78k
          /*Order=*/GetMDInt(5));
4298
1.78k
      break;
4299
1.85k
    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300
66
        OffloadingEntryInfoDeviceGlobalVar:
4301
66
      OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4302
66
          /*MangledName=*/GetMDString(1),
4303
66
          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4304
66
              /*Flags=*/GetMDInt(2)),
4305
66
          /*Order=*/GetMDInt(3));
4306
66
      break;
4307
1.85k
    }
4308
1.85k
  }
4309
309
}
4310
4311
442
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4312
442
  if (!KmpRoutineEntryPtrTy) {
4313
140
    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4314
140
    ASTContext &C = CGM.getContext();
4315
140
    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4316
140
    FunctionProtoType::ExtProtoInfo EPI;
4317
140
    KmpRoutineEntryPtrQTy = C.getPointerType(
4318
140
        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4319
140
    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4320
140
  }
4321
442
}
4322
4323
11.2k
QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4324
11.2k
  // Make sure the type of the entry is already created. This is the type we
4325
11.2k
  // have to create:
4326
11.2k
  // struct __tgt_offload_entry{
4327
11.2k
  //   void      *addr;       // Pointer to the offload entry info.
4328
11.2k
  //                          // (function or global)
4329
11.2k
  //   char      *name;       // Name of the function or global.
4330
11.2k
  //   size_t     size;       // Size of the entry info (0 if it a function).
4331
11.2k
  //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4332
11.2k
  //   int32_t    reserved;   // Reserved, to use by the runtime library.
4333
11.2k
  // };
4334
11.2k
  if (TgtOffloadEntryQTy.isNull()) {
4335
1.35k
    ASTContext &C = CGM.getContext();
4336
1.35k
    RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4337
1.35k
    RD->startDefinition();
4338
1.35k
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4339
1.35k
    addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4340
1.35k
    addFieldToRecordDecl(C, RD, C.getSizeType());
4341
1.35k
    addFieldToRecordDecl(
4342
1.35k
        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4343
1.35k
    addFieldToRecordDecl(
4344
1.35k
        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4345
1.35k
    RD->completeDefinition();
4346
1.35k
    RD->addAttr(PackedAttr::CreateImplicit(C));
4347
1.35k
    TgtOffloadEntryQTy = C.getRecordType(RD);
4348
1.35k
  }
4349
11.2k
  return TgtOffloadEntryQTy;
4350
11.2k
}
4351
4352
3.40k
QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4353
3.40k
  // These are the types we need to build:
4354
3.40k
  // struct __tgt_device_image{
4355
3.40k
  // void   *ImageStart;       // Pointer to the target code start.
4356
3.40k
  // void   *ImageEnd;         // Pointer to the target code end.
4357
3.40k
  // // We also add the host entries to the device image, as it may be useful
4358
3.40k
  // // for the target runtime to have access to that information.
4359
3.40k
  // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4360
3.40k
  //                                       // the entries.
4361
3.40k
  // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4362
3.40k
  //                                       // entries (non inclusive).
4363
3.40k
  // };
4364
3.40k
  if (TgtDeviceImageQTy.isNull()) {
4365
1.13k
    ASTContext &C = CGM.getContext();
4366
1.13k
    RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4367
1.13k
    RD->startDefinition();
4368
1.13k
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4369
1.13k
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4370
1.13k
    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4371
1.13k
    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4372
1.13k
    RD->completeDefinition();
4373
1.13k
    TgtDeviceImageQTy = C.getRecordType(RD);
4374
1.13k
  }
4375
3.40k
  return TgtDeviceImageQTy;
4376
3.40k
}
4377
4378
3.40k
QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4379
3.40k
  // struct __tgt_bin_desc{
4380
3.40k
  //   int32_t              NumDevices;      // Number of devices supported.
4381
3.40k
  //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4382
3.40k
  //                                         // (one per device).
4383
3.40k
  //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4384
3.40k
  //                                         // entries.
4385
3.40k
  //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4386
3.40k
  //                                         // entries (non inclusive).
4387
3.40k
  // };
4388
3.40k
  if (TgtBinaryDescriptorQTy.isNull()) {
4389
1.13k
    ASTContext &C = CGM.getContext();
4390
1.13k
    RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4391
1.13k
    RD->startDefinition();
4392
1.13k
    addFieldToRecordDecl(
4393
1.13k
        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4394
1.13k
    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4395
1.13k
    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4396
1.13k
    addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4397
1.13k
    RD->completeDefinition();
4398
1.13k
    TgtBinaryDescriptorQTy = C.getRecordType(RD);
4399
1.13k
  }
4400
3.40k
  return TgtBinaryDescriptorQTy;
4401
3.40k
}
4402
4403
namespace {
4404
struct PrivateHelpersTy {
4405
  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4406
                   const VarDecl *PrivateElemInit)
4407
      : Original(Original), PrivateCopy(PrivateCopy),
4408
842
        PrivateElemInit(PrivateElemInit) {}
4409
  const VarDecl *Original;
4410
  const VarDecl *PrivateCopy;
4411
  const VarDecl *PrivateElemInit;
4412
};
4413
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4414
} // anonymous namespace
4415
4416
static RecordDecl *
4417
442
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4418
442
  if (!Privates.empty()) {
4419
298
    ASTContext &C = CGM.getContext();
4420
298
    // Build struct .kmp_privates_t. {
4421
298
    //         /*  private vars  */
4422
298
    //       };
4423
298
    RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4424
298
    RD->startDefinition();
4425
842
    for (const auto &Pair : Privates) {
4426
842
      const VarDecl *VD = Pair.second.Original;
4427
842
      QualType Type = VD->getType().getNonReferenceType();
4428
842
      FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4429
842
      if (VD->hasAttrs()) {
4430
16
        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4431
16
             E(VD->getAttrs().end());
4432
32
             I != E; 
++I16
)
4433
16
          FD->addAttr(*I);
4434
16
      }
4435
842
    }
4436
298
    RD->completeDefinition();
4437
298
    return RD;
4438
298
  }
4439
144
  return nullptr;
4440
144
}
4441
4442
static RecordDecl *
4443
createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4444
                         QualType KmpInt32Ty,
4445
146
                         QualType KmpRoutineEntryPointerQTy) {
4446
146
  ASTContext &C = CGM.getContext();
4447
146
  // Build struct kmp_task_t {
4448
146
  //         void *              shareds;
4449
146
  //         kmp_routine_entry_t routine;
4450
146
  //         kmp_int32           part_id;
4451
146
  //         kmp_cmplrdata_t data1;
4452
146
  //         kmp_cmplrdata_t data2;
4453
146
  // For taskloops additional fields:
4454
146
  //         kmp_uint64          lb;
4455
146
  //         kmp_uint64          ub;
4456
146
  //         kmp_int64           st;
4457
146
  //         kmp_int32           liter;
4458
146
  //         void *              reductions;
4459
146
  //       };
4460
146
  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4461
146
  UD->startDefinition();
4462
146
  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4463
146
  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4464
146
  UD->completeDefinition();
4465
146
  QualType KmpCmplrdataTy = C.getRecordType(UD);
4466
146
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4467
146
  RD->startDefinition();
4468
146
  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4469
146
  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4470
146
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4471
146
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4472
146
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4473
146
  if (isOpenMPTaskLoopDirective(Kind)) {
4474
41
    QualType KmpUInt64Ty =
4475
41
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4476
41
    QualType KmpInt64Ty =
4477
41
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4478
41
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4479
41
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4480
41
    addFieldToRecordDecl(C, RD, KmpInt64Ty);
4481
41
    addFieldToRecordDecl(C, RD, KmpInt32Ty);
4482
41
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4483
41
  }
4484
146
  RD->completeDefinition();
4485
146
  return RD;
4486
146
}
4487
4488
static RecordDecl *
4489
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4490
442
                                     ArrayRef<PrivateDataTy> Privates) {
4491
442
  ASTContext &C = CGM.getContext();
4492
442
  // Build struct kmp_task_t_with_privates {
4493
442
  //         kmp_task_t task_data;
4494
442
  //         .kmp_privates_t. privates;
4495
442
  //       };
4496
442
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4497
442
  RD->startDefinition();
4498
442
  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4499
442
  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4500
298
    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4501
442
  RD->completeDefinition();
4502
442
  return RD;
4503
442
}
4504
4505
/// Emit a proxy function which accepts kmp_task_t as the second
4506
/// argument.
4507
/// \code
4508
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4509
///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4510
///   For taskloops:
4511
///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4512
///   tt->reductions, tt->shareds);
4513
///   return 0;
4514
/// }
4515
/// \endcode
4516
static llvm::Function *
4517
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4518
                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4519
                      QualType KmpTaskTWithPrivatesPtrQTy,
4520
                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4521
                      QualType SharedsPtrTy, llvm::Function *TaskFunction,
4522
442
                      llvm::Value *TaskPrivatesMap) {
4523
442
  ASTContext &C = CGM.getContext();
4524
442
  FunctionArgList Args;
4525
442
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4526
442
                            ImplicitParamDecl::Other);
4527
442
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4528
442
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4529
442
                                ImplicitParamDecl::Other);
4530
442
  Args.push_back(&GtidArg);
4531
442
  Args.push_back(&TaskTypeArg);
4532
442
  const auto &TaskEntryFnInfo =
4533
442
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4534
442
  llvm::FunctionType *TaskEntryTy =
4535
442
      CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4536
442
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4537
442
  auto *TaskEntry = llvm::Function::Create(
4538
442
      TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4539
442
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4540
442
  TaskEntry->setDoesNotRecurse();
4541
442
  CodeGenFunction CGF(CGM);
4542
442
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4543
442
                    Loc, Loc);
4544
442
4545
442
  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4546
442
  // tt,
4547
442
  // For taskloops:
4548
442
  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4549
442
  // tt->task_data.shareds);
4550
442
  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4551
442
      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4552
442
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4553
442
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
4554
442
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4555
442
  const auto *KmpTaskTWithPrivatesQTyRD =
4556
442
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4557
442
  LValue Base =
4558
442
      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4559
442
  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4560
442
  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561
442
  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4562
442
  llvm::Value *PartidParam = PartIdLVal.getPointer();
4563
442
4564
442
  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4565
442
  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4566
442
  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4567
442
      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4568
442
      CGF.ConvertTypeForMem(SharedsPtrTy));
4569
442
4570
442
  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4571
442
  llvm::Value *PrivatesParam;
4572
442
  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4573
298
    LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4574
298
    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575
298
        PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4576
298
  } else {
4577
144
    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4578
144
  }
4579
442
4580
442
  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4581
442
                               TaskPrivatesMap,
4582
442
                               CGF.Builder
4583
442
                                   .CreatePointerBitCastOrAddrSpaceCast(
4584
442
                                       TDBase.getAddress(), CGF.VoidPtrTy)
4585
442
                                   .getPointer()};
4586
442
  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4587
442
                                          std::end(CommonArgs));
4588
442
  if (isOpenMPTaskLoopDirective(Kind)) {
4589
65
    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4590
65
    LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4591
65
    llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4592
65
    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4593
65
    LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4594
65
    llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4595
65
    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4596
65
    LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4597
65
    llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4598
65
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4599
65
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4600
65
    llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4601
65
    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4602
65
    LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4603
65
    llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4604
65
    CallArgs.push_back(LBParam);
4605
65
    CallArgs.push_back(UBParam);
4606
65
    CallArgs.push_back(StParam);
4607
65
    CallArgs.push_back(LIParam);
4608
65
    CallArgs.push_back(RParam);
4609
65
  }
4610
442
  CallArgs.push_back(SharedsParam);
4611
442
4612
442
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4613
442
                                                  CallArgs);
4614
442
  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4615
442
                             CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4616
442
  CGF.FinishFunction();
4617
442
  return TaskEntry;
4618
442
}
4619
4620
static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4621
                                            SourceLocation Loc,
4622
                                            QualType KmpInt32Ty,
4623
                                            QualType KmpTaskTWithPrivatesPtrQTy,
4624
33
                                            QualType KmpTaskTWithPrivatesQTy) {
4625
33
  ASTContext &C = CGM.getContext();
4626
33
  FunctionArgList Args;
4627
33
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4628
33
                            ImplicitParamDecl::Other);
4629
33
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4630
33
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4631
33
                                ImplicitParamDecl::Other);
4632
33
  Args.push_back(&GtidArg);
4633
33
  Args.push_back(&TaskTypeArg);
4634
33
  const auto &DestructorFnInfo =
4635
33
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4636
33
  llvm::FunctionType *DestructorFnTy =
4637
33
      CGM.getTypes().GetFunctionType(DestructorFnInfo);
4638
33
  std::string Name =
4639
33
      CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4640
33
  auto *DestructorFn =
4641
33
      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4642
33
                             Name, &CGM.getModule());
4643
33
  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4644
33
                                    DestructorFnInfo);
4645
33
  DestructorFn->setDoesNotRecurse();
4646
33
  CodeGenFunction CGF(CGM);
4647
33
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4648
33
                    Args, Loc, Loc);
4649
33
4650
33
  LValue Base = CGF.EmitLoadOfPointerLValue(
4651
33
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
4652
33
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4653
33
  const auto *KmpTaskTWithPrivatesQTyRD =
4654
33
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4655
33
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4656
33
  Base = CGF.EmitLValueForField(Base, *FI);
4657
33
  for (const auto *Field :
4658
146
       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4659
146
    if (QualType::DestructionKind DtorKind =
4660
66
            Field->getType().isDestructedType()) {
4661
66
      LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4662
66
      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4663
66
    }
4664
146
  }
4665
33
  CGF.FinishFunction();
4666
33
  return DestructorFn;
4667
33
}
4668
4669
/// Emit a privates mapping function for correct handling of private and
4670
/// firstprivate variables.
4671
/// \code
4672
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4673
/// **noalias priv1,...,  <tyn> **noalias privn) {
4674
///   *priv1 = &.privates.priv1;
4675
///   ...;
4676
///   *privn = &.privates.privn;
4677
/// }
4678
/// \endcode
4679
static llvm::Value *
4680
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4681
                               ArrayRef<const Expr *> PrivateVars,
4682
                               ArrayRef<const Expr *> FirstprivateVars,
4683
                               ArrayRef<const Expr *> LastprivateVars,
4684
                               QualType PrivatesQTy,
4685
298
                               ArrayRef<PrivateDataTy> Privates) {
4686
298
  ASTContext &C = CGM.getContext();
4687
298
  FunctionArgList Args;
4688
298
  ImplicitParamDecl TaskPrivatesArg(
4689
298
      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4690
298
      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4691
298
      ImplicitParamDecl::Other);
4692
298
  Args.push_back(&TaskPrivatesArg);
4693
298
  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4694
298
  unsigned Counter = 1;
4695
298
  for (const Expr *E : PrivateVars) {
4696
74
    Args.push_back(ImplicitParamDecl::Create(
4697
74
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4698
74
        C.getPointerType(C.getPointerType(E->getType()))
4699
74
            .withConst()
4700
74
            .withRestrict(),
4701
74
        ImplicitParamDecl::Other));
4702
74
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4703
74
    PrivateVarsPos[VD] = Counter;
4704
74
    ++Counter;
4705
74
  }
4706
719
  for (const Expr *E : FirstprivateVars) {
4707
719
    Args.push_back(ImplicitParamDecl::Create(
4708
719
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4709
719
        C.getPointerType(C.getPointerType(E->getType()))
4710
719
            .withConst()
4711
719
            .withRestrict(),
4712
719
        ImplicitParamDecl::Other));
4713
719
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4714
719
    PrivateVarsPos[VD] = Counter;
4715
719
    ++Counter;
4716
719
  }
4717
298
  for (const Expr *E : LastprivateVars) {
4718
49
    Args.push_back(ImplicitParamDecl::Create(
4719
49
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4720
49
        C.getPointerType(C.getPointerType(E->getType()))
4721
49
            .withConst()
4722
49
            .withRestrict(),
4723
49
        ImplicitParamDecl::Other));
4724
49
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4725
49
    PrivateVarsPos[VD] = Counter;
4726
49
    ++Counter;
4727
49
  }
4728
298
  const auto &TaskPrivatesMapFnInfo =
4729
298
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4730
298
  llvm::FunctionType *TaskPrivatesMapTy =
4731
298
      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4732
298
  std::string Name =
4733
298
      CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4734
298
  auto *TaskPrivatesMap = llvm::Function::Create(
4735
298
      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4736
298
      &CGM.getModule());
4737
298
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4738
298
                                    TaskPrivatesMapFnInfo);
4739
298
  if (CGM.getLangOpts().Optimize) {
4740
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4741
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4742
0
    TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4743
0
  }
4744
298
  CodeGenFunction CGF(CGM);
4745
298
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4746
298
                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
4747
298
4748
298
  // *privi = &.privates.privi;
4749
298
  LValue Base = CGF.EmitLoadOfPointerLValue(
4750
298
      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4751
298
      TaskPrivatesArg.getType()->castAs<PointerType>());
4752
298
  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4753
298
  Counter = 0;
4754
842
  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4755
842
    LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4756
842
    const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4757
842
    LValue RefLVal =
4758
842
        CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4759
842
    LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4760
842
        RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4761
842
    CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4762
842
    ++Counter;
4763
842
  }
4764
298
  CGF.FinishFunction();
4765
298
  return TaskPrivatesMap;
4766
298
}
4767
4768
/// Emit initialization for private variables in task-based directives.
4769
static void emitPrivatesInit(CodeGenFunction &CGF,
4770
                             const OMPExecutableDirective &D,
4771
                             Address KmpTaskSharedsPtr, LValue TDBase,
4772
                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4773
                             QualType SharedsTy, QualType SharedsPtrTy,
4774
                             const OMPTaskDataTy &Data,
4775
329
                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4776
329
  ASTContext &C = CGF.getContext();
4777
329
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4778
329
  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4779
329
  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4780
329
                                 ? 
OMPD_taskloop80
4781
329
                                 : 
OMPD_task249
;
4782
329
  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4783
329
  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4784
329
  LValue SrcBase;
4785
329
  bool IsTargetTask =
4786
329
      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4787
329
      
isOpenMPTargetExecutionDirective(D.getDirectiveKind())281
;
4788
329
  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4789
329
  // PointersArray and SizesArray. The original variables for these arrays are
4790
329
  // not captured and we get their addresses explicitly.
4791
329
  if ((!IsTargetTask && 
!Data.FirstprivateVars.empty()101
) ||
4792
329
      
(288
IsTargetTask288
&&
KmpTaskSharedsPtr.isValid()228
)) {
4793
245
    SrcBase = CGF.MakeAddrLValue(
4794
245
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4795
245
            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4796
245
        SharedsTy);
4797
245
  }
4798
329
  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4799
963
  for (const PrivateDataTy &Pair : Privates) {
4800
963
    const VarDecl *VD = Pair.second.PrivateCopy;
4801
963
    const Expr *Init = VD->getAnyInitializer();
4802
963
    if (Init && 
(829
!ForDup829
||
(68
isa<CXXConstructExpr>(Init)68
&&
4803
809
                             
!CGF.isTrivialInitializer(Init)48
))) {
4804
809
      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4805
809
      if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4806
735
        const VarDecl *OriginalVD = Pair.second.Original;
4807
735
        // Check if the variable is the target-based BasePointersArray,
4808
735
        // PointersArray or SizesArray.
4809
735
        LValue SharedRefLValue;
4810
735
        QualType Type = PrivateLValue.getType();
4811
735
        const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4812
735
        if (IsTargetTask && 
!SharedField624
) {
4813
324
          assert(isa<ImplicitParamDecl>(OriginalVD) &&
4814
324
                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4815
324
                 cast<CapturedDecl>(OriginalVD->getDeclContext())
4816
324
                         ->getNumParams() == 0 &&
4817
324
                 isa<TranslationUnitDecl>(
4818
324
                     cast<CapturedDecl>(OriginalVD->getDeclContext())
4819
324
                         ->getDeclContext()) &&
4820
324
                 "Expected artificial target data variable.");
4821
324
          SharedRefLValue =
4822
324
              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4823
411
        } else {
4824
411
          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4825
411
          SharedRefLValue = CGF.MakeAddrLValue(
4826
411
              Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4827
411
              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4828
411
              SharedRefLValue.getTBAAInfo());
4829
411
        }
4830
735
        if (Type->isArrayType()) {
4831
361
          // Initialize firstprivate array.
4832
361
          if (!isa<CXXConstructExpr>(Init) || 
CGF.isTrivialInitializer(Init)20
) {
4833
341
            // Perform simple memcpy.
4834
341
            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4835
341
          } else {
4836
20
            // Initialize firstprivate array using element-by-element
4837
20
            // initialization.
4838
20
            CGF.EmitOMPAggregateAssign(
4839
20
                PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4840
20
                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4841
20
                                                  Address SrcElement) {
4842
20
                  // Clean up any temporaries needed by the initialization.
4843
20
                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4844
20
                  InitScope.addPrivate(
4845
20
                      Elem, [SrcElement]() -> Address { return SrcElement; });
4846
20
                  (void)InitScope.Privatize();
4847
20
                  // Emit initialization for single element.
4848
20
                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4849
20
                      CGF, &CapturesInfo);
4850
20
                  CGF.EmitAnyExprToMem(Init, DestElement,
4851
20
                                       Init->getType().getQualifiers(),
4852
20
                                       /*IsInitializer=*/false);
4853
20
                });
4854
20
          }
4855
374
        } else {
4856
374
          CodeGenFunction::OMPPrivateScope InitScope(CGF);
4857
374
          InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4858
374
            return SharedRefLValue.getAddress();
4859
374
          });
4860
374
          (void)InitScope.Privatize();
4861
374
          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4862
374
          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4863
374
                             /*capturedByInit=*/false);
4864
374
        }
4865
735
      } else {
4866
74
        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4867
74
      }
4868
809
    }
4869
963
    ++FI;
4870
963
  }
4871
329
}
4872
4873
/// Check if duplication function is required for taskloops.
4874
static bool checkInitIsRequired(CodeGenFunction &CGF,
4875
34
                                ArrayRef<PrivateDataTy> Privates) {
4876
34
  bool InitRequired = false;
4877
70
  for (const PrivateDataTy &Pair : Privates) {
4878
70
    const VarDecl *VD = Pair.second.PrivateCopy;
4879
70
    const Expr *Init = VD->getAnyInitializer();
4880
70
    InitRequired = InitRequired || (Init && 
isa<CXXConstructExpr>(Init)50
&&
4881
70
                                    
!CGF.isTrivialInitializer(Init)16
);
4882
70
    if (InitRequired)
4883
16
      break;
4884
70
  }
4885
34
  return InitRequired;
4886
34
}
4887
4888
4889
/// Emit task_dup function (for initialization of
4890
/// private/firstprivate/lastprivate vars and last_iter flag)
4891
/// \code
4892
/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4893
/// lastpriv) {
4894
/// // setup lastprivate flag
4895
///    task_dst->last = lastpriv;
4896
/// // could be constructor calls here...
4897
/// }
4898
/// \endcode
4899
static llvm::Value *
4900
emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4901
                    const OMPExecutableDirective &D,
4902
                    QualType KmpTaskTWithPrivatesPtrQTy,
4903
                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4904
                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4905
                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4906
31
                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4907
31
  ASTContext &C = CGM.getContext();
4908
31
  FunctionArgList Args;
4909
31
  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4910
31
                           KmpTaskTWithPrivatesPtrQTy,
4911
31
                           ImplicitParamDecl::Other);
4912
31
  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4913
31
                           KmpTaskTWithPrivatesPtrQTy,
4914
31
                           ImplicitParamDecl::Other);
4915
31
  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4916
31
                                ImplicitParamDecl::Other);
4917
31
  Args.push_back(&DstArg);
4918
31
  Args.push_back(&SrcArg);
4919
31
  Args.push_back(&LastprivArg);
4920
31
  const auto &TaskDupFnInfo =
4921
31
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4922
31
  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4923
31
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4924
31
  auto *TaskDup = llvm::Function::Create(
4925
31
      TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4926
31
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4927
31
  TaskDup->setDoesNotRecurse();
4928
31
  CodeGenFunction CGF(CGM);
4929
31
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4930
31
                    Loc);
4931
31
4932
31
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4933
31
      CGF.GetAddrOfLocalVar(&DstArg),
4934
31
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4935
31
  // task_dst->liter = lastpriv;
4936
31
  if (WithLastIter) {
4937
15
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4938
15
    LValue Base = CGF.EmitLValueForField(
4939
15
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4940
15
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4941
15
    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4942
15
        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4943
15
    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4944
15
  }
4945
31
4946
31
  // Emit initial values for private copies (if any).
4947
31
  assert(!Privates.empty());
4948
31
  Address KmpTaskSharedsPtr = Address::invalid();
4949
31
  if (!Data.FirstprivateVars.empty()) {
4950
8
    LValue TDBase = CGF.EmitLoadOfPointerLValue(
4951
8
        CGF.GetAddrOfLocalVar(&SrcArg),
4952
8
        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4953
8
    LValue Base = CGF.EmitLValueForField(
4954
8
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4955
8
    KmpTaskSharedsPtr = Address(
4956
8
        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4957
8
                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4958
8
                                                  KmpTaskTShareds)),
4959
8
                             Loc),
4960
8
        CGF.getNaturalTypeAlignment(SharedsTy));
4961
8
  }
4962
31
  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4963
31
                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4964
31
  CGF.FinishFunction();
4965
31
  return TaskDup;
4966
31
}
4967
4968
/// Checks if destructor function is required to be generated.
4969
/// \return true if cleanups are required, false otherwise.
4970
static bool
4971
298
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4972
298
  bool NeedsCleanup = false;
4973
298
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4974
298
  const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4975
761
  for (const FieldDecl *FD : PrivateRD->fields()) {
4976
761
    NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4977
761
    if (NeedsCleanup)
4978
33
      break;
4979
761
  }
4980
298
  return NeedsCleanup;
4981
298
}
4982
4983
CGOpenMPRuntime::TaskResultTy
4984
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4985
                              const OMPExecutableDirective &D,
4986
                              llvm::Function *TaskFunction, QualType SharedsTy,
4987
442
                              Address Shareds, const OMPTaskDataTy &Data) {
4988
442
  ASTContext &C = CGM.getContext();
4989
442
  llvm::SmallVector<PrivateDataTy, 4> Privates;
4990
442
  // Aggregate privates and sort them by the alignment.
4991
442
  auto I = Data.PrivateCopies.begin();
4992
442
  for (const Expr *E : Data.PrivateVars) {
4993
74
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4994
74
    Privates.emplace_back(
4995
74
        C.getDeclAlign(VD),
4996
74
        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4997
74
                         /*PrivateElemInit=*/nullptr));
4998
74
    ++I;
4999
74
  }
5000
442
  I = Data.FirstprivateCopies.begin();
5001
442
  auto IElemInitRef = Data.FirstprivateInits.begin();
5002
719
  for (const Expr *E : Data.FirstprivateVars) {
5003
719
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5004
719
    Privates.emplace_back(
5005
719
        C.getDeclAlign(VD),
5006
719
        PrivateHelpersTy(
5007
719
            VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5008
719
            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5009
719
    ++I;
5010
719
    ++IElemInitRef;
5011
719
  }
5012
442
  I = Data.LastprivateCopies.begin();
5013
442
  for (const Expr *E : Data.LastprivateVars) {
<