Coverage Report

Created: 2020-09-22 08:39

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This provides a class for OpenMP runtime code generation.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGOpenMPRuntime.h"
14
#include "CGCXXABI.h"
15
#include "CGCleanup.h"
16
#include "CGRecordLayout.h"
17
#include "CodeGenFunction.h"
18
#include "clang/AST/Attr.h"
19
#include "clang/AST/Decl.h"
20
#include "clang/AST/OpenMPClause.h"
21
#include "clang/AST/StmtOpenMP.h"
22
#include "clang/AST/StmtVisitor.h"
23
#include "clang/Basic/BitmaskEnum.h"
24
#include "clang/Basic/FileManager.h"
25
#include "clang/Basic/OpenMPKinds.h"
26
#include "clang/Basic/SourceManager.h"
27
#include "clang/CodeGen/ConstantInitBuilder.h"
28
#include "llvm/ADT/ArrayRef.h"
29
#include "llvm/ADT/SetOperations.h"
30
#include "llvm/ADT/StringExtras.h"
31
#include "llvm/Bitcode/BitcodeReader.h"
32
#include "llvm/IR/Constants.h"
33
#include "llvm/IR/DerivedTypes.h"
34
#include "llvm/IR/GlobalValue.h"
35
#include "llvm/IR/Value.h"
36
#include "llvm/Support/AtomicOrdering.h"
37
#include "llvm/Support/Format.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include <cassert>
40
#include <numeric>
41
42
using namespace clang;
43
using namespace CodeGen;
44
using namespace llvm::omp;
45
46
namespace {
47
/// Base class for handling code generation inside OpenMP regions.
48
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49
public:
50
  /// Kinds of OpenMP regions used in codegen.
51
  enum CGOpenMPRegionKind {
52
    /// Region with outlined function for standalone 'parallel'
53
    /// directive.
54
    ParallelOutlinedRegion,
55
    /// Region with outlined function for standalone 'task' directive.
56
    TaskOutlinedRegion,
57
    /// Region for constructs that do not require function outlining,
58
    /// like 'for', 'sections', 'atomic' etc. directives.
59
    InlinedRegion,
60
    /// Region with outlined function for standalone 'target' directive.
61
    TargetRegion,
62
  };
63
64
  CGOpenMPRegionInfo(const CapturedStmt &CS,
65
                     const CGOpenMPRegionKind RegionKind,
66
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67
                     bool HasCancel)
68
      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69
23.1k
        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70
71
  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73
                     bool HasCancel)
74
      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75
40.2k
        Kind(Kind), HasCancel(HasCancel) {}
76
77
  /// Get a variable or parameter for storing global thread id
78
  /// inside OpenMP construct.
79
  virtual const VarDecl *getThreadIDVariable() const = 0;
80
81
  /// Emit the captured statement body.
82
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83
84
  /// Get an LValue for the current ThreadID variable.
85
  /// \return LValue for thread id variable. This LValue always has type int32*.
86
  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87
88
26
  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89
90
40.0k
  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91
92
211
  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93
94
326
  bool hasCancel() const { return HasCancel; }
95
96
134k
  static bool classof(const CGCapturedStmtInfo *Info) {
97
134k
    return Info->getKind() == CR_OpenMP;
98
134k
  }
99
100
63.3k
  ~CGOpenMPRegionInfo() override = default;
101
102
protected:
103
  CGOpenMPRegionKind RegionKind;
104
  RegionCodeGenTy CodeGen;
105
  OpenMPDirectiveKind Kind;
106
  bool HasCancel;
107
};
108
109
/// API for captured statement code generation in OpenMP constructs.
110
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111
public:
112
  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113
                             const RegionCodeGenTy &CodeGen,
114
                             OpenMPDirectiveKind Kind, bool HasCancel,
115
                             StringRef HelperName)
116
      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117
                           HasCancel),
118
11.1k
        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119
11.1k
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120
11.1k
  }
121
122
  /// Get a variable or parameter for storing global thread id
123
  /// inside OpenMP construct.
124
36.4k
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125
126
  /// Get the name of the capture helper.
127
11.1k
  StringRef getHelperName() const override { return HelperName; }
128
129
0
  static bool classof(const CGCapturedStmtInfo *Info) {
130
0
    return CGOpenMPRegionInfo::classof(Info) &&
131
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132
0
               ParallelOutlinedRegion;
133
0
  }
134
135
private:
136
  /// A variable or parameter storing global thread id for OpenMP
137
  /// constructs.
138
  const VarDecl *ThreadIDVar;
139
  StringRef HelperName;
140
};
141
142
/// API for captured statement code generation in OpenMP constructs.
143
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144
public:
145
  class UntiedTaskActionTy final : public PrePostActionTy {
146
    bool Untied;
147
    const VarDecl *PartIDVar;
148
    const RegionCodeGenTy UntiedCodeGen;
149
    llvm::SwitchInst *UntiedSwitch = nullptr;
150
151
  public:
152
    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153
                       const RegionCodeGenTy &UntiedCodeGen)
154
707
        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155
707
    void Enter(CodeGenFunction &CGF) override {
156
707
      if (Untied) {
157
        // Emit task switching point.
158
16
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159
16
            CGF.GetAddrOfLocalVar(PartIDVar),
160
16
            PartIDVar->getType()->castAs<PointerType>());
161
16
        llvm::Value *Res =
162
16
            CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163
16
        llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164
16
        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165
16
        CGF.EmitBlock(DoneBB);
166
16
        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167
16
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168
16
        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169
16
                              CGF.Builder.GetInsertBlock());
170
16
        emitUntiedSwitch(CGF);
171
16
      }
172
707
    }
173
32
    void emitUntiedSwitch(CodeGenFunction &CGF) const {
174
32
      if (Untied) {
175
30
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176
30
            CGF.GetAddrOfLocalVar(PartIDVar),
177
30
            PartIDVar->getType()->castAs<PointerType>());
178
30
        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179
30
                              PartIdLVal);
180
30
        UntiedCodeGen(CGF);
181
30
        CodeGenFunction::JumpDest CurPoint =
182
30
            CGF.getJumpDestInCurrentScope(".untied.next.");
183
30
        CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184
30
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185
30
        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186
30
                              CGF.Builder.GetInsertBlock());
187
30
        CGF.EmitBranchThroughCleanup(CurPoint);
188
30
        CGF.EmitBlock(CurPoint.getBlock());
189
30
      }
190
32
    }
191
16
    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192
  };
193
  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194
                                 const VarDecl *ThreadIDVar,
195
                                 const RegionCodeGenTy &CodeGen,
196
                                 OpenMPDirectiveKind Kind, bool HasCancel,
197
                                 const UntiedTaskActionTy &Action)
198
      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199
707
        ThreadIDVar(ThreadIDVar), Action(Action) {
200
707
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201
707
  }
202
203
  /// Get a variable or parameter for storing global thread id
204
  /// inside OpenMP construct.
205
276
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206
207
  /// Get an LValue for the current ThreadID variable.
208
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209
210
  /// Get the name of the capture helper.
211
707
  StringRef getHelperName() const override { return ".omp_outlined."; }
212
213
16
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
214
16
    Action.emitUntiedSwitch(CGF);
215
16
  }
216
217
0
  static bool classof(const CGCapturedStmtInfo *Info) {
218
0
    return CGOpenMPRegionInfo::classof(Info) &&
219
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220
0
               TaskOutlinedRegion;
221
0
  }
222
223
private:
224
  /// A variable or parameter storing global thread id for OpenMP
225
  /// constructs.
226
  const VarDecl *ThreadIDVar;
227
  /// Action for emitting code for untied tasks.
228
  const UntiedTaskActionTy &Action;
229
};
230
231
/// API for inlined captured statement code generation in OpenMP
232
/// constructs.
233
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234
public:
235
  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236
                            const RegionCodeGenTy &CodeGen,
237
                            OpenMPDirectiveKind Kind, bool HasCancel)
238
      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239
        OldCSI(OldCSI),
240
40.2k
        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241
242
  // Retrieve the value of the context parameter.
243
0
  llvm::Value *getContextValue() const override {
244
0
    if (OuterRegionInfo)
245
0
      return OuterRegionInfo->getContextValue();
246
0
    llvm_unreachable("No context value for inlined OpenMP region");
247
0
  }
248
249
0
  void setContextValue(llvm::Value *V) override {
250
0
    if (OuterRegionInfo) {
251
0
      OuterRegionInfo->setContextValue(V);
252
0
      return;
253
0
    }
254
0
    llvm_unreachable("No context value for inlined OpenMP region");
255
0
  }
256
257
  /// Lookup the captured field decl for a variable.
258
25.4k
  const FieldDecl *lookup(const VarDecl *VD) const override {
259
25.4k
    if (OuterRegionInfo)
260
12.9k
      return OuterRegionInfo->lookup(VD);
261
    // If there is no outer outlined region,no need to lookup in a list of
262
    // captured variables, we can use the original one.
263
12.4k
    return nullptr;
264
12.4k
  }
265
266
0
  FieldDecl *getThisFieldDecl() const override {
267
0
    if (OuterRegionInfo)
268
0
      return OuterRegionInfo->getThisFieldDecl();
269
0
    return nullptr;
270
0
  }
271
272
  /// Get a variable or parameter for storing global thread id
273
  /// inside OpenMP construct.
274
6.73k
  const VarDecl *getThreadIDVariable() const override {
275
6.73k
    if (OuterRegionInfo)
276
6.55k
      return OuterRegionInfo->getThreadIDVariable();
277
179
    return nullptr;
278
179
  }
279
280
  /// Get an LValue for the current ThreadID variable.
281
6.55k
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282
6.55k
    if (OuterRegionInfo)
283
6.55k
      return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284
0
    llvm_unreachable("No LValue for inlined OpenMP construct");
285
0
  }
286
287
  /// Get the name of the capture helper.
288
0
  StringRef getHelperName() const override {
289
0
    if (auto *OuterRegionInfo = getOldCSI())
290
0
      return OuterRegionInfo->getHelperName();
291
0
    llvm_unreachable("No helper name for inlined OpenMP construct");
292
0
  }
293
294
14
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
295
14
    if (OuterRegionInfo)
296
12
      OuterRegionInfo->emitUntiedSwitch(CGF);
297
14
  }
298
299
40.0k
  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300
301
40.0k
  static bool classof(const CGCapturedStmtInfo *Info) {
302
40.0k
    return CGOpenMPRegionInfo::classof(Info) &&
303
40.0k
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304
40.0k
  }
305
306
40.2k
  ~CGOpenMPInlinedRegionInfo() override = default;
307
308
private:
309
  /// CodeGen info about outer OpenMP region.
310
  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311
  CGOpenMPRegionInfo *OuterRegionInfo;
312
};
313
314
/// API for captured statement code generation in OpenMP target
315
/// constructs. For this captures, implicit parameters are used instead of the
316
/// captured fields. The name of the target region has to be unique in a given
317
/// application so it is provided by the client, because only the client has
318
/// the information to generate that.
319
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320
public:
321
  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322
                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
323
      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324
                           /*HasCancel=*/false),
325
11.2k
        HelperName(HelperName) {}
326
327
  /// This is unused for target regions because each starts executing
328
  /// with a single thread.
329
2.26k
  const VarDecl *getThreadIDVariable() const override { return nullptr; }
330
331
  /// Get the name of the capture helper.
332
11.3k
  StringRef getHelperName() const override { return HelperName; }
333
334
0
  static bool classof(const CGCapturedStmtInfo *Info) {
335
0
    return CGOpenMPRegionInfo::classof(Info) &&
336
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337
0
  }
338
339
private:
340
  StringRef HelperName;
341
};
342
343
0
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344
0
  llvm_unreachable("No codegen for expressions");
345
0
}
346
/// API for generation of expressions captured in a innermost OpenMP
347
/// region.
348
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349
public:
350
  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351
      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352
                                  OMPD_unknown,
353
                                  /*HasCancel=*/false),
354
226
        PrivScope(CGF) {
355
    // Make sure the globals captured in the provided statement are local by
356
    // using the privatization logic. We assume the same variable is not
357
    // captured more than once.
358
404
    for (const auto &C : CS.captures()) {
359
404
      if (!C.capturesVariable() && 
!C.capturesVariableByCopy()306
)
360
16
        continue;
361
362
388
      const VarDecl *VD = C.getCapturedVar();
363
388
      if (VD->isLocalVarDeclOrParm())
364
316
        continue;
365
366
72
      DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367
72
                      /*RefersToEnclosingVariableOrCapture=*/false,
368
72
                      VD->getType().getNonReferenceType(), VK_LValue,
369
72
                      C.getLocation());
370
72
      PrivScope.addPrivate(
371
72
          VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372
72
    }
373
226
    (void)PrivScope.Privatize();
374
226
  }
375
376
  /// Lookup the captured field decl for a variable.
377
0
  const FieldDecl *lookup(const VarDecl *VD) const override {
378
0
    if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379
0
      return FD;
380
0
    return nullptr;
381
0
  }
382
383
  /// Emit the captured statement body.
384
0
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385
0
    llvm_unreachable("No body for expressions");
386
0
  }
387
388
  /// Get a variable or parameter for storing global thread id
389
  /// inside OpenMP construct.
390
0
  const VarDecl *getThreadIDVariable() const override {
391
0
    llvm_unreachable("No thread id for expressions");
392
0
  }
393
394
  /// Get the name of the capture helper.
395
0
  StringRef getHelperName() const override {
396
0
    llvm_unreachable("No helper name for expressions");
397
0
  }
398
399
0
  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400
401
private:
402
  /// Private scope to capture global variables.
403
  CodeGenFunction::OMPPrivateScope PrivScope;
404
};
405
406
/// RAII for emitting code of OpenMP constructs.
407
class InlinedOpenMPRegionRAII {
408
  CodeGenFunction &CGF;
409
  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410
  FieldDecl *LambdaThisCaptureField = nullptr;
411
  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412
413
public:
414
  /// Constructs region for combined constructs.
415
  /// \param CodeGen Code generation sequence for combined directives. Includes
416
  /// a list of functions used for code generation of implicitly inlined
417
  /// regions.
418
  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419
                          OpenMPDirectiveKind Kind, bool HasCancel)
420
40.0k
      : CGF(CGF) {
421
    // Start emission for the construct.
422
40.0k
    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423
40.0k
        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424
40.0k
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425
40.0k
    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426
40.0k
    CGF.LambdaThisCaptureField = nullptr;
427
40.0k
    BlockInfo = CGF.BlockInfo;
428
40.0k
    CGF.BlockInfo = nullptr;
429
40.0k
  }
430
431
40.0k
  ~InlinedOpenMPRegionRAII() {
432
    // Restore original CapturedStmtInfo only if we're done with code emission.
433
40.0k
    auto *OldCSI =
434
40.0k
        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435
40.0k
    delete CGF.CapturedStmtInfo;
436
40.0k
    CGF.CapturedStmtInfo = OldCSI;
437
40.0k
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438
40.0k
    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439
40.0k
    CGF.BlockInfo = BlockInfo;
440
40.0k
  }
441
};
442
443
/// Values for bit flags used in the ident_t to describe the fields.
444
/// All enumeric elements are named and described in accordance with the code
445
/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446
enum OpenMPLocationFlags : unsigned {
447
  /// Use trampoline for internal microtask.
448
  OMP_IDENT_IMD = 0x01,
449
  /// Use c-style ident structure.
450
  OMP_IDENT_KMPC = 0x02,
451
  /// Atomic reduction option for kmpc_reduce.
452
  OMP_ATOMIC_REDUCE = 0x10,
453
  /// Explicit 'barrier' directive.
454
  OMP_IDENT_BARRIER_EXPL = 0x20,
455
  /// Implicit barrier in code.
456
  OMP_IDENT_BARRIER_IMPL = 0x40,
457
  /// Implicit barrier in 'for' directive.
458
  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459
  /// Implicit barrier in 'sections' directive.
460
  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461
  /// Implicit barrier in 'single' directive.
462
  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463
  /// Call of __kmp_for_static_init for static loop.
464
  OMP_IDENT_WORK_LOOP = 0x200,
465
  /// Call of __kmp_for_static_init for sections.
466
  OMP_IDENT_WORK_SECTIONS = 0x400,
467
  /// Call of __kmp_for_static_init for distribute.
468
  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469
  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470
};
471
472
namespace {
473
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474
/// Values for bit flags for marking which requires clauses have been used.
475
enum OpenMPOffloadingRequiresDirFlags : int64_t {
476
  /// flag undefined.
477
  OMP_REQ_UNDEFINED               = 0x000,
478
  /// no requires clause present.
479
  OMP_REQ_NONE                    = 0x001,
480
  /// reverse_offload clause.
481
  OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482
  /// unified_address clause.
483
  OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484
  /// unified_shared_memory clause.
485
  OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486
  /// dynamic_allocators clause.
487
  OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488
  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489
};
490
491
enum OpenMPOffloadingReservedDeviceIDs {
492
  /// Device ID if the device was not defined, runtime should get it
493
  /// from environment variables in the spec.
494
  OMP_DEVICEID_UNDEF = -1,
495
};
496
} // anonymous namespace
497
498
/// Describes ident structure that describes a source location.
499
/// All descriptions are taken from
500
/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501
/// Original structure:
502
/// typedef struct ident {
503
///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504
///                                  see above  */
505
///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506
///                                  KMP_IDENT_KMPC identifies this union
507
///                                  member  */
508
///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509
///                                  see above */
510
///#if USE_ITT_BUILD
511
///                            /*  but currently used for storing
512
///                                region-specific ITT */
513
///                            /*  contextual information. */
514
///#endif /* USE_ITT_BUILD */
515
///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516
///                                 C++  */
517
///    char const *psource;    /**< String describing the source location.
518
///                            The string is composed of semi-colon separated
519
//                             fields which describe the source file,
520
///                            the function and a pair of line numbers that
521
///                            delimit the construct.
522
///                             */
523
/// } ident_t;
524
enum IdentFieldIndex {
525
  /// might be used in Fortran
526
  IdentField_Reserved_1,
527
  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528
  IdentField_Flags,
529
  /// Not really used in Fortran any more
530
  IdentField_Reserved_2,
531
  /// Source[4] in Fortran, do not use for C++
532
  IdentField_Reserved_3,
533
  /// String describing the source location. The string is composed of
534
  /// semi-colon separated fields which describe the source file, the function
535
  /// and a pair of line numbers that delimit the construct.
536
  IdentField_PSource
537
};
538
539
/// Schedule types for 'omp for' loops (these enumerators are taken from
540
/// the enum sched_type in kmp.h).
541
enum OpenMPSchedType {
542
  /// Lower bound for default (unordered) versions.
543
  OMP_sch_lower = 32,
544
  OMP_sch_static_chunked = 33,
545
  OMP_sch_static = 34,
546
  OMP_sch_dynamic_chunked = 35,
547
  OMP_sch_guided_chunked = 36,
548
  OMP_sch_runtime = 37,
549
  OMP_sch_auto = 38,
550
  /// static with chunk adjustment (e.g., simd)
551
  OMP_sch_static_balanced_chunked = 45,
552
  /// Lower bound for 'ordered' versions.
553
  OMP_ord_lower = 64,
554
  OMP_ord_static_chunked = 65,
555
  OMP_ord_static = 66,
556
  OMP_ord_dynamic_chunked = 67,
557
  OMP_ord_guided_chunked = 68,
558
  OMP_ord_runtime = 69,
559
  OMP_ord_auto = 70,
560
  OMP_sch_default = OMP_sch_static,
561
  /// dist_schedule types
562
  OMP_dist_sch_static_chunked = 91,
563
  OMP_dist_sch_static = 92,
564
  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565
  /// Set if the monotonic schedule modifier was present.
566
  OMP_sch_modifier_monotonic = (1 << 29),
567
  /// Set if the nonmonotonic schedule modifier was present.
568
  OMP_sch_modifier_nonmonotonic = (1 << 30),
569
};
570
571
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572
/// region.
573
class CleanupTy final : public EHScopeStack::Cleanup {
574
  PrePostActionTy *Action;
575
576
public:
577
14.9k
  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578
15.0k
  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579
15.0k
    if (!CGF.HaveInsertPoint())
580
0
      return;
581
15.0k
    Action->Exit(CGF);
582
15.0k
  }
583
};
584
585
} // anonymous namespace
586
587
119k
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588
119k
  CodeGenFunction::RunCleanupsScope Scope(CGF);
589
119k
  if (PrePostAction) {
590
14.9k
    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591
14.9k
    Callback(CodeGen, CGF, *PrePostAction);
592
104k
  } else {
593
104k
    PrePostActionTy Action;
594
104k
    Callback(CodeGen, CGF, Action);
595
104k
  }
596
119k
}
597
598
/// Check if the combiner is a call to UDR combiner and if it is so return the
599
/// UDR decl used for reduction.
600
static const OMPDeclareReductionDecl *
601
1.07k
getReductionInit(const Expr *ReductionOp) {
602
1.07k
  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603
117
    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604
77
      if (const auto *DRE =
605
77
              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606
77
        if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607
77
          return DRD;
608
1.00k
  return nullptr;
609
1.00k
}
610
611
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612
                                             const OMPDeclareReductionDecl *DRD,
613
                                             const Expr *InitOp,
614
                                             Address Private, Address Original,
615
61
                                             QualType Ty) {
616
61
  if (DRD->getInitializer()) {
617
53
    std::pair<llvm::Function *, llvm::Function *> Reduction =
618
53
        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619
53
    const auto *CE = cast<CallExpr>(InitOp);
620
53
    const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621
53
    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622
53
    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623
53
    const auto *LHSDRE =
624
53
        cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625
53
    const auto *RHSDRE =
626
53
        cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627
53
    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628
53
    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629
53
                            [=]() { return Private; });
630
53
    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631
53
                            [=]() { return Original; });
632
53
    (void)PrivateScope.Privatize();
633
53
    RValue Func = RValue::get(Reduction.second);
634
53
    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635
53
    CGF.EmitIgnoredExpr(InitOp);
636
8
  } else {
637
8
    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638
8
    std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639
8
    auto *GV = new llvm::GlobalVariable(
640
8
        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641
8
        llvm::GlobalValue::PrivateLinkage, Init, Name);
642
8
    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643
8
    RValue InitRVal;
644
8
    switch (CGF.getEvaluationKind(Ty)) {
645
8
    case TEK_Scalar:
646
8
      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647
8
      break;
648
0
    case TEK_Complex:
649
0
      InitRVal =
650
0
          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651
0
      break;
652
0
    case TEK_Aggregate:
653
0
      InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654
0
      break;
655
8
    }
656
8
    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657
8
    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658
8
    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659
8
                         /*IsInitializer=*/false);
660
8
  }
661
61
}
662
663
/// Emit initialization of arrays of complex types.
664
/// \param DestAddr Address of the array.
665
/// \param Type Type of array.
666
/// \param Init Initial expression of array.
667
/// \param SrcAddr Address of the original array.
668
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669
                                 QualType Type, bool EmitDeclareReductionInit,
670
                                 const Expr *Init,
671
                                 const OMPDeclareReductionDecl *DRD,
672
277
                                 Address SrcAddr = Address::invalid()) {
673
  // Perform element-by-element initialization.
674
277
  QualType ElementTy;
675
676
  // Drill down to the base element type on both arrays.
677
277
  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678
277
  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679
277
  DestAddr =
680
277
      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681
277
  if (DRD)
682
31
    SrcAddr =
683
31
        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684
685
277
  llvm::Value *SrcBegin = nullptr;
686
277
  if (DRD)
687
31
    SrcBegin = SrcAddr.getPointer();
688
277
  llvm::Value *DestBegin = DestAddr.getPointer();
689
  // Cast from pointer to array type to pointer to single element.
690
277
  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691
  // The basic structure here is a while-do loop.
692
277
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693
277
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694
277
  llvm::Value *IsEmpty =
695
277
      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696
277
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697
698
  // Enter the loop body, making that address the current address.
699
277
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700
277
  CGF.EmitBlock(BodyBB);
701
702
277
  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703
704
277
  llvm::PHINode *SrcElementPHI = nullptr;
705
277
  Address SrcElementCurrent = Address::invalid();
706
277
  if (DRD) {
707
31
    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708
31
                                          "omp.arraycpy.srcElementPast");
709
31
    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710
31
    SrcElementCurrent =
711
31
        Address(SrcElementPHI,
712
31
                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713
31
  }
714
277
  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715
277
      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716
277
  DestElementPHI->addIncoming(DestBegin, EntryBB);
717
277
  Address DestElementCurrent =
718
277
      Address(DestElementPHI,
719
277
              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720
721
  // Emit copy.
722
277
  {
723
277
    CodeGenFunction::RunCleanupsScope InitScope(CGF);
724
277
    if (EmitDeclareReductionInit) {
725
31
      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726
31
                                       SrcElementCurrent, ElementTy);
727
31
    } else
728
246
      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729
246
                           /*IsInitializer=*/false);
730
277
  }
731
732
277
  if (DRD) {
733
    // Shift the address forward by one element.
734
31
    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735
31
        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736
31
    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737
31
  }
738
739
  // Shift the address forward by one element.
740
277
  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741
277
      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742
  // Check whether we've reached the end.
743
277
  llvm::Value *Done =
744
277
      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745
277
  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746
277
  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747
748
  // Done.
749
277
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750
277
}
751
752
1.09k
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753
1.09k
  return CGF.EmitOMPSharedLValue(E);
754
1.09k
}
755
756
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757
1.09k
                                            const Expr *E) {
758
1.09k
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759
227
    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760
868
  return LValue();
761
868
}
762
763
void ReductionCodeGen::emitAggregateInitialization(
764
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765
277
    const OMPDeclareReductionDecl *DRD) {
766
  // Emit VarDecl with copy init for arrays.
767
  // Get the address of the original variable captured in current
768
  // captured region.
769
277
  const auto *PrivateVD =
770
277
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771
277
  bool EmitDeclareReductionInit =
772
277
      DRD && 
(31
DRD->getInitializer()31
||
!PrivateVD->hasInit()4
);
773
277
  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774
277
                       EmitDeclareReductionInit,
775
31
                       EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776
246
                                                : PrivateVD->getInit(),
777
277
                       DRD, SharedLVal.getAddress(CGF));
778
277
}
779
780
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781
                                   ArrayRef<const Expr *> Origs,
782
                                   ArrayRef<const Expr *> Privates,
783
26.3k
                                   ArrayRef<const Expr *> ReductionOps) {
784
26.3k
  ClausesData.reserve(Shareds.size());
785
26.3k
  SharedAddresses.reserve(Shareds.size());
786
26.3k
  Sizes.reserve(Shareds.size());
787
26.3k
  BaseDecls.reserve(Shareds.size());
788
26.3k
  const auto *IOrig = Origs.begin();
789
26.3k
  const auto *IPriv = Privates.begin();
790
26.3k
  const auto *IRed = ReductionOps.begin();
791
1.05k
  for (const Expr *Ref : Shareds) {
792
1.05k
    ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793
1.05k
    std::advance(IOrig, 1);
794
1.05k
    std::advance(IPriv, 1);
795
1.05k
    std::advance(IRed, 1);
796
1.05k
  }
797
26.3k
}
798
799
1.04k
void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800
1.04k
  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801
1.04k
         "Number of generated lvalues must be exactly N.");
802
1.04k
  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803
1.04k
  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804
1.04k
  SharedAddresses.emplace_back(First, Second);
805
1.04k
  if (ClausesData[N].Shared == ClausesData[N].Ref) {
806
991
    OrigAddresses.emplace_back(First, Second);
807
52
  } else {
808
52
    LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809
52
    LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810
52
    OrigAddresses.emplace_back(First, Second);
811
52
  }
812
1.04k
}
813
814
1.04k
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815
1.04k
  const auto *PrivateVD =
816
1.04k
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817
1.04k
  QualType PrivateType = PrivateVD->getType();
818
1.04k
  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819
1.04k
  if (!PrivateType->isVariablyModifiedType()) {
820
808
    Sizes.emplace_back(
821
808
        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822
808
        nullptr);
823
808
    return;
824
808
  }
825
235
  llvm::Value *Size;
826
235
  llvm::Value *SizeInChars;
827
235
  auto *ElemType =
828
235
      cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829
235
          ->getElementType();
830
235
  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831
235
  if (AsArraySection) {
832
184
    Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833
184
                                     OrigAddresses[N].first.getPointer(CGF));
834
184
    Size = CGF.Builder.CreateNUWAdd(
835
184
        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836
184
    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837
51
  } else {
838
51
    SizeInChars =
839
51
        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840
51
    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841
51
  }
842
235
  Sizes.emplace_back(SizeInChars, Size);
843
235
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
844
235
      CGF,
845
235
      cast<OpaqueValueExpr>(
846
235
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847
235
      RValue::get(Size));
848
235
  CGF.EmitVariablyModifiedType(PrivateType);
849
235
}
850
851
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852
301
                                         llvm::Value *Size) {
853
301
  const auto *PrivateVD =
854
301
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855
301
  QualType PrivateType = PrivateVD->getType();
856
301
  if (!PrivateType->isVariablyModifiedType()) {
857
193
    assert(!Size && !Sizes[N].second &&
858
193
           "Size should be nullptr for non-variably modified reduction "
859
193
           "items.");
860
193
    return;
861
193
  }
862
108
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
863
108
      CGF,
864
108
      cast<OpaqueValueExpr>(
865
108
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866
108
      RValue::get(Size));
867
108
  CGF.EmitVariablyModifiedType(PrivateType);
868
108
}
869
870
void ReductionCodeGen::emitInitialization(
871
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872
937
    llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873
937
  assert(SharedAddresses.size() > N && "No variable was generated");
874
937
  const auto *PrivateVD =
875
937
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876
937
  const OMPDeclareReductionDecl *DRD =
877
937
      getReductionInit(ClausesData[N].ReductionOp);
878
937
  QualType PrivateType = PrivateVD->getType();
879
937
  PrivateAddr = CGF.Builder.CreateElementBitCast(
880
937
      PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881
937
  QualType SharedType = SharedAddresses[N].first.getType();
882
937
  SharedLVal = CGF.MakeAddrLValue(
883
937
      CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884
937
                                       CGF.ConvertTypeForMem(SharedType)),
885
937
      SharedType, SharedAddresses[N].first.getBaseInfo(),
886
937
      CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887
937
  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888
277
    if (DRD && 
DRD->getInitializer()31
)
889
27
      (void)DefaultInit(CGF);
890
277
    emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891
660
  } else if (DRD && 
(40
DRD->getInitializer()40
||
!PrivateVD->hasInit()14
)) {
892
30
    (void)DefaultInit(CGF);
893
30
    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894
30
                                     PrivateAddr, SharedLVal.getAddress(CGF),
895
30
                                     SharedLVal.getType());
896
630
  } else if (!DefaultInit(CGF) && 
PrivateVD->hasInit()77
&&
897
77
             !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898
77
    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899
77
                         PrivateVD->getType().getQualifiers(),
900
77
                         /*IsInitializer=*/false);
901
77
  }
902
937
}
903
904
160
bool ReductionCodeGen::needCleanups(unsigned N) {
905
160
  const auto *PrivateVD =
906
160
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907
160
  QualType PrivateType = PrivateVD->getType();
908
160
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909
160
  return DTorKind != QualType::DK_none;
910
160
}
911
912
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913
19
                                    Address PrivateAddr) {
914
19
  const auto *PrivateVD =
915
19
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916
19
  QualType PrivateType = PrivateVD->getType();
917
19
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918
19
  if (needCleanups(N)) {
919
19
    PrivateAddr = CGF.Builder.CreateElementBitCast(
920
19
        PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921
19
    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922
19
  }
923
19
}
924
925
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926
179
                          LValue BaseLV) {
927
179
  BaseTy = BaseTy.getNonReferenceType();
928
299
  while ((BaseTy->isPointerType() || 
BaseTy->isReferenceType()179
) &&
929
120
         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930
120
    if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931
120
      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932
0
    } else {
933
0
      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934
0
      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935
0
    }
936
120
    BaseTy = BaseTy->getPointeeType();
937
120
  }
938
179
  return CGF.MakeAddrLValue(
939
179
      CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940
179
                                       CGF.ConvertTypeForMem(ElTy)),
941
179
      BaseLV.getType(), BaseLV.getBaseInfo(),
942
179
      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943
179
}
944
945
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946
                          llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947
179
                          llvm::Value *Addr) {
948
179
  Address Tmp = Address::invalid();
949
179
  Address TopTmp = Address::invalid();
950
179
  Address MostTopTmp = Address::invalid();
951
179
  BaseTy = BaseTy.getNonReferenceType();
952
299
  while ((BaseTy->isPointerType() || 
BaseTy->isReferenceType()179
) &&
953
120
         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954
120
    Tmp = CGF.CreateMemTemp(BaseTy);
955
120
    if (TopTmp.isValid())
956
58
      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957
62
    else
958
62
      MostTopTmp = Tmp;
959
120
    TopTmp = Tmp;
960
120
    BaseTy = BaseTy->getPointeeType();
961
120
  }
962
179
  llvm::Type *Ty = BaseLVType;
963
179
  if (Tmp.isValid())
964
62
    Ty = Tmp.getElementType();
965
179
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966
179
  if (Tmp.isValid()) {
967
62
    CGF.Builder.CreateStore(Addr, Tmp);
968
62
    return MostTopTmp;
969
62
  }
970
117
  return Address(Addr, BaseLVAlignment);
971
117
}
972
973
1.05k
static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974
1.05k
  const VarDecl *OrigVD = nullptr;
975
1.05k
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976
277
    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977
411
    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978
134
      Base = TempOASE->getBase()->IgnoreParenImpCasts();
979
285
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980
8
      Base = TempASE->getBase()->IgnoreParenImpCasts();
981
277
    DE = cast<DeclRefExpr>(Base);
982
277
    OrigVD = cast<VarDecl>(DE->getDecl());
983
773
  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984
0
    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985
0
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986
0
      Base = TempASE->getBase()->IgnoreParenImpCasts();
987
0
    DE = cast<DeclRefExpr>(Base);
988
0
    OrigVD = cast<VarDecl>(DE->getDecl());
989
0
  }
990
1.05k
  return OrigVD;
991
1.05k
}
992
993
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994
886
                                               Address PrivateAddr) {
995
886
  const DeclRefExpr *DE;
996
886
  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997
179
    BaseDecls.emplace_back(OrigVD);
998
179
    LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999
179
    LValue BaseLValue =
1000
179
        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001
179
                    OriginalBaseLValue);
1002
179
    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003
179
        BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004
179
    llvm::Value *PrivatePointer =
1005
179
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006
179
            PrivateAddr.getPointer(),
1007
179
            SharedAddresses[N].first.getAddress(CGF).getType());
1008
179
    llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009
179
    return castToBase(CGF, OrigVD->getType(),
1010
179
                      SharedAddresses[N].first.getType(),
1011
179
                      OriginalBaseLValue.getAddress(CGF).getType(),
1012
179
                      OriginalBaseLValue.getAlignment(), Ptr);
1013
179
  }
1014
707
  BaseDecls.emplace_back(
1015
707
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016
707
  return PrivateAddr;
1017
707
}
1018
1019
141
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020
141
  const OMPDeclareReductionDecl *DRD =
1021
141
      getReductionInit(ClausesData[N].ReductionOp);
1022
141
  return DRD && 
DRD->getInitializer()6
;
1023
141
}
1024
1025
12.1k
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026
12.1k
  return CGF.EmitLoadOfPointerLValue(
1027
12.1k
      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028
12.1k
      getThreadIDVariable()->getType()->castAs<PointerType>());
1029
12.1k
}
1030
1031
62.5k
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032
62.5k
  if (!CGF.HaveInsertPoint())
1033
0
    return;
1034
  // 1.2.2 OpenMP Language Terminology
1035
  // Structured block - An executable statement with a single entry at the
1036
  // top and a single exit at the bottom.
1037
  // The point of exit cannot be a branch out of the structured block.
1038
  // longjmp() and throw() must not violate the entry/exit criteria.
1039
62.5k
  CGF.EHStack.pushTerminate();
1040
62.5k
  CodeGen(CGF);
1041
62.5k
  CGF.EHStack.popTerminate();
1042
62.5k
}
1043
1044
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045
92
    CodeGenFunction &CGF) {
1046
92
  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047
92
                            getThreadIDVariable()->getType(),
1048
92
                            AlignmentSource::Decl);
1049
92
}
1050
1051
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052
16.5k
                                       QualType FieldTy) {
1053
16.5k
  auto *Field = FieldDecl::Create(
1054
16.5k
      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055
16.5k
      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056
16.5k
      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057
16.5k
  Field->setAccess(AS_public);
1058
16.5k
  DC->addDecl(Field);
1059
16.5k
  return Field;
1060
16.5k
}
1061
1062
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063
                                 StringRef Separator)
1064
    : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065
5.47k
      OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066
5.47k
  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067
1068
  // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069
5.47k
  OMPBuilder.initialize();
1070
5.47k
  loadOffloadInfoMetadata();
1071
5.47k
}
1072
1073
5.48k
void CGOpenMPRuntime::clear() {
1074
5.48k
  InternalVars.clear();
1075
  // Clean non-target variable declarations possibly used only in debug info.
1076
12
  for (const auto &Data : EmittedNonTargetVariables) {
1077
12
    if (!Data.getValue().pointsToAliveValue())
1078
0
      continue;
1079
12
    auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080
12
    if (!GV)
1081
0
      continue;
1082
12
    if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083
11
      continue;
1084
1
    GV->eraseFromParent();
1085
1
  }
1086
5.48k
}
1087
1088
47.7k
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089
47.7k
  SmallString<128> Buffer;
1090
47.7k
  llvm::raw_svector_ostream OS(Buffer);
1091
47.7k
  StringRef Sep = FirstSeparator;
1092
92.4k
  for (StringRef Part : Parts) {
1093
92.4k
    OS << Sep << Part;
1094
92.4k
    Sep = Separator;
1095
92.4k
  }
1096
47.7k
  return std::string(OS.str());
1097
47.7k
}
1098
1099
static llvm::Function *
1100
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101
                          const Expr *CombinerInitializer, const VarDecl *In,
1102
216
                          const VarDecl *Out, bool IsCombiner) {
1103
  // void .omp_combiner.(Ty *in, Ty *out);
1104
216
  ASTContext &C = CGM.getContext();
1105
216
  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106
216
  FunctionArgList Args;
1107
216
  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108
216
                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109
216
  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110
216
                              /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111
216
  Args.push_back(&OmpOutParm);
1112
216
  Args.push_back(&OmpInParm);
1113
216
  const CGFunctionInfo &FnInfo =
1114
216
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115
216
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116
216
  std::string Name = CGM.getOpenMPRuntime().getName(
1117
137
      {IsCombiner ? "omp_combiner" : 
"omp_initializer"79
, ""});
1118
216
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119
216
                                    Name, &CGM.getModule());
1120
216
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121
216
  if (CGM.getLangOpts().Optimize) {
1122
0
    Fn->removeFnAttr(llvm::Attribute::NoInline);
1123
0
    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124
0
    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125
0
  }
1126
216
  CodeGenFunction CGF(CGM);
1127
  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128
  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129
216
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130
216
                    Out->getLocation());
1131
216
  CodeGenFunction::OMPPrivateScope Scope(CGF);
1132
216
  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133
216
  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134
216
    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135
216
        .getAddress(CGF);
1136
216
  });
1137
216
  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138
216
  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139
216
    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140
216
        .getAddress(CGF);
1141
216
  });
1142
216
  (void)Scope.Privatize();
1143
216
  if (!IsCombiner && 
Out->hasInit()79
&&
1144
50
      !CGF.isTrivialInitializer(Out->getInit())) {
1145
50
    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146
50
                         Out->getType().getQualifiers(),
1147
50
                         /*IsInitializer=*/true);
1148
50
  }
1149
216
  if (CombinerInitializer)
1150
166
    CGF.EmitIgnoredExpr(CombinerInitializer);
1151
216
  Scope.ForceCleanup();
1152
216
  CGF.FinishFunction();
1153
216
  return Fn;
1154
216
}
1155
1156
void CGOpenMPRuntime::emitUserDefinedReduction(
1157
138
    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158
138
  if (UDRMap.count(D) > 0)
1159
1
    return;
1160
137
  llvm::Function *Combiner = emitCombinerOrInitializer(
1161
137
      CGM, D->getType(), D->getCombiner(),
1162
137
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163
137
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164
137
      /*IsCombiner=*/true);
1165
137
  llvm::Function *Initializer = nullptr;
1166
137
  if (const Expr *Init = D->getInitializer()) {
1167
79
    Initializer = emitCombinerOrInitializer(
1168
79
        CGM, D->getType(),
1169
29
        D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170
50
                                                                     : nullptr,
1171
79
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172
79
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173
79
        /*IsCombiner=*/false);
1174
79
  }
1175
137
  UDRMap.try_emplace(D, Combiner, Initializer);
1176
137
  if (CGF) {
1177
38
    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178
38
    Decls.second.push_back(D);
1179
38
  }
1180
137
}
1181
1182
std::pair<llvm::Function *, llvm::Function *>
1183
240
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184
240
  auto I = UDRMap.find(D);
1185
240
  if (I != UDRMap.end())
1186
206
    return I->second;
1187
34
  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188
34
  return UDRMap.lookup(D);
1189
34
}
1190
1191
namespace {
1192
// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193
// Builder if one is present.
1194
struct PushAndPopStackRAII {
1195
  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196
                      bool HasCancel)
1197
11.1k
      : OMPBuilder(OMPBuilder) {
1198
11.1k
    if (!OMPBuilder)
1199
0
      return;
1200
1201
    // The following callback is the crucial part of clangs cleanup process.
1202
    //
1203
    // NOTE:
1204
    // Once the OpenMPIRBuilder is used to create parallel regions (and
1205
    // similar), the cancellation destination (Dest below) is determined via
1206
    // IP. That means if we have variables to finalize we split the block at IP,
1207
    // use the new block (=BB) as destination to build a JumpDest (via
1208
    // getJumpDestInCurrentScope(BB)) which then is fed to
1209
    // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210
    // to push & pop an FinalizationInfo object.
1211
    // The FiniCB will still be needed but at the point where the
1212
    // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213
11.1k
    auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214
0
      assert(IP.getBlock()->end() == IP.getPoint() &&
1215
0
             "Clang CG should cause non-terminated block!");
1216
0
      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217
0
      CGF.Builder.restoreIP(IP);
1218
0
      CodeGenFunction::JumpDest Dest =
1219
0
          CGF.getOMPCancelDestination(OMPD_parallel);
1220
0
      CGF.EmitBranchThroughCleanup(Dest);
1221
0
    };
1222
1223
    // TODO: Remove this once we emit parallel regions through the
1224
    //       OpenMPIRBuilder as it can do this setup internally.
1225
11.1k
    llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226
11.1k
        {FiniCB, OMPD_parallel, HasCancel});
1227
11.1k
    OMPBuilder->pushFinalizationCB(std::move(FI));
1228
11.1k
  }
1229
11.1k
  ~PushAndPopStackRAII() {
1230
11.1k
    if (OMPBuilder)
1231
11.1k
      OMPBuilder->popFinalizationCB();
1232
11.1k
  }
1233
  llvm::OpenMPIRBuilder *OMPBuilder;
1234
};
1235
} // namespace
1236
1237
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238
    CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240
11.1k
    const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241
11.1k
  assert(ThreadIDVar->getType()->isPointerType() &&
1242
11.1k
         "thread id variable must be of type kmp_int32 *");
1243
11.1k
  CodeGenFunction CGF(CGM, true);
1244
11.1k
  bool HasCancel = false;
1245
11.1k
  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246
903
    HasCancel = OPD->hasCancel();
1247
10.2k
  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248
909
    HasCancel = OPD->hasCancel();
1249
9.30k
  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250
26
    HasCancel = OPSD->hasCancel();
1251
9.28k
  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252
207
    HasCancel = OPFD->hasCancel();
1253
9.07k
  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254
556
    HasCancel = OPFD->hasCancel();
1255
8.51k
  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256
356
    HasCancel = OPFD->hasCancel();
1257
8.16k
  else if (const auto *OPFD =
1258
616
               dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259
616
    HasCancel = OPFD->hasCancel();
1260
7.54k
  else if (const auto *OPFD =
1261
1.08k
               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262
1.08k
    HasCancel = OPFD->hasCancel();
1263
1264
  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265
  //       parallel region to make cancellation barriers work properly.
1266
11.1k
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267
11.1k
  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268
11.1k
  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269
11.1k
                                    HasCancel, OutlinedHelperName);
1270
11.1k
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271
11.1k
  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272
11.1k
}
1273
1274
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276
5.80k
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277
5.80k
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278
5.80k
  return emitParallelOrTeamsOutlinedFunction(
1279
5.80k
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280
5.80k
}
1281
1282
llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284
5.32k
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285
5.32k
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286
5.32k
  return emitParallelOrTeamsOutlinedFunction(
1287
5.32k
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288
5.32k
}
1289
1290
llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292
    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294
707
    bool Tied, unsigned &NumberOfParts) {
1295
707
  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296
30
                                              PrePostActionTy &) {
1297
30
    llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298
30
    llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299
30
    llvm::Value *TaskArgs[] = {
1300
30
        UpLoc, ThreadID,
1301
30
        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302
30
                                    TaskTVar->getType()->castAs<PointerType>())
1303
30
            .getPointer(CGF)};
1304
30
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305
30
                            CGM.getModule(), OMPRTL___kmpc_omp_task),
1306
30
                        TaskArgs);
1307
30
  };
1308
707
  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309
707
                                                            UntiedCodeGen);
1310
707
  CodeGen.setAction(Action);
1311
707
  assert(!ThreadIDVar->getType()->isPointerType() &&
1312
707
         "thread id variable must be of type kmp_int32 for tasks");
1313
707
  const OpenMPDirectiveKind Region =
1314
224
      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315
483
                                                      : OMPD_task;
1316
707
  const CapturedStmt *CS = D.getCapturedStmt(Region);
1317
707
  bool HasCancel = false;
1318
707
  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319
175
    HasCancel = TD->hasCancel();
1320
532
  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321
37
    HasCancel = TD->hasCancel();
1322
495
  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323
35
    HasCancel = TD->hasCancel();
1324
460
  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325
33
    HasCancel = TD->hasCancel();
1326
1327
707
  CodeGenFunction CGF(CGM, true);
1328
707
  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329
707
                                        InnermostKind, HasCancel, Action);
1330
707
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331
707
  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332
707
  if (!Tied)
1333
16
    NumberOfParts = Action.getNumberOfParts();
1334
707
  return Res;
1335
707
}
1336
1337
static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338
                             const RecordDecl *RD, const CGRecordLayout &RL,
1339
10.2k
                             ArrayRef<llvm::Constant *> Data) {
1340
10.2k
  llvm::StructType *StructTy = RL.getLLVMType();
1341
10.2k
  unsigned PrevIdx = 0;
1342
10.2k
  ConstantInitBuilder CIBuilder(CGM);
1343
10.2k
  auto DI = Data.begin();
1344
51.1k
  for (const FieldDecl *FD : RD->fields()) {
1345
51.1k
    unsigned Idx = RL.getLLVMFieldNo(FD);
1346
    // Fill the alignment.
1347
51.1k
    for (unsigned I = PrevIdx; I < Idx; 
++I0
)
1348
0
      Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349
51.1k
    PrevIdx = Idx + 1;
1350
51.1k
    Fields.add(*DI);
1351
51.1k
    ++DI;
1352
51.1k
  }
1353
10.2k
}
1354
1355
template <class... As>
1356
static llvm::GlobalVariable *
1357
createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358
                   ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359
10.2k
                   As &&... Args) {
1360
10.2k
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361
10.2k
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362
10.2k
  ConstantInitBuilder CIBuilder(CGM);
1363
10.2k
  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364
10.2k
  buildStructValue(Fields, CGM, RD, RL, Data);
1365
10.2k
  return Fields.finishAndCreateGlobal(
1366
10.2k
      Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367
10.2k
      std::forward<As>(Args)...);
1368
10.2k
}
1369
1370
template <typename T>
1371
static void
1372
createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373
                                         ArrayRef<llvm::Constant *> Data,
1374
                                         T &Parent) {
1375
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377
  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378
  buildStructValue(Fields, CGM, RD, RL, Data);
1379
  Fields.finishAndAddTo(Parent);
1380
}
1381
1382
void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383
2.41k
                                             bool AtCurrentPoint) {
1384
2.41k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385
2.41k
  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386
1387
2.41k
  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388
2.41k
  if (AtCurrentPoint) {
1389
1.05k
    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390
1.05k
        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391
1.36k
  } else {
1392
1.36k
    Elem.second.ServiceInsertPt =
1393
1.36k
        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394
1.36k
    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395
1.36k
  }
1396
2.41k
}
1397
1398
9.87k
void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399
9.87k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400
9.87k
  if (Elem.second.ServiceInsertPt) {
1401
2.41k
    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402
2.41k
    Elem.second.ServiceInsertPt = nullptr;
1403
2.41k
    Ptr->eraseFromParent();
1404
2.41k
  }
1405
9.87k
}
1406
1407
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408
                                                  SourceLocation Loc,
1409
222
                                                  SmallString<128> &Buffer) {
1410
222
  llvm::raw_svector_ostream OS(Buffer);
1411
  // Build debug location
1412
222
  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413
222
  OS << ";" << PLoc.getFilename() << ";";
1414
222
  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415
222
    OS << FD->getQualifiedNameAsString();
1416
222
  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417
222
  return OS.str();
1418
222
}
1419
1420
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421
                                                 SourceLocation Loc,
1422
35.1k
                                                 unsigned Flags) {
1423
35.1k
  llvm::Constant *SrcLocStr;
1424
35.1k
  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425
34.3k
      
Loc.isInvalid()813
) {
1426
34.3k
    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427
749
  } else {
1428
749
    std::string FunctionName = "";
1429
749
    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430
717
      FunctionName = FD->getQualifiedNameAsString();
1431
749
    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432
749
    const char *FileName = PLoc.getFilename();
1433
749
    unsigned Line = PLoc.getLine();
1434
749
    unsigned Column = PLoc.getColumn();
1435
749
    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436
749
                                                Line, Column);
1437
749
  }
1438
35.1k
  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439
35.1k
  return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440
35.1k
                                     Reserved2Flags);
1441
35.1k
}
1442
1443
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444
24.1k
                                          SourceLocation Loc) {
1445
24.1k
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446
  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447
  // the clang invariants used below might be broken.
1448
24.1k
  if (CGM.getLangOpts().OpenMPIRBuilder) {
1449
222
    SmallString<128> Buffer;
1450
222
    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451
222
    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452
222
        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453
222
    return OMPBuilder.getOrCreateThreadID(
1454
222
        OMPBuilder.getOrCreateIdent(SrcLocStr));
1455
222
  }
1456
1457
23.9k
  llvm::Value *ThreadID = nullptr;
1458
  // Check whether we've already cached a load of the thread id in this
1459
  // function.
1460
23.9k
  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461
23.9k
  if (I != OpenMPLocThreadIDMap.end()) {
1462
10.8k
    ThreadID = I->second.ThreadID;
1463
10.8k
    if (ThreadID != nullptr)
1464
9.96k
      return ThreadID;
1465
13.9k
  }
1466
  // If exceptions are enabled, do not use parameter to avoid possible crash.
1467
13.9k
  if (auto *OMPRegionInfo =
1468
13.3k
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469
13.3k
    if (OMPRegionInfo->getThreadIDVariable()) {
1470
      // Check if this an outlined function with thread id passed as argument.
1471
11.7k
      LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472
11.7k
      llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473
11.7k
      if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474
725
          !CGF.getLangOpts().CXXExceptions ||
1475
725
          CGF.Builder.GetInsertBlock() == TopBlock ||
1476
318
          !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477
318
          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478
318
              TopBlock ||
1479
318
          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480
11.7k
              CGF.Builder.GetInsertBlock()) {
1481
11.7k
        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482
        // If value loaded in entry block, cache it and use it everywhere in
1483
        // function.
1484
11.7k
        if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485
6.40k
          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486
6.40k
          Elem.second.ThreadID = ThreadID;
1487
6.40k
        }
1488
11.7k
        return ThreadID;
1489
11.7k
      }
1490
2.27k
    }
1491
13.3k
  }
1492
1493
  // This is not an outlined function region - need to call __kmpc_int32
1494
  // kmpc_global_thread_num(ident_t *loc).
1495
  // Generate thread id value and cache this value for use across the
1496
  // function.
1497
2.27k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498
2.27k
  if (!Elem.second.ServiceInsertPt)
1499
1.36k
    setLocThreadIdInsertPt(CGF);
1500
2.27k
  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501
2.27k
  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502
2.27k
  llvm::CallInst *Call = CGF.Builder.CreateCall(
1503
2.27k
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504
2.27k
                                            OMPRTL___kmpc_global_thread_num),
1505
2.27k
      emitUpdateLocation(CGF, Loc));
1506
2.27k
  Call->setCallingConv(CGF.getRuntimeCC());
1507
2.27k
  Elem.second.ThreadID = Call;
1508
2.27k
  return Call;
1509
2.27k
}
1510
1511
66.7k
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512
66.7k
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513
66.7k
  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514
8.82k
    clearLocThreadIdInsertPt(CGF);
1515
8.82k
    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516
8.82k
  }
1517
66.7k
  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518
27
    for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519
38
      UDRMap.erase(D);
1520
27
    FunctionUDRMap.erase(CGF.CurFn);
1521
27
  }
1522
66.7k
  auto I = FunctionUDMMap.find(CGF.CurFn);
1523
66.7k
  if (I != FunctionUDMMap.end()) {
1524
0
    for(const auto *D : I->second)
1525
0
      UDMMap.erase(D);
1526
0
    FunctionUDMMap.erase(I);
1527
0
  }
1528
66.7k
  LastprivateConditionalToTypes.erase(CGF.CurFn);
1529
66.7k
  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530
66.7k
}
1531
1532
11.2k
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533
11.2k
  return OMPBuilder.IdentPtr;
1534
11.2k
}
1535
1536
9.80k
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537
9.80k
  if (!Kmpc_MicroTy) {
1538
    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539
1.79k
    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540
1.79k
                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541
1.79k
    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542
1.79k
  }
1543
9.80k
  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544
9.80k
}
1545
1546
llvm::FunctionCallee
1547
8.01k
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548
8.01k
  assert((IVSize == 32 || IVSize == 64) &&
1549
8.01k
         "IV size is not compatible with the omp runtime");
1550
7.67k
  StringRef Name = IVSize == 32 ? (IVSigned ? 
"__kmpc_for_static_init_4"7.54k
1551
138
                                            : "__kmpc_for_static_init_4u")
1552
337
                                : (IVSigned ? 
"__kmpc_for_static_init_8"229
1553
108
                                            : "__kmpc_for_static_init_8u");
1554
7.67k
  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : 
CGM.Int64Ty337
;
1555
8.01k
  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556
8.01k
  llvm::Type *TypeParams[] = {
1557
8.01k
    getIdentTyPointerTy(),                     // loc
1558
8.01k
    CGM.Int32Ty,                               // tid
1559
8.01k
    CGM.Int32Ty,                               // schedtype
1560
8.01k
    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561
8.01k
    PtrTy,                                     // p_lower
1562
8.01k
    PtrTy,                                     // p_upper
1563
8.01k
    PtrTy,                                     // p_stride
1564
8.01k
    ITy,                                       // incr
1565
8.01k
    ITy                                        // chunk
1566
8.01k
  };
1567
8.01k
  auto *FnTy =
1568
8.01k
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569
8.01k
  return CGM.CreateRuntimeFunction(FnTy, Name);
1570
8.01k
}
1571
1572
llvm::FunctionCallee
1573
744
CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574
744
  assert((IVSize == 32 || IVSize == 64) &&
1575
744
         "IV size is not compatible with the omp runtime");
1576
744
  StringRef Name =
1577
744
      IVSize == 32
1578
703
          ? (IVSigned ? 
"__kmpc_dispatch_init_4"699
:
"__kmpc_dispatch_init_4u"4
)
1579
41
          : (IVSigned ? 
"__kmpc_dispatch_init_8"15
:
"__kmpc_dispatch_init_8u"26
);
1580
703
  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : 
CGM.Int64Ty41
;
1581
744
  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582
744
                               CGM.Int32Ty,           // tid
1583
744
                               CGM.Int32Ty,           // schedtype
1584
744
                               ITy,                   // lower
1585
744
                               ITy,                   // upper
1586
744
                               ITy,                   // stride
1587
744
                               ITy                    // chunk
1588
744
  };
1589
744
  auto *FnTy =
1590
744
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591
744
  return CGM.CreateRuntimeFunction(FnTy, Name);
1592
744
}
1593
1594
llvm::FunctionCallee
1595
37
CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596
37
  assert((IVSize == 32 || IVSize == 64) &&
1597
37
         "IV size is not compatible with the omp runtime");
1598
37
  StringRef Name =
1599
37
      IVSize == 32
1600
29
          ? (IVSigned ? 
"__kmpc_dispatch_fini_4"25
:
"__kmpc_dispatch_fini_4u"4
)
1601
8
          : (IVSigned ? 
"__kmpc_dispatch_fini_8"4
:
"__kmpc_dispatch_fini_8u"4
);
1602
37
  llvm::Type *TypeParams[] = {
1603
37
      getIdentTyPointerTy(), // loc
1604
37
      CGM.Int32Ty,           // tid
1605
37
  };
1606
37
  auto *FnTy =
1607
37
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608
37
  return CGM.CreateRuntimeFunction(FnTy, Name);
1609
37
}
1610
1611
llvm::FunctionCallee
1612
744
CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613
744
  assert((IVSize == 32 || IVSize == 64) &&
1614
744
         "IV size is not compatible with the omp runtime");
1615
744
  StringRef Name =
1616
744
      IVSize == 32
1617
703
          ? (IVSigned ? 
"__kmpc_dispatch_next_4"699
:
"__kmpc_dispatch_next_4u"4
)
1618
41
          : (IVSigned ? 
"__kmpc_dispatch_next_8"15
:
"__kmpc_dispatch_next_8u"26
);
1619
703
  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : 
CGM.Int64Ty41
;
1620
744
  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621
744
  llvm::Type *TypeParams[] = {
1622
744
    getIdentTyPointerTy(),                     // loc
1623
744
    CGM.Int32Ty,                               // tid
1624
744
    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625
744
    PtrTy,                                     // p_lower
1626
744
    PtrTy,                                     // p_upper
1627
744
    PtrTy                                      // p_stride
1628
744
  };
1629
744
  auto *FnTy =
1630
744
      llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631
744
  return CGM.CreateRuntimeFunction(FnTy, Name);
1632
744
}
1633
1634
/// Obtain information that uniquely identifies a target entry. This
1635
/// consists of the file and device IDs as well as line number associated with
1636
/// the relevant entry source location.
1637
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638
                                     unsigned &DeviceID, unsigned &FileID,
1639
15.3k
                                     unsigned &LineNum) {
1640
15.3k
  SourceManager &SM = C.getSourceManager();
1641
1642
  // The loc should be always valid and have a file ID (the user cannot use
1643
  // #pragma directives in macros)
1644
1645
15.3k
  assert(Loc.isValid() && "Source location is expected to be always valid.");
1646
1647
15.3k
  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648
15.3k
  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649
1650
15.3k
  llvm::sys::fs::UniqueID ID;
1651
15.3k
  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652
0
    SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653
0
        << PLoc.getFilename() << EC.message();
1654
1655
15.3k
  DeviceID = ID.getDevice();
1656
15.3k
  FileID = ID.getFile();
1657
15.3k
  LineNum = PLoc.getLine();
1658
15.3k
}
1659
1660
502
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661
502
  if (CGM.getLangOpts().OpenMPSimd)
1662
20
    return Address::invalid();
1663
482
  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664
482
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665
482
  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666
44
              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667
482
               
HasRequiresUnifiedSharedMemory44
))) {
1668
482
    SmallString<64> PtrName;
1669
482
    {
1670
482
      llvm::raw_svector_ostream OS(PtrName);
1671
482
      OS << CGM.getMangledName(GlobalDecl(VD));
1672
482
      if (!VD->isExternallyVisible()) {
1673
34
        unsigned DeviceID, FileID, Line;
1674
34
        getTargetEntryUniqueInfo(CGM.getContext(),
1675
34
                                 VD->getCanonicalDecl()->getBeginLoc(),
1676
34
                                 DeviceID, FileID, Line);
1677
34
        OS << llvm::format("_%x", FileID);
1678
34
      }
1679
482
      OS << "_decl_tgt_ref_ptr";
1680
482
    }
1681
482
    llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682
482
    if (!Ptr) {
1683
58
      QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684
58
      Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685
58
                                        PtrName);
1686
1687
58
      auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688
58
      GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689
1690
58
      if (!CGM.getLangOpts().OpenMPIsDevice)
1691
38
        GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692
58
      registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693
58
    }
1694
482
    return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695
482
  }
1696
0
  return Address::invalid();
1697
0
}
1698
1699
llvm::Constant *
1700
138
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701
138
  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702
138
         !CGM.getContext().getTargetInfo().isTLSSupported());
1703
  // Lookup the entry, lazily creating it if necessary.
1704
138
  std::string Suffix = getName({"cache", ""});
1705
138
  return getOrCreateInternalVariable(
1706
138
      CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707
138
}
1708
1709
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710
                                                const VarDecl *VD,
1711
                                                Address VDAddr,
1712
248
                                                SourceLocation Loc) {
1713
248
  if (CGM.getLangOpts().OpenMPUseTLS &&
1714
110
      CGM.getContext().getTargetInfo().isTLSSupported())
1715
110
    return VDAddr;
1716
1717
138
  llvm::Type *VarTy = VDAddr.getElementType();
1718
138
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719
138
                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720
138
                                                       CGM.Int8PtrTy),
1721
138
                         CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722
138
                         getOrCreateThreadPrivateCache(VD)};
1723
138
  return Address(CGF.EmitRuntimeCall(
1724
138
                     OMPBuilder.getOrCreateRuntimeFunction(
1725
138
                         CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726
138
                     Args),
1727
138
                 VDAddr.getAlignment());
1728
138
}
1729
1730
void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731
    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732
37
    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733
  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734
  // library.
1735
37
  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736
37
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737
37
                          CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738
37
                      OMPLoc);
1739
  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740
  // to register constructor/destructor for variable.
1741
37
  llvm::Value *Args[] = {
1742
37
      OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743
37
      Ctor, CopyCtor, Dtor};
1744
37
  CGF.EmitRuntimeCall(
1745
37
      OMPBuilder.getOrCreateRuntimeFunction(
1746
37
          CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747
37
      Args);
1748
37
}
1749
1750
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751
    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752
145
    bool PerformInit, CodeGenFunction *CGF) {
1753
145
  if (CGM.getLangOpts().OpenMPUseTLS &&
1754
72
      CGM.getContext().getTargetInfo().isTLSSupported())
1755
72
    return nullptr;
1756
1757
73
  VD = VD->getDefinition(CGM.getContext());
1758
73
  if (VD && 
ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second61
) {
1759
48
    QualType ASTTy = VD->getType();
1760
1761
48
    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762
48
    const Expr *Init = VD->getAnyInitializer();
1763
48
    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764
      // Generate function that re-emits the declaration's initializer into the
1765
      // threadprivate copy of the variable VD
1766
37
      CodeGenFunction CtorCGF(CGM);
1767
37
      FunctionArgList Args;
1768
37
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769
37
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770
37
                            ImplicitParamDecl::Other);
1771
37
      Args.push_back(&Dst);
1772
1773
37
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774
37
          CGM.getContext().VoidPtrTy, Args);
1775
37
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776
37
      std::string Name = getName({"__kmpc_global_ctor_", ""});
1777
37
      llvm::Function *Fn =
1778
37
          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779
37
      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780
37
                            Args, Loc, Loc);
1781
37
      llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782
37
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783
37
          CGM.getContext().VoidPtrTy, Dst.getLocation());
1784
37
      Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785
37
      Arg = CtorCGF.Builder.CreateElementBitCast(
1786
37
          Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787
37
      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788
37
                               /*IsInitializer=*/true);
1789
37
      ArgVal = CtorCGF.EmitLoadOfScalar(
1790
37
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791
37
          CGM.getContext().VoidPtrTy, Dst.getLocation());
1792
37
      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793
37
      CtorCGF.FinishFunction();
1794
37
      Ctor = Fn;
1795
37
    }
1796
48
    if (VD->getType().isDestructedType() != QualType::DK_none) {
1797
      // Generate function that emits destructor call for the threadprivate copy
1798
      // of the variable VD
1799
35
      CodeGenFunction DtorCGF(CGM);
1800
35
      FunctionArgList Args;
1801
35
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802
35
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803
35
                            ImplicitParamDecl::Other);
1804
35
      Args.push_back(&Dst);
1805
1806
35
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807
35
          CGM.getContext().VoidTy, Args);
1808
35
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809
35
      std::string Name = getName({"__kmpc_global_dtor_", ""});
1810
35
      llvm::Function *Fn =
1811
35
          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812
35
      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813
35
      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814
35
                            Loc, Loc);
1815
      // Create a scope with an artificial location for the body of this function.
1816
35
      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817
35
      llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818
35
          DtorCGF.GetAddrOfLocalVar(&Dst),
1819
35
          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820
35
      DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821
35
                          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822
35
                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823
35
      DtorCGF.FinishFunction();
1824
35
      Dtor = Fn;
1825
35
    }
1826
    // Do not emit init function if it is not required.
1827
48
    if (!Ctor && 
!Dtor11
)
1828
11
      return nullptr;
1829
1830
37
    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831
37
    auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832
37
                                               /*isVarArg=*/false)
1833
37
                           ->getPointerTo();
1834
    // Copying constructor for the threadprivate variable.
1835
    // Must be NULL - reserved by runtime, but currently it requires that this
1836
    // parameter is always NULL. Otherwise it fires assertion.
1837
37
    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838
37
    if (Ctor == nullptr) {
1839
0
      auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840
0
                                             /*isVarArg=*/false)
1841
0
                         ->getPointerTo();
1842
0
      Ctor = llvm::Constant::getNullValue(CtorTy);
1843
0
    }
1844
37
    if (Dtor == nullptr) {
1845
2
      auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846
2
                                             /*isVarArg=*/false)
1847
2
                         ->getPointerTo();
1848
2
      Dtor = llvm::Constant::getNullValue(DtorTy);
1849
2
    }
1850
37
    if (!CGF) {
1851
11
      auto *InitFunctionTy =
1852
11
          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853
11
      std::string Name = getName({"__omp_threadprivate_init_", ""});
1854
11
      llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855
11
          InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856
11
      CodeGenFunction InitCGF(CGM);
1857
11
      FunctionArgList ArgList;
1858
11
      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859
11
                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860
11
                            Loc, Loc);
1861
11
      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862
11
      InitCGF.FinishFunction();
1863
11
      return InitFunction;
1864
11
    }
1865
26
    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866
26
  }
1867
51
  return nullptr;
1868
73
}
1869
1870
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871
                                                     llvm::GlobalVariable *Addr,
1872
3.89k
                                                     bool PerformInit) {
1873
3.89k
  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874
645
      !CGM.getLangOpts().OpenMPIsDevice)
1875
583
    return false;
1876
3.30k
  Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877
3.30k
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878
3.30k
  if (!Res || 
*Res == OMPDeclareTargetDeclAttr::MT_Link146
||
1879
146
      (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880
146
       HasRequiresUnifiedSharedMemory))
1881
3.16k
    return CGM.getLangOpts().OpenMPIsDevice;
1882
146
  VD = VD->getDefinition(CGM.getContext());
1883
146
  assert(VD && "Unknown VarDecl");
1884
1885
146
  if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886
3
    return CGM.getLangOpts().OpenMPIsDevice;
1887
1888
143
  QualType ASTTy = VD->getType();
1889
143
  SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890
1891
  // Produce the unique prefix to identify the new target regions. We use
1892
  // the source location of the variable declaration which we know to not
1893
  // conflict with any target region.
1894
143
  unsigned DeviceID;
1895
143
  unsigned FileID;
1896
143
  unsigned Line;
1897
143
  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898
143
  SmallString<128> Buffer, Out;
1899
143
  {
1900
143
    llvm::raw_svector_ostream OS(Buffer);
1901
143
    OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902
143
       << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903
143
  }
1904
1905
143
  const Expr *Init = VD->getAnyInitializer();
1906
143
  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907
143
    llvm::Constant *Ctor;
1908
143
    llvm::Constant *ID;
1909
143
    if (CGM.getLangOpts().OpenMPIsDevice) {
1910
      // Generate function that re-emits the declaration's initializer into
1911
      // the threadprivate copy of the variable VD
1912
74
      CodeGenFunction CtorCGF(CGM);
1913
1914
74
      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915
74
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916
74
      llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917
74
          FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918
74
      auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919
74
      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920
74
                            FunctionArgList(), Loc, Loc);
1921
74
      auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922
74
      CtorCGF.EmitAnyExprToMem(Init,
1923
74
                               Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924
74
                               Init->getType().getQualifiers(),
1925
74
                               /*IsInitializer=*/true);
1926
74
      CtorCGF.FinishFunction();
1927
74
      Ctor = Fn;
1928
74
      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929
74
      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930
69
    } else {
1931
69
      Ctor = new llvm::GlobalVariable(
1932
69
          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933
69
          llvm::GlobalValue::PrivateLinkage,
1934
69
          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935
69
      ID = Ctor;
1936
69
    }
1937
1938
    // Register the information for the entry associated with the constructor.
1939
143
    Out.clear();
1940
143
    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941
143
        DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942
143
        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943
143
  }
1944
143
  if (VD->getType().isDestructedType() != QualType::DK_none) {
1945
92
    llvm::Constant *Dtor;
1946
92
    llvm::Constant *ID;
1947
92
    if (CGM.getLangOpts().OpenMPIsDevice) {
1948
      // Generate function that emits destructor call for the threadprivate
1949
      // copy of the variable VD
1950
58
      CodeGenFunction DtorCGF(CGM);
1951
1952
58
      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953
58
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954
58
      llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955
58
          FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956
58
      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957
58
      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958
58
                            FunctionArgList(), Loc, Loc);
1959
      // Create a scope with an artificial location for the body of this
1960
      // function.
1961
58
      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962
58
      DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963
58
                          ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964
58
                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965
58
      DtorCGF.FinishFunction();
1966
58
      Dtor = Fn;
1967
58
      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968
58
      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969
34
    } else {
1970
34
      Dtor = new llvm::GlobalVariable(
1971
34
          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972
34
          llvm::GlobalValue::PrivateLinkage,
1973
34
          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974
34
      ID = Dtor;
1975
34
    }
1976
    // Register the information for the entry associated with the destructor.
1977
92
    Out.clear();
1978
92
    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979
92
        DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980
92
        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981
92
  }
1982
143
  return CGM.getLangOpts().OpenMPIsDevice;
1983
143
}
1984
1985
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986
                                                          QualType VarType,
1987
152
                                                          StringRef Name) {
1988
152
  std::string Suffix = getName({"artificial", ""});
1989
152
  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990
152
  llvm::Value *GAddr =
1991
152
      getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992
152
  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993
117
      CGM.getTarget().isTLSSupported()) {
1994
81
    cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995
81
    return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996
81
  }
1997
71
  std::string CacheSuffix = getName({"cache", ""});
1998
71
  llvm::Value *Args[] = {
1999
71
      emitUpdateLocation(CGF, SourceLocation()),
2000
71
      getThreadID(CGF, SourceLocation()),
2001
71
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002
71
      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003
71
                                /*isSigned=*/false),
2004
71
      getOrCreateInternalVariable(
2005
71
          CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006
71
  return Address(
2007
71
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008
71
          CGF.EmitRuntimeCall(
2009
71
              OMPBuilder.getOrCreateRuntimeFunction(
2010
71
                  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011
71
              Args),
2012
71
          VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013
71
      CGM.getContext().getTypeAlignInChars(VarType));
2014
71
}
2015
2016
void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017
                                   const RegionCodeGenTy &ThenGen,
2018
2.09k
                                   const RegionCodeGenTy &ElseGen) {
2019
2.09k
  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020
2021
  // If the condition constant folds and can be elided, try to avoid emitting
2022
  // the condition and the dead arm of the if/else.
2023
2.09k
  bool CondConstant;
2024
2.09k
  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025
946
    if (CondConstant)
2026
500
      ThenGen(CGF);
2027
446
    else
2028
446
      ElseGen(CGF);
2029
946
    return;
2030
946
  }
2031
2032
  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033
  // emit the conditional branch.
2034
1.14k
  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035
1.14k
  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036
1.14k
  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037
1.14k
  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038
2039
  // Emit the 'then' code.
2040
1.14k
  CGF.EmitBlock(ThenBlock);
2041
1.14k
  ThenGen(CGF);
2042
1.14k
  CGF.EmitBranch(ContBlock);
2043
  // Emit the 'else' code if present.
2044
  // There is no need to emit line number for unconditional branch.
2045
1.14k
  (void)ApplyDebugLocation::CreateEmpty(CGF);
2046
1.14k
  CGF.EmitBlock(ElseBlock);
2047
1.14k
  ElseGen(CGF);
2048
  // There is no need to emit line number for unconditional branch.
2049
1.14k
  (void)ApplyDebugLocation::CreateEmpty(CGF);
2050
1.14k
  CGF.EmitBranch(ContBlock);
2051
  // Emit the continuation block for code after the if.
2052
1.14k
  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053
1.14k
}
2054
2055
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056
                                       llvm::Function *OutlinedFn,
2057
                                       ArrayRef<llvm::Value *> CapturedVars,
2058
5.09k
                                       const Expr *IfCond) {
2059
5.09k
  if (!CGF.HaveInsertPoint())
2060
0
    return;
2061
5.09k
  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062
5.09k
  auto &M = CGM.getModule();
2063
5.09k
  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064
4.95k
                    this](CodeGenFunction &CGF, PrePostActionTy &) {
2065
    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066
4.95k
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067
4.95k
    llvm::Value *Args[] = {
2068
4.95k
        RTLoc,
2069
4.95k
        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070
4.95k
        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071
4.95k
    llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072
4.95k
    RealArgs.append(std::begin(Args), std::end(Args));
2073
4.95k
    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074
2075
4.95k
    llvm::FunctionCallee RTLFn =
2076
4.95k
        OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077
4.95k
    CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078
4.95k
  };
2079
5.09k
  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080
311
                    this](CodeGenFunction &CGF, PrePostActionTy &) {
2081
311
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082
311
    llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083
    // Build calls:
2084
    // __kmpc_serialized_parallel(&Loc, GTid);
2085
311
    llvm::Value *Args[] = {RTLoc, ThreadID};
2086
311
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087
311
                            M, OMPRTL___kmpc_serialized_parallel),
2088
311
                        Args);
2089
2090
    // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091
311
    Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092
311
    Address ZeroAddrBound =
2093
311
        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094
311
                                         /*Name=*/".bound.zero.addr");
2095
311
    CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096
311
    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097
    // ThreadId for serialized parallels is 0.
2098
311
    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099
311
    OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100
311
    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101
311
    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2102
2103
    // __kmpc_end_serialized_parallel(&Loc, GTid);
2104
311
    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2105
311
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2106
311
                            M, OMPRTL___kmpc_end_serialized_parallel),
2107
311
                        EndArgs);
2108
311
  };
2109
5.09k
  if (IfCond) {
2110
389
    emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2111
4.70k
  } else {
2112
4.70k
    RegionCodeGenTy ThenRCG(ThenGen);
2113
4.70k
    ThenRCG(CGF);
2114
4.70k
  }
2115
5.09k
}
2116
2117
// If we're inside an (outlined) parallel region, use the region info's
2118
// thread-ID variable (it is passed in a first argument of the outlined function
2119
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2120
// regular serial code region, get thread ID by calling kmp_int32
2121
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2122
// return the address of that temp.
2123
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2124
1.40k
                                             SourceLocation Loc) {
2125
1.40k
  if (auto *OMPRegionInfo =
2126
1.37k
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2127
1.37k
    if (OMPRegionInfo->getThreadIDVariable())
2128
542
      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2129
2130
866
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2131
866
  QualType Int32Ty =
2132
866
      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2133
866
  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2134
866
  CGF.EmitStoreOfScalar(ThreadID,
2135
866
                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2136
2137
866
  return ThreadIDTemp;
2138
866
}
2139
2140
llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2141
1.04k
    llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2142
1.04k
  SmallString<256> Buffer;
2143
1.04k
  llvm::raw_svector_ostream Out(Buffer);
2144
1.04k
  Out << Name;
2145
1.04k
  StringRef RuntimeName = Out.str();
2146
1.04k
  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2147
1.04k
  if (Elem.second) {
2148
612
    assert(Elem.second->getType()->getPointerElementType() == Ty &&
2149
612
           "OMP internal variable has different type than requested");
2150
612
    return &*Elem.second;
2151
612
  }
2152
2153
437
  return Elem.second = new llvm::GlobalVariable(
2154
437
             CGM.getModule(), Ty, /*IsConstant*/ false,
2155
437
             llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2156
437
             Elem.first(), /*InsertBefore=*/nullptr,
2157
437
             llvm::GlobalValue::NotThreadLocal, AddressSpace);
2158
437
}
2159
2160
579
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2161
579
  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2162
579
  std::string Name = getName({Prefix, "var"});
2163
579
  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2164
579
}
2165
2166
namespace {
2167
/// Common pre(post)-action for different OpenMP constructs.
2168
class CommonActionTy final : public PrePostActionTy {
2169
  llvm::FunctionCallee EnterCallee;
2170
  ArrayRef<llvm::Value *> EnterArgs;
2171
  llvm::FunctionCallee ExitCallee;
2172
  ArrayRef<llvm::Value *> ExitArgs;
2173
  bool Conditional;
2174
  llvm::BasicBlock *ContBlock = nullptr;
2175
2176
public:
2177
  CommonActionTy(llvm::FunctionCallee EnterCallee,
2178
                 ArrayRef<llvm::Value *> EnterArgs,
2179
                 llvm::FunctionCallee ExitCallee,
2180
                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2181
      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2182
1.46k
        ExitArgs(ExitArgs), Conditional(Conditional) {}
2183
868
  void Enter(CodeGenFunction &CGF) override {
2184
868
    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2185
868
    if (Conditional) {
2186
241
      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2187
241
      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2188
241
      ContBlock = CGF.createBasicBlock("omp_if.end");
2189
      // Generate the branch (If-stmt)
2190
241
      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2191
241
      CGF.EmitBlock(ThenBlock);
2192
241
    }
2193
868
  }
2194
241
  void Done(CodeGenFunction &CGF) {
2195
    // Emit the rest of blocks/branches
2196
241
    CGF.EmitBranch(ContBlock);
2197
241
    CGF.EmitBlock(ContBlock, true);
2198
241
  }
2199
1.55k
  void Exit(CodeGenFunction &CGF) override {
2200
1.55k
    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2201
1.55k
  }
2202
};
2203
} // anonymous namespace
2204
2205
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2206
                                         StringRef CriticalName,
2207
                                         const RegionCodeGenTy &CriticalOpGen,
2208
166
                                         SourceLocation Loc, const Expr *Hint) {
2209
  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2210
  // CriticalOpGen();
2211
  // __kmpc_end_critical(ident_t *, gtid, Lock);
2212
  // Prepare arguments and build a call to __kmpc_critical
2213
166
  if (!CGF.HaveInsertPoint())
2214
0
    return;
2215
166
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2216
166
                         getCriticalRegionLock(CriticalName)};
2217
166
  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2218
166
                                                std::end(Args));
2219
166
  if (Hint) {
2220
3
    EnterArgs.push_back(CGF.Builder.CreateIntCast(
2221
3
        CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2222
3
  }
2223
166
  CommonActionTy Action(
2224
166
      OMPBuilder.getOrCreateRuntimeFunction(
2225
166
          CGM.getModule(),
2226
163
          Hint ? 
OMPRTL___kmpc_critical_with_hint3
: OMPRTL___kmpc_critical),
2227
166
      EnterArgs,
2228
166
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2229
166
                                            OMPRTL___kmpc_end_critical),
2230
166
      Args);
2231
166
  CriticalOpGen.setAction(Action);
2232
166
  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2233
166
}
2234
2235
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2236
                                       const RegionCodeGenTy &MasterOpGen,
2237
186
                                       SourceLocation Loc) {
2238
186
  if (!CGF.HaveInsertPoint())
2239
0
    return;
2240
  // if(__kmpc_master(ident_t *, gtid)) {
2241
  //   MasterOpGen();
2242
  //   __kmpc_end_master(ident_t *, gtid);
2243
  // }
2244
  // Prepare arguments and build a call to __kmpc_master
2245
186
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2246
186
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2247
186
                            CGM.getModule(), OMPRTL___kmpc_master),
2248
186
                        Args,
2249
186
                        OMPBuilder.getOrCreateRuntimeFunction(
2250
186
                            CGM.getModule(), OMPRTL___kmpc_end_master),
2251
186
                        Args,
2252
186
                        /*Conditional=*/true);
2253
186
  MasterOpGen.setAction(Action);
2254
186
  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2255
186
  Action.Done(CGF);
2256
186
}
2257
2258
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2259
16
                                        SourceLocation Loc) {
2260
16
  if (!CGF.HaveInsertPoint())
2261
0
    return;
2262
16
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2263
8
    OMPBuilder.CreateTaskyield(CGF.Builder);
2264
8
  } else {
2265
    // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266
8
    llvm::Value *Args[] = {
2267
8
        emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2268
8
        llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2269
8
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2270
8
                            CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2271
8
                        Args);
2272
8
  }
2273
2274
16
  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2275
4
    Region->emitUntiedSwitch(CGF);
2276
16
}
2277
2278
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2279
                                          const RegionCodeGenTy &TaskgroupOpGen,
2280
245
                                          SourceLocation Loc) {
2281
245
  if (!CGF.HaveInsertPoint())
2282
0
    return;
2283
  // __kmpc_taskgroup(ident_t *, gtid);
2284
  // TaskgroupOpGen();
2285
  // __kmpc_end_taskgroup(ident_t *, gtid);
2286
  // Prepare arguments and build a call to __kmpc_taskgroup
2287
245
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288
245
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289
245
                            CGM.getModule(), OMPRTL___kmpc_taskgroup),
2290
245
                        Args,
2291
245
                        OMPBuilder.getOrCreateRuntimeFunction(
2292
245
                            CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2293
245
                        Args);
2294
245
  TaskgroupOpGen.setAction(Action);
2295
245
  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2296
245
}
2297
2298
/// Given an array of pointers to variables, project the address of a
2299
/// given variable.
2300
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2301
1.32k
                                      unsigned Index, const VarDecl *Var) {
2302
  // Pull out the pointer to the variable.
2303
1.32k
  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2304
1.32k
  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2305
2306
1.32k
  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2307
1.32k
  Addr = CGF.Builder.CreateElementBitCast(
2308
1.32k
      Addr, CGF.ConvertTypeForMem(Var->getType()));
2309
1.32k
  return Addr;
2310
1.32k
}
2311
2312
static llvm::Value *emitCopyprivateCopyFunction(
2313
    CodeGenModule &CGM, llvm::Type *ArgsType,
2314
    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2315
    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2316
28
    SourceLocation Loc) {
2317
28
  ASTContext &C = CGM.getContext();
2318
  // void copy_func(void *LHSArg, void *RHSArg);
2319
28
  FunctionArgList Args;
2320
28
  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2321
28
                           ImplicitParamDecl::Other);
2322
28
  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2323
28
                           ImplicitParamDecl::Other);
2324
28
  Args.push_back(&LHSArg);
2325
28
  Args.push_back(&RHSArg);
2326
28
  const auto &CGFI =
2327
28
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2328
28
  std::string Name =
2329
28
      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2330
28
  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2331
28
                                    llvm::GlobalValue::InternalLinkage, Name,
2332
28
                                    &CGM.getModule());
2333
28
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2334
28
  Fn->setDoesNotRecurse();
2335
28
  CodeGenFunction CGF(CGM);
2336
28
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2337
  // Dest = (void*[n])(LHSArg);
2338
  // Src = (void*[n])(RHSArg);
2339
28
  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2340
28
      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2341
28
      ArgsType), CGF.getPointerAlign());
2342
28
  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2343
28
      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2344
28
      ArgsType), CGF.getPointerAlign());
2345
  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2346
  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2347
  // ...
2348
  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2349
97
  for (unsigned I = 0, E = AssignmentOps.size(); I < E; 
++I69
) {
2350
69
    const auto *DestVar =
2351
69
        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2352
69
    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2353
2354
69
    const auto *SrcVar =
2355
69
        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2356
69
    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2357
2358
69
    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2359
69
    QualType Type = VD->getType();
2360
69
    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2361
69
  }
2362
28
  CGF.FinishFunction();
2363
28
  return Fn;
2364
28
}
2365
2366
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2367
                                       const RegionCodeGenTy &SingleOpGen,
2368
                                       SourceLocation Loc,
2369
                                       ArrayRef<const Expr *> CopyprivateVars,
2370
                                       ArrayRef<const Expr *> SrcExprs,
2371
                                       ArrayRef<const Expr *> DstExprs,
2372
55
                                       ArrayRef<const Expr *> AssignmentOps) {
2373
55
  if (!CGF.HaveInsertPoint())
2374
0
    return;
2375
55
  assert(CopyprivateVars.size() == SrcExprs.size() &&
2376
55
         CopyprivateVars.size() == DstExprs.size() &&
2377
55
         CopyprivateVars.size() == AssignmentOps.size());
2378
55
  ASTContext &C = CGM.getContext();
2379
  // int32 did_it = 0;
2380
  // if(__kmpc_single(ident_t *, gtid)) {
2381
  //   SingleOpGen();
2382
  //   __kmpc_end_single(ident_t *, gtid);
2383
  //   did_it = 1;
2384
  // }
2385
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2386
  // <copy_func>, did_it);
2387
2388
55
  Address DidIt = Address::invalid();
2389
55
  if (!CopyprivateVars.empty()) {
2390
    // int32 did_it = 0;
2391
28
    QualType KmpInt32Ty =
2392
28
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2393
28
    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2394
28
    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2395
28
  }
2396
  // Prepare arguments and build a call to __kmpc_single
2397
55
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2398
55
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2399
55
                            CGM.getModule(), OMPRTL___kmpc_single),
2400
55
                        Args,
2401
55
                        OMPBuilder.getOrCreateRuntimeFunction(
2402
55
                            CGM.getModule(), OMPRTL___kmpc_end_single),
2403
55
                        Args,
2404
55
                        /*Conditional=*/true);
2405
55
  SingleOpGen.setAction(Action);
2406
55
  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2407
55
  if (DidIt.isValid()) {
2408
    // did_it = 1;
2409
28
    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2410
28
  }
2411
55
  Action.Done(CGF);
2412
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2413
  // <copy_func>, did_it);
2414
55
  if (DidIt.isValid()) {
2415
28
    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2416
28
    QualType CopyprivateArrayTy = C.getConstantArrayType(
2417
28
        C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2418
28
        /*IndexTypeQuals=*/0);
2419
    // Create a list of all private variables for copyprivate.
2420
28
    Address CopyprivateList =
2421
28
        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2422
97
    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; 
++I69
) {
2423
69
      Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2424
69
      CGF.Builder.CreateStore(
2425
69
          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2426
69
              CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2427
69
              CGF.VoidPtrTy),
2428
69
          Elem);
2429
69
    }
2430
    // Build function that copies private values from single region to all other
2431
    // threads in the corresponding parallel region.
2432
28
    llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2433
28
        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2434
28
        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2435
28
    llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2436
28
    Address CL =
2437
28
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2438
28
                                                      CGF.VoidPtrTy);
2439
28
    llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2440
28
    llvm::Value *Args[] = {
2441
28
        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2442
28
        getThreadID(CGF, Loc),        // i32 <gtid>
2443
28
        BufSize,                      // size_t <buf_size>
2444
28
        CL.getPointer(),              // void *<copyprivate list>
2445
28
        CpyFn,                        // void (*) (void *, void *) <copy_func>
2446
28
        DidItVal                      // i32 did_it
2447
28
    };
2448
28
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2449
28
                            CGM.getModule(), OMPRTL___kmpc_copyprivate),
2450
28
                        Args);
2451
28
  }
2452
55
}
2453
2454
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2455
                                        const RegionCodeGenTy &OrderedOpGen,
2456
24
                                        SourceLocation Loc, bool IsThreads) {
2457
24
  if (!CGF.HaveInsertPoint())
2458
0
    return;
2459
  // __kmpc_ordered(ident_t *, gtid);
2460
  // OrderedOpGen();
2461
  // __kmpc_end_ordered(ident_t *, gtid);
2462
  // Prepare arguments and build a call to __kmpc_ordered
2463
24
  if (IsThreads) {
2464
16
    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465
16
    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466
16
                              CGM.getModule(), OMPRTL___kmpc_ordered),
2467
16
                          Args,
2468
16
                          OMPBuilder.getOrCreateRuntimeFunction(
2469
16
                              CGM.getModule(), OMPRTL___kmpc_end_ordered),
2470
16
                          Args);
2471
16
    OrderedOpGen.setAction(Action);
2472
16
    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2473
16
    return;
2474
16
  }
2475
8
  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2476
8
}
2477
2478
962
unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2479
962
  unsigned Flags;
2480
962
  if (Kind == OMPD_for)
2481
575
    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2482
387
  else if (Kind == OMPD_sections)
2483
48
    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2484
339
  else if (Kind == OMPD_single)
2485
20
    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2486
319
  else if (Kind == OMPD_barrier)
2487
18
    Flags = OMP_IDENT_BARRIER_EXPL;
2488
301
  else
2489
301
    Flags = OMP_IDENT_BARRIER_IMPL;
2490
962
  return Flags;
2491
962
}
2492
2493
void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2494
    CodeGenFunction &CGF, const OMPLoopDirective &S,
2495
2.98k
    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2496
  // Check if the loop directive is actually a doacross loop directive. In this
2497
  // case choose static, 1 schedule.
2498
2.98k
  if (llvm::any_of(
2499
2.98k
          S.getClausesOfKind<OMPOrderedClause>(),
2500
20
          [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2501
12
    ScheduleKind = OMPC_SCHEDULE_static;
2502
    // Chunk size is 1 in this case.
2503
12
    llvm::APInt ChunkSize(32, 1);
2504
12
    ChunkExpr = IntegerLiteral::Create(
2505
12
        CGF.getContext(), ChunkSize,
2506
12
        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2507
12
        SourceLocation());
2508
12
  }
2509
2.98k
}
2510
2511
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2512
                                      OpenMPDirectiveKind Kind, bool EmitChecks,
2513
794
                                      bool ForceSimpleCall) {
2514
  // Check if we should use the OMPBuilder
2515
794
  auto *OMPRegionInfo =
2516
794
      dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2517
794
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2518
48
    CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2519
48
        CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2520
48
    return;
2521
48
  }
2522
2523
746
  if (!CGF.HaveInsertPoint())
2524
0
    return;
2525
  // Build call __kmpc_cancel_barrier(loc, thread_id);
2526
  // Build call __kmpc_barrier(loc, thread_id);
2527
746
  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2528
  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2529
  // thread_id);
2530
746
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2531
746
                         getThreadID(CGF, Loc)};
2532
746
  if (OMPRegionInfo) {
2533
463
    if (!ForceSimpleCall && 
OMPRegionInfo->hasCancel()288
) {
2534
4
      llvm::Value *Result = CGF.EmitRuntimeCall(
2535
4
          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2536
4
                                                OMPRTL___kmpc_cancel_barrier),
2537
4
          Args);
2538
4
      if (EmitChecks) {
2539
        // if (__kmpc_cancel_barrier()) {
2540
        //   exit from construct;
2541
        // }
2542
4
        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2543
4
        llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2544
4
        llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2545
4
        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2546
4
        CGF.EmitBlock(ExitBB);
2547
        //   exit from construct;
2548
4
        CodeGenFunction::JumpDest CancelDestination =
2549
4
            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2550
4
        CGF.EmitBranchThroughCleanup(CancelDestination);
2551
4
        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2552
4
      }
2553
4
      return;
2554
4
    }
2555
742
  }
2556
742
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2557
742
                          CGM.getModule(), OMPRTL___kmpc_barrier),
2558
742
                      Args);
2559
742
}
2560
2561
/// Map the OpenMP loop schedule to the runtime enumeration.
2562
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2563
15.4k
                                          bool Chunked, bool Ordered) {
2564
15.4k
  switch (ScheduleKind) {
2565
2.88k
  case OMPC_SCHEDULE_static:
2566
1.89k
    return Chunked ? (Ordered ? 
OMP_ord_static_chunked7
:
OMP_sch_static_chunked1.88k
)
2567
993
                   : (Ordered ? 
OMP_ord_static6
:
OMP_sch_static987
);
2568
1.94k
  case OMPC_SCHEDULE_dynamic:
2569
1.93k
    return Ordered ? 
OMP_ord_dynamic_chunked4
: OMP_sch_dynamic_chunked;
2570
543
  case OMPC_SCHEDULE_guided:
2571
542
    return Ordered ? 
OMP_ord_guided_chunked1
: OMP_sch_guided_chunked;
2572
548
  case OMPC_SCHEDULE_runtime:
2573
542
    return Ordered ? 
OMP_ord_runtime6
: OMP_sch_runtime;
2574
550
  case OMPC_SCHEDULE_auto:
2575
545
    return Ordered ? 
OMP_ord_auto5
: OMP_sch_auto;
2576
8.93k
  case OMPC_SCHEDULE_unknown:
2577
8.93k
    assert(!Chunked && "chunk was specified but schedule kind not known");
2578
8.92k
    return Ordered ? 
OMP_ord_static8
: OMP_sch_static;
2579
0
  }
2580
0
  llvm_unreachable("Unexpected runtime schedule");
2581
0
}
2582
2583
/// Map the OpenMP distribute schedule to the runtime enumeration.
2584
static OpenMPSchedType
2585
12.6k
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2586
  // only static is allowed for dist_schedule
2587
10.6k
  return Chunked ? 
OMP_dist_sch_static_chunked1.95k
: OMP_dist_sch_static;
2588
12.6k
}
2589
2590
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2591
5.43k
                                         bool Chunked) const {
2592
5.43k
  OpenMPSchedType Schedule =
2593
5.43k
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2594
5.43k
  return Schedule == OMP_sch_static;
2595
5.43k
}
2596
2597
bool CGOpenMPRuntime::isStaticNonchunked(
2598
4.21k
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2599
4.21k
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2600
4.21k
  return Schedule == OMP_dist_sch_static;
2601
4.21k
}
2602
2603
bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2604
4.41k
                                      bool Chunked) const {
2605
4.41k
  OpenMPSchedType Schedule =
2606
4.41k
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2607
4.41k
  return Schedule == OMP_sch_static_chunked;
2608
4.41k
}
2609
2610
bool CGOpenMPRuntime::isStaticChunked(
2611
4.21k
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2612
4.21k
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2613
4.21k
  return Schedule == OMP_dist_sch_static_chunked;
2614
4.21k
}
2615
2616
1.01k
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2617
1.01k
  OpenMPSchedType Schedule =
2618
1.01k
      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2619
1.01k
  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2620
1.01k
  return Schedule != OMP_sch_static;
2621
1.01k
}
2622
2623
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2624
                                  OpenMPScheduleClauseModifier M1,
2625
8.75k
                                  OpenMPScheduleClauseModifier M2) {
2626
8.75k
  int Modifier = 0;
2627
8.75k
  switch (M1) {
2628
17
  case OMPC_SCHEDULE_MODIFIER_monotonic:
2629
17
    Modifier = OMP_sch_modifier_monotonic;
2630
17
    break;
2631
12
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2632
12
    Modifier = OMP_sch_modifier_nonmonotonic;
2633
12
    break;
2634
12
  case OMPC_SCHEDULE_MODIFIER_simd:
2635
12
    if (Schedule == OMP_sch_static_chunked)
2636
6
      Schedule = OMP_sch_static_balanced_chunked;
2637
12
    break;
2638
8.71k
  case OMPC_SCHEDULE_MODIFIER_last:
2639
8.71k
  case OMPC_SCHEDULE_MODIFIER_unknown:
2640
8.71k
    break;
2641
8.75k
  }
2642
8.75k
  switch (M2) {
2643
0
  case OMPC_SCHEDULE_MODIFIER_monotonic:
2644
0
    Modifier = OMP_sch_modifier_monotonic;
2645
0
    break;
2646
6
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2647
6
    Modifier = OMP_sch_modifier_nonmonotonic;
2648
6
    break;
2649
0
  case OMPC_SCHEDULE_MODIFIER_simd:
2650
0
    if (Schedule == OMP_sch_static_chunked)
2651
0
      Schedule = OMP_sch_static_balanced_chunked;
2652
0
    break;
2653
8.75k
  case OMPC_SCHEDULE_MODIFIER_last:
2654
8.75k
  case OMPC_SCHEDULE_MODIFIER_unknown:
2655
8.75k
    break;
2656
8.75k
  }
2657
  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2658
  // If the static schedule kind is specified or if the ordered clause is
2659
  // specified, and if the nonmonotonic modifier is not specified, the effect is
2660
  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2661
  // modifier is specified, the effect is as if the nonmonotonic modifier is
2662
  // specified.
2663
8.75k
  if (CGM.getLangOpts().OpenMP >= 50 && 
Modifier == 05.77k
) {
2664
5.74k
    if (!(Schedule == OMP_sch_static_chunked || 
Schedule == OMP_sch_static5.40k
||
2665
3.32k
          Schedule == OMP_sch_static_balanced_chunked ||
2666
3.31k
          Schedule == OMP_ord_static_chunked || 
Schedule == OMP_ord_static3.31k
||
2667
3.30k
          Schedule == OMP_dist_sch_static_chunked ||
2668
2.83k
          Schedule == OMP_dist_sch_static))
2669
519
      Modifier = OMP_sch_modifier_nonmonotonic;
2670
5.74k
  }
2671
8.75k
  return Schedule | Modifier;
2672
8.75k
}
2673
2674
void CGOpenMPRuntime::emitForDispatchInit(
2675
    CodeGenFunction &CGF, SourceLocation Loc,
2676
    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2677
744
    bool Ordered, const DispatchRTInput &DispatchValues) {
2678
744
  if (!CGF.HaveInsertPoint())
2679
0
    return;
2680
744
  OpenMPSchedType Schedule = getRuntimeSchedule(
2681
744
      ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2682
744
  assert(Ordered ||
2683
744
         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2684
744
          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2685
744
          Schedule != OMP_sch_static_balanced_chunked));
2686
  // Call __kmpc_dispatch_init(
2687
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2688
  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2689
  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2690
2691
  // If the Chunk was not specified in the clause - use default value 1.
2692
139
  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2693
605
                                            : CGF.Builder.getIntN(IVSize, 1);
2694
744
  llvm::Value *Args[] = {
2695
744
      emitUpdateLocation(CGF, Loc),
2696
744
      getThreadID(CGF, Loc),
2697
744
      CGF.Builder.getInt32(addMonoNonMonoModifier(
2698
744
          CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2699
744
      DispatchValues.LB,                                     // Lower
2700
744
      DispatchValues.UB,                                     // Upper
2701
744
      CGF.Builder.getIntN(IVSize, 1),                        // Stride
2702
744
      Chunk                                                  // Chunk
2703
744
  };
2704
744
  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2705
744
}
2706
2707
static void emitForStaticInitCall(
2708
    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2709
    llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2710
    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2711
8.01k
    const CGOpenMPRuntime::StaticRTInput &Values) {
2712
8.01k
  if (!CGF.HaveInsertPoint())
2713
0
    return;
2714
2715
8.01k
  assert(!Values.Ordered);
2716
8.01k
  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2717
8.01k
         Schedule == OMP_sch_static_balanced_chunked ||
2718
8.01k
         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2719
8.01k
         Schedule == OMP_dist_sch_static ||
2720
8.01k
         Schedule == OMP_dist_sch_static_chunked);
2721
2722
  // Call __kmpc_for_static_init(
2723
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2724
  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2725
  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2726
  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2727
8.01k
  llvm::Value *Chunk = Values.Chunk;
2728
8.01k
  if (Chunk == nullptr) {
2729
6.83k
    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2730
6.83k
            Schedule == OMP_dist_sch_static) &&
2731
6.83k
           "expected static non-chunked schedule");
2732
    // If the Chunk was not specified in the clause - use default value 1.
2733
6.83k
    Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2734
1.17k
  } else {
2735
1.17k
    assert((Schedule == OMP_sch_static_chunked ||
2736
1.17k
            Schedule == OMP_sch_static_balanced_chunked ||
2737
1.17k
            Schedule == OMP_ord_static_chunked ||
2738
1.17k
            Schedule == OMP_dist_sch_static_chunked) &&
2739
1.17k
           "expected static chunked schedule");
2740
1.17k
  }
2741
8.01k
  llvm::Value *Args[] = {
2742
8.01k
      UpdateLocation,
2743
8.01k
      ThreadId,
2744
8.01k
      CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2745
8.01k
                                                  M2)), // Schedule type
2746
8.01k
      Values.IL.getPointer(),                           // &isLastIter
2747
8.01k
      Values.LB.getPointer(),                           // &LB
2748
8.01k
      Values.UB.getPointer(),                           // &UB
2749
8.01k
      Values.ST.getPointer(),                           // &Stride
2750
8.01k
      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2751
8.01k
      Chunk                                             // Chunk
2752
8.01k
  };
2753
8.01k
  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2754
8.01k
}
2755
2756
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2757
                                        SourceLocation Loc,
2758
                                        OpenMPDirectiveKind DKind,
2759
                                        const OpenMPScheduleTy &ScheduleKind,
2760
3.79k
                                        const StaticRTInput &Values) {
2761
3.79k
  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2762
3.79k
      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2763
3.79k
  assert(isOpenMPWorksharingDirective(DKind) &&
2764
3.79k
         "Expected loop-based or sections-based directive.");
2765
3.79k
  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2766
3.79k
                                             isOpenMPLoopDirective(DKind)
2767
3.70k
                                                 ? OMP_IDENT_WORK_LOOP
2768
88
                                                 : OMP_IDENT_WORK_SECTIONS);
2769
3.79k
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2770
3.79k
  llvm::FunctionCallee StaticInitFunction =
2771
3.79k
      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2772
3.79k
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2773
3.79k
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2774
3.79k
                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2775
3.79k
}
2776
2777
void CGOpenMPRuntime::emitDistributeStaticInit(
2778
    CodeGenFunction &CGF, SourceLocation Loc,
2779
    OpenMPDistScheduleClauseKind SchedKind,
2780
4.21k
    const CGOpenMPRuntime::StaticRTInput &Values) {
2781
4.21k
  OpenMPSchedType ScheduleNum =
2782
4.21k
      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2783
4.21k
  llvm::Value *UpdatedLocation =
2784
4.21k
      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2785
4.21k
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2786
4.21k
  llvm::FunctionCallee StaticInitFunction =
2787
4.21k
      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2788
4.21k
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2789
4.21k
                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2790
4.21k
                        OMPC_SCHEDULE_MODIFIER_unknown, Values);
2791
4.21k
}
2792
2793
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2794
                                          SourceLocation Loc,
2795
8.08k
                                          OpenMPDirectiveKind DKind) {
2796
8.08k
  if (!CGF.HaveInsertPoint())
2797
0
    return;
2798
  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2799
8.08k
  llvm::Value *Args[] = {
2800
8.08k
      emitUpdateLocation(CGF, Loc,
2801
8.08k
                         isOpenMPDistributeDirective(DKind)
2802
6.32k
                             ? OMP_IDENT_WORK_DISTRIBUTE
2803
1.76k
                             : isOpenMPLoopDirective(DKind)
2804
1.63k
                                   ? OMP_IDENT_WORK_LOOP
2805
128
                                   : OMP_IDENT_WORK_SECTIONS),
2806
8.08k
      getThreadID(CGF, Loc)};
2807
8.08k
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2808
8.08k
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2809
8.08k
                          CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2810
8.08k
                      Args);
2811
8.08k
}
2812
2813
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2814
                                                 SourceLocation Loc,
2815
                                                 unsigned IVSize,
2816
37
                                                 bool IVSigned) {
2817
37
  if (!CGF.HaveInsertPoint())
2818
0
    return;
2819
  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2820
37
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2821
37
  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2822
37
}
2823
2824
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2825
                                          SourceLocation Loc, unsigned IVSize,
2826
                                          bool IVSigned, Address IL,
2827
                                          Address LB, Address UB,
2828
744
                                          Address ST) {
2829
  // Call __kmpc_dispatch_next(
2830
  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2831
  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2832
  //          kmp_int[32|64] *p_stride);
2833
744
  llvm::Value *Args[] = {
2834
744
      emitUpdateLocation(CGF, Loc),
2835
744
      getThreadID(CGF, Loc),
2836
744
      IL.getPointer(), // &isLastIter
2837
744
      LB.getPointer(), // &Lower
2838
744
      UB.getPointer(), // &Upper
2839
744
      ST.getPointer()  // &Stride
2840
744
  };
2841
744
  llvm::Value *Call =
2842
744
      CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2843
744
  return CGF.EmitScalarConversion(
2844
744
      Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2845
744
      CGF.getContext().BoolTy, Loc);
2846
744
}
2847
2848
void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2849
                                           llvm::Value *NumThreads,
2850
244
                                           SourceLocation Loc) {
2851
244
  if (!CGF.HaveInsertPoint())
2852
0
    return;
2853
  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2854
244
  llvm::Value *Args[] = {
2855
244
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2856
244
      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2857
244
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2858
244
                          CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2859
244
                      Args);
2860
244
}
2861
2862
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2863
                                         ProcBindKind ProcBind,
2864
68
                                         SourceLocation Loc) {
2865
68
  if (!CGF.HaveInsertPoint())
2866
0
    return;
2867
68
  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2868
  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2869
68
  llvm::Value *Args[] = {
2870
68
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2871
68
      llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2872
68
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2873
68
                          CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2874
68
                      Args);
2875
68
}
2876
2877
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2878
104
                                SourceLocation Loc, llvm::AtomicOrdering AO) {
2879
104
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2880
20
    OMPBuilder.CreateFlush(CGF.Builder);
2881
84
  } else {
2882
84
    if (!CGF.HaveInsertPoint())
2883
0
      return;
2884
    // Build call void __kmpc_flush(ident_t *loc)
2885
84
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2886
84
                            CGM.getModule(), OMPRTL___kmpc_flush),
2887
84
                        emitUpdateLocation(CGF, Loc));
2888
84
  }
2889
104
}
2890
2891
namespace {
2892
/// Indexes of fields for type kmp_task_t.
2893
enum KmpTaskTFields {
2894
  /// List of shared variables.
2895
  KmpTaskTShareds,
2896
  /// Task routine.
2897
  KmpTaskTRoutine,
2898
  /// Partition id for the untied tasks.
2899
  KmpTaskTPartId,
2900
  /// Function with call of destructors for private variables.
2901
  Data1,
2902
  /// Task priority.
2903
  Data2,
2904
  /// (Taskloops only) Lower bound.
2905
  KmpTaskTLowerBound,
2906
  /// (Taskloops only) Upper bound.
2907
  KmpTaskTUpperBound,
2908
  /// (Taskloops only) Stride.
2909
  KmpTaskTStride,
2910
  /// (Taskloops only) Is last iteration flag.
2911
  KmpTaskTLastIter,
2912
  /// (Taskloops only) Reduction data.
2913
  KmpTaskTReductions,
2914
};
2915
} // anonymous namespace
2916
2917
5.14k
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2918
5.14k
  return OffloadEntriesTargetRegion.empty() &&
2919
962
         OffloadEntriesDeviceGlobalVar.empty();
2920
5.14k
}
2921
2922
/// Initialize target region entry.
2923
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2924
    initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2925
                                    StringRef ParentName, unsigned LineNum,
2926
2.97k
                                    unsigned Order) {
2927
2.97k
  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2928
2.97k
                                             "only required for the device "
2929
2.97k
                                             "code generation.");
2930
2.97k
  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2931
2.97k
      OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2932
2.97k
                                   OMPTargetRegionEntryTargetRegion);
2933
2.97k
  ++OffloadingEntriesNum;
2934
2.97k
}
2935
2936
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2937
    registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2938
                                  StringRef ParentName, unsigned LineNum,
2939
                                  llvm::Constant *Addr, llvm::Constant *ID,
2940
10.9k
                                  OMPTargetRegionEntryKind Flags) {
2941
  // If we are emitting code for a target, the entry is already initialized,
2942
  // only has to be registered.
2943
10.9k
  if (CGM.getLangOpts().OpenMPIsDevice) {
2944
2.93k
    if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2945
0
      unsigned DiagID = CGM.getDiags().getCustomDiagID(
2946
0
          DiagnosticsEngine::Error,
2947
0
          "Unable to find target region on line '%0' in the device code.");
2948
0
      CGM.getDiags().Report(DiagID) << LineNum;
2949
0
      return;
2950
0
    }
2951
2.93k
    auto &Entry =
2952
2.93k
        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2953
2.93k
    assert(Entry.isValid() && "Entry not initialized!");
2954
2.93k
    Entry.setAddress(Addr);
2955
2.93k
    Entry.setID(ID);
2956
2.93k
    Entry.setFlags(Flags);
2957
8.05k
  } else {
2958
8.05k
    OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2959
8.05k
    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2960
8.05k
    ++OffloadingEntriesNum;
2961
8.05k
  }
2962
10.9k
}
2963
2964
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2965
    unsigned DeviceID, unsigned FileID, StringRef ParentName,
2966
6.78k
    unsigned LineNum) const {
2967
6.78k
  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2968
6.78k
  if (PerDevice == OffloadEntriesTargetRegion.end())
2969
0
    return false;
2970
6.78k
  auto PerFile = PerDevice->second.find(FileID);
2971
6.78k
  if (PerFile == PerDevice->second.end())
2972
0
    return false;
2973
6.78k
  auto PerParentName = PerFile->second.find(ParentName);
2974
6.78k
  if (PerParentName == PerFile->second.end())
2975
630
    return false;
2976
6.15k
  auto PerLine = PerParentName->second.find(LineNum);
2977
6.15k
  if (PerLine == PerParentName->second.end())
2978
128
    return false;
2979
  // Fail if this entry is already registered.
2980
6.02k
  if (PerLine->second.getAddress() || 
PerLine->second.getID()5.74k
)
2981
277
    return false;
2982
5.74k
  return true;
2983
5.74k
}
2984
2985
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2986
2.35k
    const OffloadTargetRegionEntryInfoActTy &Action) {
2987
  // Scan all target region entries and perform the provided action.
2988
2.35k
  for (const auto &D : OffloadEntriesTargetRegion)
2989
2.35k
    for (const auto &F : D.second)
2990
2.35k
      for (const auto &P : F.second)
2991
6.03k
        for (const auto &L : P.second)
2992
10.9k
          Action(D.first, F.first, P.first(), L.first, L.second);
2993
2.35k
}
2994
2995
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2996
    initializeDeviceGlobalVarEntryInfo(StringRef Name,
2997
                                       OMPTargetGlobalVarEntryKind Flags,
2998
141
                                       unsigned Order) {
2999
141
  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3000
141
                                             "only required for the device "
3001
141
                                             "code generation.");
3002
141
  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3003
141
  ++OffloadingEntriesNum;
3004
141
}
3005
3006
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3007
    registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3008
                                     CharUnits VarSize,
3009
                                     OMPTargetGlobalVarEntryKind Flags,
3010
641
                                     llvm::GlobalValue::LinkageTypes Linkage) {
3011
641
  if (CGM.getLangOpts().OpenMPIsDevice) {
3012
232
    auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3013
232
    assert(Entry.isValid() && Entry.getFlags() == Flags &&
3014
232
           "Entry not initialized!");
3015
232
    assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3016
232
           "Resetting with the new address.");
3017
232
    if (Entry.getAddress() && 
hasDeviceGlobalVarEntryInfo(VarName)95
) {
3018
95
      if (Entry.getVarSize().isZero()) {
3019
4
        Entry.setVarSize(VarSize);
3020
4
        Entry.setLinkage(Linkage);
3021
4
      }
3022
95
      return;
3023
95
    }
3024
137
    Entry.setVarSize(VarSize);
3025
137
    Entry.setLinkage(Linkage);
3026
137
    Entry.setAddress(Addr);
3027
409
  } else {
3028
409
    if (hasDeviceGlobalVarEntryInfo(VarName)) {
3029
250
      auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3030
250
      assert(Entry.isValid() && Entry.getFlags() == Flags &&
3031
250
             "Entry not initialized!");
3032
250
      assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3033
250
             "Resetting with the new address.");
3034
250
      if (Entry.getVarSize().isZero()) {
3035
27
        Entry.setVarSize(VarSize);
3036
27
        Entry.setLinkage(Linkage);
3037
27
      }
3038
250
      return;
3039
250
    }
3040
159
    OffloadEntriesDeviceGlobalVar.try_emplace(
3041
159
        VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3042
159
    ++OffloadingEntriesNum;
3043
159
  }
3044
641
}
3045
3046
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3047
    actOnDeviceGlobalVarEntriesInfo(
3048
2.35k
        const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3049
  // Scan all target region entries and perform the provided action.
3050
2.35k
  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3051
296
    Action(E.getKey(), E.getValue());
3052
2.35k
}
3053
3054
void CGOpenMPRuntime::createOffloadEntry(
3055
    llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3056
10.2k
    llvm::GlobalValue::LinkageTypes Linkage) {
3057
10.2k
  StringRef Name = Addr->getName();
3058
10.2k
  llvm::Module &M = CGM.getModule();
3059
10.2k
  llvm::LLVMContext &C = M.getContext();
3060
3061
  // Create constant string with the name.
3062
10.2k
  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3063
3064
10.2k
  std::string StringName = getName({"omp_offloading", "entry_name"});
3065
10.2k
  auto *Str = new llvm::GlobalVariable(
3066
10.2k
      M, StrPtrInit->getType(), /*isConstant=*/true,
3067
10.2k
      llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3068
10.2k
  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3069
3070
10.2k
  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3071
10.2k
                            llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3072
10.2k
                            llvm::ConstantInt::get(CGM.SizeTy, Size),
3073
10.2k
                            llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3074
10.2k
                            llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3075
10.2k
  std::string EntryName = getName({"omp_offloading", "entry", ""});
3076
10.2k
  llvm::GlobalVariable *Entry = createGlobalStruct(
3077
10.2k
      CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3078
10.2k
      Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3079
3080
  // The entry has to be created in the section the linker expects it to be.
3081
10.2k
  Entry->setSection("omp_offloading_entries");
3082
10.2k
}
3083
3084
5.45k
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3085
  // Emit the offloading entries and metadata so that the device codegen side
3086
  // can easily figure out what to emit. The produced metadata looks like
3087
  // this:
3088
  //
3089
  // !omp_offload.info = !{!1, ...}
3090
  //
3091
  // Right now we only generate metadata for function that contain target
3092
  // regions.
3093
3094
  // If we are in simd mode or there are no entries, we don't need to do
3095
  // anything.
3096
5.45k
  if (CGM.getLangOpts().OpenMPSimd || 
OffloadEntriesInfoManager.empty()3.12k
)
3097
3.10k
    return;
3098
3099
2.35k
  llvm::Module &M = CGM.getModule();
3100
2.35k
  llvm::LLVMContext &C = M.getContext();
3101
2.35k
  SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3102
2.35k
                         SourceLocation, StringRef>,
3103
2.35k
              16>
3104
2.35k
      OrderedEntries(OffloadEntriesInfoManager.size());
3105
2.35k
  llvm::SmallVector<StringRef, 16> ParentFunctions(
3106
2.35k
      OffloadEntriesInfoManager.size());
3107
3108
  // Auxiliary methods to create metadata values and strings.
3109
55.7k
  auto &&GetMDInt = [this](unsigned V) {
3110
55.7k
    return llvm::ConstantAsMetadata::get(
3111
55.7k
        llvm::ConstantInt::get(CGM.Int32Ty, V));
3112
55.7k
  };
3113
3114
11.2k
  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3115
3116
  // Create the offloading info metadata node.
3117
2.35k
  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3118
3119
  // Create function that emits metadata for each target region entry;
3120
2.35k
  auto &&TargetRegionMetadataEmitter =
3121
2.35k
      [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3122
2.35k
       &GetMDString](
3123
2.35k
          unsigned DeviceID, unsigned FileID, StringRef ParentName,
3124
2.35k
          unsigned Line,
3125
10.9k
          const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3126
        // Generate metadata for target regions. Each entry of this metadata
3127
        // contains:
3128
        // - Entry 0 -> Kind of this type of metadata (0).
3129
        // - Entry 1 -> Device ID of the file where the entry was identified.
3130
        // - Entry 2 -> File ID of the file where the entry was identified.
3131
        // - Entry 3 -> Mangled name of the function where the entry was
3132
        // identified.
3133
        // - Entry 4 -> Line in the file where the entry was identified.
3134
        // - Entry 5 -> Order the entry was created.
3135
        // The first element of the metadata node is the kind.
3136
10.9k
        llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3137
10.9k
                                 GetMDInt(FileID),      GetMDString(ParentName),
3138
10.9k
                                 GetMDInt(Line),        GetMDInt(E.getOrder())};
3139
3140
10.9k
        SourceLocation Loc;
3141
10.9k
        for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3142
10.9k
                  E = CGM.getContext().getSourceManager().fileinfo_end();
3143
23.8k
             I != E; 
++I12.8k
) {
3144
12.8k
          if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3145
12.8k
              I->getFirst()->getUniqueID().getFile() == FileID) {
3146
0
            Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3147
0
                I->getFirst(), Line, 1);
3148
0
            break;
3149
0
          }
3150
12.8k
        }
3151
        // Save this entry in the right position of the ordered entries array.
3152
10.9k
        OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3153
10.9k
        ParentFunctions[E.getOrder()] = ParentName;
3154
3155
        // Add metadata to the named metadata node.
3156
10.9k
        MD->addOperand(llvm::MDNode::get(C, Ops));
3157
10.9k
      };
3158
3159
2.35k
  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3160
2.35k
      TargetRegionMetadataEmitter);
3161
3162
  // Create function that emits metadata for each device global variable entry;
3163
2.35k
  auto &&DeviceGlobalVarMetadataEmitter =
3164
2.35k
      [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3165
2.35k
       MD](StringRef MangledName,
3166
2.35k
           const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3167
296
               &E) {
3168
        // Generate metadata for global variables. Each entry of this metadata
3169
        // contains:
3170
        // - Entry 0 -> Kind of this type of metadata (1).
3171
        // - Entry 1 -> Mangled name of the variable.
3172
        // - Entry 2 -> Declare target kind.
3173
        // - Entry 3 -> Order the entry was created.
3174
        // The first element of the metadata node is the kind.
3175
296
        llvm::Metadata *Ops[] = {
3176
296
            GetMDInt(E.getKind()), GetMDString(MangledName),
3177
296
            GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3178
3179
        // Save this entry in the right position of the ordered entries array.
3180
296
        OrderedEntries[E.getOrder()] =
3181
296
            std::make_tuple(&E, SourceLocation(), MangledName);
3182
3183
        // Add metadata to the named metadata node.
3184
296
        MD->addOperand(llvm::MDNode::get(C, Ops));
3185
296
      };
3186
3187
2.35k
  OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3188
2.35k
      DeviceGlobalVarMetadataEmitter);
3189
3190
11.2k
  for (const auto &E : OrderedEntries) {
3191
11.2k
    assert(std::get<0>(E) && "All ordered entries must exist!");
3192
11.2k
    if (const auto *CE =
3193
10.9k
            dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3194
10.9k
                std::get<0>(E))) {
3195
10.9k
      if (!CE->getID() || 
!CE->getAddress()10.9k
) {
3196
        // Do not blame the entry if the parent funtion is not emitted.
3197
4
        StringRef FnName = ParentFunctions[CE->getOrder()];
3198
4
        if (!CGM.GetGlobalValue(FnName))
3199
2
          continue;
3200
2
        unsigned DiagID = CGM.getDiags().getCustomDiagID(
3201
2
            DiagnosticsEngine::Error,
3202
2
            "Offloading entry for target region in %0 is incorrect: either the "
3203
2
            "address or the ID is invalid.");
3204
2
        CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3205
2
        continue;
3206
2
      }
3207
10.9k
      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3208
10.9k
                         CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3209
296
    } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3210
296
                                             OffloadEntryInfoDeviceGlobalVar>(
3211
296
                   std::get<0>(E))) {
3212
296
      OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3213
296
          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3214
296
              CE->getFlags());
3215
296
      switch (Flags) {
3216
246
      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3217
246
        if (CGM.getLangOpts().OpenMPIsDevice &&
3218
119
            CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3219
2
          continue;
3220
244
        if (!CE->getAddress()) {
3221
0
          unsigned DiagID = CGM.getDiags().getCustomDiagID(
3222
0
              DiagnosticsEngine::Error, "Offloading entry for declare target "
3223
0
                                        "variable %0 is incorrect: the "
3224
0
                                        "address is invalid.");
3225
0
          CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3226
0
          continue;
3227
0
        }
3228
        // The vaiable has no definition - no need to add the entry.
3229
244
        if (CE->getVarSize().isZero())
3230
49
          continue;
3231
195
        break;
3232
195
      }
3233
50
      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3234
50
        assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3235
50
                (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3236
50
               "Declaret target link address is set.");
3237
50
        if (CGM.getLangOpts().OpenMPIsDevice)
3238
18
          continue;
3239
32
        if (!CE->getAddress()) {
3240
0
          unsigned DiagID = CGM.getDiags().getCustomDiagID(
3241
0
              DiagnosticsEngine::Error,
3242
0
              "Offloading entry for declare target variable is incorrect: the "
3243
0
              "address is invalid.");
3244
0
          CGM.getDiags().Report(DiagID);
3245
0
          continue;
3246
0
        }
3247
32
        break;
3248
227
      }
3249
227
      createOffloadEntry(CE->getAddress(), CE->getAddress(),
3250
227
                         CE->getVarSize().getQuantity(), Flags,
3251
227
                         CE->getLinkage());
3252
0
    } else {
3253
0
      llvm_unreachable("Unsupported entry kind.");
3254
0
    }
3255
11.2k
  }
3256
2.35k
}
3257
3258
/// Loads all the offload entries information from the host IR
3259
/// metadata.
3260
5.47k
void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3261
  // If we are in target mode, load the metadata from the host IR. This code has
3262
  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3263
3264
5.47k
  if (!CGM.getLangOpts().OpenMPIsDevice)
3265
4.92k
    return;
3266
3267
552
  if (CGM.getLangOpts().OMPHostIRFile.empty())
3268
0
    return;
3269
3270
552
  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3271
552
  if (auto EC = Buf.getError()) {
3272
0
    CGM.getDiags().Report(diag::err_cannot_open_file)
3273
0
        << CGM.getLangOpts().OMPHostIRFile << EC.message();
3274
0
    return;
3275
0
  }
3276
3277
552
  llvm::LLVMContext C;
3278
552
  auto ME = expectedToErrorOrAndEmitErrors(
3279
552
      C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3280
3281
552
  if (auto EC = ME.getError()) {
3282
0
    unsigned DiagID = CGM.getDiags().getCustomDiagID(
3283
0
        DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3284
0
    CGM.getDiags().Report(DiagID)
3285
0
        << CGM.getLangOpts().OMPHostIRFile << EC.message();
3286
0
    return;
3287
0
  }
3288
3289
552
  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3290
552
  if (!MD)
3291
25
    return;
3292
3293
3.11k
  
for (llvm::MDNode *MN : MD->operands())527
{
3294
15.2k
    auto &&GetMDInt = [MN](unsigned Idx) {
3295
15.2k
      auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3296
15.2k
      return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3297
15.2k
    };
3298
3299
3.11k
    auto &&GetMDString = [MN](unsigned Idx) {
3300
3.11k
      auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3301
3.11k
      return V->getString();
3302
3.11k
    };
3303
3304
3.11k
    switch (GetMDInt(0)) {
3305
0
    default:
3306
0
      llvm_unreachable("Unexpected metadata!");
3307
0
      break;
3308
2.97k
    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3309
2.97k
        OffloadingEntryInfoTargetRegion:
3310
2.97k
      OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3311
2.97k
          /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3312
2.97k
          /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3313
2.97k
          /*Order=*/GetMDInt(5));
3314
2.97k
      break;
3315
141
    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3316
141
        OffloadingEntryInfoDeviceGlobalVar:
3317
141
      OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3318
141
          /*MangledName=*/GetMDString(1),
3319
141
          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3320
141
              /*Flags=*/GetMDInt(2)),
3321
141
          /*Order=*/GetMDInt(3));
3322
141
      break;
3323
3.11k
    }
3324
3.11k
  }
3325
527
}
3326
3327
707
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3328
707
  if (!KmpRoutineEntryPtrTy) {
3329
    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3330
263
    ASTContext &C = CGM.getContext();
3331
263
    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3332
263
    FunctionProtoType::ExtProtoInfo EPI;
3333
263
    KmpRoutineEntryPtrQTy = C.getPointerType(
3334
263
        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3335
263
    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3336
263
  }
3337
707
}
3338
3339
10.2k
QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3340
  // Make sure the type of the entry is already created. This is the type we
3341
  // have to create:
3342
  // struct __tgt_offload_entry{
3343
  //   void      *addr;       // Pointer to the offload entry info.
3344
  //                          // (function or global)
3345
  //   char      *name;       // Name of the function or global.
3346
  //   size_t     size;       // Size of the entry info (0 if it a function).
3347
  //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3348
  //   int32_t    reserved;   // Reserved, to use by the runtime library.
3349
  // };
3350
10.2k
  if (TgtOffloadEntryQTy.isNull()) {
3351
2.15k
    ASTContext &C = CGM.getContext();
3352
2.15k
    RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3353
2.15k
    RD->startDefinition();
3354
2.15k
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3355
2.15k
    addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3356
2.15k
    addFieldToRecordDecl(C, RD, C.getSizeType());
3357
2.15k
    addFieldToRecordDecl(
3358
2.15k
        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3359
2.15k
    addFieldToRecordDecl(
3360
2.15k
        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3361
2.15k
    RD->completeDefinition();
3362
2.15k
    RD->addAttr(PackedAttr::CreateImplicit(C));
3363
2.15k
    TgtOffloadEntryQTy = C.getRecordType(RD);
3364
2.15k
  }
3365
10.2k
  return TgtOffloadEntryQTy;
3366
10.2k
}
3367
3368
namespace {
3369
struct PrivateHelpersTy {
3370
  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3371
                   const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3372
      : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3373
1.33k
        PrivateElemInit(PrivateElemInit) {}
3374
8
  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3375
  const Expr *OriginalRef = nullptr;
3376
  const VarDecl *Original = nullptr;
3377
  const VarDecl *PrivateCopy = nullptr;
3378
  const VarDecl *PrivateElemInit = nullptr;
3379
4.40k
  bool isLocalPrivate() const {
3380
4.40k
    return !OriginalRef && 
!PrivateCopy24
&&
!PrivateElemInit24
;
3381
4.40k
  }
3382
};
3383
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3384
} // anonymous namespace
3385
3386
73
static bool isAllocatableDecl(const VarDecl *VD) {
3387
73
  const VarDecl *CVD = VD->getCanonicalDecl();
3388
73
  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3389
18
    return false;
3390
55
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3391
  // Use the default allocation.
3392
55
  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3393
42
            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3394
20
           !AA->getAllocator());
3395
55
}
3396
3397
static RecordDecl *
3398
707
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3399
707
  if (!Privates.empty()) {
3400
442
    ASTContext &C = CGM.getContext();
3401
    // Build struct .kmp_privates_t. {
3402
    //         /*  private vars  */
3403
    //       };
3404
442
    RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3405
442
    RD->startDefinition();
3406
1.34k
    for (const auto &Pair : Privates) {
3407
1.34k
      const VarDecl *VD = Pair.second.Original;
3408
1.34k
      QualType Type = VD->getType().getNonReferenceType();
3409
      // If the private variable is a local variable with lvalue ref type,
3410
      // allocate the pointer instead of the pointee type.
3411
1.34k
      if (Pair.second.isLocalPrivate()) {
3412
8
        if (VD->getType()->isLValueReferenceType())
3413
0
          Type = C.getPointerType(Type);
3414
8
        if (isAllocatableDecl(VD))
3415
2
          Type = C.getPointerType(Type);
3416
8
      }
3417
1.34k
      FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3418
1.34k
      if (VD->hasAttrs()) {
3419
50
        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3420
50
             E(VD->getAttrs().end());
3421
98
             I != E; 
++I48
)
3422
48
          FD->addAttr(*I);
3423
50
      }
3424
1.34k
    }
3425
442
    RD->completeDefinition();
3426
442
    return RD;
3427
442
  }
3428
265
  return nullptr;
3429
265
}
3430
3431
static RecordDecl *
3432
createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3433
                         QualType KmpInt32Ty,
3434
279
                         QualType KmpRoutineEntryPointerQTy) {
3435
279
  ASTContext &C = CGM.getContext();
3436
  // Build struct kmp_task_t {
3437
  //         void *              shareds;
3438
  //         kmp_routine_entry_t routine;
3439
  //         kmp_int32           part_id;
3440
  //         kmp_cmplrdata_t data1;
3441
  //         kmp_cmplrdata_t data2;
3442
  // For taskloops additional fields:
3443
  //         kmp_uint64          lb;
3444
  //         kmp_uint64          ub;
3445
  //         kmp_int64           st;
3446
  //         kmp_int32           liter;
3447
  //         void *              reductions;
3448
  //       };
3449
279
  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3450
279
  UD->startDefinition();
3451
279
  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3452
279
  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3453
279
  UD->completeDefinition();
3454
279
  QualType KmpCmplrdataTy = C.getRecordType(UD);
3455
279
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3456
279
  RD->startDefinition();
3457
279
  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3458
279
  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3459
279
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3460
279
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3461
279
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3462
279
  if (isOpenMPTaskLoopDirective(Kind)) {
3463
126
    QualType KmpUInt64Ty =
3464
126
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3465
126
    QualType KmpInt64Ty =
3466
126
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3467
126
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3468
126
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3469
126
    addFieldToRecordDecl(C, RD, KmpInt64Ty);
3470
126
    addFieldToRecordDecl(C, RD, KmpInt32Ty);
3471
126
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3472
126
  }
3473
279
  RD->completeDefinition();
3474
279
  return RD;
3475
279
}
3476
3477
static RecordDecl *
3478
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3479
707
                                     ArrayRef<PrivateDataTy> Privates) {
3480
707
  ASTContext &C = CGM.getContext();
3481
  // Build struct kmp_task_t_with_privates {
3482
  //         kmp_task_t task_data;
3483
  //         .kmp_privates_t. privates;
3484
  //       };
3485
707
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3486
707
  RD->startDefinition();
3487
707
  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3488
707
  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3489
442
    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3490
707
  RD->completeDefinition();
3491
707
  return RD;
3492
707
}
3493
3494
/// Emit a proxy function which accepts kmp_task_t as the second
3495
/// argument.
3496
/// \code
3497
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3498
///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3499
///   For taskloops:
3500
///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3501
///   tt->reductions, tt->shareds);
3502
///   return 0;
3503
/// }
3504
/// \endcode
3505
static llvm::Function *
3506
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3507
                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3508
                      QualType KmpTaskTWithPrivatesPtrQTy,
3509
                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3510
                      QualType SharedsPtrTy, llvm::Function *TaskFunction,
3511
707
                      llvm::Value *TaskPrivatesMap) {
3512
707
  ASTContext &C = CGM.getContext();
3513
707
  FunctionArgList Args;
3514
707
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3515
707
                            ImplicitParamDecl::Other);
3516
707
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3517
707
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3518
707
                                ImplicitParamDecl::Other);
3519
707
  Args.push_back(&GtidArg);
3520
707
  Args.push_back(&TaskTypeArg);
3521
707
  const auto &TaskEntryFnInfo =
3522
707
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3523
707
  llvm::FunctionType *TaskEntryTy =
3524
707
      CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3525
707
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3526
707
  auto *TaskEntry = llvm::Function::Create(
3527
707
      TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3528
707
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3529
707
  TaskEntry->setDoesNotRecurse();
3530
707
  CodeGenFunction CGF(CGM);
3531
707
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3532
707
                    Loc, Loc);
3533
3534
  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3535
  // tt,
3536
  // For taskloops:
3537
  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3538
  // tt->task_data.shareds);
3539
707
  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3540
707
      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3541
707
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3542
707
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3543
707
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3544
707
  const auto *KmpTaskTWithPrivatesQTyRD =
3545
707
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3546
707
  LValue Base =
3547
707
      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3548
707
  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3549
707
  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3550
707
  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3551
707
  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3552
3553
707
  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3554
707
  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3555
707
  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3556
707
      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3557
707
      CGF.ConvertTypeForMem(SharedsPtrTy));
3558
3559
707
  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3560
707
  llvm::Value *PrivatesParam;
3561
707
  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3562
442
    LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3563
442
    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3564
442
        PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3565
265
  } else {
3566
265
    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3567
265
  }
3568
3569
707
  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3570
707
                               TaskPrivatesMap,
3571
707
                               CGF.Builder
3572
707
                                   .CreatePointerBitCastOrAddrSpaceCast(
3573
707
                                       TDBase.getAddress(CGF), CGF.VoidPtrTy)
3574
707
                                   .getPointer()};
3575
707
  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3576
707
                                          std::end(CommonArgs));
3577
707
  if (isOpenMPTaskLoopDirective(Kind)) {
3578
224
    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3579
224
    LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3580
224
    llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3581
224
    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3582
224
    LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3583
224
    llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3584
224
    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3585
224
    LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3586
224
    llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3587
224
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3588
224
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3589
224
    llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3590
224
    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3591
224
    LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3592
224
    llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3593
224
    CallArgs.push_back(LBParam);
3594
224
    CallArgs.push_back(UBParam);
3595
224
    CallArgs.push_back(StParam);
3596
224
    CallArgs.push_back(LIParam);
3597
224
    CallArgs.push_back(RParam);
3598
224
  }
3599
707
  CallArgs.push_back(SharedsParam);
3600
3601
707
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3602
707
                                                  CallArgs);
3603
707
  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3604
707
                             CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3605
707
  CGF.FinishFunction();
3606
707
  return TaskEntry;
3607
707
}
3608
3609
static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3610
                                            SourceLocation Loc,
3611
                                            QualType KmpInt32Ty,
3612
                                            QualType KmpTaskTWithPrivatesPtrQTy,
3613
81
                                            QualType KmpTaskTWithPrivatesQTy) {
3614
81
  ASTContext &C = CGM.getContext();
3615
81
  FunctionArgList Args;
3616
81
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3617
81
                            ImplicitParamDecl::Other);
3618
81
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3619
81
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3620
81
                                ImplicitParamDecl::Other);
3621
81
  Args.push_back(&GtidArg);
3622
81
  Args.push_back(&TaskTypeArg);
3623
81
  const auto &DestructorFnInfo =
3624
81
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3625
81
  llvm::FunctionType *DestructorFnTy =
3626
81
      CGM.getTypes().GetFunctionType(DestructorFnInfo);
3627
81
  std::string Name =
3628
81
      CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3629
81
  auto *DestructorFn =
3630
81
      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3631
81
                             Name, &CGM.getModule());
3632
81
  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3633
81
                                    DestructorFnInfo);
3634
81
  DestructorFn->setDoesNotRecurse();
3635
81
  CodeGenFunction CGF(CGM);
3636
81
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3637
81
                    Args, Loc, Loc);
3638
3639
81
  LValue Base = CGF.EmitLoadOfPointerLValue(
3640
81
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3641
81
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3642
81
  const auto *KmpTaskTWithPrivatesQTyRD =
3643
81
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3644
81
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3645
81
  Base = CGF.EmitLValueForField(Base, *FI);
3646
81
  for (const auto *Field :
3647
362
       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3648
362
    if (QualType::DestructionKind DtorKind =
3649
162
            Field->getType().isDestructedType()) {
3650
162
      LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3651
162
      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3652
162
    }
3653
362
  }
3654
81
  CGF.FinishFunction();
3655
81
  return DestructorFn;
3656
81
}
3657
3658
/// Emit a privates mapping function for correct handling of private and
3659
/// firstprivate variables.
3660
/// \code
3661
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3662
/// **noalias priv1,...,  <tyn> **noalias privn) {
3663
///   *priv1 = &.privates.priv1;
3664
///   ...;
3665
///   *privn = &.privates.privn;
3666
/// }
3667
/// \endcode
3668
static llvm::Value *
3669
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3670
                               const OMPTaskDataTy &Data, QualType PrivatesQTy,
3671
442
                               ArrayRef<PrivateDataTy> Privates) {
3672
442
  ASTContext &C = CGM.getContext();
3673
442
  FunctionArgList Args;
3674
442
  ImplicitParamDecl TaskPrivatesArg(
3675
442
      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3676
442
      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3677
442
      ImplicitParamDecl::Other);
3678
442
  Args.push_back(&TaskPrivatesArg);
3679
442
  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3680
442
  unsigned Counter = 1;
3681
170
  for (const Expr *E : Data.PrivateVars) {
3682
170
    Args.push_back(ImplicitParamDecl::Create(
3683
170
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3684
170
        C.getPointerType(C.getPointerType(E->getType()))
3685
170
            .withConst()
3686
170
            .withRestrict(),
3687
170
        ImplicitParamDecl::Other));
3688
170
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3689
170
    PrivateVarsPos[VD] = Counter;
3690
170
    ++Counter;
3691
170
  }
3692
1.01k
  for (const Expr *E : Data.FirstprivateVars) {
3693
1.01k
    Args.push_back(ImplicitParamDecl::Create(
3694
1.01k
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3695
1.01k
        C.getPointerType(C.getPointerType(E->getType()))
3696
1.01k
            .withConst()
3697
1.01k
            .withRestrict(),
3698
1.01k
        ImplicitParamDecl::Other));
3699
1.01k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3700
1.01k
    PrivateVarsPos[VD] = Counter;
3701
1.01k
    ++Counter;
3702
1.01k
  }
3703
151
  for (const Expr *E : Data.LastprivateVars) {
3704
151
    Args.push_back(ImplicitParamDecl::Create(
3705
151
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3706
151
        C.getPointerType(C.getPointerType(E->getType()))
3707
151
            .withConst()
3708
151
            .withRestrict(),
3709
151
        ImplicitParamDecl::Other));
3710
151
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3711
151
    PrivateVarsPos[VD] = Counter;
3712
151
    ++Counter;
3713
151
  }
3714
8
  for (const VarDecl *VD : Data.PrivateLocals) {
3715
8
    QualType Ty = VD->getType().getNonReferenceType();
3716
8
    if (VD->getType()->isLValueReferenceType())
3717
0
      Ty = C.getPointerType(Ty);
3718
8
    if (isAllocatableDecl(VD))
3719
2
      Ty = C.getPointerType(Ty);
3720
8
    Args.push_back(ImplicitParamDecl::Create(
3721
8
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3722
8
        C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3723
8
        ImplicitParamDecl::Other));
3724
8
    PrivateVarsPos[VD] = Counter;
3725
8
    ++Counter;
3726
8
  }
3727
442
  const auto &TaskPrivatesMapFnInfo =
3728
442
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3729
442
  llvm::FunctionType *TaskPrivatesMapTy =
3730
442
      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3731
442
  std::string Name =
3732
442
      CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3733
442
  auto *TaskPrivatesMap = llvm::Function::Create(
3734
442
      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3735
442
      &CGM.getModule());
3736
442
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3737
442
                                    TaskPrivatesMapFnInfo);
3738
442
  if (CGM.getLangOpts().Optimize) {
3739
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3740
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3741
0
    TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3742
0
  }
3743
442
  CodeGenFunction CGF(CGM);
3744
442
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3745
442
                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
3746
3747
  // *privi = &.privates.privi;
3748
442
  LValue Base = CGF.EmitLoadOfPointerLValue(
3749
442
      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3750
442
      TaskPrivatesArg.getType()->castAs<PointerType>());
3751
442
  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3752
442
  Counter = 0;
3753
1.34k
  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3754
1.34k
    LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3755
1.34k
    const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3756
1.34k
    LValue RefLVal =
3757
1.34k
        CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3758
1.34k
    LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3759
1.34k
        RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3760
1.34k
    CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3761
1.34k
    ++Counter;
3762
1.34k
  }
3763
442
  CGF.FinishFunction();
3764
442
  return TaskPrivatesMap;
3765
442
}
3766
3767
/// Emit initialization for private variables in task-based directives.
3768
static void emitPrivatesInit(CodeGenFunction &CGF,
3769
                             const OMPExecutableDirective &D,
3770
                             Address KmpTaskSharedsPtr, LValue TDBase,
3771
                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3772
                             QualType SharedsTy, QualType SharedsPtrTy,
3773
                             const OMPTaskDataTy &Data,
3774
541
                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3775
541
  ASTContext &C = CGF.getContext();
3776
541
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3777
541
  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3778
541
  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3779
248
                                 ? OMPD_taskloop
3780
293
                                 : OMPD_task;
3781
541
  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3782
541
  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3783
541
  LValue SrcBase;
3784
541
  bool IsTargetTask =
3785
541
      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3786
493
      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3787
  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3788
  // PointersArray, SizesArray, and MappersArray. The original variables for
3789
  // these arrays are not captured and we get their addresses explicitly.
3790
541
  if ((!IsTargetTask && 
!Data.FirstprivateVars.empty()301
&&
ForDup129
) ||
3791
515
      (IsTargetTask && 
KmpTaskSharedsPtr.isValid()240
)) {
3792
242
    SrcBase = CGF.MakeAddrLValue(
3793
242
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3794
242
            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3795
242
        SharedsTy);
3796
242
  }
3797
541
  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3798
1.71k
  for (const PrivateDataTy &Pair : Privates) {
3799
    // Do not initialize private locals.
3800
1.71k
    if (Pair.second.isLocalPrivate()) {
3801
8
      ++FI;
3802
8
      continue;
3803
8
    }
3804
1.70k
    const VarDecl *VD = Pair.second.PrivateCopy;
3805
1.70k
    const Expr *Init = VD->getAnyInitializer();
3806
1.70k
    if (Init && 
(1.32k
!ForDup1.32k
||
(206
isa<CXXConstructExpr>(Init)206
&&
3807
1.26k
                             
!CGF.isTrivialInitializer(Init)146
))) {
3808
1.26k
      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3809
1.26k
      if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3810
1.06k
        const VarDecl *OriginalVD = Pair.second.Original;
3811
        // Check if the variable is the target-based BasePointersArray,
3812
        // PointersArray, SizesArray, or MappersArray.
3813
1.06k
        LValue SharedRefLValue;
3814
1.06k
        QualType Type = PrivateLValue.getType();
3815
1.06k
        const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3816
1.06k
        if (IsTargetTask && 
!SharedField768
) {
3817
448
          assert(isa<ImplicitParamDecl>(OriginalVD) &&
3818
448
                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3819
448
                 cast<CapturedDecl>(OriginalVD->getDeclContext())
3820
448
                         ->getNumParams() == 0 &&
3821
448
                 isa<TranslationUnitDecl>(
3822
448
                     cast<CapturedDecl>(OriginalVD->getDeclContext())
3823
448
                         ->getDeclContext()) &&
3824
448
                 "Expected artificial target data variable.");
3825
448
          SharedRefLValue =
3826
448
              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3827
617
        } else if (ForDup) {
3828
50
          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3829
50
          SharedRefLValue = CGF.MakeAddrLValue(
3830
50
              Address(SharedRefLValue.getPointer(CGF),
3831
50
                      C.getDeclAlign(OriginalVD)),
3832
50
              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3833
50
              SharedRefLValue.getTBAAInfo());
3834
567
        } else if (CGF.LambdaCaptureFields.count(
3835
567
                       Pair.second.Original->getCanonicalDecl()) > 0 ||
3836
565
                   dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3837
13
          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3838
554
        } else {
3839
          // Processing for implicitly captured variables.
3840
554
          InlinedOpenMPRegionRAII Region(
3841
0
              CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3842
554
              /*HasCancel=*/false);
3843
554
          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3844
554
        }
3845
1.06k
        if (Type->isArrayType()) {
3846
          // Initialize firstprivate array.
3847
541
          if (!isa<CXXConstructExpr>(Init) || 
CGF.isTrivialInitializer(Init)52
) {
3848
            // Perform simple memcpy.
3849
489
            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3850
52
          } else {
3851
            // Initialize firstprivate array using element-by-element
3852
            // initialization.
3853
52
            CGF.EmitOMPAggregateAssign(
3854
52
                PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3855
52
                Type,
3856
52
                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3857
52
                                                  Address SrcElement) {
3858
                  // Clean up any temporaries needed by the initialization.
3859
52
                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3860
52
                  InitScope.addPrivate(
3861
52
                      Elem, [SrcElement]() -> Address { return SrcElement; });
3862
52
                  (void)InitScope.Privatize();
3863
                  // Emit initialization for single element.
3864
52
                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3865
52
                      CGF, &CapturesInfo);
3866
52
                  CGF.EmitAnyExprToMem(Init, DestElement,
3867
52
                                       Init->getType().getQualifiers(),
3868
52
                                       /*IsInitializer=*/false);
3869
52
                });
3870
52
          }
3871
524
        } else {
3872
524
          CodeGenFunction::OMPPrivateScope InitScope(CGF);
3873
524
          InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3874
524
            return SharedRefLValue.getAddress(CGF);
3875
524
          });
3876
524
          (void)InitScope.Privatize();
3877
524
          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3878
524
          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3879
524
                             /*capturedByInit=*/false);
3880
524
        }
3881
202
      } else {
3882
202
        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3883
202
      }
3884
1.26k
    }
3885
1.70k
    ++FI;
3886
1.70k
  }
3887
541
}
3888
3889
/// Check if duplication function is required for taskloops.
3890
static bool checkInitIsRequired(CodeGenFunction &CGF,
3891
100
                                ArrayRef<PrivateDataTy> Privates) {
3892
100
  bool InitRequired = false;
3893
204
  for (const PrivateDataTy &Pair : Privates) {
3894
204
    if (Pair.second.isLocalPrivate())
3895
0
      continue;
3896
204
    const VarDecl *VD = Pair.second.PrivateCopy;
3897
204
    const Expr *Init = VD->getAnyInitializer();
3898
204
    InitRequired = InitRequired || (Init && 
isa<CXXConstructExpr>(Init)144
&&
3899
50
                                    !CGF.isTrivialInitializer(Init));
3900
204
    if (InitRequired)
3901
50
      break;
3902
204
  }
3903
100
  return InitRequired;
3904
100
}
3905
3906
3907
/// Emit task_dup function (for initialization of
3908
/// private/firstprivate/lastprivate vars and last_iter flag)
3909
/// \code
3910
/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3911
/// lastpriv) {
3912
/// // setup lastprivate flag
3913
///    task_dst->last = lastpriv;
3914
/// // could be constructor calls here...
3915
/// }
3916
/// \endcode
3917
static llvm::Value *
3918
emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3919
                    const OMPExecutableDirective &D,
3920
                    QualType KmpTaskTWithPrivatesPtrQTy,
3921
                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3922
                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3923
                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3924
99
                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3925
99
  ASTContext &C = CGM.getContext();
3926
99
  FunctionArgList Args;
3927
99
  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3928
99
                           KmpTaskTWithPrivatesPtrQTy,
3929
99
                           ImplicitParamDecl::Other);
3930
99
  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3931
99
                           KmpTaskTWithPrivatesPtrQTy,
3932
99
                           ImplicitParamDecl::Other);
3933
99
  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3934
99
                                ImplicitParamDecl::Other);
3935
99
  Args.push_back(&DstArg);
3936
99
  Args.push_back(&SrcArg);
3937
99
  Args.push_back(&LastprivArg);
3938
99
  const auto &TaskDupFnInfo =
3939
99
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3940
99
  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3941
99
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3942
99
  auto *TaskDup = llvm::Function::Create(
3943
99
      TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3944
99
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3945
99
  TaskDup->setDoesNotRecurse();
3946
99
  CodeGenFunction CGF(CGM);
3947
99
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3948
99
                    Loc);
3949
3950
99
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3951
99
      CGF.GetAddrOfLocalVar(&DstArg),
3952
99
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3953
  // task_dst->liter = lastpriv;
3954
99
  if (WithLastIter) {
3955
49
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3956
49
    LValue Base = CGF.EmitLValueForField(
3957
49
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3958
49
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3959
49
    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3960
49
        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3961
49
    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3962
49
  }
3963
3964
  // Emit initial values for private copies (if any).
3965
99
  assert(!Privates.empty());
3966
99
  Address KmpTaskSharedsPtr = Address::invalid();
3967
99
  if (!Data.FirstprivateVars.empty()) {
3968
26
    LValue TDBase = CGF.EmitLoadOfPointerLValue(
3969
26
        CGF.GetAddrOfLocalVar(&SrcArg),
3970
26
        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3971
26
    LValue Base = CGF.EmitLValueForField(
3972
26
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3973
26
    KmpTaskSharedsPtr = Address(
3974
26
        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3975
26
                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3976
26
                                                  KmpTaskTShareds)),
3977
26
                             Loc),
3978
26
        CGM.getNaturalTypeAlignment(SharedsTy));
3979
26
  }
3980
99
  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3981
99
                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3982
99
  CGF.FinishFunction();
3983
99
  return TaskDup;
3984
99
}
3985
3986
/// Checks if destructor function is required to be generated.
3987
/// \return true if cleanups are required, false otherwise.
3988
static bool
3989
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3990
442
                         ArrayRef<PrivateDataTy> Privates) {
3991
1.14k
  for (const PrivateDataTy &P : Privates) {
3992
1.14k
    if (P.second.isLocalPrivate())
3993
8
      continue;
3994
1.13k
    QualType Ty = P.second.Original->getType().getNonReferenceType();
3995
1.13k
    if (Ty.isDestructedType())
3996
81
      return true;
3997
1.13k
  }
3998
361
  return false;
3999
442
}
4000
4001
namespace {
4002
/// Loop generator for OpenMP iterator expression.
4003
class OMPIteratorGeneratorScope final
4004
    : public CodeGenFunction::OMPPrivateScope {
4005
  CodeGenFunction &CGF;
4006
  const OMPIteratorExpr *E = nullptr;
4007
  SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4008
  SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4009
  OMPIteratorGeneratorScope() = delete;
4010
  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4011
4012
public:
4013
  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4014
424
      : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4015
424
    if (!E)
4016
418
      return;
4017
6
    SmallVector<llvm::Value *, 4> Uppers;
4018
12
    for (unsigned I = 0, End = E->numOfIterators(); I < End; 
++I6
) {
4019
6
      Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4020
6
      const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4021
6
      addPrivate(VD, [&CGF, VD]() {
4022
6
        return CGF.CreateMemTemp(VD->getType(), VD->getName());
4023
6
      });
4024
6
      const OMPIteratorHelperData &HelperData = E->getHelper(I);
4025
6
      addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4026
6
        return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4027
6
                                 "counter.addr");
4028
6
      });
4029
6
    }
4030
6
    Privatize();
4031
4032
12
    for (unsigned I = 0, End = E->numOfIterators(); I < End; 
++I6
) {
4033
6
      const OMPIteratorHelperData &HelperData = E->getHelper(I);
4034
6
      LValue CLVal =
4035
6
          CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4036
6
                             HelperData.CounterVD->getType());
4037
      // Counter = 0;
4038
6
      CGF.EmitStoreOfScalar(
4039
6
          llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4040
6
          CLVal);
4041
6
      CodeGenFunction::JumpDest &ContDest =
4042
6
          ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4043
6
      CodeGenFunction::JumpDest &ExitDest =
4044
6
          ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4045
      // N = <number-of_iterations>;
4046
6
      llvm::Value *N = Uppers[I];
4047
      // cont:
4048
      // if (Counter < N) goto body; else goto exit;
4049
6
      CGF.EmitBlock(ContDest.getBlock());
4050
6
      auto *CVal =
4051
6
          CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4052
6
      llvm::Value *Cmp =
4053
6
          HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4054
4
              ? CGF.Builder.CreateICmpSLT(CVal, N)
4055
2
              : CGF.Builder.CreateICmpULT(CVal, N);
4056
6
      llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4057
6
      CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4058
      // body:
4059
6
      CGF.EmitBlock(BodyBB);
4060
      // Iteri = Begini + Counter * Stepi;
4061
6
      CGF.EmitIgnoredExpr(HelperData.Update);
4062
6
    }
4063
6
  }
4064
424
  ~OMPIteratorGeneratorScope() {
4065
424
    if (!E)
4066
418
      return;
4067
12
    
for (unsigned I = E->numOfIterators(); 6
I > 0;
--I6
) {
4068
      // Counter = Counter + 1;
4069
6
      const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4070
6
      CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4071
      // goto cont;
4072
6
      CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4073
      // exit:
4074
6
      CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4075
6
    }
4076
6
  }
4077
};
4078
} // namespace
4079
4080
static std::pair<llvm::Value *, llvm::Value *>
4081
958
getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4082
958
  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4083
958
  llvm::Value *Addr;
4084
958
  if (OASE) {
4085
6
    const Expr *Base = OASE->getBase();
4086
6
    Addr = CGF.EmitScalarExpr(Base);
4087
952
  } else {
4088
952
    Addr = CGF.EmitLValue(E).getPointer(CGF);
4089
952
  }
4090
958
  llvm::Value *SizeVal;
4091
958
  QualType Ty = E->getType();
4092
958
  if (OASE) {
4093
6
    SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4094
18
    for (const Expr *SE : OASE->getDimensions()) {
4095
18
      llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4096
18
      Sz = CGF.EmitScalarConversion(
4097
18
          Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4098
18
      SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4099
18
    }
4100
952
  } else if (const auto *ASE =
4101
28
                 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4102
28
    LValue UpAddrLVal =
4103
28
        CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4104
28
    llvm::Value *UpAddr =
4105
28
        CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4106
28
    llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4107
28
    llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4108
28
    SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4109
924
  } else {
4110
924
    SizeVal = CGF.getTypeSize(Ty);
4111
924
  }
4112
958
  return std::make_pair(Addr, SizeVal);
4113
958
}
4114
4115
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4116
4
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4117
4
  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4118
4
  if (KmpTaskAffinityInfoTy.isNull()) {
4119
2
    RecordDecl *KmpAffinityInfoRD =
4120
2
        C.buildImplicitRecord("kmp_task_affinity_info_t");
4121
2
    KmpAffinityInfoRD->startDefinition();
4122
2
    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4123
2
    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4124
2
    addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4125
2
    KmpAffinityInfoRD->completeDefinition();
4126
2
    KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4127
2
  }
4128
4
}
4129
4130
CGOpenMPRuntime::TaskResultTy
4131
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4132
                              const OMPExecutableDirective &D,
4133
                              llvm::Function *TaskFunction, QualType SharedsTy,
4134
707
                              Address Shareds, const OMPTaskDataTy &Data) {
4135
707
  ASTContext &C = CGM.getContext();
4136
707
  llvm::SmallVector<PrivateDataTy, 4> Privates;
4137
  // Aggregate privates and sort them by the alignment.
4138
707
  const auto *I = Data.PrivateCopies.begin();
4139
170
  for (const Expr *E : Data.PrivateVars) {
4140
170
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4141
170
    Privates.emplace_back(
4142
170
        C.getDeclAlign(VD),
4143
170
        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4144
170
                         /*PrivateElemInit=*/nullptr));
4145
170
    ++I;
4146
170
  }
4147
707
  I = Data.FirstprivateCopies.begin();
4148
707
  const auto *IElemInitRef = Data.FirstprivateInits.begin();
4149
1.01k
  for (const Expr *E : Data.FirstprivateVars) {
4150
1.01k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4151
1.01k
    Privates.emplace_back(
4152
1.01k
        C.getDeclAlign(VD),
4153
1.01k
        PrivateHelpersTy(
4154
1.01k
            E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4155
1.01k
            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4156
1.01k
    ++I;
4157
1.01k
    ++IElemInitRef;
4158
1.01k
  }
4159
707
  I = Data.LastprivateCopies.begin();
4160
151
  for (const Expr *E : Data.LastprivateVars) {
4161
151
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4162
151
    Privates.emplace_back(
4163
151
        C.getDeclAlign(VD),
4164
151
        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4165
151
                         /*PrivateElemInit=*/nullptr));
4166
151
    ++I;
4167
151
  }
4168
8
  for (const VarDecl *VD : Data.PrivateLocals) {
4169
8
    if (isAllocatableDecl(VD))
4170
2
      Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4171
6
    else
4172
6
      Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4173
8
  }
4174
707
  llvm::stable_sort(Privates,
4175
1.35k
                    [](const PrivateDataTy &L, const PrivateDataTy &R) {
4176
1.35k
                      return L.first > R.first;
4177
1.35k
                    });
4178
707
  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4179
  // Build type kmp_routine_entry_t (if not built yet).
4180
707
  emitKmpRoutineEntryT(KmpInt32Ty);
4181
  // Build type kmp_task_t (if not built yet).
4182
707
  if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4183
224
    if (SavedKmpTaskloopTQTy.isNull()) {
4184
126
      SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4185
126
          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4186
126
    }
4187
224
    KmpTaskTQTy = SavedKmpTaskloopTQTy;
4188
483
  } else {
4189
483
    assert((D.getDirectiveKind() == OMPD_task ||
4190
483
            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4191
483
            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4192
483
           "Expected taskloop, task or target directive");
4193
483
    if (SavedKmpTaskTQTy.isNull()) {
4194
153
      SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4195
153
          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4196
153
    }
4197
483
    KmpTaskTQTy = SavedKmpTaskTQTy;
4198
483
  }
4199
707
  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4200
  // Build particular struct kmp_task_t for the given task.
4201
707
  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4202
707
      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4203
707
  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4204
707
  QualType KmpTaskTWithPrivatesPtrQTy =
4205
707
      C.getPointerType(KmpTaskTWithPrivatesQTy);
4206
707
  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4207
707
  llvm::Type *KmpTaskTWithPrivatesPtrTy =
4208
707
      KmpTaskTWithPrivatesTy->getPointerTo();
4209
707
  llvm::Value *KmpTaskTWithPrivatesTySize =
4210
707
      CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4211
707
  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4212
4213
  // Emit initial values for private copies (if any).
4214
707
  llvm::Value *TaskPrivatesMap = nullptr;
4215
707
  llvm::Type *TaskPrivatesMapTy =
4216
707
      std::next(TaskFunction->arg_begin(), 3)->getType();
4217
707
  if (!Privates.empty()) {
4218
442
    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4219
442
    TaskPrivatesMap =
4220
442
        emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4221
442
    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4222
442
        TaskPrivatesMap, TaskPrivatesMapTy);
4223
265
  } else {
4224
265
    TaskPrivatesMap = llvm::ConstantPointerNull::get(
4225
265
        cast<llvm::PointerType>(TaskPrivatesMapTy));
4226
265
  }
4227
  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4228
  // kmp_task_t *tt);
4229
707
  llvm::Function *TaskEntry = emitProxyTaskFunction(
4230
707
      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4231
707
      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4232
707
      TaskPrivatesMap);
4233
4234
  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4235
  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4236
  // kmp_routine_entry_t *task_entry);
4237
  // Task flags. Format is taken from
4238
  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4239
  // description of kmp_tasking_flags struct.
4240
707
  enum {
4241
707
    TiedFlag = 0x1,
4242
707
    FinalFlag = 0x2,
4243
707
    DestructorsFlag = 0x8,
4244
707
    PriorityFlag = 0x20,
4245
707
    DetachableFlag = 0x40,
4246
707
  };
4247
691
  unsigned Flags = Data.Tied ? TiedFlag : 
016
;
4248
707
  bool NeedsCleanup = false;
4249
707
  if (!Privates.empty()) {
4250
442
    NeedsCleanup =
4251
442
        checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4252
442
    if (NeedsCleanup)
4253
81
      Flags = Flags | DestructorsFlag;
4254
442
  }
4255
707
  if (Data.Priority.getInt())
4256
22
    Flags = Flags | PriorityFlag;
4257
707
  if (D.hasClausesOfKind<OMPDetachClause>())
4258
2
    Flags = Flags | DetachableFlag;
4259
707
  llvm::Value *TaskFlags =
4260
707
      Data.Final.getPointer()
4261
10
          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4262
10
                                     CGF.Builder.getInt32(FinalFlag),
4263
10
                                     CGF.Builder.getInt32(/*C=*/0))
4264
697
          : CGF.Builder.getInt32(Data.Final.getInt() ? 
FinalFlag8
:
0689
);
4265
707
  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4266
707
  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4267
707
  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4268
707
      getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4269
707
      SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4270
707
          TaskEntry, KmpRoutineEntryPtrTy)};
4271
707
  llvm::Value *NewTask;
4272
707
  if (D.hasClausesOfKind<OMPNowaitClause>()) {
4273
    // Check if we have any device clause associated with the directive.
4274
144
    const Expr *Device = nullptr;
4275
144
    if (auto *C = D.getSingleClause<OMPDeviceClause>())
4276
142
      Device = C->getDevice();
4277
    // Emit device ID if any otherwise use default value.
4278
144
    llvm::Value *DeviceID;
4279
144
    if (Device)
4280
142
      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4281
142
                                           CGF.Int64Ty, /*isSigned=*/true);
4282
2
    else
4283
2
      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4284
144
    AllocArgs.push_back(DeviceID);
4285
144
    NewTask = CGF.EmitRuntimeCall(
4286
144
        OMPBuilder.getOrCreateRuntimeFunction(
4287
144
            CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4288
144
        AllocArgs);
4289
563
  } else {
4290
563
    NewTask =
4291
563
        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4292
563
                                CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4293
563
                            AllocArgs);
4294
563
  }
4295
  // Emit detach clause initialization.
4296
  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4297
  // task_descriptor);
4298
707
  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4299
2
    const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4300
2
    LValue EvtLVal = CGF.EmitLValue(Evt);
4301
4302
    // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4303
    // int gtid, kmp_task_t *task);
4304
2
    llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4305
2
    llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4306
2
    Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4307
2
    llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4308
2
        OMPBuilder.getOrCreateRuntimeFunction(
4309
2
            CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4310
2
        {Loc, Tid, NewTask});
4311
2
    EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4312
2
                                      Evt->getExprLoc());
4313
2
    CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4314
2
  }
4315
  // Process affinity clauses.
4316
707
  if (D.hasClausesOfKind<OMPAffinityClause>()) {
4317
    // Process list of affinity data.
4318
4
    ASTContext &C = CGM.getContext();
4319
4
    Address AffinitiesArray = Address::invalid();
4320
    // Calculate number of elements to form the array of affinity data.
4321
4
    llvm::Value *NumOfElements = nullptr;
4322
4
    unsigned NumAffinities = 0;
4323
6
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4324
6
      if (const Expr *Modifier = C->getModifier()) {
4325
2
        const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4326
4
        for (unsigned I = 0, E = IE->numOfIterators(); I < E; 
++I2
) {
4327
2
          llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4328
2
          Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4329
2
          NumOfElements =
4330
2
              NumOfElements ? 
CGF.Builder.CreateNUWMul(NumOfElements, Sz)0
: Sz;
4331
2
        }
4332
4
      } else {
4333
4
        NumAffinities += C->varlist_size();
4334
4
      }
4335
6
    }
4336
4
    getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4337
    // Fields ids in kmp_task_affinity_info record.
4338
4
    enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4339
4340
4
    QualType KmpTaskAffinityInfoArrayTy;
4341
4
    if (NumOfElements) {
4342
2
      NumOfElements = CGF.Builder.CreateNUWAdd(
4343
2
          llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4344
2
      OpaqueValueExpr OVE(
4345
2
          Loc,
4346
2
          C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4347
2
          VK_RValue);
4348
2
      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4349
2
                                                    RValue::get(NumOfElements));
4350
2
      KmpTaskAffinityInfoArrayTy =
4351
2
          C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4352
2
                                 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4353
      // Properly emit variable-sized array.
4354
2
      auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4355
2
                                           ImplicitParamDecl::Other);
4356
2
      CGF.EmitVarDecl(*PD);
4357
2
      AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4358
2
      NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4359
2
                                                /*isSigned=*/false);
4360
2
    } else {
4361
2
      KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4362
2
          KmpTaskAffinityInfoTy,
4363
2
          llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4364
2
          ArrayType::Normal, /*IndexTypeQuals=*/0);
4365
2
      AffinitiesArray =
4366
2
          CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4367
2
      AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4368
2
      NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4369
2
                                             /*isSigned=*/false);
4370
2
    }
4371
4372
4
    const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4373
    // Fill array by elements without iterators.
4374
4
    unsigned Pos = 0;
4375
4
    bool HasIterator = false;
4376
6
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4377
6
      if (C->getModifier()) {
4378
2
        HasIterator = true;
4379
2
        continue;
4380
2
      }
4381
4
      for (const Expr *E : C->varlists()) {
4382
4
        llvm::Value *Addr;
4383
4
        llvm::Value *Size;
4384
4
        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4385
4
        LValue Base =
4386
4
            CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4387
4
                               KmpTaskAffinityInfoTy);
4388
        // affs[i].base_addr = &<Affinities[i].second>;
4389
4
        LValue BaseAddrLVal = CGF.EmitLValueForField(
4390
4
            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4391
4
        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4392
4
                              BaseAddrLVal);
4393
        // affs[i].len = sizeof(<Affinities[i].second>);
4394
4
        LValue LenLVal = CGF.EmitLValueForField(
4395
4
            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4396
4
        CGF.EmitStoreOfScalar(Size, LenLVal);
4397
4
        ++Pos;
4398
4
      }
4399
4
    }
4400
4
    LValue PosLVal;
4401
4
    if (HasIterator) {
4402
2
      PosLVal = CGF.MakeAddrLValue(
4403
2
          CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4404
2
          C.getSizeType());
4405
2
      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4406
2
    }
4407
    // Process elements with iterators.
4408
6
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4409
6
      const Expr *Modifier = C->getModifier();
4410
6
      if (!Modifier)
4411
4
        continue;
4412
2
      OMPIteratorGeneratorScope IteratorScope(
4413
2
          CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4414
2
      for (const Expr *E : C->varlists()) {
4415
2
        llvm::Value *Addr;
4416
2
        llvm::Value *Size;
4417
2
        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4418
2
        llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4419
2
        LValue Base = CGF.MakeAddrLValue(
4420
2
            Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4421
2
                    AffinitiesArray.getAlignment()),
4422
2
            KmpTaskAffinityInfoTy);
4423
        // affs[i].base_addr = &<Affinities[i].second>;
4424
2
        LValue BaseAddrLVal = CGF.EmitLValueForField(
4425
2
            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4426
2
        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4427
2
                              BaseAddrLVal);
4428
        // affs[i].len = sizeof(<Affinities[i].second>);
4429
2
        LValue LenLVal = CGF.EmitLValueForField(
4430
2
            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4431
2
        CGF.EmitStoreOfScalar(Size, LenLVal);
4432
2
        Idx = CGF.Builder.CreateNUWAdd(
4433
2
            Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4434
2
        CGF.EmitStoreOfScalar(Idx, PosLVal);
4435
2
      }
4436
2
    }
4437
    // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4438
    // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4439
    // naffins, kmp_task_affinity_info_t *affin_list);
4440
4
    llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4441
4
    llvm::Value *GTid = getThreadID(CGF, Loc);
4442
4
    llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4443
4
        AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4444
    // FIXME: Emit the function and ignore its result for now unless the
4445
    // runtime function is properly implemented.
4446
4
    (void)CGF.EmitRuntimeCall(
4447
4
        OMPBuilder.getOrCreateRuntimeFunction(
4448
4
            CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4449
4
        {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4450
4
  }
4451
707
  llvm::Value *NewTaskNewTaskTTy =
4452
707
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4453
707
          NewTask, KmpTaskTWithPrivatesPtrTy);
4454
707
  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4455
707
                                               KmpTaskTWithPrivatesQTy);
4456
707
  LValue TDBase =
4457
707
      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4458
  // Fill the data in the resulting kmp_task_t record.
4459
  // Copy shareds if there are any.
4460
707
  Address KmpTaskSharedsPtr = Address::invalid();
4461
707
  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4462
462
    KmpTaskSharedsPtr =
4463
462
        Address(CGF.EmitLoadOfScalar(
4464
462
                    CGF.EmitLValueForField(
4465
462
                        TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4466
462
                                           KmpTaskTShareds)),
4467
462
                    Loc),
4468
462
                CGM.getNaturalTypeAlignment(SharedsTy));
4469
462
    LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4470
462
    LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4471
462
    CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4472
462
  }
4473
  // Emit initial values for private copies (if any).
4474
707
  TaskResultTy Result;
4475
707
  if (!Privates.empty()) {
4476
442
    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4477
442
                     SharedsTy, SharedsPtrTy, Data, Privates,
4478
442
                     /*ForDup=*/false);
4479
442
    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4480
149
        (!Data.LastprivateVars.empty() || 
checkInitIsRequired(CGF, Privates)100
)) {
4481
99
      Result.TaskDupFn = emitTaskDupFunction(
4482
99
          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4483
99
          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4484
99
          /*WithLastIter=*/!Data.LastprivateVars.empty());
4485
99
    }
4486
442
  }
4487
  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4488
707
  enum { Priority = 0, Destructors = 1 };
4489
  // Provide pointer to function with destructors for privates.
4490
707
  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4491
707
  const RecordDecl *KmpCmplrdataUD =
4492
707
      (*FI)->getType()->getAsUnionType()->getDecl();
4493
707
  if (NeedsCleanup) {
4494
81
    llvm::Value *DestructorFn = emitDestructorsFunction(
4495
81
        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4496
81
        KmpTaskTWithPrivatesQTy);
4497
81
    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4498
81
    LValue DestructorsLV = CGF.EmitLValueForField(
4499
81
        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4500
81
    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4501
81
                              DestructorFn, KmpRoutineEntryPtrTy),
4502
81
                          DestructorsLV);
4503
81
  }
4504
  // Set priority.
4505
707
  if (Data.Priority.getInt()) {
4506
22
    LValue Data2LV = CGF.EmitLValueForField(
4507
22
        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4508
22
    LValue PriorityLV = CGF.EmitLValueForField(
4509
22
        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4510
22
    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4511
22
  }
4512
707
  Result.NewTask = NewTask;
4513
707
  Result.TaskEntry = TaskEntry;
4514
707
  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4515
707
  Result.TDBase = TDBase;
4516
707
  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4517
707
  return Result;
4518
707
}
4519
4520
namespace {
4521
/// Dependence kind for RTL.
4522
enum RTLDependenceKindTy {
4523
  DepIn = 0x01,
4524
  DepInOut = 0x3,
4525
  DepMutexInOutSet = 0x4
4526
};
4527
/// Fields ids in kmp_depend_info record.
4528
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4529
} // namespace
4530
4531
/// Translates internal dependency kind into the runtime kind.
4532
956
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4533
956
  RTLDependenceKindTy DepKind;
4534
956
  switch (K) {
4535
170
  case OMPC_DEPEND_in:
4536
170
    DepKind = DepIn;
4537
170
    break;
4538
  // Out and InOut dependencies must use the same code.
4539
776
  case OMPC_DEPEND_out:
4540
776
  case OMPC_DEPEND_inout:
4541
776
    DepKind = DepInOut;
4542
776
    break;
4543
10
  case OMPC_DEPEND_mutexinoutset:
4544
10
    DepKind = DepMutexInOutSet;
4545
10
    break;
4546
0
  case OMPC_DEPEND_source:
4547
0
  case OMPC_DEPEND_sink:
4548
0
  case OMPC_DEPEND_depobj:
4549
0
  case OMPC_DEPEND_unknown:
4550
0
    llvm_unreachable("Unknown task dependence type");
4551
956
  }
4552
956
  return DepKind;
4553
956
}
4554
4555
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4556
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4557
784
                           QualType &FlagsTy) {
4558
784
  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4559
784
  if (KmpDependInfoTy.isNull()) {
4560
90
    RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4561
90
    KmpDependInfoRD->startDefinition();
4562
90
    addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4563
90
    addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4564
90
    addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4565
90
    KmpDependInfoRD->completeDefinition();
4566
90
    KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4567
90
  }
4568
784
}
4569
4570
std::pair<llvm::Value *, LValue>
4571
CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4572
4
                                   SourceLocation Loc) {
4573
4
  ASTContext &C = CGM.getContext();
4574
4
  QualType FlagsTy;
4575
4
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4576
4
  RecordDecl *KmpDependInfoRD =
4577
4
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4578
4
  LValue Base = CGF.EmitLoadOfPointerLValue(
4579
4
      DepobjLVal.getAddress(CGF),
4580
4
      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4581
4
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4582
4
  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4583
4
          Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4584
4
  Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4585
4
                            Base.getTBAAInfo());
4586
4
  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4587
4
      Addr.getPointer(),
4588
4
      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4589
4
  LValue NumDepsBase = CGF.MakeAddrLValue(
4590
4
      Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4591
4
      Base.getBaseInfo(), Base.getTBAAInfo());
4592
  // NumDeps = deps[i].base_addr;
4593
4
  LValue BaseAddrLVal = CGF.EmitLValueForField(
4594
4
      NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4595
4
  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4596
4
  return std::make_pair(NumDeps, Base);
4597
4
}
4598
4599
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4600
                           llvm::PointerUnion<unsigned *, LValue *> Pos,
4601
                           const OMPTaskDataTy::DependData &Data,
4602
418
                           Address DependenciesArray) {
4603
418
  CodeGenModule &CGM = CGF.CGM;
4604
418
  ASTContext &C = CGM.getContext();
4605
418
  QualType FlagsTy;
4606
418
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4607
418
  RecordDecl *KmpDependInfoRD =
4608
418
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4609
418
  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4610
4611
418
  OMPIteratorGeneratorScope IteratorScope(
4612
418
      CGF, cast_or_null<OMPIteratorExpr>(
4613
4
               Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4614
414
                                 : nullptr));
4615
952
  for (const Expr *E : Data.DepExprs) {
4616
952
    llvm::Value *Addr;
4617
952
    llvm::Value *Size;
4618
952
    std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4619
952
    LValue Base;
4620
952
    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4621
948
      Base = CGF.MakeAddrLValue(
4622
948
          CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4623
4
    } else {
4624
4
      LValue &PosLVal = *Pos.get<LValue *>();
4625
4
      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4626
4
      Base = CGF.MakeAddrLValue(
4627
4
          Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4628
4
                  DependenciesArray.getAlignment()),
4629
4
          KmpDependInfoTy);
4630
4
    }
4631
    // deps[i].base_addr = &<Dependencies[i].second>;
4632
952
    LValue BaseAddrLVal = CGF.EmitLValueForField(
4633
952
        Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4634
952
    CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4635
952
                          BaseAddrLVal);
4636
    // deps[i].len = sizeof(<Dependencies[i].second>);
4637
952
    LValue LenLVal = CGF.EmitLValueForField(
4638
952
        Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4639
952
    CGF.EmitStoreOfScalar(Size, LenLVal);
4640
    // deps[i].flags = <Dependencies[i].first>;
4641
952
    RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4642
952
    LValue FlagsLVal = CGF.EmitLValueForField(
4643
952
        Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4644
952
    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4645
952
                          FlagsLVal);
4646
952
    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4647
948
      ++(*P);
4648
4
    } else {
4649
4
      LValue &PosLVal = *Pos.get<LValue *>();
4650
4
      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4651
4
      Idx = CGF.Builder.CreateNUWAdd(Idx,
4652
4
                                     llvm::ConstantInt::get(Idx->getType(), 1));
4653
4
      CGF.EmitStoreOfScalar(Idx, PosLVal);
4654
4
    }
4655
952
  }
4656
418
}
4657
4658
static SmallVector<llvm::Value *, 4>
4659
emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4660
2
                        const OMPTaskDataTy::DependData &Data) {
4661
2
  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4662
2
         "Expected depobj dependecy kind.");
4663
2
  SmallVector<llvm::Value *, 4> Sizes;
4664
2
  SmallVector<LValue, 4> SizeLVals;
4665
2
  ASTContext &C = CGF.getContext();
4666
2
  QualType FlagsTy;
4667
2
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4668
2
  RecordDecl *KmpDependInfoRD =
4669
2
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4670
2
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4671
2
  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4672
2
  {
4673
2
    OMPIteratorGeneratorScope IteratorScope(
4674
2
        CGF, cast_or_null<OMPIteratorExpr>(
4675
0
                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4676
2
                                   : nullptr));
4677
4
    for (const Expr *E : Data.DepExprs) {
4678
4
      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4679
4
      LValue Base = CGF.EmitLoadOfPointerLValue(
4680
4
          DepobjLVal.getAddress(CGF),
4681
4
          C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4682
4
      Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4683
4
          Base.getAddress(CGF), KmpDependInfoPtrT);
4684
4
      Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4685
4
                                Base.getTBAAInfo());
4686
4
      llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4687
4
          Addr.getPointer(),
4688
4
          llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4689
4
      LValue NumDepsBase = CGF.MakeAddrLValue(
4690
4
          Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4691
4
          Base.getBaseInfo(), Base.getTBAAInfo());
4692
      // NumDeps = deps[i].base_addr;
4693
4
      LValue BaseAddrLVal = CGF.EmitLValueForField(
4694
4
          NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4695
4
      llvm::Value *NumDeps =
4696
4
          CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4697
4
      LValue NumLVal = CGF.MakeAddrLValue(
4698
4
          CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4699
4
          C.getUIntPtrType());
4700
4
      CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4701
4
                         llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4702
4
      llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4703
4
      llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4704
4
      CGF.EmitStoreOfScalar(Add, NumLVal);
4705
4
      SizeLVals.push_back(NumLVal);
4706
4
    }
4707
2
  }
4708
6
  for (unsigned I = 0, E = SizeLVals.size(); I < E; 
++I4
) {
4709
4
    llvm::Value *Size =
4710
4
        CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4711
4
    Sizes.push_back(Size);
4712
4
  }
4713
2
  return Sizes;
4714
2
}
4715
4716
static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4717
                               LValue PosLVal,
4718
                               const OMPTaskDataTy::DependData &Data,
4719
2
                               Address DependenciesArray) {
4720
2
  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4721
2
         "Expected depobj dependecy kind.");
4722
2
  ASTContext &C = CGF.getContext();
4723
2
  QualType FlagsTy;
4724
2
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4725
2
  RecordDecl *KmpDependInfoRD =
4726
2
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4727
2
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4728
2
  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4729
2
  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4730
2
  {
4731
2
    OMPIteratorGeneratorScope IteratorScope(
4732
2
        CGF, cast_or_null<OMPIteratorExpr>(
4733
0
                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4734
2
                                   : nullptr));
4735
6
    for (unsigned I = 0, End = Data.DepExprs.size(); I < End; 
++I4
) {
4736
4
      const Expr *E = Data.DepExprs[I];
4737
4
      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4738
4
      LValue Base = CGF.EmitLoadOfPointerLValue(
4739
4
          DepobjLVal.getAddress(CGF),
4740
4
          C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4741
4
      Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4742
4
          Base.getAddress(CGF), KmpDependInfoPtrT);
4743
4
      Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4744
4
                                Base.getTBAAInfo());
4745
4746
      // Get number of elements in a single depobj.
4747
4
      llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4748
4
          Addr.getPointer(),
4749
4
          llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4750
4
      LValue NumDepsBase = CGF.MakeAddrLValue(
4751
4
          Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4752
4
          Base.getBaseInfo(), Base.getTBAAInfo());
4753
      // NumDeps = deps[i].base_addr;
4754
4
      LValue BaseAddrLVal = CGF.EmitLValueForField(
4755
4
          NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4756
4
      llvm::Value *NumDeps =
4757
4
          CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4758
4759
      // memcopy dependency data.
4760
4
      llvm::Value *Size = CGF.Builder.CreateNUWMul(
4761
4
          ElSize,
4762
4
          CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4763
4
      llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4764
4
      Address DepAddr =
4765
4
          Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4766
4
                  DependenciesArray.getAlignment());
4767
4
      CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4768
4769
      // Increase pos.
4770
      // pos += size;
4771
4
      llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4772
4
      CGF.EmitStoreOfScalar(Add, PosLVal);
4773
4
    }
4774
2
  }
4775
2
}
4776
4777
std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4778
    CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4779
483
    SourceLocation Loc) {
4780
483
  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4781
344
        return D.DepExprs.empty();
4782
344
      }))
4783
139
    return std::make_pair(nullptr, Address::invalid());
4784
  // Process list of dependencies.
4785
344
  ASTContext &C = CGM.getContext();
4786
344
  Address DependenciesArray = Address::invalid();
4787
344
  llvm::Value *NumOfElements = nullptr;
4788
344
  unsigned NumDependencies = std::accumulate(
4789
344
      Dependencies.begin(), Dependencies.end(), 0,
4790
414
      [](unsigned V, const OMPTaskDataTy::DependData &D) {
4791
414
        return D.DepKind == OMPC_DEPEND_depobj
4792
2
                   ? V
4793
412
                   : (V + (D.IteratorExpr ? 
02
:
D.DepExprs.size()410
));
4794
414
      });
4795
344
  QualType FlagsTy;
4796
344
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4797
344
  bool HasDepobjDeps = false;
4798
344
  bool HasRegularWithIterators = false;
4799
344
  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4800
344
  llvm::Value *NumOfRegularWithIterators =
4801
344
      llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4802
  // Calculate number of depobj dependecies and regular deps with the iterators.
4803
414
  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4804
414
    if (D.DepKind == OMPC_DEPEND_depobj) {
4805
2
      SmallVector<llvm::Value *, 4> Sizes =
4806
2
          emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4807
4
      for (llvm::Value *Size : Sizes) {
4808
4
        NumOfDepobjElements =
4809
4
            CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4810
4
      }
4811
2
      HasDepobjDeps = true;
4812
2
      continue;
4813
2
    }
4814
    // Include number of iterations, if any.
4815
412
    if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4816
4
      for (unsigned I = 0, E = IE->numOfIterators(); I < E; 
++I2
) {
4817
2
        llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4818
2
        Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4819
2
        NumOfRegularWithIterators =
4820
2
            CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4821
2
      }
4822
2
      HasRegularWithIterators = true;
4823
2
      continue;
4824
2
    }
4825
412
  }
4826
4827
344
  QualType KmpDependInfoArrayTy;
4828
344
  if (HasDepobjDeps || 
HasRegularWithIterators342
) {
4829
4
    NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4830
4
                                           /*isSigned=*/false);
4831
4
    if (HasDepobjDeps) {
4832
2
      NumOfElements =
4833
2
          CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4834
2
    }
4835
4
    if (HasRegularWithIterators) {
4836
2
      NumOfElements =
4837
2
          CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4838
2
    }
4839
4
    OpaqueValueExpr OVE(Loc,
4840
4
                        C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4841
4
                        VK_RValue);
4842
4
    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4843
4
                                                  RValue::get(NumOfElements));
4844
4
    KmpDependInfoArrayTy =
4845
4
        C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4846
4
                               /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4847
    // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4848
    // Properly emit variable-sized array.
4849
4
    auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4850
4
                                         ImplicitParamDecl::Other);
4851
4
    CGF.EmitVarDecl(*PD);
4852
4
    DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4853
4
    NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4854
4
                                              /*isSigned=*/false);
4855
340
  } else {
4856
340
    KmpDependInfoArrayTy = C.getConstantArrayType(
4857
340
        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4858
340
        ArrayType::Normal, /*IndexTypeQuals=*/0);
4859
340
    DependenciesArray =
4860
340
        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4861
340
    DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4862
340
    NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4863
340
                                           /*isSigned=*/false);
4864
340
  }
4865
344
  unsigned Pos = 0;
4866
758
  for (unsigned I = 0, End = Dependencies.size(); I < End; 
++I414
) {
4867
414
    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4868
412
        Dependencies[I].IteratorExpr)
4869
4
      continue;
4870
410
    emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4871
410
                   DependenciesArray);
4872
410
  }
4873
  // Copy regular dependecies with iterators.
4874
344
  LValue PosLVal = CGF.MakeAddrLValue(
4875
344
      CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4876
344
  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4877
758
  for (unsigned I = 0, End = Dependencies.size(); I < End; 
++I414
) {
4878
414
    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4879
412
        !Dependencies[I].IteratorExpr)
4880
412
      continue;
4881
2
    emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4882
2
                   DependenciesArray);
4883
2
  }
4884
  // Copy final depobj arrays without iterators.
4885
344
  if (HasDepobjDeps) {
4886
6
    for (unsigned I = 0, End = Dependencies.size(); I < End; 
++I4
) {
4887
4
      if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4888
2
        continue;
4889
2
      emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4890
2
                         DependenciesArray);
4891
2
    }
4892
2
  }
4893
344
  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4894
344
      DependenciesArray, CGF.VoidPtrTy);
4895
344
  return std::make_pair(NumOfElements, DependenciesArray);
4896
344
}
4897
4898
Address CGOpenMPRuntime::emitDepobjDependClause(
4899
    CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4900
6
    SourceLocation Loc) {
4901
6
  if (Dependencies.DepExprs.empty())
4902
0
    return Address::invalid();
4903
  // Process list of dependencies.
4904
6
  ASTContext &C = CGM.getContext();
4905
6
  Address DependenciesArray = Address::invalid();
4906
6
  unsigned NumDependencies = Dependencies.DepExprs.size();
4907
6
  QualType FlagsTy;
4908
6
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4909
6
  RecordDecl *KmpDependInfoRD =
4910
6
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4911
4912
6
  llvm::Value *Size;
4913
  // Define type kmp_depend_info[<Dependencies.size()>];
4914
  // For depobj reserve one extra element to store the number of elements.
4915
  // It is required to handle depobj(x) update(in) construct.
4916
  // kmp_depend_info[<Dependencies.size()>] deps;
4917
6
  llvm::Value *NumDepsVal;
4918
6
  CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4919
6
  if (const auto *IE =
4920
2
          cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4921
2
    NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4922
4
    for (unsigned I = 0, E = IE->numOfIterators(); I < E; 
++I2
) {
4923
2
      llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4924
2
      Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4925
2
      NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4926
2
    }
4927
2
    Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4928
2
                                    NumDepsVal);
4929
2
    CharUnits SizeInBytes =
4930
2
        C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4931
2
    llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4932
2
    Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4933
2
    NumDepsVal =
4934
2
        CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4935
4
  } else {
4936
4
    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4937
4
        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4938
4
        nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4939
4
    CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4940
4
    Size = CGM.getSize(Sz.alignTo(Align));
4941
4
    NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4942
4
  }
4943
  // Need to allocate on the dynamic memory.
4944
6
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4945
  // Use default allocator.
4946
6
  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4947
6
  llvm::Value *Args[] = {ThreadID, Size, Allocator};
4948
4949
6
  llvm::Value *Addr =
4950
6
      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4951
6
                              CGM.getModule(), OMPRTL___kmpc_alloc),
4952
6
                          Args, ".dep.arr.addr");
4953
6
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4954
6
      Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4955
6
  DependenciesArray = Address(Addr, Align);
4956
  // Write number of elements in the first element of array for depobj.
4957
6
  LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4958
  // deps[i].base_addr = NumDependencies;
4959
6
  LValue BaseAddrLVal = CGF.EmitLValueForField(
4960
6
      Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4961
6
  CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4962
6
  llvm::PointerUnion<unsigned *, LValue *> Pos;
4963
6
  unsigned Idx = 1;
4964
6
  LValue PosLVal;
4965
6
  if (Dependencies.IteratorExpr) {
4966
2
    PosLVal = CGF.MakeAddrLValue(
4967
2
        CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4968
2
        C.getSizeType());
4969
2
    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4970
2
                          /*IsInit=*/true);
4971
2
    Pos = &PosLVal;
4972
4
  } else {
4973
4
    Pos = &Idx;
4974
4
  }
4975
6
  emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4976
6
  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4977
6
      CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4978
6
  return DependenciesArray;
4979
6
}
4980
4981
void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4982
4
                                        SourceLocation Loc) {
4983
4
  ASTContext &C = CGM.getContext();
4984
4
  QualType FlagsTy;
4985
4
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4986
4
  LValue Base = CGF.EmitLoadOfPointerLValue(
4987
4
      DepobjLVal.getAddress(CGF),
4988
4
      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4989
4
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4990
4
  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4991
4
      Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4992
4
  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4993
4
      Addr.getPointer(),
4994
4
      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4995
4
  DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4996
4
                                                               CGF.VoidPtrTy);
4997
4
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4998
  // Use default allocator.
4999
4
  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5000
4
  llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5001
5002
  // _kmpc_free(gtid, addr, nullptr);
5003
4
  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5004
4
                                CGM.getModule(), OMPRTL___kmpc_free),
5005
4
                            Args);
5006
4
}
5007
5008
void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5009
                                       OpenMPDependClauseKind NewDepKind,
5010
4
                                       SourceLocation Loc) {
5011
4
  ASTContext &C = CGM.getContext();
5012
4
  QualType FlagsTy;
5013
4
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
5014
4
  RecordDecl *KmpDependInfoRD =
5015
4
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5016
4
  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5017
4
  llvm::Value *NumDeps;
5018
4
  LValue Base;
5019
4
  std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5020
5021
4
  Address Begin = Base.getAddress(CGF);
5022
  // Cast from pointer to array type to pointer to single element.
5023
4
  llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5024
  // The basic structure here is a while-do loop.
5025
4
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5026
4
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5027
4
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5028
4
  CGF.EmitBlock(BodyBB);
5029
4
  llvm::PHINode *ElementPHI =
5030
4
      CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5031
4
  ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5032
4
  Begin = Address(ElementPHI, Begin.getAlignment());
5033
4
  Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5034
4
                            Base.getTBAAInfo());
5035
  // deps[i].flags = NewDepKind;
5036
4
  RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5037
4
  LValue FlagsLVal = CGF.EmitLValueForField(
5038
4
      Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5039
4
  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5040
4
                        FlagsLVal);
5041
5042
  // Shift the address forward by one element.
5043
4
  Address ElementNext =
5044
4
      CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5045
4
  ElementPHI->addIncoming(ElementNext.getPointer(),
5046
4
                          CGF.Builder.GetInsertBlock());
5047
4
  llvm::Value *IsEmpty =
5048
4
      CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5049
4
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5050
  // Done.
5051
4
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5052
4
}
5053
5054
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5055
                                   const OMPExecutableDirective &D,
5056
                                   llvm::Function *TaskFunction,
5057
                                   QualType SharedsTy, Address Shareds,
5058
                                   const Expr *IfCond,
5059
483
                                   const OMPTaskDataTy &Data) {
5060
483
  if (!CGF.HaveInsertPoint())
5061
0
    return;
5062
5063
483
  TaskResultTy Result =
5064
483
      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5065
483
  llvm::Value *NewTask = Result.NewTask;
5066
483
  llvm::Function *TaskEntry = Result.TaskEntry;
5067
483
  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5068
483
  LValue TDBase = Result.TDBase;
5069
483
  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5070
  // Process list of dependences.
5071
483
  Address DependenciesArray = Address::invalid();
5072
483
  llvm::Value *NumOfElements;
5073
483
  std::tie(NumOfElements, DependenciesArray) =
5074
483
      emitDependClause(CGF, Data.Dependences, Loc);
5075
5076
  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5077
  // libcall.
5078
  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5079
  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5080
  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence