Coverage Report

Created: 2021-01-23 06:44

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This provides a class for OpenMP runtime code generation.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGOpenMPRuntime.h"
14
#include "CGCXXABI.h"
15
#include "CGCleanup.h"
16
#include "CGRecordLayout.h"
17
#include "CodeGenFunction.h"
18
#include "clang/AST/Attr.h"
19
#include "clang/AST/Decl.h"
20
#include "clang/AST/OpenMPClause.h"
21
#include "clang/AST/StmtOpenMP.h"
22
#include "clang/AST/StmtVisitor.h"
23
#include "clang/Basic/BitmaskEnum.h"
24
#include "clang/Basic/FileManager.h"
25
#include "clang/Basic/OpenMPKinds.h"
26
#include "clang/Basic/SourceManager.h"
27
#include "clang/CodeGen/ConstantInitBuilder.h"
28
#include "llvm/ADT/ArrayRef.h"
29
#include "llvm/ADT/SetOperations.h"
30
#include "llvm/ADT/StringExtras.h"
31
#include "llvm/Bitcode/BitcodeReader.h"
32
#include "llvm/IR/Constants.h"
33
#include "llvm/IR/DerivedTypes.h"
34
#include "llvm/IR/GlobalValue.h"
35
#include "llvm/IR/Value.h"
36
#include "llvm/Support/AtomicOrdering.h"
37
#include "llvm/Support/Format.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include <cassert>
40
#include <numeric>
41
42
using namespace clang;
43
using namespace CodeGen;
44
using namespace llvm::omp;
45
46
namespace {
47
/// Base class for handling code generation inside OpenMP regions.
48
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49
public:
50
  /// Kinds of OpenMP regions used in codegen.
51
  enum CGOpenMPRegionKind {
52
    /// Region with outlined function for standalone 'parallel'
53
    /// directive.
54
    ParallelOutlinedRegion,
55
    /// Region with outlined function for standalone 'task' directive.
56
    TaskOutlinedRegion,
57
    /// Region for constructs that do not require function outlining,
58
    /// like 'for', 'sections', 'atomic' etc. directives.
59
    InlinedRegion,
60
    /// Region with outlined function for standalone 'target' directive.
61
    TargetRegion,
62
  };
63
64
  CGOpenMPRegionInfo(const CapturedStmt &CS,
65
                     const CGOpenMPRegionKind RegionKind,
66
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67
                     bool HasCancel)
68
      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69
24.1k
        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70
71
  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72
                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73
                     bool HasCancel)
74
      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75
42.0k
        Kind(Kind), HasCancel(HasCancel) {}
76
77
  /// Get a variable or parameter for storing global thread id
78
  /// inside OpenMP construct.
79
  virtual const VarDecl *getThreadIDVariable() const = 0;
80
81
  /// Emit the captured statement body.
82
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83
84
  /// Get an LValue for the current ThreadID variable.
85
  /// \return LValue for thread id variable. This LValue always has type int32*.
86
  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87
88
28
  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89
90
41.7k
  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91
92
211
  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93
94
328
  bool hasCancel() const { return HasCancel; }
95
96
140k
  static bool classof(const CGCapturedStmtInfo *Info) {
97
140k
    return Info->getKind() == CR_OpenMP;
98
140k
  }
99
100
66.1k
  ~CGOpenMPRegionInfo() override = default;
101
102
protected:
103
  CGOpenMPRegionKind RegionKind;
104
  RegionCodeGenTy CodeGen;
105
  OpenMPDirectiveKind Kind;
106
  bool HasCancel;
107
};
108
109
/// API for captured statement code generation in OpenMP constructs.
110
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111
public:
112
  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113
                             const RegionCodeGenTy &CodeGen,
114
                             OpenMPDirectiveKind Kind, bool HasCancel,
115
                             StringRef HelperName)
116
      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117
                           HasCancel),
118
11.6k
        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119
11.6k
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120
11.6k
  }
121
122
  /// Get a variable or parameter for storing global thread id
123
  /// inside OpenMP construct.
124
38.3k
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125
126
  /// Get the name of the capture helper.
127
11.6k
  StringRef getHelperName() const override { return HelperName; }
128
129
0
  static bool classof(const CGCapturedStmtInfo *Info) {
130
0
    return CGOpenMPRegionInfo::classof(Info) &&
131
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132
0
               ParallelOutlinedRegion;
133
0
  }
134
135
private:
136
  /// A variable or parameter storing global thread id for OpenMP
137
  /// constructs.
138
  const VarDecl *ThreadIDVar;
139
  StringRef HelperName;
140
};
141
142
/// API for captured statement code generation in OpenMP constructs.
143
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144
public:
145
  class UntiedTaskActionTy final : public PrePostActionTy {
146
    bool Untied;
147
    const VarDecl *PartIDVar;
148
    const RegionCodeGenTy UntiedCodeGen;
149
    llvm::SwitchInst *UntiedSwitch = nullptr;
150
151
  public:
152
    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153
                       const RegionCodeGenTy &UntiedCodeGen)
154
855
        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155
855
    void Enter(CodeGenFunction &CGF) override {
156
855
      if (Untied) {
157
        // Emit task switching point.
158
16
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159
16
            CGF.GetAddrOfLocalVar(PartIDVar),
160
16
            PartIDVar->getType()->castAs<PointerType>());
161
16
        llvm::Value *Res =
162
16
            CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163
16
        llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164
16
        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165
16
        CGF.EmitBlock(DoneBB);
166
16
        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167
16
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168
16
        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169
16
                              CGF.Builder.GetInsertBlock());
170
16
        emitUntiedSwitch(CGF);
171
16
      }
172
855
    }
173
34
    void emitUntiedSwitch(CodeGenFunction &CGF) const {
174
34
      if (Untied) {
175
30
        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176
30
            CGF.GetAddrOfLocalVar(PartIDVar),
177
30
            PartIDVar->getType()->castAs<PointerType>());
178
30
        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179
30
                              PartIdLVal);
180
30
        UntiedCodeGen(CGF);
181
30
        CodeGenFunction::JumpDest CurPoint =
182
30
            CGF.getJumpDestInCurrentScope(".untied.next.");
183
30
        CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184
30
        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185
30
        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186
30
                              CGF.Builder.GetInsertBlock());
187
30
        CGF.EmitBranchThroughCleanup(CurPoint);
188
30
        CGF.EmitBlock(CurPoint.getBlock());
189
30
      }
190
34
    }
191
16
    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192
  };
193
  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194
                                 const VarDecl *ThreadIDVar,
195
                                 const RegionCodeGenTy &CodeGen,
196
                                 OpenMPDirectiveKind Kind, bool HasCancel,
197
                                 const UntiedTaskActionTy &Action)
198
      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199
855
        ThreadIDVar(ThreadIDVar), Action(Action) {
200
855
    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201
855
  }
202
203
  /// Get a variable or parameter for storing global thread id
204
  /// inside OpenMP construct.
205
282
  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206
207
  /// Get an LValue for the current ThreadID variable.
208
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209
210
  /// Get the name of the capture helper.
211
855
  StringRef getHelperName() const override { return ".omp_outlined."; }
212
213
18
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
214
18
    Action.emitUntiedSwitch(CGF);
215
18
  }
216
217
0
  static bool classof(const CGCapturedStmtInfo *Info) {
218
0
    return CGOpenMPRegionInfo::classof(Info) &&
219
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220
0
               TaskOutlinedRegion;
221
0
  }
222
223
private:
224
  /// A variable or parameter storing global thread id for OpenMP
225
  /// constructs.
226
  const VarDecl *ThreadIDVar;
227
  /// Action for emitting code for untied tasks.
228
  const UntiedTaskActionTy &Action;
229
};
230
231
/// API for inlined captured statement code generation in OpenMP
232
/// constructs.
233
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234
public:
235
  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236
                            const RegionCodeGenTy &CodeGen,
237
                            OpenMPDirectiveKind Kind, bool HasCancel)
238
      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239
        OldCSI(OldCSI),
240
42.0k
        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241
242
  // Retrieve the value of the context parameter.
243
0
  llvm::Value *getContextValue() const override {
244
0
    if (OuterRegionInfo)
245
0
      return OuterRegionInfo->getContextValue();
246
0
    llvm_unreachable("No context value for inlined OpenMP region");
247
0
  }
248
249
0
  void setContextValue(llvm::Value *V) override {
250
0
    if (OuterRegionInfo) {
251
0
      OuterRegionInfo->setContextValue(V);
252
0
      return;
253
0
    }
254
0
    llvm_unreachable("No context value for inlined OpenMP region");
255
0
  }
256
257
  /// Lookup the captured field decl for a variable.
258
26.0k
  const FieldDecl *lookup(const VarDecl *VD) const override {
259
26.0k
    if (OuterRegionInfo)
260
13.3k
      return OuterRegionInfo->lookup(VD);
261
    // If there is no outer outlined region,no need to lookup in a list of
262
    // captured variables, we can use the original one.
263
12.6k
    return nullptr;
264
12.6k
  }
265
266
0
  FieldDecl *getThisFieldDecl() const override {
267
0
    if (OuterRegionInfo)
268
0
      return OuterRegionInfo->getThisFieldDecl();
269
0
    return nullptr;
270
0
  }
271
272
  /// Get a variable or parameter for storing global thread id
273
  /// inside OpenMP construct.
274
7.10k
  const VarDecl *getThreadIDVariable() const override {
275
7.10k
    if (OuterRegionInfo)
276
6.92k
      return OuterRegionInfo->getThreadIDVariable();
277
179
    return nullptr;
278
179
  }
279
280
  /// Get an LValue for the current ThreadID variable.
281
6.92k
  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282
6.92k
    if (OuterRegionInfo)
283
6.92k
      return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284
0
    llvm_unreachable("No LValue for inlined OpenMP construct");
285
0
  }
286
287
  /// Get the name of the capture helper.
288
0
  StringRef getHelperName() const override {
289
0
    if (auto *OuterRegionInfo = getOldCSI())
290
0
      return OuterRegionInfo->getHelperName();
291
0
    llvm_unreachable("No helper name for inlined OpenMP construct");
292
0
  }
293
294
16
  void emitUntiedSwitch(CodeGenFunction &CGF) override {
295
16
    if (OuterRegionInfo)
296
14
      OuterRegionInfo->emitUntiedSwitch(CGF);
297
16
  }
298
299
41.7k
  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300
301
41.7k
  static bool classof(const CGCapturedStmtInfo *Info) {
302
41.7k
    return CGOpenMPRegionInfo::classof(Info) &&
303
41.7k
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304
41.7k
  }
305
306
42.0k
  ~CGOpenMPInlinedRegionInfo() override = default;
307
308
private:
309
  /// CodeGen info about outer OpenMP region.
310
  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311
  CGOpenMPRegionInfo *OuterRegionInfo;
312
};
313
314
/// API for captured statement code generation in OpenMP target
315
/// constructs. For this captures, implicit parameters are used instead of the
316
/// captured fields. The name of the target region has to be unique in a given
317
/// application so it is provided by the client, because only the client has
318
/// the information to generate that.
319
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320
public:
321
  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322
                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
323
      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324
                           /*HasCancel=*/false),
325
11.6k
        HelperName(HelperName) {}
326
327
  /// This is unused for target regions because each starts executing
328
  /// with a single thread.
329
2.26k
  const VarDecl *getThreadIDVariable() const override { return nullptr; }
330
331
  /// Get the name of the capture helper.
332
11.6k
  StringRef getHelperName() const override { return HelperName; }
333
334
0
  static bool classof(const CGCapturedStmtInfo *Info) {
335
0
    return CGOpenMPRegionInfo::classof(Info) &&
336
0
           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337
0
  }
338
339
private:
340
  StringRef HelperName;
341
};
342
343
0
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344
0
  llvm_unreachable("No codegen for expressions");
345
0
}
346
/// API for generation of expressions captured in a innermost OpenMP
347
/// region.
348
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349
public:
350
  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351
      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352
                                  OMPD_unknown,
353
                                  /*HasCancel=*/false),
354
282
        PrivScope(CGF) {
355
    // Make sure the globals captured in the provided statement are local by
356
    // using the privatization logic. We assume the same variable is not
357
    // captured more than once.
358
420
    for (const auto &C : CS.captures()) {
359
420
      if (!C.capturesVariable() && 
!C.capturesVariableByCopy()322
)
360
16
        continue;
361
362
404
      const VarDecl *VD = C.getCapturedVar();
363
404
      if (VD->isLocalVarDeclOrParm())
364
324
        continue;
365
366
80
      DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367
80
                      /*RefersToEnclosingVariableOrCapture=*/false,
368
80
                      VD->getType().getNonReferenceType(), VK_LValue,
369
80
                      C.getLocation());
370
80
      PrivScope.addPrivate(
371
80
          VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372
80
    }
373
282
    (void)PrivScope.Privatize();
374
282
  }
375
376
  /// Lookup the captured field decl for a variable.
377
0
  const FieldDecl *lookup(const VarDecl *VD) const override {
378
0
    if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379
0
      return FD;
380
0
    return nullptr;
381
0
  }
382
383
  /// Emit the captured statement body.
384
0
  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385
0
    llvm_unreachable("No body for expressions");
386
0
  }
387
388
  /// Get a variable or parameter for storing global thread id
389
  /// inside OpenMP construct.
390
0
  const VarDecl *getThreadIDVariable() const override {
391
0
    llvm_unreachable("No thread id for expressions");
392
0
  }
393
394
  /// Get the name of the capture helper.
395
0
  StringRef getHelperName() const override {
396
0
    llvm_unreachable("No helper name for expressions");
397
0
  }
398
399
0
  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400
401
private:
402
  /// Private scope to capture global variables.
403
  CodeGenFunction::OMPPrivateScope PrivScope;
404
};
405
406
/// RAII for emitting code of OpenMP constructs.
407
class InlinedOpenMPRegionRAII {
408
  CodeGenFunction &CGF;
409
  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410
  FieldDecl *LambdaThisCaptureField = nullptr;
411
  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412
413
public:
414
  /// Constructs region for combined constructs.
415
  /// \param CodeGen Code generation sequence for combined directives. Includes
416
  /// a list of functions used for code generation of implicitly inlined
417
  /// regions.
418
  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419
                          OpenMPDirectiveKind Kind, bool HasCancel)
420
41.7k
      : CGF(CGF) {
421
    // Start emission for the construct.
422
41.7k
    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423
41.7k
        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424
41.7k
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425
41.7k
    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426
41.7k
    CGF.LambdaThisCaptureField = nullptr;
427
41.7k
    BlockInfo = CGF.BlockInfo;
428
41.7k
    CGF.BlockInfo = nullptr;
429
41.7k
  }
430
431
41.7k
  ~InlinedOpenMPRegionRAII() {
432
    // Restore original CapturedStmtInfo only if we're done with code emission.
433
41.7k
    auto *OldCSI =
434
41.7k
        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435
41.7k
    delete CGF.CapturedStmtInfo;
436
41.7k
    CGF.CapturedStmtInfo = OldCSI;
437
41.7k
    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438
41.7k
    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439
41.7k
    CGF.BlockInfo = BlockInfo;
440
41.7k
  }
441
};
442
443
/// Values for bit flags used in the ident_t to describe the fields.
444
/// All enumeric elements are named and described in accordance with the code
445
/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446
enum OpenMPLocationFlags : unsigned {
447
  /// Use trampoline for internal microtask.
448
  OMP_IDENT_IMD = 0x01,
449
  /// Use c-style ident structure.
450
  OMP_IDENT_KMPC = 0x02,
451
  /// Atomic reduction option for kmpc_reduce.
452
  OMP_ATOMIC_REDUCE = 0x10,
453
  /// Explicit 'barrier' directive.
454
  OMP_IDENT_BARRIER_EXPL = 0x20,
455
  /// Implicit barrier in code.
456
  OMP_IDENT_BARRIER_IMPL = 0x40,
457
  /// Implicit barrier in 'for' directive.
458
  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459
  /// Implicit barrier in 'sections' directive.
460
  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461
  /// Implicit barrier in 'single' directive.
462
  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463
  /// Call of __kmp_for_static_init for static loop.
464
  OMP_IDENT_WORK_LOOP = 0x200,
465
  /// Call of __kmp_for_static_init for sections.
466
  OMP_IDENT_WORK_SECTIONS = 0x400,
467
  /// Call of __kmp_for_static_init for distribute.
468
  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469
  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470
};
471
472
namespace {
473
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474
/// Values for bit flags for marking which requires clauses have been used.
475
enum OpenMPOffloadingRequiresDirFlags : int64_t {
476
  /// flag undefined.
477
  OMP_REQ_UNDEFINED               = 0x000,
478
  /// no requires clause present.
479
  OMP_REQ_NONE                    = 0x001,
480
  /// reverse_offload clause.
481
  OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482
  /// unified_address clause.
483
  OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484
  /// unified_shared_memory clause.
485
  OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486
  /// dynamic_allocators clause.
487
  OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488
  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489
};
490
491
enum OpenMPOffloadingReservedDeviceIDs {
492
  /// Device ID if the device was not defined, runtime should get it
493
  /// from environment variables in the spec.
494
  OMP_DEVICEID_UNDEF = -1,
495
};
496
} // anonymous namespace
497
498
/// Describes ident structure that describes a source location.
499
/// All descriptions are taken from
500
/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501
/// Original structure:
502
/// typedef struct ident {
503
///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504
///                                  see above  */
505
///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506
///                                  KMP_IDENT_KMPC identifies this union
507
///                                  member  */
508
///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509
///                                  see above */
510
///#if USE_ITT_BUILD
511
///                            /*  but currently used for storing
512
///                                region-specific ITT */
513
///                            /*  contextual information. */
514
///#endif /* USE_ITT_BUILD */
515
///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516
///                                 C++  */
517
///    char const *psource;    /**< String describing the source location.
518
///                            The string is composed of semi-colon separated
519
//                             fields which describe the source file,
520
///                            the function and a pair of line numbers that
521
///                            delimit the construct.
522
///                             */
523
/// } ident_t;
524
enum IdentFieldIndex {
525
  /// might be used in Fortran
526
  IdentField_Reserved_1,
527
  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528
  IdentField_Flags,
529
  /// Not really used in Fortran any more
530
  IdentField_Reserved_2,
531
  /// Source[4] in Fortran, do not use for C++
532
  IdentField_Reserved_3,
533
  /// String describing the source location. The string is composed of
534
  /// semi-colon separated fields which describe the source file, the function
535
  /// and a pair of line numbers that delimit the construct.
536
  IdentField_PSource
537
};
538
539
/// Schedule types for 'omp for' loops (these enumerators are taken from
540
/// the enum sched_type in kmp.h).
541
enum OpenMPSchedType {
542
  /// Lower bound for default (unordered) versions.
543
  OMP_sch_lower = 32,
544
  OMP_sch_static_chunked = 33,
545
  OMP_sch_static = 34,
546
  OMP_sch_dynamic_chunked = 35,
547
  OMP_sch_guided_chunked = 36,
548
  OMP_sch_runtime = 37,
549
  OMP_sch_auto = 38,
550
  /// static with chunk adjustment (e.g., simd)
551
  OMP_sch_static_balanced_chunked = 45,
552
  /// Lower bound for 'ordered' versions.
553
  OMP_ord_lower = 64,
554
  OMP_ord_static_chunked = 65,
555
  OMP_ord_static = 66,
556
  OMP_ord_dynamic_chunked = 67,
557
  OMP_ord_guided_chunked = 68,
558
  OMP_ord_runtime = 69,
559
  OMP_ord_auto = 70,
560
  OMP_sch_default = OMP_sch_static,
561
  /// dist_schedule types
562
  OMP_dist_sch_static_chunked = 91,
563
  OMP_dist_sch_static = 92,
564
  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565
  /// Set if the monotonic schedule modifier was present.
566
  OMP_sch_modifier_monotonic = (1 << 29),
567
  /// Set if the nonmonotonic schedule modifier was present.
568
  OMP_sch_modifier_nonmonotonic = (1 << 30),
569
};
570
571
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572
/// region.
573
class CleanupTy final : public EHScopeStack::Cleanup {
574
  PrePostActionTy *Action;
575
576
public:
577
15.4k
  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578
15.5k
  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579
15.5k
    if (!CGF.HaveInsertPoint())
580
0
      return;
581
15.5k
    Action->Exit(CGF);
582
15.5k
  }
583
};
584
585
} // anonymous namespace
586
587
125k
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588
125k
  CodeGenFunction::RunCleanupsScope Scope(CGF);
589
125k
  if (PrePostAction) {
590
15.4k
    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591
15.4k
    Callback(CodeGen, CGF, *PrePostAction);
592
109k
  } else {
593
109k
    PrePostActionTy Action;
594
109k
    Callback(CodeGen, CGF, Action);
595
109k
  }
596
125k
}
597
598
/// Check if the combiner is a call to UDR combiner and if it is so return the
599
/// UDR decl used for reduction.
600
static const OMPDeclareReductionDecl *
601
1.07k
getReductionInit(const Expr *ReductionOp) {
602
1.07k
  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603
117
    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604
77
      if (const auto *DRE =
605
77
              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606
77
        if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607
77
          return DRD;
608
1.00k
  return nullptr;
609
1.00k
}
610
611
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612
                                             const OMPDeclareReductionDecl *DRD,
613
                                             const Expr *InitOp,
614
                                             Address Private, Address Original,
615
61
                                             QualType Ty) {
616
61
  if (DRD->getInitializer()) {
617
53
    std::pair<llvm::Function *, llvm::Function *> Reduction =
618
53
        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619
53
    const auto *CE = cast<CallExpr>(InitOp);
620
53
    const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621
53
    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622
53
    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623
53
    const auto *LHSDRE =
624
53
        cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625
53
    const auto *RHSDRE =
626
53
        cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627
53
    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628
53
    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629
53
                            [=]() { return Private; });
630
53
    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631
53
                            [=]() { return Original; });
632
53
    (void)PrivateScope.Privatize();
633
53
    RValue Func = RValue::get(Reduction.second);
634
53
    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635
53
    CGF.EmitIgnoredExpr(InitOp);
636
8
  } else {
637
8
    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638
8
    std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639
8
    auto *GV = new llvm::GlobalVariable(
640
8
        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641
8
        llvm::GlobalValue::PrivateLinkage, Init, Name);
642
8
    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643
8
    RValue InitRVal;
644
8
    switch (CGF.getEvaluationKind(Ty)) {
645
8
    case TEK_Scalar:
646
8
      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647
8
      break;
648
0
    case TEK_Complex:
649
0
      InitRVal =
650
0
          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651
0
      break;
652
0
    case TEK_Aggregate:
653
0
      InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654
0
      break;
655
8
    }
656
8
    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657
8
    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658
8
    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659
8
                         /*IsInitializer=*/false);
660
8
  }
661
61
}
662
663
/// Emit initialization of arrays of complex types.
664
/// \param DestAddr Address of the array.
665
/// \param Type Type of array.
666
/// \param Init Initial expression of array.
667
/// \param SrcAddr Address of the original array.
668
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669
                                 QualType Type, bool EmitDeclareReductionInit,
670
                                 const Expr *Init,
671
                                 const OMPDeclareReductionDecl *DRD,
672
277
                                 Address SrcAddr = Address::invalid()) {
673
  // Perform element-by-element initialization.
674
277
  QualType ElementTy;
675
676
  // Drill down to the base element type on both arrays.
677
277
  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678
277
  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679
277
  DestAddr =
680
277
      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681
277
  if (DRD)
682
31
    SrcAddr =
683
31
        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684
685
277
  llvm::Value *SrcBegin = nullptr;
686
277
  if (DRD)
687
31
    SrcBegin = SrcAddr.getPointer();
688
277
  llvm::Value *DestBegin = DestAddr.getPointer();
689
  // Cast from pointer to array type to pointer to single element.
690
277
  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691
  // The basic structure here is a while-do loop.
692
277
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693
277
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694
277
  llvm::Value *IsEmpty =
695
277
      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696
277
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697
698
  // Enter the loop body, making that address the current address.
699
277
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700
277
  CGF.EmitBlock(BodyBB);
701
702
277
  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703
704
277
  llvm::PHINode *SrcElementPHI = nullptr;
705
277
  Address SrcElementCurrent = Address::invalid();
706
277
  if (DRD) {
707
31
    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708
31
                                          "omp.arraycpy.srcElementPast");
709
31
    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710
31
    SrcElementCurrent =
711
31
        Address(SrcElementPHI,
712
31
                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713
31
  }
714
277
  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715
277
      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716
277
  DestElementPHI->addIncoming(DestBegin, EntryBB);
717
277
  Address DestElementCurrent =
718
277
      Address(DestElementPHI,
719
277
              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720
721
  // Emit copy.
722
277
  {
723
277
    CodeGenFunction::RunCleanupsScope InitScope(CGF);
724
277
    if (EmitDeclareReductionInit) {
725
31
      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726
31
                                       SrcElementCurrent, ElementTy);
727
31
    } else
728
246
      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729
246
                           /*IsInitializer=*/false);
730
277
  }
731
732
277
  if (DRD) {
733
    // Shift the address forward by one element.
734
31
    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735
31
        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736
31
    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737
31
  }
738
739
  // Shift the address forward by one element.
740
277
  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741
277
      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742
  // Check whether we've reached the end.
743
277
  llvm::Value *Done =
744
277
      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745
277
  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746
277
  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747
748
  // Done.
749
277
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750
277
}
751
752
1.09k
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753
1.09k
  return CGF.EmitOMPSharedLValue(E);
754
1.09k
}
755
756
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757
1.09k
                                            const Expr *E) {
758
1.09k
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759
227
    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760
868
  return LValue();
761
868
}
762
763
void ReductionCodeGen::emitAggregateInitialization(
764
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765
277
    const OMPDeclareReductionDecl *DRD) {
766
  // Emit VarDecl with copy init for arrays.
767
  // Get the address of the original variable captured in current
768
  // captured region.
769
277
  const auto *PrivateVD =
770
277
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771
277
  bool EmitDeclareReductionInit =
772
277
      DRD && 
(31
DRD->getInitializer()31
||
!PrivateVD->hasInit()4
);
773
277
  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774
277
                       EmitDeclareReductionInit,
775
31
                       EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776
246
                                                : PrivateVD->getInit(),
777
277
                       DRD, SharedLVal.getAddress(CGF));
778
277
}
779
780
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781
                                   ArrayRef<const Expr *> Origs,
782
                                   ArrayRef<const Expr *> Privates,
783
27.3k
                                   ArrayRef<const Expr *> ReductionOps) {
784
27.3k
  ClausesData.reserve(Shareds.size());
785
27.3k
  SharedAddresses.reserve(Shareds.size());
786
27.3k
  Sizes.reserve(Shareds.size());
787
27.3k
  BaseDecls.reserve(Shareds.size());
788
27.3k
  const auto *IOrig = Origs.begin();
789
27.3k
  const auto *IPriv = Privates.begin();
790
27.3k
  const auto *IRed = ReductionOps.begin();
791
1.05k
  for (const Expr *Ref : Shareds) {
792
1.05k
    ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793
1.05k
    std::advance(IOrig, 1);
794
1.05k
    std::advance(IPriv, 1);
795
1.05k
    std::advance(IRed, 1);
796
1.05k
  }
797
27.3k
}
798
799
1.04k
void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800
1.04k
  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801
1.04k
         "Number of generated lvalues must be exactly N.");
802
1.04k
  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803
1.04k
  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804
1.04k
  SharedAddresses.emplace_back(First, Second);
805
1.04k
  if (ClausesData[N].Shared == ClausesData[N].Ref) {
806
991
    OrigAddresses.emplace_back(First, Second);
807
52
  } else {
808
52
    LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809
52
    LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810
52
    OrigAddresses.emplace_back(First, Second);
811
52
  }
812
1.04k
}
813
814
1.04k
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815
1.04k
  const auto *PrivateVD =
816
1.04k
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817
1.04k
  QualType PrivateType = PrivateVD->getType();
818
1.04k
  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819
1.04k
  if (!PrivateType->isVariablyModifiedType()) {
820
808
    Sizes.emplace_back(
821
808
        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822
808
        nullptr);
823
808
    return;
824
808
  }
825
235
  llvm::Value *Size;
826
235
  llvm::Value *SizeInChars;
827
235
  auto *ElemType =
828
235
      cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829
235
          ->getElementType();
830
235
  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831
235
  if (AsArraySection) {
832
184
    Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833
184
                                     OrigAddresses[N].first.getPointer(CGF));
834
184
    Size = CGF.Builder.CreateNUWAdd(
835
184
        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836
184
    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837
51
  } else {
838
51
    SizeInChars =
839
51
        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840
51
    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841
51
  }
842
235
  Sizes.emplace_back(SizeInChars, Size);
843
235
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
844
235
      CGF,
845
235
      cast<OpaqueValueExpr>(
846
235
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847
235
      RValue::get(Size));
848
235
  CGF.EmitVariablyModifiedType(PrivateType);
849
235
}
850
851
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852
301
                                         llvm::Value *Size) {
853
301
  const auto *PrivateVD =
854
301
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855
301
  QualType PrivateType = PrivateVD->getType();
856
301
  if (!PrivateType->isVariablyModifiedType()) {
857
193
    assert(!Size && !Sizes[N].second &&
858
193
           "Size should be nullptr for non-variably modified reduction "
859
193
           "items.");
860
193
    return;
861
193
  }
862
108
  CodeGenFunction::OpaqueValueMapping OpaqueMap(
863
108
      CGF,
864
108
      cast<OpaqueValueExpr>(
865
108
          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866
108
      RValue::get(Size));
867
108
  CGF.EmitVariablyModifiedType(PrivateType);
868
108
}
869
870
void ReductionCodeGen::emitInitialization(
871
    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872
937
    llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873
937
  assert(SharedAddresses.size() > N && "No variable was generated");
874
937
  const auto *PrivateVD =
875
937
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876
937
  const OMPDeclareReductionDecl *DRD =
877
937
      getReductionInit(ClausesData[N].ReductionOp);
878
937
  QualType PrivateType = PrivateVD->getType();
879
937
  PrivateAddr = CGF.Builder.CreateElementBitCast(
880
937
      PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881
937
  QualType SharedType = SharedAddresses[N].first.getType();
882
937
  SharedLVal = CGF.MakeAddrLValue(
883
937
      CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884
937
                                       CGF.ConvertTypeForMem(SharedType)),
885
937
      SharedType, SharedAddresses[N].first.getBaseInfo(),
886
937
      CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887
937
  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888
277
    if (DRD && 
DRD->getInitializer()31
)
889
27
      (void)DefaultInit(CGF);
890
277
    emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891
660
  } else if (DRD && 
(40
DRD->getInitializer()40
||
!PrivateVD->hasInit()14
)) {
892
30
    (void)DefaultInit(CGF);
893
30
    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894
30
                                     PrivateAddr, SharedLVal.getAddress(CGF),
895
30
                                     SharedLVal.getType());
896
630
  } else if (!DefaultInit(CGF) && 
PrivateVD->hasInit()77
&&
897
77
             !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898
77
    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899
77
                         PrivateVD->getType().getQualifiers(),
900
77
                         /*IsInitializer=*/false);
901
77
  }
902
937
}
903
904
160
bool ReductionCodeGen::needCleanups(unsigned N) {
905
160
  const auto *PrivateVD =
906
160
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907
160
  QualType PrivateType = PrivateVD->getType();
908
160
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909
160
  return DTorKind != QualType::DK_none;
910
160
}
911
912
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913
19
                                    Address PrivateAddr) {
914
19
  const auto *PrivateVD =
915
19
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916
19
  QualType PrivateType = PrivateVD->getType();
917
19
  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918
19
  if (needCleanups(N)) {
919
19
    PrivateAddr = CGF.Builder.CreateElementBitCast(
920
19
        PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921
19
    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922
19
  }
923
19
}
924
925
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926
179
                          LValue BaseLV) {
927
179
  BaseTy = BaseTy.getNonReferenceType();
928
299
  while ((BaseTy->isPointerType() || 
BaseTy->isReferenceType()179
) &&
929
120
         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930
120
    if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931
120
      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932
0
    } else {
933
0
      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934
0
      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935
0
    }
936
120
    BaseTy = BaseTy->getPointeeType();
937
120
  }
938
179
  return CGF.MakeAddrLValue(
939
179
      CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940
179
                                       CGF.ConvertTypeForMem(ElTy)),
941
179
      BaseLV.getType(), BaseLV.getBaseInfo(),
942
179
      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943
179
}
944
945
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946
                          llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947
179
                          llvm::Value *Addr) {
948
179
  Address Tmp = Address::invalid();
949
179
  Address TopTmp = Address::invalid();
950
179
  Address MostTopTmp = Address::invalid();
951
179
  BaseTy = BaseTy.getNonReferenceType();
952
299
  while ((BaseTy->isPointerType() || 
BaseTy->isReferenceType()179
) &&
953
120
         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954
120
    Tmp = CGF.CreateMemTemp(BaseTy);
955
120
    if (TopTmp.isValid())
956
58
      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957
62
    else
958
62
      MostTopTmp = Tmp;
959
120
    TopTmp = Tmp;
960
120
    BaseTy = BaseTy->getPointeeType();
961
120
  }
962
179
  llvm::Type *Ty = BaseLVType;
963
179
  if (Tmp.isValid())
964
62
    Ty = Tmp.getElementType();
965
179
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966
179
  if (Tmp.isValid()) {
967
62
    CGF.Builder.CreateStore(Addr, Tmp);
968
62
    return MostTopTmp;
969
62
  }
970
117
  return Address(Addr, BaseLVAlignment);
971
117
}
972
973
1.05k
static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974
1.05k
  const VarDecl *OrigVD = nullptr;
975
1.05k
  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976
277
    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977
411
    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978
134
      Base = TempOASE->getBase()->IgnoreParenImpCasts();
979
285
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980
8
      Base = TempASE->getBase()->IgnoreParenImpCasts();
981
277
    DE = cast<DeclRefExpr>(Base);
982
277
    OrigVD = cast<VarDecl>(DE->getDecl());
983
773
  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984
0
    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985
0
    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986
0
      Base = TempASE->getBase()->IgnoreParenImpCasts();
987
0
    DE = cast<DeclRefExpr>(Base);
988
0
    OrigVD = cast<VarDecl>(DE->getDecl());
989
0
  }
990
1.05k
  return OrigVD;
991
1.05k
}
992
993
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994
886
                                               Address PrivateAddr) {
995
886
  const DeclRefExpr *DE;
996
886
  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997
179
    BaseDecls.emplace_back(OrigVD);
998
179
    LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999
179
    LValue BaseLValue =
1000
179
        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001
179
                    OriginalBaseLValue);
1002
179
    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003
179
        BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004
179
    llvm::Value *PrivatePointer =
1005
179
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006
179
            PrivateAddr.getPointer(),
1007
179
            SharedAddresses[N].first.getAddress(CGF).getType());
1008
179
    llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009
179
    return castToBase(CGF, OrigVD->getType(),
1010
179
                      SharedAddresses[N].first.getType(),
1011
179
                      OriginalBaseLValue.getAddress(CGF).getType(),
1012
179
                      OriginalBaseLValue.getAlignment(), Ptr);
1013
179
  }
1014
707
  BaseDecls.emplace_back(
1015
707
      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016
707
  return PrivateAddr;
1017
707
}
1018
1019
141
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020
141
  const OMPDeclareReductionDecl *DRD =
1021
141
      getReductionInit(ClausesData[N].ReductionOp);
1022
141
  return DRD && 
DRD->getInitializer()6
;
1023
141
}
1024
1025
12.7k
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026
12.7k
  return CGF.EmitLoadOfPointerLValue(
1027
12.7k
      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028
12.7k
      getThreadIDVariable()->getType()->castAs<PointerType>());
1029
12.7k
}
1030
1031
65.2k
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032
65.2k
  if (!CGF.HaveInsertPoint())
1033
0
    return;
1034
  // 1.2.2 OpenMP Language Terminology
1035
  // Structured block - An executable statement with a single entry at the
1036
  // top and a single exit at the bottom.
1037
  // The point of exit cannot be a branch out of the structured block.
1038
  // longjmp() and throw() must not violate the entry/exit criteria.
1039
65.2k
  CGF.EHStack.pushTerminate();
1040
65.2k
  CodeGen(CGF);
1041
65.2k
  CGF.EHStack.popTerminate();
1042
65.2k
}
1043
1044
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045
94
    CodeGenFunction &CGF) {
1046
94
  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047
94
                            getThreadIDVariable()->getType(),
1048
94
                            AlignmentSource::Decl);
1049
94
}
1050
1051
static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052
18.4k
                                       QualType FieldTy) {
1053
18.4k
  auto *Field = FieldDecl::Create(
1054
18.4k
      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055
18.4k
      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056
18.4k
      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057
18.4k
  Field->setAccess(AS_public);
1058
18.4k
  DC->addDecl(Field);
1059
18.4k
  return Field;
1060
18.4k
}
1061
1062
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063
                                 StringRef Separator)
1064
    : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065
5.68k
      OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066
5.68k
  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067
1068
  // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069
5.68k
  OMPBuilder.initialize();
1070
5.68k
  loadOffloadInfoMetadata();
1071
5.68k
}
1072
1073
5.68k
void CGOpenMPRuntime::clear() {
1074
5.68k
  InternalVars.clear();
1075
  // Clean non-target variable declarations possibly used only in debug info.
1076
12
  for (const auto &Data : EmittedNonTargetVariables) {
1077
12
    if (!Data.getValue().pointsToAliveValue())
1078
0
      continue;
1079
12
    auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080
12
    if (!GV)
1081
0
      continue;
1082
12
    if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083
11
      continue;
1084
1
    GV->eraseFromParent();
1085
1
  }
1086
5.68k
}
1087
1088
50.0k
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089
50.0k
  SmallString<128> Buffer;
1090
50.0k
  llvm::raw_svector_ostream OS(Buffer);
1091
50.0k
  StringRef Sep = FirstSeparator;
1092
96.5k
  for (StringRef Part : Parts) {
1093
96.5k
    OS << Sep << Part;
1094
96.5k
    Sep = Separator;
1095
96.5k
  }
1096
50.0k
  return std::string(OS.str());
1097
50.0k
}
1098
1099
static llvm::Function *
1100
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101
                          const Expr *CombinerInitializer, const VarDecl *In,
1102
216
                          const VarDecl *Out, bool IsCombiner) {
1103
  // void .omp_combiner.(Ty *in, Ty *out);
1104
216
  ASTContext &C = CGM.getContext();
1105
216
  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106
216
  FunctionArgList Args;
1107
216
  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108
216
                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109
216
  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110
216
                              /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111
216
  Args.push_back(&OmpOutParm);
1112
216
  Args.push_back(&OmpInParm);
1113
216
  const CGFunctionInfo &FnInfo =
1114
216
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115
216
  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116
216
  std::string Name = CGM.getOpenMPRuntime().getName(
1117
137
      {IsCombiner ? "omp_combiner" : 
"omp_initializer"79
, ""});
1118
216
  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119
216
                                    Name, &CGM.getModule());
1120
216
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121
216
  if (CGM.getLangOpts().Optimize) {
1122
0
    Fn->removeFnAttr(llvm::Attribute::NoInline);
1123
0
    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124
0
    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125
0
  }
1126
216
  CodeGenFunction CGF(CGM);
1127
  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128
  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129
216
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130
216
                    Out->getLocation());
1131
216
  CodeGenFunction::OMPPrivateScope Scope(CGF);
1132
216
  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133
216
  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134
216
    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135
216
        .getAddress(CGF);
1136
216
  });
1137
216
  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138
216
  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139
216
    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140
216
        .getAddress(CGF);
1141
216
  });
1142
216
  (void)Scope.Privatize();
1143
216
  if (!IsCombiner && 
Out->hasInit()79
&&
1144
50
      !CGF.isTrivialInitializer(Out->getInit())) {
1145
50
    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146
50
                         Out->getType().getQualifiers(),
1147
50
                         /*IsInitializer=*/true);
1148
50
  }
1149
216
  if (CombinerInitializer)
1150
166
    CGF.EmitIgnoredExpr(CombinerInitializer);
1151
216
  Scope.ForceCleanup();
1152
216
  CGF.FinishFunction();
1153
216
  return Fn;
1154
216
}
1155
1156
void CGOpenMPRuntime::emitUserDefinedReduction(
1157
138
    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158
138
  if (UDRMap.count(D) > 0)
1159
1
    return;
1160
137
  llvm::Function *Combiner = emitCombinerOrInitializer(
1161
137
      CGM, D->getType(), D->getCombiner(),
1162
137
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163
137
      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164
137
      /*IsCombiner=*/true);
1165
137
  llvm::Function *Initializer = nullptr;
1166
137
  if (const Expr *Init = D->getInitializer()) {
1167
79
    Initializer = emitCombinerOrInitializer(
1168
79
        CGM, D->getType(),
1169
29
        D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170
50
                                                                     : nullptr,
1171
79
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172
79
        cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173
79
        /*IsCombiner=*/false);
1174
79
  }
1175
137
  UDRMap.try_emplace(D, Combiner, Initializer);
1176
137
  if (CGF) {
1177
38
    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178
38
    Decls.second.push_back(D);
1179
38
  }
1180
137
}
1181
1182
std::pair<llvm::Function *, llvm::Function *>
1183
240
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184
240
  auto I = UDRMap.find(D);
1185
240
  if (I != UDRMap.end())
1186
206
    return I->second;
1187
34
  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188
34
  return UDRMap.lookup(D);
1189
34
}
1190
1191
namespace {
1192
// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193
// Builder if one is present.
1194
struct PushAndPopStackRAII {
1195
  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196
                      bool HasCancel)
1197
11.6k
      : OMPBuilder(OMPBuilder) {
1198
11.6k
    if (!OMPBuilder)
1199
0
      return;
1200
1201
    // The following callback is the crucial part of clangs cleanup process.
1202
    //
1203
    // NOTE:
1204
    // Once the OpenMPIRBuilder is used to create parallel regions (and
1205
    // similar), the cancellation destination (Dest below) is determined via
1206
    // IP. That means if we have variables to finalize we split the block at IP,
1207
    // use the new block (=BB) as destination to build a JumpDest (via
1208
    // getJumpDestInCurrentScope(BB)) which then is fed to
1209
    // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210
    // to push & pop an FinalizationInfo object.
1211
    // The FiniCB will still be needed but at the point where the
1212
    // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213
11.6k
    auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214
0
      assert(IP.getBlock()->end() == IP.getPoint() &&
1215
0
             "Clang CG should cause non-terminated block!");
1216
0
      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217
0
      CGF.Builder.restoreIP(IP);
1218
0
      CodeGenFunction::JumpDest Dest =
1219
0
          CGF.getOMPCancelDestination(OMPD_parallel);
1220
0
      CGF.EmitBranchThroughCleanup(Dest);
1221
0
    };
1222
1223
    // TODO: Remove this once we emit parallel regions through the
1224
    //       OpenMPIRBuilder as it can do this setup internally.
1225
11.6k
    llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226
11.6k
        {FiniCB, OMPD_parallel, HasCancel});
1227
11.6k
    OMPBuilder->pushFinalizationCB(std::move(FI));
1228
11.6k
  }
1229
11.6k
  ~PushAndPopStackRAII() {
1230
11.6k
    if (OMPBuilder)
1231
11.6k
      OMPBuilder->popFinalizationCB();
1232
11.6k
  }
1233
  llvm::OpenMPIRBuilder *OMPBuilder;
1234
};
1235
} // namespace
1236
1237
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238
    CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239
    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240
11.6k
    const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241
11.6k
  assert(ThreadIDVar->getType()->isPointerType() &&
1242
11.6k
         "thread id variable must be of type kmp_int32 *");
1243
11.6k
  CodeGenFunction CGF(CGM, true);
1244
11.6k
  bool HasCancel = false;
1245
11.6k
  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246
905
    HasCancel = OPD->hasCancel();
1247
10.7k
  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248
911
    HasCancel = OPD->hasCancel();
1249
9.79k
  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250
26
    HasCancel = OPSD->hasCancel();
1251
9.77k
  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252
207
    HasCancel = OPFD->hasCancel();
1253
9.56k
  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254
556
    HasCancel = OPFD->hasCancel();
1255
9.00k
  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256
412
    HasCancel = OPFD->hasCancel();
1257
8.59k
  else if (const auto *OPFD =
1258
680
               dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259
680
    HasCancel = OPFD->hasCancel();
1260
7.91k
  else if (const auto *OPFD =
1261
1.15k
               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262
1.15k
    HasCancel = OPFD->hasCancel();
1263
1264
  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265
  //       parallel region to make cancellation barriers work properly.
1266
11.6k
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267
11.6k
  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268
11.6k
  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269
11.6k
                                    HasCancel, OutlinedHelperName);
1270
11.6k
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271
11.6k
  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272
11.6k
}
1273
1274
llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276
6.05k
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277
6.05k
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278
6.05k
  return emitParallelOrTeamsOutlinedFunction(
1279
6.05k
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280
6.05k
}
1281
1282
llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284
5.56k
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285
5.56k
  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286
5.56k
  return emitParallelOrTeamsOutlinedFunction(
1287
5.56k
      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288
5.56k
}
1289
1290
llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291
    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292
    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294
855
    bool Tied, unsigned &NumberOfParts) {
1295
855
  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296
30
                                              PrePostActionTy &) {
1297
30
    llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298
30
    llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299
30
    llvm::Value *TaskArgs[] = {
1300
30
        UpLoc, ThreadID,
1301
30
        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302
30
                                    TaskTVar->getType()->castAs<PointerType>())
1303
30
            .getPointer(CGF)};
1304
30
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305
30
                            CGM.getModule(), OMPRTL___kmpc_omp_task),
1306
30
                        TaskArgs);
1307
30
  };
1308
855
  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309
855
                                                            UntiedCodeGen);
1310
855
  CodeGen.setAction(Action);
1311
855
  assert(!ThreadIDVar->getType()->isPointerType() &&
1312
855
         "thread id variable must be of type kmp_int32 for tasks");
1313
855
  const OpenMPDirectiveKind Region =
1314
226
      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315
629
                                                      : OMPD_task;
1316
855
  const CapturedStmt *CS = D.getCapturedStmt(Region);
1317
855
  bool HasCancel = false;
1318
855
  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319
181
    HasCancel = TD->hasCancel();
1320
674
  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321
39
    HasCancel = TD->hasCancel();
1322
635
  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323
35
    HasCancel = TD->hasCancel();
1324
600
  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325
33
    HasCancel = TD->hasCancel();
1326
1327
855
  CodeGenFunction CGF(CGM, true);
1328
855
  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329
855
                                        InnermostKind, HasCancel, Action);
1330
855
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331
855
  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332
855
  if (!Tied)
1333
16
    NumberOfParts = Action.getNumberOfParts();
1334
855
  return Res;
1335
855
}
1336
1337
static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338
                             const RecordDecl *RD, const CGRecordLayout &RL,
1339
10.5k
                             ArrayRef<llvm::Constant *> Data) {
1340
10.5k
  llvm::StructType *StructTy = RL.getLLVMType();
1341
10.5k
  unsigned PrevIdx = 0;
1342
10.5k
  ConstantInitBuilder CIBuilder(CGM);
1343
10.5k
  auto DI = Data.begin();
1344
52.8k
  for (const FieldDecl *FD : RD->fields()) {
1345
52.8k
    unsigned Idx = RL.getLLVMFieldNo(FD);
1346
    // Fill the alignment.
1347
52.8k
    for (unsigned I = PrevIdx; I < Idx; 
++I0
)
1348
0
      Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349
52.8k
    PrevIdx = Idx + 1;
1350
52.8k
    Fields.add(*DI);
1351
52.8k
    ++DI;
1352
52.8k
  }
1353
10.5k
}
1354
1355
template <class... As>
1356
static llvm::GlobalVariable *
1357
createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358
                   ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359
10.5k
                   As &&... Args) {
1360
10.5k
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361
10.5k
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362
10.5k
  ConstantInitBuilder CIBuilder(CGM);
1363
10.5k
  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364
10.5k
  buildStructValue(Fields, CGM, RD, RL, Data);
1365
10.5k
  return Fields.finishAndCreateGlobal(
1366
10.5k
      Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367
10.5k
      std::forward<As>(Args)...);
1368
10.5k
}
1369
1370
template <typename T>
1371
static void
1372
createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373
                                         ArrayRef<llvm::Constant *> Data,
1374
                                         T &Parent) {
1375
  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376
  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377
  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378
  buildStructValue(Fields, CGM, RD, RL, Data);
1379
  Fields.finishAndAddTo(Parent);
1380
}
1381
1382
void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383
2.54k
                                             bool AtCurrentPoint) {
1384
2.54k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385
2.54k
  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386
1387
2.54k
  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388
2.54k
  if (AtCurrentPoint) {
1389
1.06k
    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390
1.06k
        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391
1.47k
  } else {
1392
1.47k
    Elem.second.ServiceInsertPt =
1393
1.47k
        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394
1.47k
    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395
1.47k
  }
1396
2.54k
}
1397
1398
10.4k
void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399
10.4k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400
10.4k
  if (Elem.second.ServiceInsertPt) {
1401
2.54k
    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402
2.54k
    Elem.second.ServiceInsertPt = nullptr;
1403
2.54k
    Ptr->eraseFromParent();
1404
2.54k
  }
1405
10.4k
}
1406
1407
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408
                                                  SourceLocation Loc,
1409
222
                                                  SmallString<128> &Buffer) {
1410
222
  llvm::raw_svector_ostream OS(Buffer);
1411
  // Build debug location
1412
222
  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413
222
  OS << ";" << PLoc.getFilename() << ";";
1414
222
  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415
222
    OS << FD->getQualifiedNameAsString();
1416
222
  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417
222
  return OS.str();
1418
222
}
1419
1420
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421
                                                 SourceLocation Loc,
1422
49.3k
                                                 unsigned Flags) {
1423
49.3k
  llvm::Constant *SrcLocStr;
1424
49.3k
  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425
48.5k
      
Loc.isInvalid()882
) {
1426
48.5k
    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427
818
  } else {
1428
818
    std::string FunctionName = "";
1429
818
    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430
786
      FunctionName = FD->getQualifiedNameAsString();
1431
818
    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432
818
    const char *FileName = PLoc.getFilename();
1433
818
    unsigned Line = PLoc.getLine();
1434
818
    unsigned Column = PLoc.getColumn();
1435
818
    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436
818
                                                Line, Column);
1437
818
  }
1438
49.3k
  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439
49.3k
  return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440
49.3k
                                     Reserved2Flags);
1441
49.3k
}
1442
1443
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444
25.6k
                                          SourceLocation Loc) {
1445
25.6k
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446
  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447
  // the clang invariants used below might be broken.
1448
25.6k
  if (CGM.getLangOpts().OpenMPIRBuilder) {
1449
222
    SmallString<128> Buffer;
1450
222
    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451
222
    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452
222
        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453
222
    return OMPBuilder.getOrCreateThreadID(
1454
222
        OMPBuilder.getOrCreateIdent(SrcLocStr));
1455
222
  }
1456
1457
25.4k
  llvm::Value *ThreadID = nullptr;
1458
  // Check whether we've already cached a load of the thread id in this
1459
  // function.
1460
25.4k
  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461
25.4k
  if (I != OpenMPLocThreadIDMap.end()) {
1462
11.7k
    ThreadID = I->second.ThreadID;
1463
11.7k
    if (ThreadID != nullptr)
1464
10.7k
      return ThreadID;
1465
14.6k
  }
1466
  // If exceptions are enabled, do not use parameter to avoid possible crash.
1467
14.6k
  if (auto *OMPRegionInfo =
1468
13.8k
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469
13.8k
    if (OMPRegionInfo->getThreadIDVariable()) {
1470
      // Check if this an outlined function with thread id passed as argument.
1471
12.2k
      LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472
12.2k
      llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473
12.2k
      if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474
821
          !CGF.getLangOpts().CXXExceptions ||
1475
821
          CGF.Builder.GetInsertBlock() == TopBlock ||
1476
318
          !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477
318
          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478
318
              TopBlock ||
1479
318
          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480
12.2k
              CGF.Builder.GetInsertBlock()) {
1481
12.2k
        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482
        // If value loaded in entry block, cache it and use it everywhere in
1483
        // function.
1484
12.2k
        if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485
6.88k
          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486
6.88k
          Elem.second.ThreadID = ThreadID;
1487
6.88k
        }
1488
12.2k
        return ThreadID;
1489
12.2k
      }
1490
2.39k
    }
1491
13.8k
  }
1492
1493
  // This is not an outlined function region - need to call __kmpc_int32
1494
  // kmpc_global_thread_num(ident_t *loc).
1495
  // Generate thread id value and cache this value for use across the
1496
  // function.
1497
2.39k
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498
2.39k
  if (!Elem.second.ServiceInsertPt)
1499
1.47k
    setLocThreadIdInsertPt(CGF);
1500
2.39k
  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501
2.39k
  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502
2.39k
  llvm::CallInst *Call = CGF.Builder.CreateCall(
1503
2.39k
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504
2.39k
                                            OMPRTL___kmpc_global_thread_num),
1505
2.39k
      emitUpdateLocation(CGF, Loc));
1506
2.39k
  Call->setCallingConv(CGF.getRuntimeCC());
1507
2.39k
  Elem.second.ThreadID = Call;
1508
2.39k
  return Call;
1509
2.39k
}
1510
1511
68.5k
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512
68.5k
  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513
68.5k
  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514
9.42k
    clearLocThreadIdInsertPt(CGF);
1515
9.42k
    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516
9.42k
  }
1517
68.5k
  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518
27
    for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519
38
      UDRMap.erase(D);
1520
27
    FunctionUDRMap.erase(CGF.CurFn);
1521
27
  }
1522
68.5k
  auto I = FunctionUDMMap.find(CGF.CurFn);
1523
68.5k
  if (I != FunctionUDMMap.end()) {
1524
0
    for(const auto *D : I->second)
1525
0
      UDMMap.erase(D);
1526
0
    FunctionUDMMap.erase(I);
1527
0
  }
1528
68.5k
  LastprivateConditionalToTypes.erase(CGF.CurFn);
1529
68.5k
  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530
68.5k
}
1531
1532
10.7k
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533
10.7k
  return OMPBuilder.IdentPtr;
1534
10.7k
}
1535
1536
10.2k
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537
10.2k
  if (!Kmpc_MicroTy) {
1538
    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539
1.82k
    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540
1.82k
                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541
1.82k
    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542
1.82k
  }
1543
10.2k
  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544
10.2k
}
1545
1546
llvm::FunctionCallee
1547
8.51k
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548
8.51k
  assert((IVSize == 32 || IVSize == 64) &&
1549
8.51k
         "IV size is not compatible with the omp runtime");
1550
8.17k
  StringRef Name = IVSize == 32 ? (IVSigned ? 
"__kmpc_for_static_init_4"8.03k
1551
138
                                            : "__kmpc_for_static_init_4u")
1552
337
                                : (IVSigned ? 
"__kmpc_for_static_init_8"229
1553
108
                                            : "__kmpc_for_static_init_8u");
1554
8.17k
  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : 
CGM.Int64Ty337
;
1555
8.51k
  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556
8.51k
  llvm::Type *TypeParams[] = {
1557
8.51k
    getIdentTyPointerTy(),                     // loc
1558
8.51k
    CGM.Int32Ty,                               // tid
1559
8.51k
    CGM.Int32Ty,                               // schedtype
1560
8.51k
    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561
8.51k
    PtrTy,                                     // p_lower
1562
8.51k
    PtrTy,                                     // p_upper
1563
8.51k
    PtrTy,                                     // p_stride
1564
8.51k
    ITy,                                       // incr
1565
8.51k
    ITy                                        // chunk
1566
8.51k
  };
1567
8.51k
  auto *FnTy =
1568
8.51k
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569
8.51k
  return CGM.CreateRuntimeFunction(FnTy, Name);
1570
8.51k
}
1571
1572
llvm::FunctionCallee
1573
744
CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574
744
  assert((IVSize == 32 || IVSize == 64) &&
1575
744
         "IV size is not compatible with the omp runtime");
1576
744
  StringRef Name =
1577
744
      IVSize == 32
1578
703
          ? (IVSigned ? 
"__kmpc_dispatch_init_4"699
:
"__kmpc_dispatch_init_4u"4
)
1579
41
          : (IVSigned ? 
"__kmpc_dispatch_init_8"15
:
"__kmpc_dispatch_init_8u"26
);
1580
703
  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : 
CGM.Int64Ty41
;
1581
744
  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582
744
                               CGM.Int32Ty,           // tid
1583
744
                               CGM.Int32Ty,           // schedtype
1584
744
                               ITy,                   // lower
1585
744
                               ITy,                   // upper
1586
744
                               ITy,                   // stride
1587
744
                               ITy                    // chunk
1588
744
  };
1589
744
  auto *FnTy =
1590
744
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591
744
  return CGM.CreateRuntimeFunction(FnTy, Name);
1592
744
}
1593
1594
llvm::FunctionCallee
1595
37
CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596
37
  assert((IVSize == 32 || IVSize == 64) &&
1597
37
         "IV size is not compatible with the omp runtime");
1598
37
  StringRef Name =
1599
37
      IVSize == 32
1600
29
          ? (IVSigned ? 
"__kmpc_dispatch_fini_4"25
:
"__kmpc_dispatch_fini_4u"4
)
1601
8
          : (IVSigned ? 
"__kmpc_dispatch_fini_8"4
:
"__kmpc_dispatch_fini_8u"4
);
1602
37
  llvm::Type *TypeParams[] = {
1603
37
      getIdentTyPointerTy(), // loc
1604
37
      CGM.Int32Ty,           // tid
1605
37
  };
1606
37
  auto *FnTy =
1607
37
      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608
37
  return CGM.CreateRuntimeFunction(FnTy, Name);
1609
37
}
1610
1611
llvm::FunctionCallee
1612
744
CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613
744
  assert((IVSize == 32 || IVSize == 64) &&
1614
744
         "IV size is not compatible with the omp runtime");
1615
744
  StringRef Name =
1616
744
      IVSize == 32
1617
703
          ? (IVSigned ? 
"__kmpc_dispatch_next_4"699
:
"__kmpc_dispatch_next_4u"4
)
1618
41
          : (IVSigned ? 
"__kmpc_dispatch_next_8"15
:
"__kmpc_dispatch_next_8u"26
);
1619
703
  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : 
CGM.Int64Ty41
;
1620
744
  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621
744
  llvm::Type *TypeParams[] = {
1622
744
    getIdentTyPointerTy(),                     // loc
1623
744
    CGM.Int32Ty,                               // tid
1624
744
    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625
744
    PtrTy,                                     // p_lower
1626
744
    PtrTy,                                     // p_upper
1627
744
    PtrTy                                      // p_stride
1628
744
  };
1629
744
  auto *FnTy =
1630
744
      llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631
744
  return CGM.CreateRuntimeFunction(FnTy, Name);
1632
744
}
1633
1634
/// Obtain information that uniquely identifies a target entry. This
1635
/// consists of the file and device IDs as well as line number associated with
1636
/// the relevant entry source location.
1637
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638
                                     unsigned &DeviceID, unsigned &FileID,
1639
15.6k
                                     unsigned &LineNum) {
1640
15.6k
  SourceManager &SM = C.getSourceManager();
1641
1642
  // The loc should be always valid and have a file ID (the user cannot use
1643
  // #pragma directives in macros)
1644
1645
15.6k
  assert(Loc.isValid() && "Source location is expected to be always valid.");
1646
1647
15.6k
  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648
15.6k
  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649
1650
15.6k
  llvm::sys::fs::UniqueID ID;
1651
15.6k
  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652
0
    SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653
0
        << PLoc.getFilename() << EC.message();
1654
1655
15.6k
  DeviceID = ID.getDevice();
1656
15.6k
  FileID = ID.getFile();
1657
15.6k
  LineNum = PLoc.getLine();
1658
15.6k
}
1659
1660
502
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661
502
  if (CGM.getLangOpts().OpenMPSimd)
1662
20
    return Address::invalid();
1663
482
  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664
482
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665
482
  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666
44
              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667
482
               
HasRequiresUnifiedSharedMemory44
))) {
1668
482
    SmallString<64> PtrName;
1669
482
    {
1670
482
      llvm::raw_svector_ostream OS(PtrName);
1671
482
      OS << CGM.getMangledName(GlobalDecl(VD));
1672
482
      if (!VD->isExternallyVisible()) {
1673
34
        unsigned DeviceID, FileID, Line;
1674
34
        getTargetEntryUniqueInfo(CGM.getContext(),
1675
34
                                 VD->getCanonicalDecl()->getBeginLoc(),
1676
34
                                 DeviceID, FileID, Line);
1677
34
        OS << llvm::format("_%x", FileID);
1678
34
      }
1679
482
      OS << "_decl_tgt_ref_ptr";
1680
482
    }
1681
482
    llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682
482
    if (!Ptr) {
1683
58
      QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684
58
      Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685
58
                                        PtrName);
1686
1687
58
      auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688
58
      GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689
1690
58
      if (!CGM.getLangOpts().OpenMPIsDevice)
1691
38
        GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692
58
      registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693
58
    }
1694
482
    return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695
482
  }
1696
0
  return Address::invalid();
1697
0
}
1698
1699
llvm::Constant *
1700
138
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701
138
  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702
138
         !CGM.getContext().getTargetInfo().isTLSSupported());
1703
  // Lookup the entry, lazily creating it if necessary.
1704
138
  std::string Suffix = getName({"cache", ""});
1705
138
  return getOrCreateInternalVariable(
1706
138
      CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707
138
}
1708
1709
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710
                                                const VarDecl *VD,
1711
                                                Address VDAddr,
1712
254
                                                SourceLocation Loc) {
1713
254
  if (CGM.getLangOpts().OpenMPUseTLS &&
1714
116
      CGM.getContext().getTargetInfo().isTLSSupported())
1715
116
    return VDAddr;
1716
1717
138
  llvm::Type *VarTy = VDAddr.getElementType();
1718
138
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719
138
                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720
138
                                                       CGM.Int8PtrTy),
1721
138
                         CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722
138
                         getOrCreateThreadPrivateCache(VD)};
1723
138
  return Address(CGF.EmitRuntimeCall(
1724
138
                     OMPBuilder.getOrCreateRuntimeFunction(
1725
138
                         CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726
138
                     Args),
1727
138
                 VDAddr.getAlignment());
1728
138
}
1729
1730
void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731
    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732
37
    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733
  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734
  // library.
1735
37
  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736
37
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737
37
                          CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738
37
                      OMPLoc);
1739
  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740
  // to register constructor/destructor for variable.
1741
37
  llvm::Value *Args[] = {
1742
37
      OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743
37
      Ctor, CopyCtor, Dtor};
1744
37
  CGF.EmitRuntimeCall(
1745
37
      OMPBuilder.getOrCreateRuntimeFunction(
1746
37
          CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747
37
      Args);
1748
37
}
1749
1750
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751
    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752
145
    bool PerformInit, CodeGenFunction *CGF) {
1753
145
  if (CGM.getLangOpts().OpenMPUseTLS &&
1754
72
      CGM.getContext().getTargetInfo().isTLSSupported())
1755
72
    return nullptr;
1756
1757
73
  VD = VD->getDefinition(CGM.getContext());
1758
73
  if (VD && 
ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second61
) {
1759
48
    QualType ASTTy = VD->getType();
1760
1761
48
    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762
48
    const Expr *Init = VD->getAnyInitializer();
1763
48
    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764
      // Generate function that re-emits the declaration's initializer into the
1765
      // threadprivate copy of the variable VD
1766
37
      CodeGenFunction CtorCGF(CGM);
1767
37
      FunctionArgList Args;
1768
37
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769
37
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770
37
                            ImplicitParamDecl::Other);
1771
37
      Args.push_back(&Dst);
1772
1773
37
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774
37
          CGM.getContext().VoidPtrTy, Args);
1775
37
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776
37
      std::string Name = getName({"__kmpc_global_ctor_", ""});
1777
37
      llvm::Function *Fn =
1778
37
          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779
37
      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780
37
                            Args, Loc, Loc);
1781
37
      llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782
37
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783
37
          CGM.getContext().VoidPtrTy, Dst.getLocation());
1784
37
      Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785
37
      Arg = CtorCGF.Builder.CreateElementBitCast(
1786
37
          Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787
37
      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788
37
                               /*IsInitializer=*/true);
1789
37
      ArgVal = CtorCGF.EmitLoadOfScalar(
1790
37
          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791
37
          CGM.getContext().VoidPtrTy, Dst.getLocation());
1792
37
      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793
37
      CtorCGF.FinishFunction();
1794
37
      Ctor = Fn;
1795
37
    }
1796
48
    if (VD->getType().isDestructedType() != QualType::DK_none) {
1797
      // Generate function that emits destructor call for the threadprivate copy
1798
      // of the variable VD
1799
35
      CodeGenFunction DtorCGF(CGM);
1800
35
      FunctionArgList Args;
1801
35
      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802
35
                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803
35
                            ImplicitParamDecl::Other);
1804
35
      Args.push_back(&Dst);
1805
1806
35
      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807
35
          CGM.getContext().VoidTy, Args);
1808
35
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809
35
      std::string Name = getName({"__kmpc_global_dtor_", ""});
1810
35
      llvm::Function *Fn =
1811
35
          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812
35
      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813
35
      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814
35
                            Loc, Loc);
1815
      // Create a scope with an artificial location for the body of this function.
1816
35
      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817
35
      llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818
35
          DtorCGF.GetAddrOfLocalVar(&Dst),
1819
35
          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820
35
      DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821
35
                          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822
35
                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823
35
      DtorCGF.FinishFunction();
1824
35
      Dtor = Fn;
1825
35
    }
1826
    // Do not emit init function if it is not required.
1827
48
    if (!Ctor && 
!Dtor11
)
1828
11
      return nullptr;
1829
1830
37
    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831
37
    auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832
37
                                               /*isVarArg=*/false)
1833
37
                           ->getPointerTo();
1834
    // Copying constructor for the threadprivate variable.
1835
    // Must be NULL - reserved by runtime, but currently it requires that this
1836
    // parameter is always NULL. Otherwise it fires assertion.
1837
37
    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838
37
    if (Ctor == nullptr) {
1839
0
      auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840
0
                                             /*isVarArg=*/false)
1841
0
                         ->getPointerTo();
1842
0
      Ctor = llvm::Constant::getNullValue(CtorTy);
1843
0
    }
1844
37
    if (Dtor == nullptr) {
1845
2
      auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846
2
                                             /*isVarArg=*/false)
1847
2
                         ->getPointerTo();
1848
2
      Dtor = llvm::Constant::getNullValue(DtorTy);
1849
2
    }
1850
37
    if (!CGF) {
1851
11
      auto *InitFunctionTy =
1852
11
          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853
11
      std::string Name = getName({"__omp_threadprivate_init_", ""});
1854
11
      llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855
11
          InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856
11
      CodeGenFunction InitCGF(CGM);
1857
11
      FunctionArgList ArgList;
1858
11
      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859
11
                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860
11
                            Loc, Loc);
1861
11
      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862
11
      InitCGF.FinishFunction();
1863
11
      return InitFunction;
1864
11
    }
1865
26
    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866
26
  }
1867
51
  return nullptr;
1868
73
}
1869
1870
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871
                                                     llvm::GlobalVariable *Addr,
1872
3.89k
                                                     bool PerformInit) {
1873
3.89k
  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874
645
      !CGM.getLangOpts().OpenMPIsDevice)
1875
583
    return false;
1876
3.30k
  Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877
3.30k
      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878
3.30k
  if (!Res || 
*Res == OMPDeclareTargetDeclAttr::MT_Link146
||
1879
146
      (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880
146
       HasRequiresUnifiedSharedMemory))
1881
3.16k
    return CGM.getLangOpts().OpenMPIsDevice;
1882
146
  VD = VD->getDefinition(CGM.getContext());
1883
146
  assert(VD && "Unknown VarDecl");
1884
1885
146
  if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886
3
    return CGM.getLangOpts().OpenMPIsDevice;
1887
1888
143
  QualType ASTTy = VD->getType();
1889
143
  SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890
1891
  // Produce the unique prefix to identify the new target regions. We use
1892
  // the source location of the variable declaration which we know to not
1893
  // conflict with any target region.
1894
143
  unsigned DeviceID;
1895
143
  unsigned FileID;
1896
143
  unsigned Line;
1897
143
  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898
143
  SmallString<128> Buffer, Out;
1899
143
  {
1900
143
    llvm::raw_svector_ostream OS(Buffer);
1901
143
    OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902
143
       << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903
143
  }
1904
1905
143
  const Expr *Init = VD->getAnyInitializer();
1906
143
  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907
143
    llvm::Constant *Ctor;
1908
143
    llvm::Constant *ID;
1909
143
    if (CGM.getLangOpts().OpenMPIsDevice) {
1910
      // Generate function that re-emits the declaration's initializer into
1911
      // the threadprivate copy of the variable VD
1912
74
      CodeGenFunction CtorCGF(CGM);
1913
1914
74
      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915
74
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916
74
      llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917
74
          FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918
74
      auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919
74
      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920
74
                            FunctionArgList(), Loc, Loc);
1921
74
      auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922
74
      CtorCGF.EmitAnyExprToMem(Init,
1923
74
                               Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924
74
                               Init->getType().getQualifiers(),
1925
74
                               /*IsInitializer=*/true);
1926
74
      CtorCGF.FinishFunction();
1927
74
      Ctor = Fn;
1928
74
      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929
74
      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930
69
    } else {
1931
69
      Ctor = new llvm::GlobalVariable(
1932
69
          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933
69
          llvm::GlobalValue::PrivateLinkage,
1934
69
          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935
69
      ID = Ctor;
1936
69
    }
1937
1938
    // Register the information for the entry associated with the constructor.
1939
143
    Out.clear();
1940
143
    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941
143
        DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942
143
        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943
143
  }
1944
143
  if (VD->getType().isDestructedType() != QualType::DK_none) {
1945
92
    llvm::Constant *Dtor;
1946
92
    llvm::Constant *ID;
1947
92
    if (CGM.getLangOpts().OpenMPIsDevice) {
1948
      // Generate function that emits destructor call for the threadprivate
1949
      // copy of the variable VD
1950
58
      CodeGenFunction DtorCGF(CGM);
1951
1952
58
      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953
58
      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954
58
      llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955
58
          FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956
58
      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957
58
      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958
58
                            FunctionArgList(), Loc, Loc);
1959
      // Create a scope with an artificial location for the body of this
1960
      // function.
1961
58
      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962
58
      DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963
58
                          ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964
58
                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965
58
      DtorCGF.FinishFunction();
1966
58
      Dtor = Fn;
1967
58
      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968
58
      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969
34
    } else {
1970
34
      Dtor = new llvm::GlobalVariable(
1971
34
          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972
34
          llvm::GlobalValue::PrivateLinkage,
1973
34
          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974
34
      ID = Dtor;
1975
34
    }
1976
    // Register the information for the entry associated with the destructor.
1977
92
    Out.clear();
1978
92
    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979
92
        DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980
92
        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981
92
  }
1982
143
  return CGM.getLangOpts().OpenMPIsDevice;
1983
143
}
1984
1985
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986
                                                          QualType VarType,
1987
152
                                                          StringRef Name) {
1988
152
  std::string Suffix = getName({"artificial", ""});
1989
152
  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990
152
  llvm::Value *GAddr =
1991
152
      getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992
152
  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993
117
      CGM.getTarget().isTLSSupported()) {
1994
81
    cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995
81
    return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996
81
  }
1997
71
  std::string CacheSuffix = getName({"cache", ""});
1998
71
  llvm::Value *Args[] = {
1999
71
      emitUpdateLocation(CGF, SourceLocation()),
2000
71
      getThreadID(CGF, SourceLocation()),
2001
71
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002
71
      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003
71
                                /*isSigned=*/false),
2004
71
      getOrCreateInternalVariable(
2005
71
          CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006
71
  return Address(
2007
71
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008
71
          CGF.EmitRuntimeCall(
2009
71
              OMPBuilder.getOrCreateRuntimeFunction(
2010
71
                  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011
71
              Args),
2012
71
          VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013
71
      CGM.getContext().getTypeAlignInChars(VarType));
2014
71
}
2015
2016
void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017
                                   const RegionCodeGenTy &ThenGen,
2018
2.53k
                                   const RegionCodeGenTy &ElseGen) {
2019
2.53k
  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020
2021
  // If the condition constant folds and can be elided, try to avoid emitting
2022
  // the condition and the dead arm of the if/else.
2023
2.53k
  bool CondConstant;
2024
2.53k
  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025
1.30k
    if (CondConstant)
2026
746
      ThenGen(CGF);
2027
554
    else
2028
554
      ElseGen(CGF);
2029
1.30k
    return;
2030
1.30k
  }
2031
2032
  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033
  // emit the conditional branch.
2034
1.23k
  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035
1.23k
  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036
1.23k
  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037
1.23k
  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038
2039
  // Emit the 'then' code.
2040
1.23k
  CGF.EmitBlock(ThenBlock);
2041
1.23k
  ThenGen(CGF);
2042
1.23k
  CGF.EmitBranch(ContBlock);
2043
  // Emit the 'else' code if present.
2044
  // There is no need to emit line number for unconditional branch.
2045
1.23k
  (void)ApplyDebugLocation::CreateEmpty(CGF);
2046
1.23k
  CGF.EmitBlock(ElseBlock);
2047
1.23k
  ElseGen(CGF);
2048
  // There is no need to emit line number for unconditional branch.
2049
1.23k
  (void)ApplyDebugLocation::CreateEmpty(CGF);
2050
1.23k
  CGF.EmitBranch(ContBlock);
2051
  // Emit the continuation block for code after the if.
2052
1.23k
  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053
1.23k
}
2054
2055
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056
                                       llvm::Function *OutlinedFn,
2057
                                       ArrayRef<llvm::Value *> CapturedVars,
2058
5.34k
                                       const Expr *IfCond) {
2059
5.34k
  if (!CGF.HaveInsertPoint())
2060
0
    return;
2061
5.34k
  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062
5.34k
  auto &M = CGM.getModule();
2063
5.34k
  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064
5.13k
                    this](CodeGenFunction &CGF, PrePostActionTy &) {
2065
    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066
5.13k
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067
5.13k
    llvm::Value *Args[] = {
2068
5.13k
        RTLoc,
2069
5.13k
        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070
5.13k
        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071
5.13k
    llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072
5.13k
    RealArgs.append(std::begin(Args), std::end(Args));
2073
5.13k
    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074
2075
5.13k
    llvm::FunctionCallee RTLFn =
2076
5.13k
        OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077
5.13k
    CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078
5.13k
  };
2079
5.34k
  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080
437
                    this](CodeGenFunction &CGF, PrePostActionTy &) {
2081
437
    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082
437
    llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083
    // Build calls:
2084
    // __kmpc_serialized_parallel(&Loc, GTid);
2085
437
    llvm::Value *Args[] = {RTLoc, ThreadID};
2086
437
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087
437
                            M, OMPRTL___kmpc_serialized_parallel),
2088
437
                        Args);
2089
2090
    // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091
437
    Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092
437
    Address ZeroAddrBound =
2093
437
        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094
437
                                         /*Name=*/".bound.zero.addr");
2095
437
    CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096
437
    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097
    // ThreadId for serialized parallels is 0.
2098
437
    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099
437
    OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100
437
    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101
437
    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2102
2103
    // __kmpc_end_serialized_parallel(&Loc, GTid);
2104
437
    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2105
437
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2106
437
                            M, OMPRTL___kmpc_end_serialized_parallel),
2107
437
                        EndArgs);
2108
437
  };
2109
5.34k
  if (IfCond) {
2110
563
    emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2111
4.78k
  } else {
2112
4.78k
    RegionCodeGenTy ThenRCG(ThenGen);
2113
4.78k
    ThenRCG(CGF);
2114
4.78k
  }
2115
5.34k
}
2116
2117
// If we're inside an (outlined) parallel region, use the region info's
2118
// thread-ID variable (it is passed in a first argument of the outlined function
2119
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2120
// regular serial code region, get thread ID by calling kmp_int32
2121
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2122
// return the address of that temp.
2123
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2124
1.53k
                                             SourceLocation Loc) {
2125
1.53k
  if (auto *OMPRegionInfo =
2126
1.50k
          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2127
1.50k
    if (OMPRegionInfo->getThreadIDVariable())
2128
668
      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2129
2130
866
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2131
866
  QualType Int32Ty =
2132
866
      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2133
866
  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2134
866
  CGF.EmitStoreOfScalar(ThreadID,
2135
866
                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2136
2137
866
  return ThreadIDTemp;
2138
866
}
2139
2140
llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2141
1.04k
    llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2142
1.04k
  SmallString<256> Buffer;
2143
1.04k
  llvm::raw_svector_ostream Out(Buffer);
2144
1.04k
  Out << Name;
2145
1.04k
  StringRef RuntimeName = Out.str();
2146
1.04k
  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2147
1.04k
  if (Elem.second) {
2148
612
    assert(Elem.second->getType()->getPointerElementType() == Ty &&
2149
612
           "OMP internal variable has different type than requested");
2150
612
    return &*Elem.second;
2151
612
  }
2152
2153
437
  return Elem.second = new llvm::GlobalVariable(
2154
437
             CGM.getModule(), Ty, /*IsConstant*/ false,
2155
437
             llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2156
437
             Elem.first(), /*InsertBefore=*/nullptr,
2157
437
             llvm::GlobalValue::NotThreadLocal, AddressSpace);
2158
437
}
2159
2160
579
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2161
579
  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2162
579
  std::string Name = getName({Prefix, "var"});
2163
579
  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2164
579
}
2165
2166
namespace {
2167
/// Common pre(post)-action for different OpenMP constructs.
2168
class CommonActionTy final : public PrePostActionTy {
2169
  llvm::FunctionCallee EnterCallee;
2170
  ArrayRef<llvm::Value *> EnterArgs;
2171
  llvm::FunctionCallee ExitCallee;
2172
  ArrayRef<llvm::Value *> ExitArgs;
2173
  bool Conditional;
2174
  llvm::BasicBlock *ContBlock = nullptr;
2175
2176
public:
2177
  CommonActionTy(llvm::FunctionCallee EnterCallee,
2178
                 ArrayRef<llvm::Value *> EnterArgs,
2179
                 llvm::FunctionCallee ExitCallee,
2180
                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2181
      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2182
1.46k
        ExitArgs(ExitArgs), Conditional(Conditional) {}
2183
872
  void Enter(CodeGenFunction &CGF) override {
2184
872
    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2185
872
    if (Conditional) {
2186
243
      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2187
243
      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2188
243
      ContBlock = CGF.createBasicBlock("omp_if.end");
2189
      // Generate the branch (If-stmt)
2190
243
      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2191
243
      CGF.EmitBlock(ThenBlock);
2192
243
    }
2193
872
  }
2194
243
  void Done(CodeGenFunction &CGF) {
2195
    // Emit the rest of blocks/branches
2196
243
    CGF.EmitBranch(ContBlock);
2197
243
    CGF.EmitBlock(ContBlock, true);
2198
243
  }
2199
1.56k
  void Exit(CodeGenFunction &CGF) override {
2200
1.56k
    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2201
1.56k
  }
2202
};
2203
} // anonymous namespace
2204
2205
void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2206
                                         StringRef CriticalName,
2207
                                         const RegionCodeGenTy &CriticalOpGen,
2208
166
                                         SourceLocation Loc, const Expr *Hint) {
2209
  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2210
  // CriticalOpGen();
2211
  // __kmpc_end_critical(ident_t *, gtid, Lock);
2212
  // Prepare arguments and build a call to __kmpc_critical
2213
166
  if (!CGF.HaveInsertPoint())
2214
0
    return;
2215
166
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2216
166
                         getCriticalRegionLock(CriticalName)};
2217
166
  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2218
166
                                                std::end(Args));
2219
166
  if (Hint) {
2220
3
    EnterArgs.push_back(CGF.Builder.CreateIntCast(
2221
3
        CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2222
3
  }
2223
166
  CommonActionTy Action(
2224
166
      OMPBuilder.getOrCreateRuntimeFunction(
2225
166
          CGM.getModule(),
2226
163
          Hint ? 
OMPRTL___kmpc_critical_with_hint3
: OMPRTL___kmpc_critical),
2227
166
      EnterArgs,
2228
166
      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2229
166
                                            OMPRTL___kmpc_end_critical),
2230
166
      Args);
2231
166
  CriticalOpGen.setAction(Action);
2232
166
  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2233
166
}
2234
2235
void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2236
                                       const RegionCodeGenTy &MasterOpGen,
2237
186
                                       SourceLocation Loc) {
2238
186
  if (!CGF.HaveInsertPoint())
2239
0
    return;
2240
  // if(__kmpc_master(ident_t *, gtid)) {
2241
  //   MasterOpGen();
2242
  //   __kmpc_end_master(ident_t *, gtid);
2243
  // }
2244
  // Prepare arguments and build a call to __kmpc_master
2245
186
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2246
186
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2247
186
                            CGM.getModule(), OMPRTL___kmpc_master),
2248
186
                        Args,
2249
186
                        OMPBuilder.getOrCreateRuntimeFunction(
2250
186
                            CGM.getModule(), OMPRTL___kmpc_end_master),
2251
186
                        Args,
2252
186
                        /*Conditional=*/true);
2253
186
  MasterOpGen.setAction(Action);
2254
186
  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2255
186
  Action.Done(CGF);
2256
186
}
2257
2258
void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2259
16
                                        SourceLocation Loc) {
2260
16
  if (!CGF.HaveInsertPoint())
2261
0
    return;
2262
16
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2263
8
    OMPBuilder.createTaskyield(CGF.Builder);
2264
8
  } else {
2265
    // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266
8
    llvm::Value *Args[] = {
2267
8
        emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2268
8
        llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2269
8
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2270
8
                            CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2271
8
                        Args);
2272
8
  }
2273
2274
16
  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2275
4
    Region->emitUntiedSwitch(CGF);
2276
16
}
2277
2278
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2279
                                          const RegionCodeGenTy &TaskgroupOpGen,
2280
247
                                          SourceLocation Loc) {
2281
247
  if (!CGF.HaveInsertPoint())
2282
0
    return;
2283
  // __kmpc_taskgroup(ident_t *, gtid);
2284
  // TaskgroupOpGen();
2285
  // __kmpc_end_taskgroup(ident_t *, gtid);
2286
  // Prepare arguments and build a call to __kmpc_taskgroup
2287
247
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288
247
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289
247
                            CGM.getModule(), OMPRTL___kmpc_taskgroup),
2290
247
                        Args,
2291
247
                        OMPBuilder.getOrCreateRuntimeFunction(
2292
247
                            CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2293
247
                        Args);
2294
247
  TaskgroupOpGen.setAction(Action);
2295
247
  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2296
247
}
2297
2298
/// Given an array of pointers to variables, project the address of a
2299
/// given variable.
2300
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2301
1.32k
                                      unsigned Index, const VarDecl *Var) {
2302
  // Pull out the pointer to the variable.
2303
1.32k
  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2304
1.32k
  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2305
2306
1.32k
  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2307
1.32k
  Addr = CGF.Builder.CreateElementBitCast(
2308
1.32k
      Addr, CGF.ConvertTypeForMem(Var->getType()));
2309
1.32k
  return Addr;
2310
1.32k
}
2311
2312
static llvm::Value *emitCopyprivateCopyFunction(
2313
    CodeGenModule &CGM, llvm::Type *ArgsType,
2314
    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2315
    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2316
28
    SourceLocation Loc) {
2317
28
  ASTContext &C = CGM.getContext();
2318
  // void copy_func(void *LHSArg, void *RHSArg);
2319
28
  FunctionArgList Args;
2320
28
  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2321
28
                           ImplicitParamDecl::Other);
2322
28
  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2323
28
                           ImplicitParamDecl::Other);
2324
28
  Args.push_back(&LHSArg);
2325
28
  Args.push_back(&RHSArg);
2326
28
  const auto &CGFI =
2327
28
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2328
28
  std::string Name =
2329
28
      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2330
28
  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2331
28
                                    llvm::GlobalValue::InternalLinkage, Name,
2332
28
                                    &CGM.getModule());
2333
28
  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2334
28
  Fn->setDoesNotRecurse();
2335
28
  CodeGenFunction CGF(CGM);
2336
28
  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2337
  // Dest = (void*[n])(LHSArg);
2338
  // Src = (void*[n])(RHSArg);
2339
28
  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2340
28
      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2341
28
      ArgsType), CGF.getPointerAlign());
2342
28
  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2343
28
      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2344
28
      ArgsType), CGF.getPointerAlign());
2345
  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2346
  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2347
  // ...
2348
  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2349
97
  for (unsigned I = 0, E = AssignmentOps.size(); I < E; 
++I69
) {
2350
69
    const auto *DestVar =
2351
69
        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2352
69
    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2353
2354
69
    const auto *SrcVar =
2355
69
        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2356
69
    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2357
2358
69
    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2359
69
    QualType Type = VD->getType();
2360
69
    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2361
69
  }
2362
28
  CGF.FinishFunction();
2363
28
  return Fn;
2364
28
}
2365
2366
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2367
                                       const RegionCodeGenTy &SingleOpGen,
2368
                                       SourceLocation Loc,
2369
                                       ArrayRef<const Expr *> CopyprivateVars,
2370
                                       ArrayRef<const Expr *> SrcExprs,
2371
                                       ArrayRef<const Expr *> DstExprs,
2372
57
                                       ArrayRef<const Expr *> AssignmentOps) {
2373
57
  if (!CGF.HaveInsertPoint())
2374
0
    return;
2375
57
  assert(CopyprivateVars.size() == SrcExprs.size() &&
2376
57
         CopyprivateVars.size() == DstExprs.size() &&
2377
57
         CopyprivateVars.size() == AssignmentOps.size());
2378
57
  ASTContext &C = CGM.getContext();
2379
  // int32 did_it = 0;
2380
  // if(__kmpc_single(ident_t *, gtid)) {
2381
  //   SingleOpGen();
2382
  //   __kmpc_end_single(ident_t *, gtid);
2383
  //   did_it = 1;
2384
  // }
2385
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2386
  // <copy_func>, did_it);
2387
2388
57
  Address DidIt = Address::invalid();
2389
57
  if (!CopyprivateVars.empty()) {
2390
    // int32 did_it = 0;
2391
28
    QualType KmpInt32Ty =
2392
28
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2393
28
    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2394
28
    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2395
28
  }
2396
  // Prepare arguments and build a call to __kmpc_single
2397
57
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2398
57
  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2399
57
                            CGM.getModule(), OMPRTL___kmpc_single),
2400
57
                        Args,
2401
57
                        OMPBuilder.getOrCreateRuntimeFunction(
2402
57
                            CGM.getModule(), OMPRTL___kmpc_end_single),
2403
57
                        Args,
2404
57
                        /*Conditional=*/true);
2405
57
  SingleOpGen.setAction(Action);
2406
57
  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2407
57
  if (DidIt.isValid()) {
2408
    // did_it = 1;
2409
28
    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2410
28
  }
2411
57
  Action.Done(CGF);
2412
  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2413
  // <copy_func>, did_it);
2414
57
  if (DidIt.isValid()) {
2415
28
    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2416
28
    QualType CopyprivateArrayTy = C.getConstantArrayType(
2417
28
        C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2418
28
        /*IndexTypeQuals=*/0);
2419
    // Create a list of all private variables for copyprivate.
2420
28
    Address CopyprivateList =
2421
28
        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2422
97
    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; 
++I69
) {
2423
69
      Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2424
69
      CGF.Builder.CreateStore(
2425
69
          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2426
69
              CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2427
69
              CGF.VoidPtrTy),
2428
69
          Elem);
2429
69
    }
2430
    // Build function that copies private values from single region to all other
2431
    // threads in the corresponding parallel region.
2432
28
    llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2433
28
        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2434
28
        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2435
28
    llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2436
28
    Address CL =
2437
28
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2438
28
                                                      CGF.VoidPtrTy);
2439
28
    llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2440
28
    llvm::Value *Args[] = {
2441
28
        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2442
28
        getThreadID(CGF, Loc),        // i32 <gtid>
2443
28
        BufSize,                      // size_t <buf_size>
2444
28
        CL.getPointer(),              // void *<copyprivate list>
2445
28
        CpyFn,                        // void (*) (void *, void *) <copy_func>
2446
28
        DidItVal                      // i32 did_it
2447
28
    };
2448
28
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2449
28
                            CGM.getModule(), OMPRTL___kmpc_copyprivate),
2450
28
                        Args);
2451
28
  }
2452
57
}
2453
2454
void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2455
                                        const RegionCodeGenTy &OrderedOpGen,
2456
24
                                        SourceLocation Loc, bool IsThreads) {
2457
24
  if (!CGF.HaveInsertPoint())
2458
0
    return;
2459
  // __kmpc_ordered(ident_t *, gtid);
2460
  // OrderedOpGen();
2461
  // __kmpc_end_ordered(ident_t *, gtid);
2462
  // Prepare arguments and build a call to __kmpc_ordered
2463
24
  if (IsThreads) {
2464
16
    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465
16
    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466
16
                              CGM.getModule(), OMPRTL___kmpc_ordered),
2467
16
                          Args,
2468
16
                          OMPBuilder.getOrCreateRuntimeFunction(
2469
16
                              CGM.getModule(), OMPRTL___kmpc_end_ordered),
2470
16
                          Args);
2471
16
    OrderedOpGen.setAction(Action);
2472
16
    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2473
16
    return;
2474
16
  }
2475
8
  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2476
8
}
2477
2478
972
unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2479
972
  unsigned Flags;
2480
972
  if (Kind == OMPD_for)
2481
575
    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2482
397
  else if (Kind == OMPD_sections)
2483
48
    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2484
349
  else if (Kind == OMPD_single)
2485
22
    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2486
327
  else if (Kind == OMPD_barrier)
2487
18
    Flags = OMP_IDENT_BARRIER_EXPL;
2488
309
  else
2489
309
    Flags = OMP_IDENT_BARRIER_IMPL;
2490
972
  return Flags;
2491
972
}
2492
2493
void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2494
    CodeGenFunction &CGF, const OMPLoopDirective &S,
2495
3.22k
    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2496
  // Check if the loop directive is actually a doacross loop directive. In this
2497
  // case choose static, 1 schedule.
2498
3.22k
  if (llvm::any_of(
2499
3.22k
          S.getClausesOfKind<OMPOrderedClause>(),
2500
20
          [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2501
12
    ScheduleKind = OMPC_SCHEDULE_static;
2502
    // Chunk size is 1 in this case.
2503
12
    llvm::APInt ChunkSize(32, 1);
2504
12
    ChunkExpr = IntegerLiteral::Create(
2505
12
        CGF.getContext(), ChunkSize,
2506
12
        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2507
12
        SourceLocation());
2508
12
  }
2509
3.22k
}
2510
2511
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2512
                                      OpenMPDirectiveKind Kind, bool EmitChecks,
2513
804
                                      bool ForceSimpleCall) {
2514
  // Check if we should use the OMPBuilder
2515
804
  auto *OMPRegionInfo =
2516
804
      dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2517
804
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2518
48
    CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2519
48
        CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2520
48
    return;
2521
48
  }
2522
2523
756
  if (!CGF.HaveInsertPoint())
2524
0
    return;
2525
  // Build call __kmpc_cancel_barrier(loc, thread_id);
2526
  // Build call __kmpc_barrier(loc, thread_id);
2527
756
  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2528
  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2529
  // thread_id);
2530
756
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2531
756
                         getThreadID(CGF, Loc)};
2532
756
  if (OMPRegionInfo) {
2533
473
    if (!ForceSimpleCall && 
OMPRegionInfo->hasCancel()290
) {
2534
4
      llvm::Value *Result = CGF.EmitRuntimeCall(
2535
4
          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2536
4
                                                OMPRTL___kmpc_cancel_barrier),
2537
4
          Args);
2538
4
      if (EmitChecks) {
2539
        // if (__kmpc_cancel_barrier()) {
2540
        //   exit from construct;
2541
        // }
2542
4
        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2543
4
        llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2544
4
        llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2545
4
        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2546
4
        CGF.EmitBlock(ExitBB);
2547
        //   exit from construct;
2548
4
        CodeGenFunction::JumpDest CancelDestination =
2549
4
            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2550
4
        CGF.EmitBranchThroughCleanup(CancelDestination);
2551
4
        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2552
4
      }
2553
4
      return;
2554
4
    }
2555
752
  }
2556
752
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2557
752
                          CGM.getModule(), OMPRTL___kmpc_barrier),
2558
752
                      Args);
2559
752
}
2560
2561
/// Map the OpenMP loop schedule to the runtime enumeration.
2562
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2563
16.1k
                                          bool Chunked, bool Ordered) {
2564
16.1k
  switch (ScheduleKind) {
2565
2.88k
  case OMPC_SCHEDULE_static:
2566
1.89k
    return Chunked ? (Ordered ? 
OMP_ord_static_chunked7
:
OMP_sch_static_chunked1.88k
)
2567
993
                   : (Ordered ? 
OMP_ord_static6
:
OMP_sch_static987
);
2568
1.94k
  case OMPC_SCHEDULE_dynamic:
2569
1.93k
    return Ordered ? 
OMP_ord_dynamic_chunked4
: OMP_sch_dynamic_chunked;
2570
543
  case OMPC_SCHEDULE_guided:
2571
542
    return Ordered ? 
OMP_ord_guided_chunked1
: OMP_sch_guided_chunked;
2572
548
  case OMPC_SCHEDULE_runtime:
2573
542
    return Ordered ? 
OMP_ord_runtime6
: OMP_sch_runtime;
2574
550
  case OMPC_SCHEDULE_auto:
2575
545
    return Ordered ? 
OMP_ord_auto5
: OMP_sch_auto;
2576
9.68k
  case OMPC_SCHEDULE_unknown:
2577
9.68k
    assert(!Chunked && "chunk was specified but schedule kind not known");
2578
9.67k
    return Ordered ? 
OMP_ord_static8
: OMP_sch_static;
2579
0
  }
2580
0
  llvm_unreachable("Unexpected runtime schedule");
2581
0
}
2582
2583
/// Map the OpenMP distribute schedule to the runtime enumeration.
2584
static OpenMPSchedType
2585
13.3k
getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2586
  // only static is allowed for dist_schedule
2587
11.4k
  return Chunked ? 
OMP_dist_sch_static_chunked1.95k
: OMP_dist_sch_static;
2588
13.3k
}
2589
2590
bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2591
5.67k
                                         bool Chunked) const {
2592
5.67k
  OpenMPSchedType Schedule =
2593
5.67k
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2594
5.67k
  return Schedule == OMP_sch_static;
2595
5.67k
}
2596
2597
bool CGOpenMPRuntime::isStaticNonchunked(
2598
4.45k
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2599
4.45k
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2600
4.45k
  return Schedule == OMP_dist_sch_static;
2601
4.45k
}
2602
2603
bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2604
4.66k
                                      bool Chunked) const {
2605
4.66k
  OpenMPSchedType Schedule =
2606
4.66k
      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2607
4.66k
  return Schedule == OMP_sch_static_chunked;
2608
4.66k
}
2609
2610
bool CGOpenMPRuntime::isStaticChunked(
2611
4.45k
    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2612
4.45k
  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2613
4.45k
  return Schedule == OMP_dist_sch_static_chunked;
2614
4.45k
}
2615
2616
1.01k
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2617
1.01k
  OpenMPSchedType Schedule =
2618
1.01k
      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2619
1.01k
  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2620
1.01k
  return Schedule != OMP_sch_static;
2621
1.01k
}
2622
2623
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2624
                                  OpenMPScheduleClauseModifier M1,
2625
9.25k
                                  OpenMPScheduleClauseModifier M2) {
2626
9.25k
  int Modifier = 0;
2627
9.25k
  switch (M1) {
2628
17
  case OMPC_SCHEDULE_MODIFIER_monotonic:
2629
17
    Modifier = OMP_sch_modifier_monotonic;
2630
17
    break;
2631
12
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2632
12
    Modifier = OMP_sch_modifier_nonmonotonic;
2633
12
    break;
2634
12
  case OMPC_SCHEDULE_MODIFIER_simd:
2635
12
    if (Schedule == OMP_sch_static_chunked)
2636
6
      Schedule = OMP_sch_static_balanced_chunked;
2637
12
    break;
2638
0
  case OMPC_SCHEDULE_MODIFIER_last:
2639
9.21k
  case OMPC_SCHEDULE_MODIFIER_unknown:
2640
9.21k
    break;
2641
9.25k
  }
2642
9.25k
  switch (M2) {
2643
0
  case OMPC_SCHEDULE_MODIFIER_monotonic:
2644
0
    Modifier = OMP_sch_modifier_monotonic;
2645
0
    break;
2646
6
  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2647
6
    Modifier = OMP_sch_modifier_nonmonotonic;
2648
6
    break;
2649
0
  case OMPC_SCHEDULE_MODIFIER_simd:
2650
0
    if (Schedule == OMP_sch_static_chunked)
2651
0
      Schedule = OMP_sch_static_balanced_chunked;
2652
0
    break;
2653
0
  case OMPC_SCHEDULE_MODIFIER_last:
2654
9.25k
  case OMPC_SCHEDULE_MODIFIER_unknown:
2655
9.25k
    break;
2656
9.25k
  }
2657
  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2658
  // If the static schedule kind is specified or if the ordered clause is
2659
  // specified, and if the nonmonotonic modifier is not specified, the effect is
2660
  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2661
  // modifier is specified, the effect is as if the nonmonotonic modifier is
2662
  // specified.
2663
9.25k
  if (CGM.getLangOpts().OpenMP >= 50 && 
Modifier == 06.03k
) {
2664
6.00k
    if (!(Schedule == OMP_sch_static_chunked || 
Schedule == OMP_sch_static5.66k
||
2665
3.44k
          Schedule == OMP_sch_static_balanced_chunked ||
2666
3.43k
          Schedule == OMP_ord_static_chunked || 
Schedule == OMP_ord_static3.43k
||
2667
3.42k
          Schedule == OMP_dist_sch_static_chunked ||
2668
2.95k
          Schedule == OMP_dist_sch_static))
2669
519
      Modifier = OMP_sch_modifier_nonmonotonic;
2670
6.00k
  }
2671
9.25k
  return Schedule | Modifier;
2672
9.25k
}
2673
2674
void CGOpenMPRuntime::emitForDispatchInit(
2675
    CodeGenFunction &CGF, SourceLocation Loc,
2676
    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2677
744
    bool Ordered, const DispatchRTInput &DispatchValues) {
2678
744
  if (!CGF.HaveInsertPoint())
2679
0
    return;
2680
744
  OpenMPSchedType Schedule = getRuntimeSchedule(
2681
744
      ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2682
744
  assert(Ordered ||
2683
744
         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2684
744
          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2685
744
          Schedule != OMP_sch_static_balanced_chunked));
2686
  // Call __kmpc_dispatch_init(
2687
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2688
  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2689
  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2690
2691
  // If the Chunk was not specified in the clause - use default value 1.
2692
139
  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2693
605
                                            : CGF.Builder.getIntN(IVSize, 1);
2694
744
  llvm::Value *Args[] = {
2695
744
      emitUpdateLocation(CGF, Loc),
2696
744
      getThreadID(CGF, Loc),
2697
744
      CGF.Builder.getInt32(addMonoNonMonoModifier(
2698
744
          CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2699
744
      DispatchValues.LB,                                     // Lower
2700
744
      DispatchValues.UB,                                     // Upper
2701
744
      CGF.Builder.getIntN(IVSize, 1),                        // Stride
2702
744
      Chunk                                                  // Chunk
2703
744
  };
2704
744
  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2705
744
}
2706
2707
static void emitForStaticInitCall(
2708
    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2709
    llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2710
    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2711
8.51k
    const CGOpenMPRuntime::StaticRTInput &Values) {
2712
8.51k
  if (!CGF.HaveInsertPoint())
2713
0
    return;
2714
2715
8.51k
  assert(!Values.Ordered);
2716
8.51k
  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2717
8.51k
         Schedule == OMP_sch_static_balanced_chunked ||
2718
8.51k
         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2719
8.51k
         Schedule == OMP_dist_sch_static ||
2720
8.51k
         Schedule == OMP_dist_sch_static_chunked);
2721
2722
  // Call __kmpc_for_static_init(
2723
  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2724
  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2725
  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2726
  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2727
8.51k
  llvm::Value *Chunk = Values.Chunk;
2728
8.51k
  if (Chunk == nullptr) {
2729
7.33k
    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2730
7.33k
            Schedule == OMP_dist_sch_static) &&
2731
7.33k
           "expected static non-chunked schedule");
2732
    // If the Chunk was not specified in the clause - use default value 1.
2733
7.33k
    Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2734
1.17k
  } else {
2735
1.17k
    assert((Schedule == OMP_sch_static_chunked ||
2736
1.17k
            Schedule == OMP_sch_static_balanced_chunked ||
2737
1.17k
            Schedule == OMP_ord_static_chunked ||
2738
1.17k
            Schedule == OMP_dist_sch_static_chunked) &&
2739
1.17k
           "expected static chunked schedule");
2740
1.17k
  }
2741
8.51k
  llvm::Value *Args[] = {
2742
8.51k
      UpdateLocation,
2743
8.51k
      ThreadId,
2744
8.51k
      CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2745
8.51k
                                                  M2)), // Schedule type
2746
8.51k
      Values.IL.getPointer(),                           // &isLastIter
2747
8.51k
      Values.LB.getPointer(),                           // &LB
2748
8.51k
      Values.UB.getPointer(),                           // &UB
2749
8.51k
      Values.ST.getPointer(),                           // &Stride
2750
8.51k
      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2751
8.51k
      Chunk                                             // Chunk
2752
8.51k
  };
2753
8.51k
  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2754
8.51k
}
2755
2756
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2757
                                        SourceLocation Loc,
2758
                                        OpenMPDirectiveKind DKind,
2759
                                        const OpenMPScheduleTy &ScheduleKind,
2760
4.05k
                                        const StaticRTInput &Values) {
2761
4.05k
  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2762
4.05k
      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2763
4.05k
  assert(isOpenMPWorksharingDirective(DKind) &&
2764
4.05k
         "Expected loop-based or sections-based directive.");
2765
4.05k
  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2766
4.05k
                                             isOpenMPLoopDirective(DKind)
2767
3.96k
                                                 ? OMP_IDENT_WORK_LOOP
2768
88
                                                 : OMP_IDENT_WORK_SECTIONS);
2769
4.05k
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2770
4.05k
  llvm::FunctionCallee StaticInitFunction =
2771
4.05k
      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2772
4.05k
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2773
4.05k
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2774
4.05k
                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2775
4.05k
}
2776
2777
void CGOpenMPRuntime::emitDistributeStaticInit(
2778
    CodeGenFunction &CGF, SourceLocation Loc,
2779
    OpenMPDistScheduleClauseKind SchedKind,
2780
4.45k
    const CGOpenMPRuntime::StaticRTInput &Values) {
2781
4.45k
  OpenMPSchedType ScheduleNum =
2782
4.45k
      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2783
4.45k
  llvm::Value *UpdatedLocation =
2784
4.45k
      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2785
4.45k
  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2786
4.45k
  llvm::FunctionCallee StaticInitFunction =
2787
4.45k
      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2788
4.45k
  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2789
4.45k
                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2790
4.45k
                        OMPC_SCHEDULE_MODIFIER_unknown, Values);
2791
4.45k
}
2792
2793
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2794
                                          SourceLocation Loc,
2795
8.56k
                                          OpenMPDirectiveKind DKind) {
2796
8.56k
  if (!CGF.HaveInsertPoint())
2797
0
    return;
2798
  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2799
8.56k
  llvm::Value *Args[] = {
2800
8.56k
      emitUpdateLocation(CGF, Loc,
2801
8.56k
                         isOpenMPDistributeDirective(DKind)
2802
6.80k
                             ? OMP_IDENT_WORK_DISTRIBUTE
2803
1.76k
                             : isOpenMPLoopDirective(DKind)
2804
1.63k
                                   ? OMP_IDENT_WORK_LOOP
2805
128
                                   : OMP_IDENT_WORK_SECTIONS),
2806
8.56k
      getThreadID(CGF, Loc)};
2807
8.56k
  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2808
8.56k
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2809
8.56k
                          CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2810
8.56k
                      Args);
2811
8.56k
}
2812
2813
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2814
                                                 SourceLocation Loc,
2815
                                                 unsigned IVSize,
2816
37
                                                 bool IVSigned) {
2817
37
  if (!CGF.HaveInsertPoint())
2818
0
    return;
2819
  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2820
37
  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2821
37
  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2822
37
}
2823
2824
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2825
                                          SourceLocation Loc, unsigned IVSize,
2826
                                          bool IVSigned, Address IL,
2827
                                          Address LB, Address UB,
2828
744
                                          Address ST) {
2829
  // Call __kmpc_dispatch_next(
2830
  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2831
  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2832
  //          kmp_int[32|64] *p_stride);
2833
744
  llvm::Value *Args[] = {
2834
744
      emitUpdateLocation(CGF, Loc),
2835
744
      getThreadID(CGF, Loc),
2836
744
      IL.getPointer(), // &isLastIter
2837
744
      LB.getPointer(), // &Lower
2838
744
      UB.getPointer(), // &Upper
2839
744
      ST.getPointer()  // &Stride
2840
744
  };
2841
744
  llvm::Value *Call =
2842
744
      CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2843
744
  return CGF.EmitScalarConversion(
2844
744
      Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2845
744
      CGF.getContext().BoolTy, Loc);
2846
744
}
2847
2848
void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2849
                                           llvm::Value *NumThreads,
2850
292
                                           SourceLocation Loc) {
2851
292
  if (!CGF.HaveInsertPoint())
2852
0
    return;
2853
  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2854
292
  llvm::Value *Args[] = {
2855
292
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2856
292
      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2857
292
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2858
292
                          CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2859
292
                      Args);
2860
292
}
2861
2862
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2863
                                         ProcBindKind ProcBind,
2864
68
                                         SourceLocation Loc) {
2865
68
  if (!CGF.HaveInsertPoint())
2866
0
    return;
2867
68
  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2868
  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2869
68
  llvm::Value *Args[] = {
2870
68
      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2871
68
      llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2872
68
  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2873
68
                          CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2874
68
                      Args);
2875
68
}
2876
2877
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2878
104
                                SourceLocation Loc, llvm::AtomicOrdering AO) {
2879
104
  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2880
20
    OMPBuilder.createFlush(CGF.Builder);
2881
84
  } else {
2882
84
    if (!CGF.HaveInsertPoint())
2883
0
      return;
2884
    // Build call void __kmpc_flush(ident_t *loc)
2885
84
    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2886
84
                            CGM.getModule(), OMPRTL___kmpc_flush),
2887
84
                        emitUpdateLocation(CGF, Loc));
2888
84
  }
2889
104
}
2890
2891
namespace {
2892
/// Indexes of fields for type kmp_task_t.
2893
enum KmpTaskTFields {
2894
  /// List of shared variables.
2895
  KmpTaskTShareds,
2896
  /// Task routine.
2897
  KmpTaskTRoutine,
2898
  /// Partition id for the untied tasks.
2899
  KmpTaskTPartId,
2900
  /// Function with call of destructors for private variables.
2901
  Data1,
2902
  /// Task priority.
2903
  Data2,
2904
  /// (Taskloops only) Lower bound.
2905
  KmpTaskTLowerBound,
2906
  /// (Taskloops only) Upper bound.
2907
  KmpTaskTUpperBound,
2908
  /// (Taskloops only) Stride.
2909
  KmpTaskTStride,
2910
  /// (Taskloops only) Is last iteration flag.
2911
  KmpTaskTLastIter,
2912
  /// (Taskloops only) Reduction data.
2913
  KmpTaskTReductions,
2914
};
2915
} // anonymous namespace
2916
2917
5.36k
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2918
5.36k
  return OffloadEntriesTargetRegion.empty() &&
2919
1.05k
         OffloadEntriesDeviceGlobalVar.empty();
2920
5.36k
}
2921
2922
/// Initialize target region entry.
2923
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2924
    initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2925
                                    StringRef ParentName, unsigned LineNum,
2926
2.98k
                                    unsigned Order) {
2927
2.98k
  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2928
2.98k
                                             "only required for the device "
2929
2.98k
                                             "code generation.");
2930
2.98k
  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2931
2.98k
      OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2932
2.98k
                                   OMPTargetRegionEntryTargetRegion);
2933
2.98k
  ++OffloadingEntriesNum;
2934
2.98k
}
2935
2936
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2937
    registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2938
                                  StringRef ParentName, unsigned LineNum,
2939
                                  llvm::Constant *Addr, llvm::Constant *ID,
2940
11.3k
                                  OMPTargetRegionEntryKind Flags) {
2941
  // If we are emitting code for a target, the entry is already initialized,
2942
  // only has to be registered.
2943
11.3k
  if (CGM.getLangOpts().OpenMPIsDevice) {
2944
    // This could happen if the device compilation is invoked standalone.
2945
2.94k
    if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2946
0
      initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2947
0
                                      OffloadingEntriesNum);
2948
2.94k
    auto &Entry =
2949
2.94k
        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2950
2.94k
    Entry.setAddress(Addr);
2951
2.94k
    Entry.setID(ID);
2952
2.94k
    Entry.setFlags(Flags);
2953
8.38k
  } else {
2954
8.38k
    if (Flags ==
2955
8.38k
            OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2956
8.28k
        hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2957
8.28k
                                 /*IgnoreAddressId*/ true))
2958
4
      return;
2959
8.38k
    assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2960
8.38k
           "Target region entry already registered!");
2961
8.38k
    OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2962
8.38k
    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2963
8.38k
    ++OffloadingEntriesNum;
2964
8.38k
  }
2965
11.3k
}
2966
2967
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2968
    unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2969
23.4k
    bool IgnoreAddressId) const {
2970
23.4k
  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2971
23.4k
  if (PerDevice == OffloadEntriesTargetRegion.end())
2972
3.77k
    return false;
2973
19.6k
  auto PerFile = PerDevice->second.find(FileID);
2974
19.6k
  if (PerFile == PerDevice->second.end())
2975
0
    return false;
2976
19.6k
  auto PerParentName = PerFile->second.find(ParentName);
2977
19.6k
  if (PerParentName == PerFile->second.end())
2978
5.37k
    return false;
2979
14.3k
  auto PerLine = PerParentName->second.find(LineNum);
2980
14.3k
  if (PerLine == PerParentName->second.end())
2981
8.26k
    return false;
2982
  // Fail if this entry is already registered.
2983
6.04k
  if (!IgnoreAddressId &&
2984
6.03k
      (PerLine->second.getAddress() || 
PerLine->second.getID()5.76k
))
2985
277
    return false;
2986
5.76k
  return true;
2987
5.76k
}
2988
2989
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2990
2.43k
    const OffloadTargetRegionEntryInfoActTy &Action) {
2991
  // Scan all target region entries and perform the provided action.
2992
2.43k
  for (const auto &D : OffloadEntriesTargetRegion)
2993
2.42k
    for (const auto &F : D.second)
2994
2.42k
      for (const auto &P : F.second)
2995
6.18k
        for (const auto &L : P.second)
2996
11.3k
          Action(D.first, F.first, P.first(), L.first, L.second);
2997
2.43k
}
2998
2999
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3000
    initializeDeviceGlobalVarEntryInfo(StringRef Name,
3001
                                       OMPTargetGlobalVarEntryKind Flags,
3002
144
                                       unsigned Order) {
3003
144
  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3004
144
                                             "only required for the device "
3005
144
                                             "code generation.");
3006
144
  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3007
144
  ++OffloadingEntriesNum;
3008
144
}
3009
3010
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3011
    registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3012
                                     CharUnits VarSize,
3013
                                     OMPTargetGlobalVarEntryKind Flags,
3014
746
                                     llvm::GlobalValue::LinkageTypes Linkage) {
3015
746
  if (CGM.getLangOpts().OpenMPIsDevice) {
3016
    // This could happen if the device compilation is invoked standalone.
3017
235
    if (!hasDeviceGlobalVarEntryInfo(VarName))
3018
3
      initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3019
235
    auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3020
235
    assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3021
235
           "Resetting with the new address.");
3022
235
    if (Entry.getAddress() && 
hasDeviceGlobalVarEntryInfo(VarName)95
) {
3023
95
      if (Entry.getVarSize().isZero()) {
3024
4
        Entry.setVarSize(VarSize);
3025
4
        Entry.setLinkage(Linkage);
3026
4
      }
3027
95
      return;
3028
95
    }
3029
140
    Entry.setVarSize(VarSize);
3030
140
    Entry.setLinkage(Linkage);
3031
140
    Entry.setAddress(Addr);
3032
511
  } else {
3033
511
    if (hasDeviceGlobalVarEntryInfo(VarName)) {
3034
342
      auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3035
342
      assert(Entry.isValid() && Entry.getFlags() == Flags &&
3036
342
             "Entry not initialized!");
3037
342
      assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3038
342
             "Resetting with the new address.");
3039
342
      if (Entry.getVarSize().isZero()) {
3040
27
        Entry.setVarSize(VarSize);
3041
27
        Entry.setLinkage(Linkage);
3042
27
      }
3043
342
      return;
3044
342
    }
3045
169
    OffloadEntriesDeviceGlobalVar.try_emplace(
3046
169
        VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3047
169
    ++OffloadingEntriesNum;
3048
169
  }
3049
746
}
3050
3051
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3052
    actOnDeviceGlobalVarEntriesInfo(
3053
2.43k
        const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3054
  // Scan all target region entries and perform the provided action.
3055
2.43k
  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3056
309
    Action(E.getKey(), E.getValue());
3057
2.43k
}
3058
3059
void CGOpenMPRuntime::createOffloadEntry(
3060
    llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3061
10.5k
    llvm::GlobalValue::LinkageTypes Linkage) {
3062
10.5k
  StringRef Name = Addr->getName();
3063
10.5k
  llvm::Module &M = CGM.getModule();
3064
10.5k
  llvm::LLVMContext &C = M.getContext();
3065
3066
  // Create constant string with the name.
3067
10.5k
  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3068
3069
10.5k
  std::string StringName = getName({"omp_offloading", "entry_name"});
3070
10.5k
  auto *Str = new llvm::GlobalVariable(
3071
10.5k
      M, StrPtrInit->getType(), /*isConstant=*/true,
3072
10.5k
      llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3073
10.5k
  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3074
3075
10.5k
  llvm::Constant *Data[] = {
3076
10.5k
      llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3077
10.5k
      llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3078
10.5k
      llvm::ConstantInt::get(CGM.SizeTy, Size),
3079
10.5k
      llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3080
10.5k
      llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3081
10.5k
  std::string EntryName = getName({"omp_offloading", "entry", ""});
3082
10.5k
  llvm::GlobalVariable *Entry = createGlobalStruct(
3083
10.5k
      CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3084
10.5k
      Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3085
3086
  // The entry has to be created in the section the linker expects it to be.
3087
10.5k
  Entry->setSection("omp_offloading_entries");
3088
10.5k
}
3089
3090
5.65k
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3091
  // Emit the offloading entries and metadata so that the device codegen side
3092
  // can easily figure out what to emit. The produced metadata looks like
3093
  // this:
3094
  //
3095
  // !omp_offload.info = !{!1, ...}
3096
  //
3097
  // Right now we only generate metadata for function that contain target
3098
  // regions.
3099
3100
  // If we are in simd mode or there are no entries, we don't need to do
3101
  // anything.
3102
5.65k
  if (CGM.getLangOpts().OpenMPSimd || 
OffloadEntriesInfoManager.empty()3.24k
)
3103
3.22k
    return;
3104
3105
2.43k
  llvm::Module &M = CGM.getModule();
3106
2.43k
  llvm::LLVMContext &C = M.getContext();
3107
2.43k
  SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3108
2.43k
                         SourceLocation, StringRef>,
3109
2.43k
              16>
3110
2.43k
      OrderedEntries(OffloadEntriesInfoManager.size());
3111
2.43k
  llvm::SmallVector<StringRef, 16> ParentFunctions(
3112
2.43k
      OffloadEntriesInfoManager.size());
3113
3114
  // Auxiliary methods to create metadata values and strings.
3115
57.4k
  auto &&GetMDInt = [this](unsigned V) {
3116
57.4k
    return llvm::ConstantAsMetadata::get(
3117
57.4k
        llvm::ConstantInt::get(CGM.Int32Ty, V));
3118
57.4k
  };
3119
3120
11.6k
  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3121
3122
  // Create the offloading info metadata node.
3123
2.43k
  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3124
3125
  // Create function that emits metadata for each target region entry;
3126
2.43k
  auto &&TargetRegionMetadataEmitter =
3127
2.43k
      [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3128
2.43k
       &GetMDString](
3129
2.43k
          unsigned DeviceID, unsigned FileID, StringRef ParentName,
3130
2.43k
          unsigned Line,
3131
11.3k
          const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3132
        // Generate metadata for target regions. Each entry of this metadata
3133
        // contains:
3134
        // - Entry 0 -> Kind of this type of metadata (0).
3135
        // - Entry 1 -> Device ID of the file where the entry was identified.
3136
        // - Entry 2 -> File ID of the file where the entry was identified.
3137
        // - Entry 3 -> Mangled name of the function where the entry was
3138
        // identified.
3139
        // - Entry 4 -> Line in the file where the entry was identified.
3140
        // - Entry 5 -> Order the entry was created.
3141
        // The first element of the metadata node is the kind.
3142
11.3k
        llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3143
11.3k
                                 GetMDInt(FileID),      GetMDString(ParentName),
3144
11.3k
                                 GetMDInt(Line),        GetMDInt(E.getOrder())};
3145
3146
11.3k
        SourceLocation Loc;
3147
11.3k
        for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3148
11.3k
                  E = CGM.getContext().getSourceManager().fileinfo_end();
3149
24.5k
             I != E; 
++I13.2k
) {
3150
13.2k
          if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3151
13.2k
              I->getFirst()->getUniqueID().getFile() == FileID) {
3152
0
            Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3153
0
                I->getFirst(), Line, 1);
3154
0
            break;
3155
0
          }
3156
13.2k
        }
3157
        // Save this entry in the right position of the ordered entries array.
3158
11.3k
        OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3159
11.3k
        ParentFunctions[E.getOrder()] = ParentName;
3160
3161
        // Add metadata to the named metadata node.
3162
11.3k
        MD->addOperand(llvm::MDNode::get(C, Ops));
3163
11.3k
      };
3164
3165
2.43k
  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3166
2.43k
      TargetRegionMetadataEmitter);
3167
3168
  // Create function that emits metadata for each device global variable entry;
3169
2.43k
  auto &&DeviceGlobalVarMetadataEmitter =
3170
2.43k
      [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3171
2.43k
       MD](StringRef MangledName,
3172
2.43k
           const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3173
309
               &E) {
3174
        // Generate metadata for global variables. Each entry of this metadata
3175
        // contains:
3176
        // - Entry 0 -> Kind of this type of metadata (1).
3177
        // - Entry 1 -> Mangled name of the variable.
3178
        // - Entry 2 -> Declare target kind.
3179
        // - Entry 3 -> Order the entry was created.
3180
        // The first element of the metadata node is the kind.
3181
309
        llvm::Metadata *Ops[] = {
3182
309
            GetMDInt(E.getKind()), GetMDString(MangledName),
3183
309
            GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3184
3185
        // Save this entry in the right position of the ordered entries array.
3186
309
        OrderedEntries[E.getOrder()] =
3187
309
            std::make_tuple(&E, SourceLocation(), MangledName);
3188
3189
        // Add metadata to the named metadata node.
3190
309
        MD->addOperand(llvm::MDNode::get(C, Ops));
3191
309
      };
3192
3193
2.43k
  OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3194
2.43k
      DeviceGlobalVarMetadataEmitter);
3195
3196
11.6k
  for (const auto &E : OrderedEntries) {
3197
11.6k
    assert(std::get<0>(E) && "All ordered entries must exist!");
3198
11.6k
    if (const auto *CE =
3199
11.3k
            dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3200
11.3k
                std::get<0>(E))) {
3201
11.3k
      if (!CE->getID() || 
!CE->getAddress()11.3k
) {
3202
        // Do not blame the entry if the parent funtion is not emitted.
3203
4
        StringRef FnName = ParentFunctions[CE->getOrder()];
3204
4
        if (!CGM.GetGlobalValue(FnName))
3205
2
          continue;
3206
2
        unsigned DiagID = CGM.getDiags().getCustomDiagID(
3207
2
            DiagnosticsEngine::Error,
3208
2
            "Offloading entry for target region in %0 is incorrect: either the "
3209
2
            "address or the ID is invalid.");
3210
2
        CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3211
2
        continue;
3212
2
      }
3213
11.3k
      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3214
11.3k
                         CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3215
309
    } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3216
309
                                             OffloadEntryInfoDeviceGlobalVar>(
3217
309
                   std::get<0>(E))) {
3218
309
      OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3219
309
          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3220
309
              CE->getFlags());
3221
309
      switch (Flags) {
3222
259
      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3223
259
        if (CGM.getLangOpts().OpenMPIsDevice &&
3224
122
            CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3225
2
          continue;
3226
257
        if (!CE->getAddress()) {
3227
0
          unsigned DiagID = CGM.getDiags().getCustomDiagID(
3228
0
              DiagnosticsEngine::Error, "Offloading entry for declare target "
3229
0
                                        "variable %0 is incorrect: the "
3230
0
                                        "address is invalid.");
3231
0
          CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3232
0
          continue;
3233
0
        }
3234
        // The vaiable has no definition - no need to add the entry.
3235
257
        if (CE->getVarSize().isZero())
3236
49
          continue;
3237
208
        break;
3238
208
      }
3239
50
      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3240
50
        assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3241
50
                (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3242
50
               "Declaret target link address is set.");
3243
50
        if (CGM.getLangOpts().OpenMPIsDevice)
3244
18
          continue;
3245
32
        if (!CE->getAddress()) {
3246
0
          unsigned DiagID = CGM.getDiags().getCustomDiagID(
3247
0
              DiagnosticsEngine::Error,
3248
0
              "Offloading entry for declare target variable is incorrect: the "
3249
0
              "address is invalid.");
3250
0
          CGM.getDiags().Report(DiagID);
3251
0
          continue;
3252
0
        }
3253
32
        break;
3254
240
      }
3255
240
      createOffloadEntry(CE->getAddress(), CE->getAddress(),
3256
240
                         CE->getVarSize().getQuantity(), Flags,
3257
240
                         CE->getLinkage());
3258
0
    } else {
3259
0
      llvm_unreachable("Unsupported entry kind.");
3260
0
    }
3261
11.6k
  }
3262
2.43k
}
3263
3264
/// Loads all the offload entries information from the host IR
3265
/// metadata.
3266
5.68k
void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3267
  // If we are in target mode, load the metadata from the host IR. This code has
3268
  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3269
3270
5.68k
  if (!CGM.getLangOpts().OpenMPIsDevice)
3271
5.12k
    return;
3272
3273
559
  if (CGM.getLangOpts().OMPHostIRFile.empty())
3274
0
    return;
3275
3276
559
  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3277
559
  if (auto EC = Buf.getError()) {
3278
0
    CGM.getDiags().Report(diag::err_cannot_open_file)
3279
0
        << CGM.getLangOpts().OMPHostIRFile << EC.message();
3280
0
    return;
3281
0
  }
3282
3283
559
  llvm::LLVMContext C;
3284
559
  auto ME = expectedToErrorOrAndEmitErrors(
3285
559
      C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3286
3287
559
  if (auto EC = ME.getError()) {
3288
0
    unsigned DiagID = CGM.getDiags().getCustomDiagID(
3289
0
        DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3290
0
    CGM.getDiags().Report(DiagID)
3291
0
        << CGM.getLangOpts().OMPHostIRFile << EC.message();
3292
0
    return;
3293
0
  }
3294
3295
559
  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3296
559
  if (!MD)
3297
28
    return;
3298
3299
3.12k
  
for (llvm::MDNode *MN : MD->operands())531
{
3300
15.3k
    auto &&GetMDInt = [MN](unsigned Idx) {
3301
15.3k
      auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3302
15.3k
      return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3303
15.3k
    };
3304
3305
3.12k
    auto &&GetMDString = [MN](unsigned Idx) {
3306
3.12k
      auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3307
3.12k
      return V->getString();
3308
3.12k
    };
3309
3310
3.12k
    switch (GetMDInt(0)) {
3311
0
    default:
3312
0
      llvm_unreachable("Unexpected metadata!");
3313
0
      break;
3314
2.98k
    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3315
2.98k
        OffloadingEntryInfoTargetRegion:
3316
2.98k
      OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3317
2.98k
          /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3318
2.98k
          /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3319
2.98k
          /*Order=*/GetMDInt(5));
3320
2.98k
      break;
3321
141
    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3322
141
        OffloadingEntryInfoDeviceGlobalVar:
3323
141
      OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3324
141
          /*MangledName=*/GetMDString(1),
3325
141
          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3326
141
              /*Flags=*/GetMDInt(2)),
3327
141
          /*Order=*/GetMDInt(3));
3328
141
      break;
3329
3.12k
    }
3330
3.12k
  }
3331
531
}
3332
3333
855
void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3334
855
  if (!KmpRoutineEntryPtrTy) {
3335
    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3336
381
    ASTContext &C = CGM.getContext();
3337
381
    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3338
381
    FunctionProtoType::ExtProtoInfo EPI;
3339
381
    KmpRoutineEntryPtrQTy = C.getPointerType(
3340
381
        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3341
381
    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3342
381
  }
3343
855
}
3344
3345
10.5k
QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3346
  // Make sure the type of the entry is already created. This is the type we
3347
  // have to create:
3348
  // struct __tgt_offload_entry{
3349
  //   void      *addr;       // Pointer to the offload entry info.
3350
  //                          // (function or global)
3351
  //   char      *name;       // Name of the function or global.
3352
  //   size_t     size;       // Size of the entry info (0 if it a function).
3353
  //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3354
  //   int32_t    reserved;   // Reserved, to use by the runtime library.
3355
  // };
3356
10.5k
  if (TgtOffloadEntryQTy.isNull()) {
3357
2.23k
    ASTContext &C = CGM.getContext();
3358
2.23k
    RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3359
2.23k
    RD->startDefinition();
3360
2.23k
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3361
2.23k
    addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3362
2.23k
    addFieldToRecordDecl(C, RD, C.getSizeType());
3363
2.23k
    addFieldToRecordDecl(
3364
2.23k
        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3365
2.23k
    addFieldToRecordDecl(
3366
2.23k
        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3367
2.23k
    RD->completeDefinition();
3368
2.23k
    RD->addAttr(PackedAttr::CreateImplicit(C));
3369
2.23k
    TgtOffloadEntryQTy = C.getRecordType(RD);
3370
2.23k
  }
3371
10.5k
  return TgtOffloadEntryQTy;
3372
10.5k
}
3373
3374
namespace {
3375
struct PrivateHelpersTy {
3376
  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3377
                   const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3378
      : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3379
1.64k
        PrivateElemInit(PrivateElemInit) {}
3380
8
  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3381
  const Expr *OriginalRef = nullptr;
3382
  const VarDecl *Original = nullptr;
3383
  const VarDecl *PrivateCopy = nullptr;
3384
  const VarDecl *PrivateElemInit = nullptr;
3385
5.32k
  bool isLocalPrivate() const {
3386
5.32k
    return !OriginalRef && 
!PrivateCopy24
&&
!PrivateElemInit24
;
3387
5.32k
  }
3388
};
3389
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3390
} // anonymous namespace
3391
3392
73
static bool isAllocatableDecl(const VarDecl *VD) {
3393
73
  const VarDecl *CVD = VD->getCanonicalDecl();
3394
73
  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3395
18
    return false;
3396
55
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3397
  // Use the default allocation.
3398
55
  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3399
42
            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3400
20
           !AA->getAllocator());
3401
55
}
3402
3403
static RecordDecl *
3404
855
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3405
855
  if (!Privates.empty()) {
3406
554
    ASTContext &C = CGM.getContext();
3407
    // Build struct .kmp_privates_t. {
3408
    //         /*  private vars  */
3409
    //       };
3410
554
    RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3411
554
    RD->startDefinition();
3412
1.65k
    for (const auto &Pair : Privates) {
3413
1.65k
      const VarDecl *VD = Pair.second.Original;
3414
1.65k
      QualType Type = VD->getType().getNonReferenceType();
3415
      // If the private variable is a local variable with lvalue ref type,
3416
      // allocate the pointer instead of the pointee type.
3417
1.65k
      if (Pair.second.isLocalPrivate()) {
3418
8
        if (VD->getType()->isLValueReferenceType())
3419
0
          Type = C.getPointerType(Type);
3420
8
        if (isAllocatableDecl(VD))
3421
2
          Type = C.getPointerType(Type);
3422
8
      }
3423
1.65k
      FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3424
1.65k
      if (VD->hasAttrs()) {
3425
50
        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3426
50
             E(VD->getAttrs().end());
3427
98
             I != E; 
++I48
)
3428
48
          FD->addAttr(*I);
3429
50
      }
3430
1.65k
    }
3431
554
    RD->completeDefinition();
3432
554
    return RD;
3433
554
  }
3434
301
  return nullptr;
3435
301
}
3436
3437
static RecordDecl *
3438
createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3439
                         QualType KmpInt32Ty,
3440
397
                         QualType KmpRoutineEntryPointerQTy) {
3441
397
  ASTContext &C = CGM.getContext();
3442
  // Build struct kmp_task_t {
3443
  //         void *              shareds;
3444
  //         kmp_routine_entry_t routine;
3445
  //         kmp_int32           part_id;
3446
  //         kmp_cmplrdata_t data1;
3447
  //         kmp_cmplrdata_t data2;
3448
  // For taskloops additional fields:
3449
  //         kmp_uint64          lb;
3450
  //         kmp_uint64          ub;
3451
  //         kmp_int64           st;
3452
  //         kmp_int32           liter;
3453
  //         void *              reductions;
3454
  //       };
3455
397
  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3456
397
  UD->startDefinition();
3457
397
  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3458
397
  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3459
397
  UD->completeDefinition();
3460
397
  QualType KmpCmplrdataTy = C.getRecordType(UD);
3461
397
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3462
397
  RD->startDefinition();
3463
397
  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3464
397
  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3465
397
  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3466
397
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3467
397
  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3468
397
  if (isOpenMPTaskLoopDirective(Kind)) {
3469
128
    QualType KmpUInt64Ty =
3470
128
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3471
128
    QualType KmpInt64Ty =
3472
128
        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3473
128
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3474
128
    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3475
128
    addFieldToRecordDecl(C, RD, KmpInt64Ty);
3476
128
    addFieldToRecordDecl(C, RD, KmpInt32Ty);
3477
128
    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3478
128
  }
3479
397
  RD->completeDefinition();
3480
397
  return RD;
3481
397
}
3482
3483
static RecordDecl *
3484
createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3485
855
                                     ArrayRef<PrivateDataTy> Privates) {
3486
855
  ASTContext &C = CGM.getContext();
3487
  // Build struct kmp_task_t_with_privates {
3488
  //         kmp_task_t task_data;
3489
  //         .kmp_privates_t. privates;
3490
  //       };
3491
855
  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3492
855
  RD->startDefinition();
3493
855
  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3494
855
  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3495
554
    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3496
855
  RD->completeDefinition();
3497
855
  return RD;
3498
855
}
3499
3500
/// Emit a proxy function which accepts kmp_task_t as the second
3501
/// argument.
3502
/// \code
3503
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3504
///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3505
///   For taskloops:
3506
///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3507
///   tt->reductions, tt->shareds);
3508
///   return 0;
3509
/// }
3510
/// \endcode
3511
static llvm::Function *
3512
emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3513
                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3514
                      QualType KmpTaskTWithPrivatesPtrQTy,
3515
                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3516
                      QualType SharedsPtrTy, llvm::Function *TaskFunction,
3517
855
                      llvm::Value *TaskPrivatesMap) {
3518
855
  ASTContext &C = CGM.getContext();
3519
855
  FunctionArgList Args;
3520
855
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3521
855
                            ImplicitParamDecl::Other);
3522
855
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3523
855
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3524
855
                                ImplicitParamDecl::Other);
3525
855
  Args.push_back(&GtidArg);
3526
855
  Args.push_back(&TaskTypeArg);
3527
855
  const auto &TaskEntryFnInfo =
3528
855
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3529
855
  llvm::FunctionType *TaskEntryTy =
3530
855
      CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3531
855
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3532
855
  auto *TaskEntry = llvm::Function::Create(
3533
855
      TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3534
855
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3535
855
  TaskEntry->setDoesNotRecurse();
3536
855
  CodeGenFunction CGF(CGM);
3537
855
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3538
855
                    Loc, Loc);
3539
3540
  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3541
  // tt,
3542
  // For taskloops:
3543
  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3544
  // tt->task_data.shareds);
3545
855
  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3546
855
      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3547
855
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3548
855
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3549
855
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3550
855
  const auto *KmpTaskTWithPrivatesQTyRD =
3551
855
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3552
855
  LValue Base =
3553
855
      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3554
855
  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3555
855
  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3556
855
  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3557
855
  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3558
3559
855
  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3560
855
  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3561
855
  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3562
855
      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3563
855
      CGF.ConvertTypeForMem(SharedsPtrTy));
3564
3565
855
  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3566
855
  llvm::Value *PrivatesParam;
3567
855
  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3568
554
    LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3569
554
    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3570
554
        PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3571
301
  } else {
3572
301
    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3573
301
  }
3574
3575
855
  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3576
855
                               TaskPrivatesMap,
3577
855
                               CGF.Builder
3578
855
                                   .CreatePointerBitCastOrAddrSpaceCast(
3579
855
                                       TDBase.getAddress(CGF), CGF.VoidPtrTy)
3580
855
                                   .getPointer()};
3581
855
  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3582
855
                                          std::end(CommonArgs));
3583
855
  if (isOpenMPTaskLoopDirective(Kind)) {
3584
226
    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3585
226
    LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3586
226
    llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3587
226
    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3588
226
    LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3589
226
    llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3590
226
    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3591
226
    LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3592
226
    llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3593
226
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3594
226
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3595
226
    llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3596
226
    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3597
226
    LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3598
226
    llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3599
226
    CallArgs.push_back(LBParam);
3600
226
    CallArgs.push_back(UBParam);
3601
226
    CallArgs.push_back(StParam);
3602
226
    CallArgs.push_back(LIParam);
3603
226
    CallArgs.push_back(RParam);
3604
226
  }
3605
855
  CallArgs.push_back(SharedsParam);
3606
3607
855
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3608
855
                                                  CallArgs);
3609
855
  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3610
855
                             CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3611
855
  CGF.FinishFunction();
3612
855
  return TaskEntry;
3613
855
}
3614
3615
static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3616
                                            SourceLocation Loc,
3617
                                            QualType KmpInt32Ty,
3618
                                            QualType KmpTaskTWithPrivatesPtrQTy,
3619
81
                                            QualType KmpTaskTWithPrivatesQTy) {
3620
81
  ASTContext &C = CGM.getContext();
3621
81
  FunctionArgList Args;
3622
81
  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3623
81
                            ImplicitParamDecl::Other);
3624
81
  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3625
81
                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3626
81
                                ImplicitParamDecl::Other);
3627
81
  Args.push_back(&GtidArg);
3628
81
  Args.push_back(&TaskTypeArg);
3629
81
  const auto &DestructorFnInfo =
3630
81
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3631
81
  llvm::FunctionType *DestructorFnTy =
3632
81
      CGM.getTypes().GetFunctionType(DestructorFnInfo);
3633
81
  std::string Name =
3634
81
      CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3635
81
  auto *DestructorFn =
3636
81
      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3637
81
                             Name, &CGM.getModule());
3638
81
  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3639
81
                                    DestructorFnInfo);
3640
81
  DestructorFn->setDoesNotRecurse();
3641
81
  CodeGenFunction CGF(CGM);
3642
81
  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3643
81
                    Args, Loc, Loc);
3644
3645
81
  LValue Base = CGF.EmitLoadOfPointerLValue(
3646
81
      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3647
81
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3648
81
  const auto *KmpTaskTWithPrivatesQTyRD =
3649
81
      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3650
81
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3651
81
  Base = CGF.EmitLValueForField(Base, *FI);
3652
81
  for (const auto *Field :
3653
362
       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3654
362
    if (QualType::DestructionKind DtorKind =
3655
162
            Field->getType().isDestructedType()) {
3656
162
      LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3657
162
      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3658
162
    }
3659
362
  }
3660
81
  CGF.FinishFunction();
3661
81
  return DestructorFn;
3662
81
}
3663
3664
/// Emit a privates mapping function for correct handling of private and
3665
/// firstprivate variables.
3666
/// \code
3667
/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3668
/// **noalias priv1,...,  <tyn> **noalias privn) {
3669
///   *priv1 = &.privates.priv1;
3670
///   ...;
3671
///   *privn = &.privates.privn;
3672
/// }
3673
/// \endcode
3674
static llvm::Value *
3675
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3676
                               const OMPTaskDataTy &Data, QualType PrivatesQTy,
3677
554
                               ArrayRef<PrivateDataTy> Privates) {
3678
554
  ASTContext &C = CGM.getContext();
3679
554
  FunctionArgList Args;
3680
554
  ImplicitParamDecl TaskPrivatesArg(
3681
554
      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3682
554
      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3683
554
      ImplicitParamDecl::Other);
3684
554
  Args.push_back(&TaskPrivatesArg);
3685
554
  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3686
554
  unsigned Counter = 1;
3687
170
  for (const Expr *E : Data.PrivateVars) {
3688
170
    Args.push_back(ImplicitParamDecl::Create(
3689
170
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3690
170
        C.getPointerType(C.getPointerType(E->getType()))
3691
170
            .withConst()
3692
170
            .withRestrict(),
3693
170
        ImplicitParamDecl::Other));
3694
170
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3695
170
    PrivateVarsPos[VD] = Counter;
3696
170
    ++Counter;
3697
170
  }
3698
1.32k
  for (const Expr *E : Data.FirstprivateVars) {
3699
1.32k
    Args.push_back(ImplicitParamDecl::Create(
3700
1.32k
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3701
1.32k
        C.getPointerType(C.getPointerType(E->getType()))
3702
1.32k
            .withConst()
3703
1.32k
            .withRestrict(),
3704
1.32k
        ImplicitParamDecl::Other));
3705
1.32k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3706
1.32k
    PrivateVarsPos[VD] = Counter;
3707
1.32k
    ++Counter;
3708
1.32k
  }
3709
151
  for (const Expr *E : Data.LastprivateVars) {
3710
151
    Args.push_back(ImplicitParamDecl::Create(
3711
151
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712
151
        C.getPointerType(C.getPointerType(E->getType()))
3713
151
            .withConst()
3714
151
            .withRestrict(),
3715
151
        ImplicitParamDecl::Other));
3716
151
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3717
151
    PrivateVarsPos[VD] = Counter;
3718
151
    ++Counter;
3719
151
  }
3720
8
  for (const VarDecl *VD : Data.PrivateLocals) {
3721
8
    QualType Ty = VD->getType().getNonReferenceType();
3722
8
    if (VD->getType()->isLValueReferenceType())
3723
0
      Ty = C.getPointerType(Ty);
3724
8
    if (isAllocatableDecl(VD))
3725
2
      Ty = C.getPointerType(Ty);
3726
8
    Args.push_back(ImplicitParamDecl::Create(
3727
8
        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3728
8
        C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3729
8
        ImplicitParamDecl::Other));
3730
8
    PrivateVarsPos[VD] = Counter;
3731
8
    ++Counter;
3732
8
  }
3733
554
  const auto &TaskPrivatesMapFnInfo =
3734
554
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3735
554
  llvm::FunctionType *TaskPrivatesMapTy =
3736
554
      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3737
554
  std::string Name =
3738
554
      CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3739
554
  auto *TaskPrivatesMap = llvm::Function::Create(
3740
554
      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3741
554
      &CGM.getModule());
3742
554
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3743
554
                                    TaskPrivatesMapFnInfo);
3744
554
  if (CGM.getLangOpts().Optimize) {
3745
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3746
0
    TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3747
0
    TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3748
0
  }
3749
554
  CodeGenFunction CGF(CGM);
3750
554
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3751
554
                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
3752
3753
  // *privi = &.privates.privi;
3754
554
  LValue Base = CGF.EmitLoadOfPointerLValue(
3755
554
      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3756
554
      TaskPrivatesArg.getType()->castAs<PointerType>());
3757
554
  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3758
554
  Counter = 0;
3759
1.65k
  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3760
1.65k
    LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3761
1.65k
    const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3762
1.65k
    LValue RefLVal =
3763
1.65k
        CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3764
1.65k
    LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3765
1.65k
        RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3766
1.65k
    CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3767
1.65k
    ++Counter;
3768
1.65k
  }
3769
554
  CGF.FinishFunction();
3770
554
  return TaskPrivatesMap;
3771
554
}
3772
3773
/// Emit initialization for private variables in task-based directives.
3774
static void emitPrivatesInit(CodeGenFunction &CGF,
3775
                             const OMPExecutableDirective &D,
3776
                             Address KmpTaskSharedsPtr, LValue TDBase,
3777
                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3778
                             QualType SharedsTy, QualType SharedsPtrTy,
3779
                             const OMPTaskDataTy &Data,
3780
653
                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3781
653
  ASTContext &C = CGF.getContext();
3782
653
  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3783
653
  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3784
653
  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3785
250
                                 ? OMPD_taskloop
3786
403
                                 : OMPD_task;
3787
653
  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3788
653
  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3789
653
  LValue SrcBase;
3790
653
  bool IsTargetTask =
3791
653
      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3792
573
      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3793
  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3794
  // PointersArray, SizesArray, and MappersArray. The original variables for
3795
  // these arrays are not captured and we get their addresses explicitly.
3796
653
  if ((!IsTargetTask && 
!Data.FirstprivateVars.empty()309
&&
ForDup137
) ||
3797
627
      (IsTargetTask && 
KmpTaskSharedsPtr.isValid()344
)) {
3798
322
    SrcBase = CGF.MakeAddrLValue(
3799
322
        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3800
322
            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3801
322
        SharedsTy);
3802
322
  }
3803
653
  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3804
2.01k
  for (const PrivateDataTy &Pair : Privates) {
3805
    // Do not initialize private locals.
3806
2.01k
    if (Pair.second.isLocalPrivate()) {
3807
8
      ++FI;
3808
8
      continue;
3809
8
    }
3810
2.01k
    const VarDecl *VD = Pair.second.PrivateCopy;
3811
2.01k
    const Expr *Init = VD->getAnyInitializer();
3812
2.01k
    if (Init && 
(1.63k
!ForDup1.63k
||
(206
isa<CXXConstructExpr>(Init)206
&&
3813
1.57k
                             
!CGF.isTrivialInitializer(Init)146
))) {
3814
1.57k
      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3815
1.57k
      if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3816
1.37k
        const VarDecl *OriginalVD = Pair.second.Original;
3817
        // Check if the variable is the target-based BasePointersArray,
3818
        // PointersArray, SizesArray, or MappersArray.
3819
1.37k
        LValue SharedRefLValue;
3820
1.37k
        QualType Type = PrivateLValue.getType();
3821
1.37k
        const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3822
1.37k
        if (IsTargetTask && 
!SharedField1.06k
) {
3823
674
          assert(isa<ImplicitParamDecl>(OriginalVD) &&
3824
674
                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3825
674
                 cast<CapturedDecl>(OriginalVD->getDeclContext())
3826
674
                         ->getNumParams() == 0 &&
3827
674
                 isa<TranslationUnitDecl>(
3828
674
                     cast<CapturedDecl>(OriginalVD->getDeclContext())
3829
674
                         ->getDeclContext()) &&
3830
674
                 "Expected artificial target data variable.");
3831
674
          SharedRefLValue =
3832
674
              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3833
697
        } else if (ForDup) {
3834
50
          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3835
50
          SharedRefLValue = CGF.MakeAddrLValue(
3836
50
              Address(SharedRefLValue.getPointer(CGF),
3837
50
                      C.getDeclAlign(OriginalVD)),
3838
50
              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3839
50
              SharedRefLValue.getTBAAInfo());
3840
647
        } else if (CGF.LambdaCaptureFields.count(
3841
647
                       Pair.second.Original->getCanonicalDecl()) > 0 ||
3842
645
                   dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3843
13
          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3844
634
        } else {
3845
          // Processing for implicitly captured variables.
3846
634
          InlinedOpenMPRegionRAII Region(
3847
0
              CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3848
634
              /*HasCancel=*/false);
3849
634
          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3850
634
        }
3851
1.37k
        if (Type->isArrayType()) {
3852
          // Initialize firstprivate array.
3853
767
          if (!isa<CXXConstructExpr>(Init) || 
CGF.isTrivialInitializer(Init)52
) {
3854
            // Perform simple memcpy.
3855
715
            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3856
52
          } else {
3857
            // Initialize firstprivate array using element-by-element
3858
            // initialization.
3859
52
            CGF.EmitOMPAggregateAssign(
3860
52
                PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3861
52
                Type,
3862
52
                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3863
52
                                                  Address SrcElement) {
3864
                  // Clean up any temporaries needed by the initialization.
3865
52
                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3866
52
                  InitScope.addPrivate(
3867
52
                      Elem, [SrcElement]() -> Address { return SrcElement; });
3868
52
                  (void)InitScope.Privatize();
3869
                  // Emit initialization for single element.
3870
52
                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3871
52
                      CGF, &CapturesInfo);
3872
52
                  CGF.EmitAnyExprToMem(Init, DestElement,
3873
52
                                       Init->getType().getQualifiers(),
3874
52
                                       /*IsInitializer=*/false);
3875
52
                });
3876
52
          }
3877
604
        } else {
3878
604
          CodeGenFunction::OMPPrivateScope InitScope(CGF);
3879
604
          InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3880
604
            return SharedRefLValue.getAddress(CGF);
3881
604
          });
3882
604
          (void)InitScope.Privatize();
3883
604
          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3884
604
          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3885
604
                             /*capturedByInit=*/false);
3886
604
        }
3887
202
      } else {
3888
202
        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3889
202
      }
3890
1.57k
    }
3891
2.01k
    ++FI;
3892
2.01k
  }
3893
653
}
3894
3895
/// Check if duplication function is required for taskloops.
3896
static bool checkInitIsRequired(CodeGenFunction &CGF,
3897
102
                                ArrayRef<PrivateDataTy> Privates) {
3898
102
  bool InitRequired = false;
3899
206
  for (const PrivateDataTy &Pair : Privates) {
3900
206
    if (Pair.second.isLocalPrivate())
3901
0
      continue;
3902
206
    const VarDecl *VD = Pair.second.PrivateCopy;
3903
206
    const Expr *Init = VD->getAnyInitializer();
3904
206
    InitRequired = InitRequired || (Init && 
isa<CXXConstructExpr>(Init)146
&&
3905
50
                                    !CGF.isTrivialInitializer(Init));
3906
206
    if (InitRequired)
3907
50
      break;
3908
206
  }
3909
102
  return InitRequired;
3910
102
}
3911
3912
3913
/// Emit task_dup function (for initialization of
3914
/// private/firstprivate/lastprivate vars and last_iter flag)
3915
/// \code
3916
/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3917
/// lastpriv) {
3918
/// // setup lastprivate flag
3919
///    task_dst->last = lastpriv;
3920
/// // could be constructor calls here...
3921
/// }
3922
/// \endcode
3923
static llvm::Value *
3924
emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3925
                    const OMPExecutableDirective &D,
3926
                    QualType KmpTaskTWithPrivatesPtrQTy,
3927
                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3928
                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3929
                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3930
99
                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3931
99
  ASTContext &C = CGM.getContext();
3932
99
  FunctionArgList Args;
3933
99
  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3934
99
                           KmpTaskTWithPrivatesPtrQTy,
3935
99
                           ImplicitParamDecl::Other);
3936
99
  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3937
99
                           KmpTaskTWithPrivatesPtrQTy,
3938
99
                           ImplicitParamDecl::Other);
3939
99
  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3940
99
                                ImplicitParamDecl::Other);
3941
99
  Args.push_back(&DstArg);
3942
99
  Args.push_back(&SrcArg);
3943
99
  Args.push_back(&LastprivArg);
3944
99
  const auto &TaskDupFnInfo =
3945
99
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3946
99
  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3947
99
  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3948
99
  auto *TaskDup = llvm::Function::Create(
3949
99
      TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3950
99
  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3951
99
  TaskDup->setDoesNotRecurse();
3952
99
  CodeGenFunction CGF(CGM);
3953
99
  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3954
99
                    Loc);
3955
3956
99
  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957
99
      CGF.GetAddrOfLocalVar(&DstArg),
3958
99
      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3959
  // task_dst->liter = lastpriv;
3960
99
  if (WithLastIter) {
3961
49
    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3962
49
    LValue Base = CGF.EmitLValueForField(
3963
49
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3964
49
    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3965
49
    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3966
49
        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3967
49
    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3968
49
  }
3969
3970
  // Emit initial values for private copies (if any).
3971
99
  assert(!Privates.empty());
3972
99
  Address KmpTaskSharedsPtr = Address::invalid();
3973
99
  if (!Data.FirstprivateVars.empty()) {
3974
26
    LValue TDBase = CGF.EmitLoadOfPointerLValue(
3975
26
        CGF.GetAddrOfLocalVar(&SrcArg),
3976
26
        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3977
26
    LValue Base = CGF.EmitLValueForField(
3978
26
        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3979
26
    KmpTaskSharedsPtr = Address(
3980
26
        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3981
26
                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3982
26
                                                  KmpTaskTShareds)),
3983
26
                             Loc),
3984
26
        CGM.getNaturalTypeAlignment(SharedsTy));
3985
26
  }
3986
99
  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3987
99
                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3988
99
  CGF.FinishFunction();
3989
99
  return TaskDup;
3990
99
}
3991
3992
/// Checks if destructor function is required to be generated.
3993
/// \return true if cleanups are required, false otherwise.
3994
static bool
3995
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3996
554
                         ArrayRef<PrivateDataTy> Privates) {
3997
1.44k
  for (const PrivateDataTy &P : Privates) {
3998
1.44k
    if (P.second.isLocalPrivate())
3999
8
      continue;
4000
1.44k
    QualType Ty = P.second.Original->getType().getNonReferenceType();
4001
1.44k
    if (Ty.isDestructedType())
4002
81
      return true;
4003
1.44k
  }
4004
473
  return false;
4005
554
}
4006
4007
namespace {
4008
/// Loop generator for OpenMP iterator expression.
4009
class OMPIteratorGeneratorScope final
4010
    : public CodeGenFunction::OMPPrivateScope {
4011
  CodeGenFunction &CGF;
4012
  const OMPIteratorExpr *E = nullptr;
4013
  SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4014
  SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4015
  OMPIteratorGeneratorScope() = delete;
4016
  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4017
4018
public:
4019
  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4020
424
      : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4021
424
    if (!E)
4022
418
      return;
4023
6
    SmallVector<llvm::Value *, 4> Uppers;
4024
12
    for (unsigned I = 0, End = E->numOfIterators(); I < End; 
++I6
) {
4025
6
      Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4026
6
      const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4027
6
      addPrivate(VD, [&CGF, VD]() {
4028
6
        return CGF.CreateMemTemp(VD->getType(), VD->getName());
4029
6
      });
4030
6
      const OMPIteratorHelperData &HelperData = E->getHelper(I);
4031
6
      addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4032
6
        return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4033
6
                                 "counter.addr");
4034
6
      });
4035
6
    }
4036
6
    Privatize();
4037
4038
12
    for (unsigned I = 0, End = E->numOfIterators(); I < End; 
++I6
) {
4039
6
      const OMPIteratorHelperData &HelperData = E->getHelper(I);
4040
6
      LValue CLVal =
4041
6
          CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4042
6
                             HelperData.CounterVD->getType());
4043
      // Counter = 0;
4044
6
      CGF.EmitStoreOfScalar(
4045
6
          llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4046
6
          CLVal);
4047
6
      CodeGenFunction::JumpDest &ContDest =
4048
6
          ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4049
6
      CodeGenFunction::JumpDest &ExitDest =
4050
6
          ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4051
      // N = <number-of_iterations>;
4052
6
      llvm::Value *N = Uppers[I];
4053
      // cont:
4054
      // if (Counter < N) goto body; else goto exit;
4055
6
      CGF.EmitBlock(ContDest.getBlock());
4056
6
      auto *CVal =
4057
6
          CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4058
6
      llvm::Value *Cmp =
4059
6
          HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4060
4
              ? CGF.Builder.CreateICmpSLT(CVal, N)
4061
2
              : CGF.Builder.CreateICmpULT(CVal, N);
4062
6
      llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4063
6
      CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4064
      // body:
4065
6
      CGF.EmitBlock(BodyBB);
4066
      // Iteri = Begini + Counter * Stepi;
4067
6
      CGF.EmitIgnoredExpr(HelperData.Update);
4068
6
    }
4069
6
  }
4070
424
  ~OMPIteratorGeneratorScope() {
4071
424
    if (!E)
4072
418
      return;
4073
12
    
for (unsigned I = E->numOfIterators(); 6
I > 0;
--I6
) {
4074
      // Counter = Counter + 1;
4075
6
      const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4076
6
      CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4077
      // goto cont;
4078
6
      CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4079
      // exit:
4080
6
      CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4081
6
    }
4082
6
  }
4083
};
4084
} // namespace
4085
4086
static std::pair<llvm::Value *, llvm::Value *>
4087
958
getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4088
958
  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4089
958
  llvm::Value *Addr;
4090
958
  if (OASE) {
4091
6
    const Expr *Base = OASE->getBase();
4092
6
    Addr = CGF.EmitScalarExpr(Base);
4093
952
  } else {
4094
952
    Addr = CGF.EmitLValue(E).getPointer(CGF);
4095
952
  }
4096
958
  llvm::Value *SizeVal;
4097
958
  QualType Ty = E->getType();
4098
958
  if (OASE) {
4099
6
    SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4100
18
    for (const Expr *SE : OASE->getDimensions()) {
4101
18
      llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4102
18
      Sz = CGF.EmitScalarConversion(
4103
18
          Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4104
18
      SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4105
18
    }
4106
952
  } else if (const auto *ASE =
4107
28
                 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4108
28
    LValue UpAddrLVal =
4109
28
        CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4110
28
    llvm::Value *UpAddr =
4111
28
        CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4112
28
    llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4113
28
    llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4114
28
    SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4115
924
  } else {
4116
924
    SizeVal = CGF.getTypeSize(Ty);
4117
924
  }
4118
958
  return std::make_pair(Addr, SizeVal);
4119
958
}
4120
4121
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4122
4
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4123
4
  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4124
4
  if (KmpTaskAffinityInfoTy.isNull()) {
4125
2
    RecordDecl *KmpAffinityInfoRD =
4126
2
        C.buildImplicitRecord("kmp_task_affinity_info_t");
4127
2
    KmpAffinityInfoRD->startDefinition();
4128
2
    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4129
2
    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4130
2
    addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4131
2
    KmpAffinityInfoRD->completeDefinition();
4132
2
    KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4133
2
  }
4134
4
}
4135
4136
CGOpenMPRuntime::TaskResultTy
4137
CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4138
                              const OMPExecutableDirective &D,
4139
                              llvm::Function *TaskFunction, QualType SharedsTy,
4140
855
                              Address Shareds, const OMPTaskDataTy &Data) {
4141
855
  ASTContext &C = CGM.getContext();
4142
855
  llvm::SmallVector<PrivateDataTy, 4> Privates;
4143
  // Aggregate privates and sort them by the alignment.
4144
855
  const auto *I = Data.PrivateCopies.begin();
4145
170
  for (const Expr *E : Data.PrivateVars) {
4146
170
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4147
170
    Privates.emplace_back(
4148
170
        C.getDeclAlign(VD),
4149
170
        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4150
170
                         /*PrivateElemInit=*/nullptr));
4151
170
    ++I;
4152
170
  }
4153
855
  I = Data.FirstprivateCopies.begin();
4154
855
  const auto *IElemInitRef = Data.FirstprivateInits.begin();
4155
1.32k
  for (const Expr *E : Data.FirstprivateVars) {
4156
1.32k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4157
1.32k
    Privates.emplace_back(
4158
1.32k
        C.getDeclAlign(VD),
4159
1.32k
        PrivateHelpersTy(
4160
1.32k
            E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4161
1.32k
            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4162
1.32k
    ++I;
4163
1.32k
    ++IElemInitRef;
4164
1.32k
  }
4165
855
  I = Data.LastprivateCopies.begin();
4166
151
  for (const Expr *E : Data.LastprivateVars) {
4167
151
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4168
151
    Privates.emplace_back(
4169
151
        C.getDeclAlign(VD),
4170
151
        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4171
151
                         /*PrivateElemInit=*/nullptr));
4172
151
    ++I;
4173
151
  }
4174
8
  for (const VarDecl *VD : Data.PrivateLocals) {
4175
8
    if (isAllocatableDecl(VD))
4176
2
      Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4177
6
    else
4178
6
      Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4179
8
  }
4180
855
  llvm::stable_sort(Privates,
4181
1.60k
                    [](const PrivateDataTy &L, const PrivateDataTy &R) {
4182
1.60k
                      return L.first > R.first;
4183
1.60k
                    });
4184
855
  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4185
  // Build type kmp_routine_entry_t (if not built yet).
4186
855
  emitKmpRoutineEntryT(KmpInt32Ty);
4187
  // Build type kmp_task_t (if not built yet).
4188
855
  if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4189
226
    if (SavedKmpTaskloopTQTy.isNull()) {
4190
128
      SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4191
128
          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4192
128
    }
4193
226
    KmpTaskTQTy = SavedKmpTaskloopTQTy;
4194
629
  } else {
4195
629
    assert((D.getDirectiveKind() == OMPD_task ||
4196
629
            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4197
629
            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4198
629
           "Expected taskloop, task or target directive");
4199
629
    if (SavedKmpTaskTQTy.isNull()) {
4200
269
      SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4201
269
          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4202
269
    }
4203
629
    KmpTaskTQTy = SavedKmpTaskTQTy;
4204
629
  }
4205
855
  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4206
  // Build particular struct kmp_task_t for the given task.
4207
855
  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4208
855
      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4209
855
  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4210
855
  QualType KmpTaskTWithPrivatesPtrQTy =
4211
855
      C.getPointerType(KmpTaskTWithPrivatesQTy);
4212
855
  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4213
855
  llvm::Type *KmpTaskTWithPrivatesPtrTy =
4214
855
      KmpTaskTWithPrivatesTy->getPointerTo();
4215
855
  llvm::Value *KmpTaskTWithPrivatesTySize =
4216
855
      CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4217
855
  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4218
4219
  // Emit initial values for private copies (if any).
4220
855
  llvm::Value *TaskPrivatesMap = nullptr;
4221
855
  llvm::Type *TaskPrivatesMapTy =
4222
855
      std::next(TaskFunction->arg_begin(), 3)->getType();
4223
855
  if (!Privates.empty()) {
4224
554
    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4225
554
    TaskPrivatesMap =
4226
554
        emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4227
554
    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4228
554
        TaskPrivatesMap, TaskPrivatesMapTy);
4229
301
  } else {
4230
301
    TaskPrivatesMap = llvm::ConstantPointerNull::get(
4231
301
        cast<llvm::PointerType>(TaskPrivatesMapTy));
4232
301
  }
4233
  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4234
  // kmp_task_t *tt);
4235
855
  llvm::Function *TaskEntry = emitProxyTaskFunction(
4236
855
      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4237
855
      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4238
855
      TaskPrivatesMap);
4239
4240
  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4241
  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4242
  // kmp_routine_entry_t *task_entry);
4243
  // Task flags. Format is taken from
4244
  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4245
  // description of kmp_tasking_flags struct.
4246
855
  enum {
4247
855
    TiedFlag = 0x1,
4248
855
    FinalFlag = 0x2,
4249
855
    DestructorsFlag = 0x8,
4250
855
    PriorityFlag = 0x20,
4251
855
    DetachableFlag = 0x40,
4252
855
  };
4253
839
  unsigned Flags = Data.Tied ? TiedFlag : 
016
;
4254
855
  bool NeedsCleanup = false;
4255
855
  if (!Privates.empty()) {
4256
554
    NeedsCleanup =
4257
554
        checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4258
554
    if (NeedsCleanup)
4259
81
      Flags = Flags | DestructorsFlag;
4260
554
  }
4261
855
  if (Data.Priority.getInt())
4262
22
    Flags = Flags | PriorityFlag;
4263
855
  if (D.hasClausesOfKind<OMPDetachClause>())
4264
2
    Flags = Flags | DetachableFlag;
4265
855
  llvm::Value *TaskFlags =
4266
855
      Data.Final.getPointer()
4267
10
          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4268
10
                                     CGF.Builder.getInt32(FinalFlag),
4269
10
                                     CGF.Builder.getInt32(/*C=*/0))
4270
845
          : CGF.Builder.getInt32(Data.Final.getInt() ? 
FinalFlag8
:
0837
);
4271
855
  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4272
855
  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4273
855
  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4274
855
      getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4275
855
      SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4276
855
          TaskEntry, KmpRoutineEntryPtrTy)};
4277
855
  llvm::Value *NewTask;
4278
855
  if (D.hasClausesOfKind<OMPNowaitClause>()) {
4279
    // Check if we have any device clause associated with the directive.
4280
284
    const Expr *Device = nullptr;
4281
284
    if (auto *C = D.getSingleClause<OMPDeviceClause>())
4282
166
      Device = C->getDevice();
4283
    // Emit device ID if any otherwise use default value.
4284
284
    llvm::Value *DeviceID;
4285
284
    if (Device)
4286
166
      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4287
166
                                           CGF.Int64Ty, /*isSigned=*/true);
4288
118
    else
4289
118
      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4290
284
    AllocArgs.push_back(DeviceID);
4291
284
    NewTask = CGF.EmitRuntimeCall(
4292
284
        OMPBuilder.getOrCreateRuntimeFunction(
4293
284
            CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4294
284
        AllocArgs);
4295
571
  } else {
4296
571
    NewTask =
4297
571
        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4298
571
                                CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4299
571
                            AllocArgs);
4300
571
  }
4301
  // Emit detach clause initialization.
4302
  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4303
  // task_descriptor);
4304
855
  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4305
2
    const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4306
2
    LValue EvtLVal = CGF.EmitLValue(Evt);
4307
4308
    // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4309
    // int gtid, kmp_task_t *task);
4310
2
    llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4311
2
    llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4312
2
    Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4313
2
    llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4314
2
        OMPBuilder.getOrCreateRuntimeFunction(
4315
2
            CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4316
2
        {Loc, Tid, NewTask});
4317
2
    EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4318
2
                                      Evt->getExprLoc());
4319
2
    CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4320
2
  }
4321
  // Process affinity clauses.
4322
855
  if (D.hasClausesOfKind<OMPAffinityClause>()) {
4323
    // Process list of affinity data.
4324
4
    ASTContext &C = CGM.getContext();
4325
4
    Address AffinitiesArray = Address::invalid();
4326
    // Calculate number of elements to form the array of affinity data.
4327
4
    llvm::Value *NumOfElements = nullptr;
4328
4
    unsigned NumAffinities = 0;
4329
6
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4330
6
      if (const Expr *Modifier = C->getModifier()) {
4331
2
        const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4332
4
        for (unsigned I = 0, E = IE->numOfIterators(); I < E; 
++I2
) {
4333
2
          llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4334
2
          Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4335
2
          NumOfElements =
4336
2
              NumOfElements ? 
CGF.Builder.CreateNUWMul(NumOfElements, Sz)0
: Sz;
4337
2
        }
4338
4
      } else {
4339
4
        NumAffinities += C->varlist_size();
4340
4
      }
4341
6
    }
4342
4
    getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4343
    // Fields ids in kmp_task_affinity_info record.
4344
4
    enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4345
4346
4
    QualType KmpTaskAffinityInfoArrayTy;
4347
4
    if (NumOfElements) {
4348
2
      NumOfElements = CGF.Builder.CreateNUWAdd(
4349
2
          llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4350
2
      OpaqueValueExpr OVE(
4351
2
          Loc,
4352
2
          C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4353
2
          VK_RValue);
4354
2
      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4355
2
                                                    RValue::get(NumOfElements));
4356
2
      KmpTaskAffinityInfoArrayTy =
4357
2
          C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4358
2
                                 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4359
      // Properly emit variable-sized array.
4360
2
      auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4361
2
                                           ImplicitParamDecl::Other);
4362
2
      CGF.EmitVarDecl(*PD);
4363
2
      AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4364
2
      NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4365
2
                                                /*isSigned=*/false);
4366
2
    } else {
4367
2
      KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4368
2
          KmpTaskAffinityInfoTy,
4369
2
          llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4370
2
          ArrayType::Normal, /*IndexTypeQuals=*/0);
4371
2
      AffinitiesArray =
4372
2
          CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4373
2
      AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4374
2
      NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4375
2
                                             /*isSigned=*/false);
4376
2
    }
4377
4378
4
    const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4379
    // Fill array by elements without iterators.
4380
4
    unsigned Pos = 0;
4381
4
    bool HasIterator = false;
4382
6
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4383
6
      if (C->getModifier()) {
4384
2
        HasIterator = true;
4385
2
        continue;
4386
2
      }
4387
4
      for (const Expr *E : C->varlists()) {
4388
4
        llvm::Value *Addr;
4389
4
        llvm::Value *Size;
4390
4
        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4391
4
        LValue Base =
4392
4
            CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4393
4
                               KmpTaskAffinityInfoTy);
4394
        // affs[i].base_addr = &<Affinities[i].second>;
4395
4
        LValue BaseAddrLVal = CGF.EmitLValueForField(
4396
4
            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4397
4
        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4398
4
                              BaseAddrLVal);
4399
        // affs[i].len = sizeof(<Affinities[i].second>);
4400
4
        LValue LenLVal = CGF.EmitLValueForField(
4401
4
            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4402
4
        CGF.EmitStoreOfScalar(Size, LenLVal);
4403
4
        ++Pos;
4404
4
      }
4405
4
    }
4406
4
    LValue PosLVal;
4407
4
    if (HasIterator) {
4408
2
      PosLVal = CGF.MakeAddrLValue(
4409
2
          CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4410
2
          C.getSizeType());
4411
2
      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4412
2
    }
4413
    // Process elements with iterators.
4414
6
    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4415
6
      const Expr *Modifier = C->getModifier();
4416
6
      if (!Modifier)
4417
4
        continue;
4418
2
      OMPIteratorGeneratorScope IteratorScope(
4419
2
          CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4420
2
      for (const Expr *E : C->varlists()) {
4421
2
        llvm::Value *Addr;
4422
2
        llvm::Value *Size;
4423
2
        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4424
2
        llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4425
2
        LValue Base = CGF.MakeAddrLValue(
4426
2
            Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4427
2
                    AffinitiesArray.getAlignment()),
4428
2
            KmpTaskAffinityInfoTy);
4429
        // affs[i].base_addr = &<Affinities[i].second>;
4430
2
        LValue BaseAddrLVal = CGF.EmitLValueForField(
4431
2
            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4432
2
        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4433
2
                              BaseAddrLVal);
4434
        // affs[i].len = sizeof(<Affinities[i].second>);
4435
2
        LValue LenLVal = CGF.EmitLValueForField(
4436
2
            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4437
2
        CGF.EmitStoreOfScalar(Size, LenLVal);
4438
2
        Idx = CGF.Builder.CreateNUWAdd(
4439
2
            Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4440
2
        CGF.EmitStoreOfScalar(Idx, PosLVal);
4441
2
      }
4442
2
    }
4443
    // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4444
    // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4445
    // naffins, kmp_task_affinity_info_t *affin_list);
4446
4
    llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4447
4
    llvm::Value *GTid = getThreadID(CGF, Loc);
4448
4
    llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4449
4
        AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4450
    // FIXME: Emit the function and ignore its result for now unless the
4451
    // runtime function is properly implemented.
4452
4
    (void)CGF.EmitRuntimeCall(
4453
4
        OMPBuilder.getOrCreateRuntimeFunction(
4454
4
            CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4455
4
        {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4456
4
  }
4457
855
  llvm::Value *NewTaskNewTaskTTy =
4458
855
      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4459
855
          NewTask, KmpTaskTWithPrivatesPtrTy);
4460
855
  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4461
855
                                               KmpTaskTWithPrivatesQTy);
4462
855
  LValue TDBase =
4463
855
      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4464
  // Fill the data in the resulting kmp_task_t record.
4465
  // Copy shareds if there are any.
4466
855
  Address KmpTaskSharedsPtr = Address::invalid();
4467
855
  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4468
544
    KmpTaskSharedsPtr =
4469
544
        Address(CGF.EmitLoadOfScalar(
4470
544
                    CGF.EmitLValueForField(
4471
544
                        TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4472
544
                                           KmpTaskTShareds)),
4473
544
                    Loc),
4474
544
                CGM.getNaturalTypeAlignment(SharedsTy));
4475
544
    LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4476
544
    LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4477
544
    CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4478
544
  }
4479
  // Emit initial values for private copies (if any).
4480
855
  TaskResultTy Result;
4481
855
  if (!Privates.empty()) {
4482
554
    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4483
554
                     SharedsTy, SharedsPtrTy, Data, Privates,
4484
554
                     /*ForDup=*/false);
4485
554
    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4486
151
        (!Data.LastprivateVars.empty() || 
checkInitIsRequired(CGF, Privates)102
)) {
4487
99
      Result.TaskDupFn = emitTaskDupFunction(
4488
99
          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4489
99
          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4490
99
          /*WithLastIter=*/!Data.LastprivateVars.empty());
4491
99
    }
4492
554
  }
4493
  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4494
855
  enum { Priority = 0, Destructors = 1 };
4495
  // Provide pointer to function with destructors for privates.
4496
855
  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4497
855
  const RecordDecl *KmpCmplrdataUD =
4498
855
      (*FI)->getType()->getAsUnionType()->getDecl();
4499
855
  if (NeedsCleanup) {
4500
81
    llvm::Value *DestructorFn = emitDestructorsFunction(
4501
81
        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4502
81
        KmpTaskTWithPrivatesQTy);
4503
81
    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4504
81
    LValue DestructorsLV = CGF.EmitLValueForField(
4505
81
        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4506
81
    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4507
81
                              DestructorFn, KmpRoutineEntryPtrTy),
4508
81
                          DestructorsLV);
4509
81
  }
4510
  // Set priority.
4511
855
  if (Data.Priority.getInt()) {
4512
22
    LValue Data2LV = CGF.EmitLValueForField(
4513
22
        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4514
22
    LValue PriorityLV = CGF.EmitLValueForField(
4515
22
        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4516
22
    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4517
22
  }
4518
855
  Result.NewTask = NewTask;
4519
855
  Result.TaskEntry = TaskEntry;
4520
855
  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4521
855
  Result.TDBase = TDBase;
4522
855
  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4523
855
  return Result;
4524
855
}
4525
4526
namespace {
4527
/// Dependence kind for RTL.
4528
enum RTLDependenceKindTy {
4529
  DepIn = 0x01,
4530
  DepInOut = 0x3,
4531
  DepMutexInOutSet = 0x4
4532
};
4533
/// Fields ids in kmp_depend_info record.
4534
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4535
} // namespace
4536
4537
/// Translates internal dependency kind into the runtime kind.
4538
956
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4539
956
  RTLDependenceKindTy DepKind;
4540
956
  switch (K) {
4541
170
  case OMPC_DEPEND_in:
4542
170
    DepKind = DepIn;
4543
170
    break;
4544
  // Out and InOut dependencies must use the same code.
4545
334
  case OMPC_DEPEND_out:
4546
776
  case OMPC_DEPEND_inout:
4547
776
    DepKind = DepInOut;
4548
776
    break;
4549
10
  case OMPC_DEPEND_mutexinoutset:
4550
10
    DepKind = DepMutexInOutSet;
4551
10
    break;
4552
0
  case OMPC_DEPEND_source:
4553
0
  case OMPC_DEPEND_sink:
4554
0
  case OMPC_DEPEND_depobj:
4555
0
  case OMPC_DEPEND_unknown:
4556
0
    llvm_unreachable("Unknown task dependence type");
4557
956
  }
4558
956
  return DepKind;
4559
956
}
4560
4561
/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4562
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4563
784
                           QualType &FlagsTy) {
4564
784
  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4565
784
  if (KmpDependInfoTy.isNull()) {
4566
90
    RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4567
90
    KmpDependInfoRD->startDefinition();
4568
90
    addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4569
90
    addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4570
90
    addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4571
90
    KmpDependInfoRD->completeDefinition();
4572
90
    KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4573
90
  }
4574
784
}
4575
4576
std::pair<llvm::Value *, LValue>
4577
CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4578
4
                                   SourceLocation Loc) {
4579
4
  ASTContext &C = CGM.getContext();
4580
4
  QualType FlagsTy;
4581
4
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4582
4
  RecordDecl *KmpDependInfoRD =
4583
4
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4584
4
  LValue Base = CGF.EmitLoadOfPointerLValue(
4585
4
      DepobjLVal.getAddress(CGF),
4586
4
      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4587
4
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4588
4
  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4589
4
          Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4590
4
  Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4591
4
                            Base.getTBAAInfo());
4592
4
  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4593
4
      Addr.getPointer(),
4594
4
      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4595
4
  LValue NumDepsBase = CGF.MakeAddrLValue(
4596
4
      Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4597
4
      Base.getBaseInfo(), Base.getTBAAInfo());
4598
  // NumDeps = deps[i].base_addr;
4599
4
  LValue BaseAddrLVal = CGF.EmitLValueForField(
4600
4
      NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4601
4
  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4602
4
  return std::make_pair(NumDeps, Base);
4603
4
}
4604
4605
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4606
                           llvm::PointerUnion<unsigned *, LValue *> Pos,
4607
                           const OMPTaskDataTy::DependData &Data,
4608
418
                           Address DependenciesArray) {
4609
418
  CodeGenModule &CGM = CGF.CGM;
4610
418
  ASTContext &C = CGM.getContext();
4611
418
  QualType FlagsTy;
4612
418
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4613
418
  RecordDecl *KmpDependInfoRD =
4614
418
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4615
418
  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4616
4617
418
  OMPIteratorGeneratorScope IteratorScope(
4618
418
      CGF, cast_or_null<OMPIteratorExpr>(
4619
4
               Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4620
414
                                 : nullptr));
4621
952
  for (const Expr *E : Data.DepExprs) {
4622
952
    llvm::Value *Addr;
4623
952
    llvm::Value *Size;
4624
952
    std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4625
952
    LValue Base;
4626
952
    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4627
948
      Base = CGF.MakeAddrLValue(
4628
948
          CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4629
4
    } else {
4630
4
      LValue &PosLVal = *Pos.get<LValue *>();
4631
4
      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4632
4
      Base = CGF.MakeAddrLValue(
4633
4
          Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4634
4
                  DependenciesArray.getAlignment()),
4635
4
          KmpDependInfoTy);
4636
4
    }
4637
    // deps[i].base_addr = &<Dependencies[i].second>;
4638
952
    LValue BaseAddrLVal = CGF.EmitLValueForField(
4639
952
        Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4640
952
    CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4641
952
                          BaseAddrLVal);
4642
    // deps[i].len = sizeof(<Dependencies[i].second>);
4643
952
    LValue LenLVal = CGF.EmitLValueForField(
4644
952
        Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4645
952
    CGF.EmitStoreOfScalar(Size, LenLVal);
4646
    // deps[i].flags = <Dependencies[i].first>;
4647
952
    RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4648
952
    LValue FlagsLVal = CGF.EmitLValueForField(
4649
952
        Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4650
952
    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4651
952
                          FlagsLVal);
4652
952
    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4653
948
      ++(*P);
4654
4
    } else {
4655
4
      LValue &PosLVal = *Pos.get<LValue *>();
4656
4
      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4657
4
      Idx = CGF.Builder.CreateNUWAdd(Idx,
4658
4
                                     llvm::ConstantInt::get(Idx->getType(), 1));
4659
4
      CGF.EmitStoreOfScalar(Idx, PosLVal);
4660
4
    }
4661
952
  }
4662
418
}
4663
4664
static SmallVector<llvm::Value *, 4>
4665
emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4666
2
                        const OMPTaskDataTy::DependData &Data) {
4667
2
  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4668
2
         "Expected depobj dependecy kind.");
4669
2
  SmallVector<llvm::Value *, 4> Sizes;
4670
2
  SmallVector<LValue, 4> SizeLVals;
4671
2
  ASTContext &C = CGF.getContext();
4672
2
  QualType FlagsTy;
4673
2
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4674
2
  RecordDecl *KmpDependInfoRD =
4675
2
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4676
2
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4677
2
  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4678
2
  {
4679
2
    OMPIteratorGeneratorScope IteratorScope(
4680
2
        CGF, cast_or_null<OMPIteratorExpr>(
4681
0
                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4682
2
                                   : nullptr));
4683
4
    for (const Expr *E : Data.DepExprs) {
4684
4
      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4685
4
      LValue Base = CGF.EmitLoadOfPointerLValue(
4686
4
          DepobjLVal.getAddress(CGF),
4687
4
          C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4688
4
      Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4689
4
          Base.getAddress(CGF), KmpDependInfoPtrT);
4690
4
      Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4691
4
                                Base.getTBAAInfo());
4692
4
      llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4693
4
          Addr.getPointer(),
4694
4
          llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4695
4
      LValue NumDepsBase = CGF.MakeAddrLValue(
4696
4
          Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4697
4
          Base.getBaseInfo(), Base.getTBAAInfo());
4698
      // NumDeps = deps[i].base_addr;
4699
4
      LValue BaseAddrLVal = CGF.EmitLValueForField(
4700
4
          NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4701
4
      llvm::Value *NumDeps =
4702
4
          CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4703
4
      LValue NumLVal = CGF.MakeAddrLValue(
4704
4
          CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4705
4
          C.getUIntPtrType());
4706
4
      CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4707
4
                         llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4708
4
      llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4709
4
      llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4710
4
      CGF.EmitStoreOfScalar(Add, NumLVal);
4711
4
      SizeLVals.push_back(NumLVal);
4712
4
    }
4713
2
  }
4714
6
  for (unsigned I = 0, E = SizeLVals.size(); I < E; 
++I4
) {
4715
4
    llvm::Value *Size =
4716
4
        CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4717
4
    Sizes.push_back(Size);
4718
4
  }
4719
2
  return Sizes;
4720
2
}
4721
4722
static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4723
                               LValue PosLVal,
4724
                               const OMPTaskDataTy::DependData &Data,
4725
2
                               Address DependenciesArray) {
4726
2
  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4727
2
         "Expected depobj dependecy kind.");
4728
2
  ASTContext &C = CGF.getContext();
4729
2
  QualType FlagsTy;
4730
2
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4731
2
  RecordDecl *KmpDependInfoRD =
4732
2
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4733
2
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4734
2
  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4735
2
  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4736
2
  {
4737
2
    OMPIteratorGeneratorScope IteratorScope(
4738
2
        CGF, cast_or_null<OMPIteratorExpr>(
4739
0
                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4740
2
                                   : nullptr));
4741
6
    for (unsigned I = 0, End = Data.DepExprs.size(); I < End; 
++I4
) {
4742
4
      const Expr *E = Data.DepExprs[I];
4743
4
      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4744
4
      LValue Base = CGF.EmitLoadOfPointerLValue(
4745
4
          DepobjLVal.getAddress(CGF),
4746
4
          C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4747
4
      Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4748
4
          Base.getAddress(CGF), KmpDependInfoPtrT);
4749
4
      Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4750
4
                                Base.getTBAAInfo());
4751
4752
      // Get number of elements in a single depobj.
4753
4
      llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4754
4
          Addr.getPointer(),
4755
4
          llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4756
4
      LValue NumDepsBase = CGF.MakeAddrLValue(
4757
4
          Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4758
4
          Base.getBaseInfo(), Base.getTBAAInfo());
4759
      // NumDeps = deps[i].base_addr;
4760
4
      LValue BaseAddrLVal = CGF.EmitLValueForField(
4761
4
          NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4762
4
      llvm::Value *NumDeps =
4763
4
          CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4764
4765
      // memcopy dependency data.
4766
4
      llvm::Value *Size = CGF.Builder.CreateNUWMul(
4767
4
          ElSize,
4768
4
          CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4769
4
      llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4770
4
      Address DepAddr =
4771
4
          Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4772
4
                  DependenciesArray.getAlignment());
4773
4
      CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4774
4775
      // Increase pos.
4776
      // pos += size;
4777
4
      llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4778
4
      CGF.EmitStoreOfScalar(Add, PosLVal);
4779
4
    }
4780
2
  }
4781
2
}
4782
4783
std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4784
    CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4785
629
    SourceLocation Loc) {
4786
629
  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4787
344
        return D.DepExprs.empty();
4788
344
      }))
4789
285
    return std::make_pair(nullptr, Address::invalid());
4790
  // Process list of dependencies.
4791
344
  ASTContext &C = CGM.getContext();
4792
344
  Address DependenciesArray = Address::invalid();
4793
344
  llvm::Value *NumOfElements = nullptr;
4794
344
  unsigned NumDependencies = std::accumulate(
4795
344
      Dependencies.begin(), Dependencies.end(), 0,
4796
414
      [](unsigned V, const OMPTaskDataTy::DependData &D) {
4797
414
        return D.DepKind == OMPC_DEPEND_depobj
4798
2
                   ? V
4799
412
                   : (V + (D.IteratorExpr ? 
02
:
D.DepExprs.size()410
));
4800
414
      });
4801
344
  QualType FlagsTy;
4802
344
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4803
344
  bool HasDepobjDeps = false;
4804
344
  bool HasRegularWithIterators = false;
4805
344
  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4806
344
  llvm::Value *NumOfRegularWithIterators =
4807
344
      llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4808
  // Calculate number of depobj dependecies and regular deps with the iterators.
4809
414
  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4810
414
    if (D.DepKind == OMPC_DEPEND_depobj) {
4811
2
      SmallVector<llvm::Value *, 4> Sizes =
4812
2
          emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4813
4
      for (llvm::Value *Size : Sizes) {
4814
4
        NumOfDepobjElements =
4815
4
            CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4816
4
      }
4817
2
      HasDepobjDeps = true;
4818
2
      continue;
4819
2
    }
4820
    // Include number of iterations, if any.
4821
412
    if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4822
4
      for (unsigned I = 0, E = IE->numOfIterators(); I < E; 
++I2
) {
4823
2
        llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4824
2
        Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4825
2
        NumOfRegularWithIterators =
4826
2
            CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4827
2
      }
4828
2
      HasRegularWithIterators = true;
4829
2
      continue;
4830
2
    }
4831
412
  }
4832
4833
344
  QualType KmpDependInfoArrayTy;
4834
344
  if (HasDepobjDeps || 
HasRegularWithIterators342
) {
4835
4
    NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4836
4
                                           /*isSigned=*/false);
4837
4
    if (HasDepobjDeps) {
4838
2
      NumOfElements =
4839
2
          CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4840
2
    }
4841
4
    if (HasRegularWithIterators) {
4842
2
      NumOfElements =
4843
2
          CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4844
2
    }
4845
4
    OpaqueValueExpr OVE(Loc,
4846
4
                        C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4847
4
                        VK_RValue);
4848
4
    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4849
4
                                                  RValue::get(NumOfElements));
4850
4
    KmpDependInfoArrayTy =
4851
4
        C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4852
4
                               /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4853
    // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4854
    // Properly emit variable-sized array.
4855
4
    auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4856
4
                                         ImplicitParamDecl::Other);
4857
4
    CGF.EmitVarDecl(*PD);
4858
4
    DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4859
4
    NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4860
4
                                              /*isSigned=*/false);
4861
340
  } else {
4862
340
    KmpDependInfoArrayTy = C.getConstantArrayType(
4863
340
        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4864
340
        ArrayType::Normal, /*IndexTypeQuals=*/0);
4865
340
    DependenciesArray =
4866
340
        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4867
340
    DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4868
340
    NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4869
340
                                           /*isSigned=*/false);
4870
340
  }
4871
344
  unsigned Pos = 0;
4872
758
  for (unsigned I = 0, End = Dependencies.size(); I < End; 
++I414
) {
4873
414
    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4874
412
        Dependencies[I].IteratorExpr)
4875
4
      continue;
4876
410
    emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4877
410
                   DependenciesArray);
4878
410
  }
4879
  // Copy regular dependecies with iterators.
4880
344
  LValue PosLVal = CGF.MakeAddrLValue(
4881
344
      CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4882
344
  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4883
758
  for (unsigned I = 0, End = Dependencies.size(); I < End; 
++I414
) {
4884
414
    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4885
412
        !Dependencies[I].IteratorExpr)
4886
412
      continue;
4887
2
    emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4888
2
                   DependenciesArray);
4889
2
  }
4890
  // Copy final depobj arrays without iterators.
4891
344
  if (HasDepobjDeps) {
4892
6
    for (unsigned I = 0, End = Dependencies.size(); I < End; 
++I4
) {
4893
4
      if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4894
2
        continue;
4895
2
      emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4896
2
                         DependenciesArray);
4897
2
    }
4898
2
  }
4899
344
  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4900
344
      DependenciesArray, CGF.VoidPtrTy);
4901
344
  return std::make_pair(NumOfElements, DependenciesArray);
4902
344
}
4903
4904
Address CGOpenMPRuntime::emitDepobjDependClause(
4905
    CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4906
6
    SourceLocation Loc) {
4907
6
  if (Dependencies.DepExprs.empty())
4908
0
    return Address::invalid();
4909
  // Process list of dependencies.
4910
6
  ASTContext &C = CGM.getContext();
4911
6
  Address DependenciesArray = Address::invalid();
4912
6
  unsigned NumDependencies = Dependencies.DepExprs.size();
4913
6
  QualType FlagsTy;
4914
6
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4915
6
  RecordDecl *KmpDependInfoRD =
4916
6
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4917
4918
6
  llvm::Value *Size;
4919
  // Define type kmp_depend_info[<Dependencies.size()>];
4920
  // For depobj reserve one extra element to store the number of elements.
4921
  // It is required to handle depobj(x) update(in) construct.
4922
  // kmp_depend_info[<Dependencies.size()>] deps;
4923
6
  llvm::Value *NumDepsVal;
4924
6
  CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4925
6
  if (const auto *IE =
4926
2
          cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4927
2
    NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4928
4
    for (unsigned I = 0, E = IE->numOfIterators(); I < E; 
++I2
) {
4929
2
      llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4930
2
      Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4931
2
      NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4932
2
    }
4933
2
    Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4934
2
                                    NumDepsVal);
4935
2
    CharUnits SizeInBytes =
4936
2
        C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4937
2
    llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4938
2
    Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4939
2
    NumDepsVal =
4940
2
        CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4941
4
  } else {
4942
4
    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4943
4
        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4944
4
        nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4945
4
    CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4946
4
    Size = CGM.getSize(Sz.alignTo(Align));
4947
4
    NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4948
4
  }
4949
  // Need to allocate on the dynamic memory.
4950
6
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4951
  // Use default allocator.
4952
6
  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4953
6
  llvm::Value *Args[] = {ThreadID, Size, Allocator};
4954
4955
6
  llvm::Value *Addr =
4956
6
      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4957
6
                              CGM.getModule(), OMPRTL___kmpc_alloc),
4958
6
                          Args, ".dep.arr.addr");
4959
6
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4960
6
      Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4961
6
  DependenciesArray = Address(Addr, Align);
4962
  // Write number of elements in the first element of array for depobj.
4963
6
  LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4964
  // deps[i].base_addr = NumDependencies;
4965
6
  LValue BaseAddrLVal = CGF.EmitLValueForField(
4966
6
      Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4967
6
  CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4968
6
  llvm::PointerUnion<unsigned *, LValue *> Pos;
4969
6
  unsigned Idx = 1;
4970
6
  LValue PosLVal;
4971
6
  if (Dependencies.IteratorExpr) {
4972
2
    PosLVal = CGF.MakeAddrLValue(
4973
2
        CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4974
2
        C.getSizeType());
4975
2
    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4976
2
                          /*IsInit=*/true);
4977
2
    Pos = &PosLVal;
4978
4
  } else {
4979
4
    Pos = &Idx;
4980
4
  }
4981
6
  emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4982
6
  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4983
6
      CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4984
6
  return DependenciesArray;
4985
6
}
4986
4987
void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4988
4
                                        SourceLocation Loc) {
4989
4
  ASTContext &C = CGM.getContext();
4990
4
  QualType FlagsTy;
4991
4
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4992
4
  LValue Base = CGF.EmitLoadOfPointerLValue(
4993
4
      DepobjLVal.getAddress(CGF),
4994
4
      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4995
4
  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4996
4
  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4997
4
      Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4998
4
  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4999
4
      Addr.getPointer(),
5000
4
      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5001
4
  DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5002
4
                                                               CGF.VoidPtrTy);
5003
4
  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5004
  // Use default allocator.
5005
4
  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5006
4
  llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5007
5008
  // _kmpc_free(gtid, addr, nullptr);
5009
4
  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5010
4
                                CGM.getModule(), OMPRTL___kmpc_free),
5011
4
                            Args);
5012
4
}
5013
5014
void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5015
                                       OpenMPDependClauseKind NewDepKind,
5016
4
                                       SourceLocation Loc) {
5017
4
  ASTContext &C = CGM.getContext();
5018
4
  QualType FlagsTy;
5019
4
  getDependTypes(C, KmpDependInfoTy, FlagsTy);
5020
4
  RecordDecl *KmpDependInfoRD =
5021
4
      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5022
4
  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5023
4
  llvm::Value *NumDeps;
5024
4
  LValue Base;
5025
4
  std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5026
5027
4
  Address Begin = Base.getAddress(CGF);
5028
  // Cast from pointer to array type to pointer to single element.
5029
4
  llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5030
  // The basic structure here is a while-do loop.
5031
4
  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5032
4
  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5033
4
  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5034
4
  CGF.EmitBlock(BodyBB);
5035
4
  llvm::PHINode *ElementPHI =
5036
4
      CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5037
4
  ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5038
4
  Begin = Address(ElementPHI, Begin.getAlignment());
5039
4
  Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5040
4
                            Base.getTBAAInfo());
5041
  // deps[i].flags = NewDepKind;
5042
4
  RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5043
4
  LValue FlagsLVal = CGF.EmitLValueForField(
5044
4
      Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5045
4
  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5046
4
                        FlagsLVal);
5047
5048
  // Shift the address forward by one element.
5049
4
  Address ElementNext =
5050
4
      CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5051
4
  ElementPHI->addIncoming(ElementNext.getPointer(),
5052
4
                          CGF.Builder.GetInsertBlock());
5053
4
  llvm::Value *IsEmpty =
5054
4
      CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5055
4
  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5056
  // Done.
5057
4
  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5058
4
}
5059
5060
void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5061
                                   const OMPExecutableDirective &D,
5062
                                   llvm::Function *TaskFunction,
5063
                                   QualType SharedsTy, Address Shareds,
5064
                                   const Expr *IfCond,
5065
629
                                   const OMPTaskDataTy &Data) {
5066
629
  if (!CGF.HaveInsertPoint())
5067
0
    return;
5068
5069
629
  TaskResultTy Result =
5070
629
      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5071
629
  llvm::Value *NewTask = Result.NewTask;
5072
629
  llvm::Function *TaskEntry = Result.TaskEntry;
5073
629
  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5074
629
  LValue TDBase = Result.TDBase;
5075
629
  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5076
  // Process list of dependences.
5077
629
  Address DependenciesArray = Address::invalid();
5078
629
  llvm::Value *NumOfElements;
5079
629
  std::tie(NumOfElements, DependenciesArray) =
5080
629
      emitDependClause(