Coverage Report

Created: 2020-02-25 14:32

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This contains code to emit OpenMP nodes as LLVM code.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGCleanup.h"
14
#include "CGOpenMPRuntime.h"
15
#include "CodeGenFunction.h"
16
#include "CodeGenModule.h"
17
#include "TargetInfo.h"
18
#include "clang/AST/ASTContext.h"
19
#include "clang/AST/Attr.h"
20
#include "clang/AST/DeclOpenMP.h"
21
#include "clang/AST/OpenMPClause.h"
22
#include "clang/AST/Stmt.h"
23
#include "clang/AST/StmtOpenMP.h"
24
#include "clang/Basic/OpenMPKinds.h"
25
#include "clang/Basic/PrettyStackTrace.h"
26
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
27
#include "llvm/IR/Instructions.h"
28
#include "llvm/Support/AtomicOrdering.h"
29
using namespace clang;
30
using namespace CodeGen;
31
using namespace llvm::omp;
32
33
namespace {
34
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
35
/// for captured expressions.
36
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
37
10.3k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
38
12.4k
    for (const auto *C : S.clauses()) {
39
12.4k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
40
7.58k
        if (const auto *PreInit =
41
756
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
42
822
          for (const auto *I : PreInit->decls()) {
43
822
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
44
804
              CGF.EmitVarDecl(cast<VarDecl>(*I));
45
804
            } else {
46
18
              CodeGenFunction::AutoVarEmission Emission =
47
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
48
18
              CGF.EmitAutoVarCleanups(Emission);
49
18
            }
50
822
          }
51
756
        }
52
7.58k
      }
53
12.4k
    }
54
10.3k
  }
55
  CodeGenFunction::OMPPrivateScope InlinedShareds;
56
57
12.5k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
58
12.5k
    return CGF.LambdaCaptureFields.lookup(VD) ||
59
12.5k
           
(12.1k
CGF.CapturedStmtInfo12.1k
&&
CGF.CapturedStmtInfo->lookup(VD)3.59k
) ||
60
12.5k
           
(8.92k
CGF.CurCodeDecl8.92k
&&
isa<BlockDecl>(CGF.CurCodeDecl)8.92k
);
61
12.5k
  }
62
63
public:
64
  OMPLexicalScope(
65
      CodeGenFunction &CGF, const OMPExecutableDirective &S,
66
      const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
67
      const bool EmitPreInitStmt = true)
68
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
69
16.5k
        InlinedShareds(CGF) {
70
16.5k
    if (EmitPreInitStmt)
71
10.3k
      emitPreInitStmt(CGF, S);
72
16.5k
    if (!CapturedRegion.hasValue())
73
8.57k
      return;
74
8.02k
    assert(S.hasAssociatedStmt() &&
75
8.02k
           "Expected associated statement for inlined directive.");
76
8.02k
    const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
77
14.0k
    for (const auto &C : CS->captures()) {
78
14.0k
      if (C.capturesVariable() || 
C.capturesVariableByCopy()7.87k
) {
79
12.5k
        auto *VD = C.getCapturedVar();
80
12.5k
        assert(VD == VD->getCanonicalDecl() &&
81
12.5k
               "Canonical decl must be captured.");
82
12.5k
        DeclRefExpr DRE(
83
12.5k
            CGF.getContext(), const_cast<VarDecl *>(VD),
84
12.5k
            isCapturedVar(CGF, VD) || 
(8.92k
CGF.CapturedStmtInfo8.92k
&&
85
8.92k
                                       
InlinedShareds.isGlobalVarCaptured(VD)322
),
86
12.5k
            VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
87
12.5k
        InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
88
12.5k
          return CGF.EmitLValue(&DRE).getAddress(CGF);
89
12.5k
        });
90
12.5k
      }
91
14.0k
    }
92
8.02k
    (void)InlinedShareds.Privatize();
93
8.02k
  }
94
};
95
96
/// Lexical scope for OpenMP parallel construct, that handles correct codegen
97
/// for captured expressions.
98
class OMPParallelScope final : public OMPLexicalScope {
99
4.13k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
100
4.13k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
101
4.13k
    return !(isOpenMPTargetExecutionDirective(Kind) ||
102
4.13k
             
isOpenMPLoopBoundSharingDirective(Kind)2.04k
) &&
103
4.13k
           
isOpenMPParallelDirective(Kind)1.04k
;
104
4.13k
  }
105
106
public:
107
  OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
108
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
109
4.13k
                        EmitPreInitStmt(S)) {}
110
};
111
112
/// Lexical scope for OpenMP teams construct, that handles correct codegen
113
/// for captured expressions.
114
class OMPTeamsScope final : public OMPLexicalScope {
115
3.86k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
116
3.86k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
117
3.86k
    return !isOpenMPTargetExecutionDirective(Kind) &&
118
3.86k
           
isOpenMPTeamsDirective(Kind)1.51k
;
119
3.86k
  }
120
121
public:
122
  OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
123
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
124
3.86k
                        EmitPreInitStmt(S)) {}
125
};
126
127
/// Private scope for OpenMP loop-based directives, that supports capturing
128
/// of used expression from loop statement.
129
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
130
12.5k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
131
12.5k
    CodeGenFunction::OMPMapVars PreCondVars;
132
12.5k
    llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
133
13.1k
    for (const auto *E : S.counters()) {
134
13.1k
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
135
13.1k
      EmittedAsPrivate.insert(VD->getCanonicalDecl());
136
13.1k
      (void)PreCondVars.setVarAddr(
137
13.1k
          CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
138
13.1k
    }
139
12.5k
    // Mark private vars as undefs.
140
12.5k
    for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
141
2.84k
      for (const Expr *IRef : C->varlists()) {
142
2.84k
        const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
143
2.84k
        if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
144
2.50k
          (void)PreCondVars.setVarAddr(
145
2.50k
              CGF, OrigVD,
146
2.50k
              Address(llvm::UndefValue::get(
147
2.50k
                          CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
148
2.50k
                              OrigVD->getType().getNonReferenceType()))),
149
2.50k
                      CGF.getContext().getDeclAlign(OrigVD)));
150
2.50k
        }
151
2.84k
      }
152
705
    }
153
12.5k
    (void)PreCondVars.apply(CGF);
154
12.5k
    // Emit init, __range and __end variables for C++ range loops.
155
12.5k
    const Stmt *Body =
156
12.5k
        S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
157
25.7k
    for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); 
++Cnt13.1k
) {
158
13.1k
      Body = OMPLoopDirective::tryToFindNextInnerLoop(
159
13.1k
          Body, /*TryImperfectlyNestedLoops=*/true);
160
13.1k
      if (auto *For = dyn_cast<ForStmt>(Body)) {
161
13.1k
        Body = For->getBody();
162
13.1k
      } else {
163
6
        assert(isa<CXXForRangeStmt>(Body) &&
164
6
               "Expected canonical for loop or range-based for loop.");
165
6
        auto *CXXFor = cast<CXXForRangeStmt>(Body);
166
6
        if (const Stmt *Init = CXXFor->getInit())
167
0
          CGF.EmitStmt(Init);
168
6
        CGF.EmitStmt(CXXFor->getRangeStmt());
169
6
        CGF.EmitStmt(CXXFor->getEndStmt());
170
6
        Body = CXXFor->getBody();
171
6
      }
172
13.1k
    }
173
12.5k
    if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
174
2.57k
      for (const auto *I : PreInits->decls())
175
5.60k
        CGF.EmitVarDecl(cast<VarDecl>(*I));
176
2.57k
    }
177
12.5k
    PreCondVars.restore(CGF);
178
12.5k
  }
179
180
public:
181
  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
182
12.5k
      : CodeGenFunction::RunCleanupsScope(CGF) {
183
12.5k
    emitPreInitStmt(CGF, S);
184
12.5k
  }
185
};
186
187
class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
188
  CodeGenFunction::OMPPrivateScope InlinedShareds;
189
190
33.9k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
191
33.9k
    return CGF.LambdaCaptureFields.lookup(VD) ||
192
33.9k
           
(32.9k
CGF.CapturedStmtInfo32.9k
&&
CGF.CapturedStmtInfo->lookup(VD)6.39k
) ||
193
33.9k
           
(32.9k
CGF.CurCodeDecl32.9k
&&
isa<BlockDecl>(CGF.CurCodeDecl)32.9k
&&
194
32.9k
            
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)109
);
195
33.9k
  }
196
197
public:
198
  OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
199
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
200
10.5k
        InlinedShareds(CGF) {
201
13.9k
    for (const auto *C : S.clauses()) {
202
13.9k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
203
8.66k
        if (const auto *PreInit =
204
988
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
205
1.05k
          for (const auto *I : PreInit->decls()) {
206
1.05k
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
207
1.03k
              CGF.EmitVarDecl(cast<VarDecl>(*I));
208
1.03k
            } else {
209
18
              CodeGenFunction::AutoVarEmission Emission =
210
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
211
18
              CGF.EmitAutoVarCleanups(Emission);
212
18
            }
213
1.05k
          }
214
988
        }
215
8.66k
      } else 
if (const auto *5.24k
UDP5.24k
= dyn_cast<OMPUseDevicePtrClause>(C)) {
216
80
        for (const Expr *E : UDP->varlists()) {
217
80
          const Decl *D = cast<DeclRefExpr>(E)->getDecl();
218
80
          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
219
20
            CGF.EmitVarDecl(*OED);
220
80
        }
221
72
      }
222
13.9k
    }
223
10.5k
    if (!isOpenMPSimdDirective(S.getDirectiveKind()))
224
7.85k
      CGF.EmitOMPPrivateClause(S, InlinedShareds);
225
10.5k
    if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
226
37
      if (const Expr *E = TG->getReductionRef())
227
26
        CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
228
37
    }
229
10.5k
    const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
230
31.2k
    while (CS) {
231
39.0k
      for (auto &C : CS->captures()) {
232
39.0k
        if (C.capturesVariable() || 
C.capturesVariableByCopy()23.6k
) {
233
33.9k
          auto *VD = C.getCapturedVar();
234
33.9k
          assert(VD == VD->getCanonicalDecl() &&
235
33.9k
                 "Canonical decl must be captured.");
236
33.9k
          DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
237
33.9k
                          isCapturedVar(CGF, VD) ||
238
33.9k
                              
(32.8k
CGF.CapturedStmtInfo32.8k
&&
239
32.8k
                               
InlinedShareds.isGlobalVarCaptured(VD)6.37k
),
240
33.9k
                          VD->getType().getNonReferenceType(), VK_LValue,
241
33.9k
                          C.getLocation());
242
33.9k
          InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
243
33.9k
            return CGF.EmitLValue(&DRE).getAddress(CGF);
244
33.9k
          });
245
33.9k
        }
246
39.0k
      }
247
20.6k
      CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
248
20.6k
    }
249
10.5k
    (void)InlinedShareds.Privatize();
250
10.5k
  }
251
};
252
253
} // namespace
254
255
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
256
                                         const OMPExecutableDirective &S,
257
                                         const RegionCodeGenTy &CodeGen);
258
259
6.45k
LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
260
6.45k
  if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
261
5.28k
    if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
262
5.28k
      OrigVD = OrigVD->getCanonicalDecl();
263
5.28k
      bool IsCaptured =
264
5.28k
          LambdaCaptureFields.lookup(OrigVD) ||
265
5.28k
          
(5.24k
CapturedStmtInfo5.24k
&&
CapturedStmtInfo->lookup(OrigVD)665
) ||
266
5.28k
          
(4.76k
CurCodeDecl4.76k
&&
isa<BlockDecl>(CurCodeDecl)4.75k
);
267
5.28k
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
268
5.28k
                      OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
269
5.28k
      return EmitLValue(&DRE);
270
5.28k
    }
271
1.16k
  }
272
1.16k
  return EmitLValue(E);
273
1.16k
}
274
275
11.4k
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
276
11.4k
  ASTContext &C = getContext();
277
11.4k
  llvm::Value *Size = nullptr;
278
11.4k
  auto SizeInChars = C.getTypeSizeInChars(Ty);
279
11.4k
  if (SizeInChars.isZero()) {
280
828
    // getTypeSizeInChars() returns 0 for a VLA.
281
1.65k
    while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
282
828
      VlaSizePair VlaSize = getVLASize(VAT);
283
828
      Ty = VlaSize.Type;
284
828
      Size = Size ? 
Builder.CreateNUWMul(Size, VlaSize.NumElts)0
285
828
                  : VlaSize.NumElts;
286
828
    }
287
828
    SizeInChars = C.getTypeSizeInChars(Ty);
288
828
    if (SizeInChars.isZero())
289
0
      return llvm::ConstantInt::get(SizeTy, /*V=*/0);
290
828
    return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
291
828
  }
292
10.6k
  return CGM.getSize(SizeInChars);
293
10.6k
}
294
295
void CodeGenFunction::GenerateOpenMPCapturedVars(
296
13.3k
    const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
297
13.3k
  const RecordDecl *RD = S.getCapturedRecordDecl();
298
13.3k
  auto CurField = RD->field_begin();
299
13.3k
  auto CurCap = S.captures().begin();
300
13.3k
  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
301
13.3k
                                                 E = S.capture_init_end();
302
36.3k
       I != E; 
++I, ++CurField, ++CurCap23.0k
) {
303
23.0k
    if (CurField->hasCapturedVLAType()) {
304
1.78k
      const VariableArrayType *VAT = CurField->getCapturedVLAType();
305
1.78k
      llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
306
1.78k
      CapturedVars.push_back(Val);
307
21.2k
    } else if (CurCap->capturesThis()) {
308
1.11k
      CapturedVars.push_back(CXXThisValue);
309
20.1k
    } else if (CurCap->capturesVariableByCopy()) {
310
10.0k
      llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
311
10.0k
312
10.0k
      // If the field is not a pointer, we need to save the actual value
313
10.0k
      // and load it as a void pointer.
314
10.0k
      if (!CurField->getType()->isAnyPointerType()) {
315
8.63k
        ASTContext &Ctx = getContext();
316
8.63k
        Address DstAddr = CreateMemTemp(
317
8.63k
            Ctx.getUIntPtrType(),
318
8.63k
            Twine(CurCap->getCapturedVar()->getName(), ".casted"));
319
8.63k
        LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
320
8.63k
321
8.63k
        llvm::Value *SrcAddrVal = EmitScalarConversion(
322
8.63k
            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
323
8.63k
            Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
324
8.63k
        LValue SrcLV =
325
8.63k
            MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
326
8.63k
327
8.63k
        // Store the value using the source type pointer.
328
8.63k
        EmitStoreThroughLValue(RValue::get(CV), SrcLV);
329
8.63k
330
8.63k
        // Load the value using the destination type pointer.
331
8.63k
        CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
332
8.63k
      }
333
10.0k
      CapturedVars.push_back(CV);
334
10.1k
    } else {
335
10.1k
      assert(CurCap->capturesVariable() && "Expected capture by reference.");
336
10.1k
      CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
337
10.1k
    }
338
23.0k
  }
339
13.3k
}
340
341
static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
342
                                    QualType DstType, StringRef Name,
343
12.1k
                                    LValue AddrLV) {
344
12.1k
  ASTContext &Ctx = CGF.getContext();
345
12.1k
346
12.1k
  llvm::Value *CastedPtr = CGF.EmitScalarConversion(
347
12.1k
      AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
348
12.1k
      Ctx.getPointerType(DstType), Loc);
349
12.1k
  Address TmpAddr =
350
12.1k
      CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
351
12.1k
          .getAddress(CGF);
352
12.1k
  return TmpAddr;
353
12.1k
}
354
355
4.93k
static QualType getCanonicalParamType(ASTContext &C, QualType T) {
356
4.93k
  if (T->isLValueReferenceType())
357
1.44k
    return C.getLValueReferenceType(
358
1.44k
        getCanonicalParamType(C, T.getNonReferenceType()),
359
1.44k
        /*SpelledAsLValue=*/false);
360
3.49k
  if (T->isPointerType())
361
27
    return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
362
3.47k
  if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
363
2.02k
    if (const auto *VLA = dyn_cast<VariableArrayType>(A))
364
2.00k
      return getCanonicalParamType(C, VLA->getElementType());
365
20
    if (!A->isVariablyModifiedType())
366
20
      return C.getCanonicalType(T);
367
1.44k
  }
368
1.44k
  return C.getCanonicalParamType(T);
369
1.44k
}
370
371
namespace {
372
/// Contains required data for proper outlined function codegen.
373
struct FunctionOptions {
374
  /// Captured statement for which the function is generated.
375
  const CapturedStmt *S = nullptr;
376
  /// true if cast to/from  UIntPtr is required for variables captured by
377
  /// value.
378
  const bool UIntPtrCastRequired = true;
379
  /// true if only casted arguments must be registered as local args or VLA
380
  /// sizes.
381
  const bool RegisterCastedArgsOnly = false;
382
  /// Name of the generated function.
383
  const StringRef FunctionName;
384
  /// Location of the non-debug version of the outlined function.
385
  SourceLocation Loc;
386
  explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
387
                           bool RegisterCastedArgsOnly, StringRef FunctionName,
388
                           SourceLocation Loc)
389
      : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
390
        RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
391
15.0k
        FunctionName(FunctionName), Loc(Loc) {}
392
};
393
} // namespace
394
395
static llvm::Function *emitOutlinedFunctionPrologue(
396
    CodeGenFunction &CGF, FunctionArgList &Args,
397
    llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
398
        &LocalAddrs,
399
    llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
400
        &VLASizes,
401
15.0k
    llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
402
15.0k
  const CapturedDecl *CD = FO.S->getCapturedDecl();
403
15.0k
  const RecordDecl *RD = FO.S->getCapturedRecordDecl();
404
15.0k
  assert(CD->hasBody() && "missing CapturedDecl body");
405
15.0k
406
15.0k
  CXXThisValue = nullptr;
407
15.0k
  // Build the argument list.
408
15.0k
  CodeGenModule &CGM = CGF.CGM;
409
15.0k
  ASTContext &Ctx = CGM.getContext();
410
15.0k
  FunctionArgList TargetArgs;
411
15.0k
  Args.append(CD->param_begin(),
412
15.0k
              std::next(CD->param_begin(), CD->getContextParamPosition()));
413
15.0k
  TargetArgs.append(
414
15.0k
      CD->param_begin(),
415
15.0k
      std::next(CD->param_begin(), CD->getContextParamPosition()));
416
15.0k
  auto I = FO.S->captures().begin();
417
15.0k
  FunctionDecl *DebugFunctionDecl = nullptr;
418
15.0k
  if (!FO.UIntPtrCastRequired) {
419
69
    FunctionProtoType::ExtProtoInfo EPI;
420
69
    QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
421
69
    DebugFunctionDecl = FunctionDecl::Create(
422
69
        Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
423
69
        SourceLocation(), DeclarationName(), FunctionTy,
424
69
        Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
425
69
        /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
426
69
  }
427
25.9k
  for (const FieldDecl *FD : RD->fields()) {
428
25.9k
    QualType ArgType = FD->getType();
429
25.9k
    IdentifierInfo *II = nullptr;
430
25.9k
    VarDecl *CapVar = nullptr;
431
25.9k
432
25.9k
    // If this is a capture by copy and the type is not a pointer, the outlined
433
25.9k
    // function argument type should be uintptr and the value properly casted to
434
25.9k
    // uintptr. This is necessary given that the runtime library is only able to
435
25.9k
    // deal with pointers. We can pass in the same way the VLA type sizes to the
436
25.9k
    // outlined function.
437
25.9k
    if (FO.UIntPtrCastRequired &&
438
25.9k
        
(25.8k
(25.8k
I->capturesVariableByCopy()25.8k
&&
!ArgType->isAnyPointerType()11.5k
) ||
439
25.8k
         
I->capturesVariableArrayType()15.7k
))
440
12.1k
      ArgType = Ctx.getUIntPtrType();
441
25.9k
442
25.9k
    if (I->capturesVariable() || 
I->capturesVariableByCopy()14.9k
) {
443
22.6k
      CapVar = I->getCapturedVar();
444
22.6k
      II = CapVar->getIdentifier();
445
22.6k
    } else 
if (3.31k
I->capturesThis()3.31k
) {
446
1.22k
      II = &Ctx.Idents.get("this");
447
2.08k
    } else {
448
2.08k
      assert(I->capturesVariableArrayType());
449
2.08k
      II = &Ctx.Idents.get("vla");
450
2.08k
    }
451
25.9k
    if (ArgType->isVariablyModifiedType())
452
1.46k
      ArgType = getCanonicalParamType(Ctx, ArgType);
453
25.9k
    VarDecl *Arg;
454
25.9k
    if (DebugFunctionDecl && 
(177
CapVar177
||
I->capturesThis()11
)) {
455
172
      Arg = ParmVarDecl::Create(
456
172
          Ctx, DebugFunctionDecl,
457
172
          CapVar ? 
CapVar->getBeginLoc()166
:
FD->getBeginLoc()6
,
458
172
          CapVar ? 
CapVar->getLocation()166
:
FD->getLocation()6
, II, ArgType,
459
172
          /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
460
25.8k
    } else {
461
25.8k
      Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
462
25.8k
                                      II, ArgType, ImplicitParamDecl::Other);
463
25.8k
    }
464
25.9k
    Args.emplace_back(Arg);
465
25.9k
    // Do not cast arguments if we emit function with non-original types.
466
25.9k
    TargetArgs.emplace_back(
467
25.9k
        FO.UIntPtrCastRequired
468
25.9k
            ? 
Arg25.8k
469
25.9k
            : 
CGM.getOpenMPRuntime().translateParameter(FD, Arg)177
);
470
25.9k
    ++I;
471
25.9k
  }
472
15.0k
  Args.append(
473
15.0k
      std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
474
15.0k
      CD->param_end());
475
15.0k
  TargetArgs.append(
476
15.0k
      std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
477
15.0k
      CD->param_end());
478
15.0k
479
15.0k
  // Create the function declaration.
480
15.0k
  const CGFunctionInfo &FuncInfo =
481
15.0k
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
482
15.0k
  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
483
15.0k
484
15.0k
  auto *F =
485
15.0k
      llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
486
15.0k
                             FO.FunctionName, &CGM.getModule());
487
15.0k
  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
488
15.0k
  if (CD->isNothrow())
489
15.0k
    F->setDoesNotThrow();
490
15.0k
  F->setDoesNotRecurse();
491
15.0k
492
15.0k
  // Generate the function.
493
15.0k
  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
494
15.0k
                    FO.UIntPtrCastRequired ? 
FO.Loc14.9k
:
FO.S->getBeginLoc()69
,
495
15.0k
                    FO.UIntPtrCastRequired ? 
FO.Loc14.9k
496
15.0k
                                           : 
CD->getBody()->getBeginLoc()69
);
497
15.0k
  unsigned Cnt = CD->getContextParamPosition();
498
15.0k
  I = FO.S->captures().begin();
499
25.9k
  for (const FieldDecl *FD : RD->fields()) {
500
25.9k
    // Do not map arguments if we emit function with non-original types.
501
25.9k
    Address LocalAddr(Address::invalid());
502
25.9k
    if (!FO.UIntPtrCastRequired && 
Args[Cnt] != TargetArgs[Cnt]177
) {
503
54
      LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
504
54
                                                             TargetArgs[Cnt]);
505
25.9k
    } else {
506
25.9k
      LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
507
25.9k
    }
508
25.9k
    // If we are capturing a pointer by copy we don't need to do anything, just
509
25.9k
    // use the value that we get from the arguments.
510
25.9k
    if (I->capturesVariableByCopy() && 
FD->getType()->isAnyPointerType()11.6k
) {
511
1.52k
      const VarDecl *CurVD = I->getCapturedVar();
512
1.52k
      if (!FO.RegisterCastedArgsOnly)
513
1.51k
        LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
514
1.52k
      ++Cnt;
515
1.52k
      ++I;
516
1.52k
      continue;
517
1.52k
    }
518
24.4k
519
24.4k
    LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
520
24.4k
                                        AlignmentSource::Decl);
521
24.4k
    if (FD->hasCapturedVLAType()) {
522
2.08k
      if (FO.UIntPtrCastRequired) {
523
2.08k
        ArgLVal = CGF.MakeAddrLValue(
524
2.08k
            castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
525
2.08k
                                 Args[Cnt]->getName(), ArgLVal),
526
2.08k
            FD->getType(), AlignmentSource::Decl);
527
2.08k
      }
528
2.08k
      llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
529
2.08k
      const VariableArrayType *VAT = FD->getCapturedVLAType();
530
2.08k
      VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
531
22.3k
    } else if (I->capturesVariable()) {
532
11.0k
      const VarDecl *Var = I->getCapturedVar();
533
11.0k
      QualType VarTy = Var->getType();
534
11.0k
      Address ArgAddr = ArgLVal.getAddress(CGF);
535
11.0k
      if (ArgLVal.getType()->isLValueReferenceType()) {
536
11.0k
        ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
537
11.0k
      } else 
if (0
!VarTy->isVariablyModifiedType()0
||
!VarTy->isPointerType()0
) {
538
0
        assert(ArgLVal.getType()->isPointerType());
539
0
        ArgAddr = CGF.EmitLoadOfPointer(
540
0
            ArgAddr, ArgLVal.getType()->castAs<PointerType>());
541
0
      }
542
11.0k
      if (!FO.RegisterCastedArgsOnly) {
543
10.9k
        LocalAddrs.insert(
544
10.9k
            {Args[Cnt],
545
10.9k
             {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
546
10.9k
      }
547
11.3k
    } else if (I->capturesVariableByCopy()) {
548
10.0k
      assert(!FD->getType()->isAnyPointerType() &&
549
10.0k
             "Not expecting a captured pointer.");
550
10.0k
      const VarDecl *Var = I->getCapturedVar();
551
10.0k
      LocalAddrs.insert({Args[Cnt],
552
10.0k
                         {Var, FO.UIntPtrCastRequired
553
10.0k
                                   ? castValueFromUintptr(
554
10.0k
                                         CGF, I->getLocation(), FD->getType(),
555
10.0k
                                         Args[Cnt]->getName(), ArgLVal)
556
10.0k
                                   : 
ArgLVal.getAddress(CGF)22
}});
557
10.0k
    } else {
558
1.22k
      // If 'this' is captured, load it into CXXThisValue.
559
1.22k
      assert(I->capturesThis());
560
1.22k
      CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
561
1.22k
      LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
562
1.22k
    }
563
24.4k
    ++Cnt;
564
24.4k
    ++I;
565
24.4k
  }
566
15.0k
567
15.0k
  return F;
568
15.0k
}
569
570
llvm::Function *
571
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
572
14.9k
                                                    SourceLocation Loc) {
573
14.9k
  assert(
574
14.9k
      CapturedStmtInfo &&
575
14.9k
      "CapturedStmtInfo should be set when generating the captured function");
576
14.9k
  const CapturedDecl *CD = S.getCapturedDecl();
577
14.9k
  // Build the argument list.
578
14.9k
  bool NeedWrapperFunction =
579
14.9k
      getDebugInfo() && 
CGM.getCodeGenOpts().hasReducedDebugInfo()127
;
580
14.9k
  FunctionArgList Args;
581
14.9k
  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
582
14.9k
  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
583
14.9k
  SmallString<256> Buffer;
584
14.9k
  llvm::raw_svector_ostream Out(Buffer);
585
14.9k
  Out << CapturedStmtInfo->getHelperName();
586
14.9k
  if (NeedWrapperFunction)
587
69
    Out << "_debug__";
588
14.9k
  FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
589
14.9k
                     Out.str(), Loc);
590
14.9k
  llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
591
14.9k
                                                   VLASizes, CXXThisValue, FO);
592
14.9k
  CodeGenFunction::OMPPrivateScope LocalScope(*this);
593
23.7k
  for (const auto &LocalAddrPair : LocalAddrs) {
594
23.7k
    if (LocalAddrPair.second.first) {
595
22.5k
      LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
596
22.5k
        return LocalAddrPair.second.second;
597
22.5k
      });
598
22.5k
    }
599
23.7k
  }
600
14.9k
  (void)LocalScope.Privatize();
601
14.9k
  for (const auto &VLASizePair : VLASizes)
602
2.08k
    VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
603
14.9k
  PGO.assignRegionCounters(GlobalDecl(CD), F);
604
14.9k
  CapturedStmtInfo->EmitBody(*this, CD->getBody());
605
14.9k
  (void)LocalScope.ForceCleanup();
606
14.9k
  FinishFunction(CD->getBodyRBrace());
607
14.9k
  if (!NeedWrapperFunction)
608
14.9k
    return F;
609
69
610
69
  FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
611
69
                            /*RegisterCastedArgsOnly=*/true,
612
69
                            CapturedStmtInfo->getHelperName(), Loc);
613
69
  CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
614
69
  WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
615
69
  Args.clear();
616
69
  LocalAddrs.clear();
617
69
  VLASizes.clear();
618
69
  llvm::Function *WrapperF =
619
69
      emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
620
69
                                   WrapperCGF.CXXThisValue, WrapperFO);
621
69
  llvm::SmallVector<llvm::Value *, 4> CallArgs;
622
279
  for (const auto *Arg : Args) {
623
279
    llvm::Value *CallArg;
624
279
    auto I = LocalAddrs.find(Arg);
625
279
    if (I != LocalAddrs.end()) {
626
28
      LValue LV = WrapperCGF.MakeAddrLValue(
627
28
          I->second.second,
628
28
          I->second.first ? 
I->second.first->getType()22
:
Arg->getType()6
,
629
28
          AlignmentSource::Decl);
630
28
      CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
631
251
    } else {
632
251
      auto EI = VLASizes.find(Arg);
633
251
      if (EI != VLASizes.end()) {
634
5
        CallArg = EI->second.second;
635
246
      } else {
636
246
        LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
637
246
                                              Arg->getType(),
638
246
                                              AlignmentSource::Decl);
639
246
        CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
640
246
      }
641
251
    }
642
279
    CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
643
279
  }
644
69
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
645
69
  WrapperCGF.FinishFunction();
646
69
  return WrapperF;
647
69
}
648
649
//===----------------------------------------------------------------------===//
650
//                              OpenMP Directive Emission
651
//===----------------------------------------------------------------------===//
652
void CodeGenFunction::EmitOMPAggregateAssign(
653
    Address DestAddr, Address SrcAddr, QualType OriginalType,
654
529
    const llvm::function_ref<void(Address, Address)> CopyGen) {
655
529
  // Perform element-by-element initialization.
656
529
  QualType ElementTy;
657
529
658
529
  // Drill down to the base element type on both arrays.
659
529
  const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
660
529
  llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
661
529
  SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
662
529
663
529
  llvm::Value *SrcBegin = SrcAddr.getPointer();
664
529
  llvm::Value *DestBegin = DestAddr.getPointer();
665
529
  // Cast from pointer to array type to pointer to single element.
666
529
  llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
667
529
  // The basic structure here is a while-do loop.
668
529
  llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
669
529
  llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
670
529
  llvm::Value *IsEmpty =
671
529
      Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
672
529
  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
673
529
674
529
  // Enter the loop body, making that address the current address.
675
529
  llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
676
529
  EmitBlock(BodyBB);
677
529
678
529
  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
679
529
680
529
  llvm::PHINode *SrcElementPHI =
681
529
    Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
682
529
  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
683
529
  Address SrcElementCurrent =
684
529
      Address(SrcElementPHI,
685
529
              SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
686
529
687
529
  llvm::PHINode *DestElementPHI =
688
529
    Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
689
529
  DestElementPHI->addIncoming(DestBegin, EntryBB);
690
529
  Address DestElementCurrent =
691
529
    Address(DestElementPHI,
692
529
            DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
693
529
694
529
  // Emit copy.
695
529
  CopyGen(DestElementCurrent, SrcElementCurrent);
696
529
697
529
  // Shift the address forward by one element.
698
529
  llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
699
529
      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
700
529
  llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
701
529
      SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
702
529
  // Check whether we've reached the end.
703
529
  llvm::Value *Done =
704
529
      Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
705
529
  Builder.CreateCondBr(Done, DoneBB, BodyBB);
706
529
  DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
707
529
  SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
708
529
709
529
  // Done.
710
529
  EmitBlock(DoneBB, /*IsFinished=*/true);
711
529
}
712
713
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
714
                                  Address SrcAddr, const VarDecl *DestVD,
715
1.81k
                                  const VarDecl *SrcVD, const Expr *Copy) {
716
1.81k
  if (OriginalType->isArrayType()) {
717
524
    const auto *BO = dyn_cast<BinaryOperator>(Copy);
718
524
    if (BO && 
BO->getOpcode() == BO_Assign267
) {
719
267
      // Perform simple memcpy for simple copying.
720
267
      LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
721
267
      LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
722
267
      EmitAggregateAssign(Dest, Src, OriginalType);
723
267
    } else {
724
257
      // For arrays with complex element types perform element by element
725
257
      // copying.
726
257
      EmitOMPAggregateAssign(
727
257
          DestAddr, SrcAddr, OriginalType,
728
257
          [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
729
257
            // Working with the single array element, so have to remap
730
257
            // destination and source variables to corresponding array
731
257
            // elements.
732
257
            CodeGenFunction::OMPPrivateScope Remap(*this);
733
257
            Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
734
257
            Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
735
257
            (void)Remap.Privatize();
736
257
            EmitIgnoredExpr(Copy);
737
257
          });
738
257
    }
739
1.29k
  } else {
740
1.29k
    // Remap pseudo source variable to private copy.
741
1.29k
    CodeGenFunction::OMPPrivateScope Remap(*this);
742
1.29k
    Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
743
1.29k
    Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
744
1.29k
    (void)Remap.Privatize();
745
1.29k
    // Emit copying of the whole variable.
746
1.29k
    EmitIgnoredExpr(Copy);
747
1.29k
  }
748
1.81k
}
749
750
bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
751
12.3k
                                                OMPPrivateScope &PrivateScope) {
752
12.3k
  if (!HaveInsertPoint())
753
0
    return false;
754
12.3k
  bool DeviceConstTarget =
755
12.3k
      getLangOpts().OpenMPIsDevice &&
756
12.3k
      
isOpenMPTargetExecutionDirective(D.getDirectiveKind())2.69k
;
757
12.3k
  bool FirstprivateIsLastprivate = false;
758
12.3k
  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
759
12.3k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
760
358
    for (const auto *D : C->varlists())
761
1.46k
      Lastprivates.try_emplace(
762
1.46k
          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
763
1.46k
          C->getKind());
764
358
  }
765
12.3k
  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
766
12.3k
  llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
767
12.3k
  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
768
12.3k
  // Force emission of the firstprivate copy if the directive does not emit
769
12.3k
  // outlined function, like omp for, omp simd, omp distribute etc.
770
12.3k
  bool MustEmitFirstprivateCopy =
771
12.3k
      CaptureRegions.size() == 1 && 
CaptureRegions.back() == OMPD_unknown4.00k
;
772
12.3k
  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
773
4.53k
    const auto *IRef = C->varlist_begin();
774
4.53k
    const auto *InitsRef = C->inits().begin();
775
7.82k
    for (const Expr *IInit : C->private_copies()) {
776
7.82k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
777
7.82k
      bool ThisFirstprivateIsLastprivate =
778
7.82k
          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
779
7.82k
      const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
780
7.82k
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
781
7.82k
      if (!MustEmitFirstprivateCopy && 
!ThisFirstprivateIsLastprivate7.61k
&&
FD7.61k
&&
782
7.82k
          
!FD->getType()->isReferenceType()7.61k
&&
783
7.82k
          
(6.52k
!VD6.52k
||
!VD->hasAttr<OMPAllocateDeclAttr>()6.52k
)) {
784
6.52k
        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
785
6.52k
        ++IRef;
786
6.52k
        ++InitsRef;
787
6.52k
        continue;
788
6.52k
      }
789
1.30k
      // Do not emit copy for firstprivate constant variables in target regions,
790
1.30k
      // captured by reference.
791
1.30k
      if (DeviceConstTarget && 
OrigVD->getType().isConstant(getContext())197
&&
792
1.30k
          
FD6
&&
FD->getType()->isReferenceType()6
&&
793
1.30k
          
(6
!VD6
||
!VD->hasAttr<OMPAllocateDeclAttr>()6
)) {
794
6
        (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
795
6
                                                                    OrigVD);
796
6
        ++IRef;
797
6
        ++InitsRef;
798
6
        continue;
799
6
      }
800
1.29k
      FirstprivateIsLastprivate =
801
1.29k
          FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
802
1.29k
      if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
803
1.20k
        const auto *VDInit =
804
1.20k
            cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
805
1.20k
        bool IsRegistered;
806
1.20k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
807
1.20k
                        /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
808
1.20k
                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
809
1.20k
        LValue OriginalLVal;
810
1.20k
        if (!FD) {
811
50
          // Check if the firstprivate variable is just a constant value.
812
50
          ConstantEmission CE = tryEmitAsConstant(&DRE);
813
50
          if (CE && 
!CE.isReference()6
) {
814
4
            // Constant value, no need to create a copy.
815
4
            ++IRef;
816
4
            ++InitsRef;
817
4
            continue;
818
4
          }
819
46
          if (CE && 
CE.isReference()2
) {
820
2
            OriginalLVal = CE.getReferenceLValue(*this, &DRE);
821
44
          } else {
822
44
            assert(!CE && "Expected non-constant firstprivate.");
823
44
            OriginalLVal = EmitLValue(&DRE);
824
44
          }
825
1.15k
        } else {
826
1.15k
          OriginalLVal = EmitLValue(&DRE);
827
1.15k
        }
828
1.20k
        QualType Type = VD->getType();
829
1.19k
        if (Type->isArrayType()) {
830
587
          // Emit VarDecl with copy init for arrays.
831
587
          // Get the address of the original variable captured in current
832
587
          // captured region.
833
587
          IsRegistered = PrivateScope.addPrivate(
834
587
              OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
835
587
                AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
836
587
                const Expr *Init = VD->getInit();
837
587
                if (!isa<CXXConstructExpr>(Init) ||
838
587
                    
isTrivialInitializer(Init)220
) {
839
367
                  // Perform simple memcpy.
840
367
                  LValue Dest =
841
367
                      MakeAddrLValue(Emission.getAllocatedAddress(), Type);
842
367
                  EmitAggregateAssign(Dest, OriginalLVal, Type);
843
367
                } else {
844
220
                  EmitOMPAggregateAssign(
845
220
                      Emission.getAllocatedAddress(),
846
220
                      OriginalLVal.getAddress(*this), Type,
847
220
                      [this, VDInit, Init](Address DestElement,
848
220
                                           Address SrcElement) {
849
220
                        // Clean up any temporaries needed by the
850
220
                        // initialization.
851
220
                        RunCleanupsScope InitScope(*this);
852
220
                        // Emit initialization for single element.
853
220
                        setAddrOfLocalVar(VDInit, SrcElement);
854
220
                        EmitAnyExprToMem(Init, DestElement,
855
220
                                         Init->getType().getQualifiers(),
856
220
                                         /*IsInitializer*/ false);
857
220
                        LocalDeclMap.erase(VDInit);
858
220
                      });
859
220
                }
860
587
                EmitAutoVarCleanups(Emission);
861
587
                return Emission.getAllocatedAddress();
862
587
              });
863
612
        } else {
864
612
          Address OriginalAddr = OriginalLVal.getAddress(*this);
865
612
          IsRegistered =
866
612
              PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
867
612
                                               ThisFirstprivateIsLastprivate,
868
612
                                               OrigVD, &Lastprivates, IRef]() {
869
612
                // Emit private VarDecl with copy init.
870
612
                // Remap temp VDInit variable to the address of the original
871
612
                // variable (for proper handling of captured global variables).
872
612
                setAddrOfLocalVar(VDInit, OriginalAddr);
873
612
                EmitDecl(*VD);
874
612
                LocalDeclMap.erase(VDInit);
875
612
                if (ThisFirstprivateIsLastprivate &&
876
612
                    Lastprivates[OrigVD->getCanonicalDecl()] ==
877
8
                        OMPC_LASTPRIVATE_conditional) {
878
0
                  // Create/init special variable for lastprivate conditionals.
879
0
                  Address VDAddr =
880
0
                      CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
881
0
                          *this, OrigVD);
882
0
                  llvm::Value *V = EmitLoadOfScalar(
883
0
                      MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
884
0
                                     AlignmentSource::Decl),
885
0
                      (*IRef)->getExprLoc());
886
0
                  EmitStoreOfScalar(V,
887
0
                                    MakeAddrLValue(VDAddr, (*IRef)->getType(),
888
0
                                                   AlignmentSource::Decl));
889
0
                  LocalDeclMap.erase(VD);
890
0
                  setAddrOfLocalVar(VD, VDAddr);
891
0
                  return VDAddr;
892
0
                }
893
612
                return GetAddrOfLocalVar(VD);
894
612
              });
895
612
        }
896
1.19k
        assert(IsRegistered &&
897
1.19k
               "firstprivate var already registered as private");
898
1.19k
        // Silence the warning about unused variable.
899
1.19k
        (void)IsRegistered;
900
1.19k
      }
901
1.29k
      ++IRef;
902
1.29k
      ++InitsRef;
903
1.29k
    }
904
4.53k
  }
905
12.3k
  return FirstprivateIsLastprivate && 
!EmittedAsFirstprivate.empty()8
;
906
12.3k
}
907
908
void CodeGenFunction::EmitOMPPrivateClause(
909
    const OMPExecutableDirective &D,
910
23.3k
    CodeGenFunction::OMPPrivateScope &PrivateScope) {
911
23.3k
  if (!HaveInsertPoint())
912
0
    return;
913
23.3k
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
914
23.3k
  for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
915
914
    auto IRef = C->varlist_begin();
916
3.05k
    for (const Expr *IInit : C->private_copies()) {
917
3.05k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
918
3.05k
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
919
2.79k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
920
2.79k
        bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
921
2.79k
          // Emit private VarDecl with copy init.
922
2.79k
          EmitDecl(*VD);
923
2.79k
          return GetAddrOfLocalVar(VD);
924
2.79k
        });
925
2.79k
        assert(IsRegistered && "private var already registered as private");
926
2.79k
        // Silence the warning about unused variable.
927
2.79k
        (void)IsRegistered;
928
2.79k
      }
929
3.05k
      ++IRef;
930
3.05k
    }
931
914
  }
932
23.3k
}
933
934
730
bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
935
730
  if (!HaveInsertPoint())
936
0
    return false;
937
730
  // threadprivate_var1 = master_threadprivate_var1;
938
730
  // operator=(threadprivate_var2, master_threadprivate_var2);
939
730
  // ...
940
730
  // __kmpc_barrier(&loc, global_tid);
941
730
  llvm::DenseSet<const VarDecl *> CopiedVars;
942
730
  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
943
730
  for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
944
27
    auto IRef = C->varlist_begin();
945
27
    auto ISrcRef = C->source_exprs().begin();
946
27
    auto IDestRef = C->destination_exprs().begin();
947
53
    for (const Expr *AssignOp : C->assignment_ops()) {
948
53
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
949
53
      QualType Type = VD->getType();
950
53
      if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
951
53
        // Get the address of the master variable. If we are emitting code with
952
53
        // TLS support, the address is passed from the master as field in the
953
53
        // captured declaration.
954
53
        Address MasterAddr = Address::invalid();
955
53
        if (getLangOpts().OpenMPUseTLS &&
956
53
            
getContext().getTargetInfo().isTLSSupported()27
) {
957
27
          assert(CapturedStmtInfo->lookup(VD) &&
958
27
                 "Copyin threadprivates should have been captured!");
959
27
          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
960
27
                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
961
27
          MasterAddr = EmitLValue(&DRE).getAddress(*this);
962
27
          LocalDeclMap.erase(VD);
963
27
        } else {
964
26
          MasterAddr =
965
26
            Address(VD->isStaticLocal() ? 
CGM.getStaticLocalDeclAddress(VD)22
966
26
                                        : 
CGM.GetAddrOfGlobal(VD)4
,
967
26
                    getContext().getDeclAlign(VD));
968
26
        }
969
53
        // Get the address of the threadprivate variable.
970
53
        Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
971
53
        if (CopiedVars.size() == 1) {
972
27
          // At first check if current thread is a master thread. If it is, no
973
27
          // need to copy data.
974
27
          CopyBegin = createBasicBlock("copyin.not.master");
975
27
          CopyEnd = createBasicBlock("copyin.not.master.end");
976
27
          Builder.CreateCondBr(
977
27
              Builder.CreateICmpNE(
978
27
                  Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
979
27
                  Builder.CreatePtrToInt(PrivateAddr.getPointer(),
980
27
                                         CGM.IntPtrTy)),
981
27
              CopyBegin, CopyEnd);
982
27
          EmitBlock(CopyBegin);
983
27
        }
984
53
        const auto *SrcVD =
985
53
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
986
53
        const auto *DestVD =
987
53
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
988
53
        EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
989
53
      }
990
53
      ++IRef;
991
53
      ++ISrcRef;
992
53
      ++IDestRef;
993
53
    }
994
27
  }
995
730
  if (CopyEnd) {
996
27
    // Exit out of copying procedure for non-master thread.
997
27
    EmitBlock(CopyEnd, /*IsFinished=*/true);
998
27
    return true;
999
27
  }
1000
703
  return false;
1001
703
}
1002
1003
bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1004
9.88k
    const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1005
9.88k
  if (!HaveInsertPoint())
1006
0
    return false;
1007
9.88k
  bool HasAtLeastOneLastprivate = false;
1008
9.88k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
1009
9.88k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1010
6.59k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1011
6.93k
    for (const Expr *C : LoopDirective->counters()) {
1012
6.93k
      SIMDLCVs.insert(
1013
6.93k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1014
6.93k
    }
1015
6.59k
  }
1016
9.88k
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1017
9.88k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1018
534
    HasAtLeastOneLastprivate = true;
1019
534
    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1020
534
        
!getLangOpts().OpenMPSimd74
)
1021
49
      break;
1022
485
    const auto *IRef = C->varlist_begin();
1023
485
    const auto *IDestRef = C->destination_exprs().begin();
1024
2.00k
    for (const Expr *IInit : C->private_copies()) {
1025
2.00k
      // Keep the address of the original variable for future update at the end
1026
2.00k
      // of the loop.
1027
2.00k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1028
2.00k
      // Taskloops do not require additional initialization, it is done in
1029
2.00k
      // runtime support library.
1030
2.00k
      if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1031
1.58k
        const auto *DestVD =
1032
1.58k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1033
1.58k
        PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1034
1.58k
          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1035
1.58k
                          /*RefersToEnclosingVariableOrCapture=*/
1036
1.58k
                              CapturedStmtInfo->lookup(OrigVD) != nullptr,
1037
1.58k
                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1038
1.58k
          return EmitLValue(&DRE).getAddress(*this);
1039
1.58k
        });
1040
1.58k
        // Check if the variable is also a firstprivate: in this case IInit is
1041
1.58k
        // not generated. Initialization of this variable will happen in codegen
1042
1.58k
        // for 'firstprivate' clause.
1043
1.58k
        if (IInit && 
!SIMDLCVs.count(OrigVD->getCanonicalDecl())1.56k
) {
1044
1.55k
          const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1045
1.55k
          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
1046
1.55k
                                                               OrigVD]() {
1047
1.55k
            if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1048
10
              Address VDAddr =
1049
10
                  CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
1050
10
                                                                        OrigVD);
1051
10
              setAddrOfLocalVar(VD, VDAddr);
1052
10
              return VDAddr;
1053
10
            }
1054
1.54k
            // Emit private VarDecl with copy init.
1055
1.54k
            EmitDecl(*VD);
1056
1.54k
            return GetAddrOfLocalVar(VD);
1057
1.54k
          });
1058
1.55k
          assert(IsRegistered &&
1059
1.55k
                 "lastprivate var already registered as private");
1060
1.55k
          (void)IsRegistered;
1061
1.55k
        }
1062
1.58k
      }
1063
2.00k
      ++IRef;
1064
2.00k
      ++IDestRef;
1065
2.00k
    }
1066
485
  }
1067
9.88k
  return HasAtLeastOneLastprivate;
1068
9.88k
}
1069
1070
void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1071
    const OMPExecutableDirective &D, bool NoFinals,
1072
526
    llvm::Value *IsLastIterCond) {
1073
526
  if (!HaveInsertPoint())
1074
0
    return;
1075
526
  // Emit following code:
1076
526
  // if (<IsLastIterCond>) {
1077
526
  //   orig_var1 = private_orig_var1;
1078
526
  //   ...
1079
526
  //   orig_varn = private_orig_varn;
1080
526
  // }
1081
526
  llvm::BasicBlock *ThenBB = nullptr;
1082
526
  llvm::BasicBlock *DoneBB = nullptr;
1083
526
  if (IsLastIterCond) {
1084
399
    // Emit implicit barrier if at least one lastprivate conditional is found
1085
399
    // and this is not a simd mode.
1086
399
    if (!getLangOpts().OpenMPSimd &&
1087
399
        llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1088
407
                     [](const OMPLastprivateClause *C) {
1089
407
                       return C->getKind() == OMPC_LASTPRIVATE_conditional;
1090
407
                     })) {
1091
6
      CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1092
6
                                             OMPD_unknown,
1093
6
                                             /*EmitChecks=*/false,
1094
6
                                             /*ForceSimpleCall=*/true);
1095
6
    }
1096
399
    ThenBB = createBasicBlock(".omp.lastprivate.then");
1097
399
    DoneBB = createBasicBlock(".omp.lastprivate.done");
1098
399
    Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1099
399
    EmitBlock(ThenBB);
1100
399
  }
1101
526
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1102
526
  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1103
526
  if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1104
510
    auto IC = LoopDirective->counters().begin();
1105
530
    for (const Expr *F : LoopDirective->finals()) {
1106
530
      const auto *D =
1107
530
          cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1108
530
      if (NoFinals)
1109
221
        AlreadyEmittedVars.insert(D);
1110
309
      else
1111
309
        LoopCountersAndUpdates[D] = F;
1112
530
      ++IC;
1113
530
    }
1114
510
  }
1115
534
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1116
534
    auto IRef = C->varlist_begin();
1117
534
    auto ISrcRef = C->source_exprs().begin();
1118
534
    auto IDestRef = C->destination_exprs().begin();
1119
2.20k
    for (const Expr *AssignOp : C->assignment_ops()) {
1120
2.20k
      const auto *PrivateVD =
1121
2.20k
          cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1122
2.20k
      QualType Type = PrivateVD->getType();
1123
2.20k
      const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1124
2.20k
      if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1125
1.72k
        // If lastprivate variable is a loop control variable for loop-based
1126
1.72k
        // directive, update its value before copyin back to original
1127
1.72k
        // variable.
1128
1.72k
        if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1129
13
          EmitIgnoredExpr(FinalExpr);
1130
1.72k
        const auto *SrcVD =
1131
1.72k
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1132
1.72k
        const auto *DestVD =
1133
1.72k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1134
1.72k
        // Get the address of the private variable.
1135
1.72k
        Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1136
1.72k
        if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1137
326
          PrivateAddr =
1138
326
              Address(Builder.CreateLoad(PrivateAddr),
1139
326
                      getNaturalTypeAlignment(RefTy->getPointeeType()));
1140
1.72k
        // Store the last value to the private copy in the last iteration.
1141
1.72k
        if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1142
10
          CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1143
10
              *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1144
10
              (*IRef)->getExprLoc());
1145
1.72k
        // Get the address of the original variable.
1146
1.72k
        Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1147
1.72k
        EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1148
1.72k
      }
1149
2.20k
      ++IRef;
1150
2.20k
      ++ISrcRef;
1151
2.20k
      ++IDestRef;
1152
2.20k
    }
1153
534
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
1154
10
      EmitIgnoredExpr(PostUpdate);
1155
534
  }
1156
526
  if (IsLastIterCond)
1157
399
    EmitBlock(DoneBB, /*IsFinished=*/true);
1158
526
}
1159
1160
void CodeGenFunction::EmitOMPReductionClauseInit(
1161
    const OMPExecutableDirective &D,
1162
11.7k
    CodeGenFunction::OMPPrivateScope &PrivateScope) {
1163
11.7k
  if (!HaveInsertPoint())
1164
0
    return;
1165
11.7k
  SmallVector<const Expr *, 4> Shareds;
1166
11.7k
  SmallVector<const Expr *, 4> Privates;
1167
11.7k
  SmallVector<const Expr *, 4> ReductionOps;
1168
11.7k
  SmallVector<const Expr *, 4> LHSs;
1169
11.7k
  SmallVector<const Expr *, 4> RHSs;
1170
11.7k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1171
574
    auto IPriv = C->privates().begin();
1172
574
    auto IRed = C->reduction_ops().begin();
1173
574
    auto ILHS = C->lhs_exprs().begin();
1174
574
    auto IRHS = C->rhs_exprs().begin();
1175
597
    for (const Expr *Ref : C->varlists()) {
1176
597
      Shareds.emplace_back(Ref);
1177
597
      Privates.emplace_back(*IPriv);
1178
597
      ReductionOps.emplace_back(*IRed);
1179
597
      LHSs.emplace_back(*ILHS);
1180
597
      RHSs.emplace_back(*IRHS);
1181
597
      std::advance(IPriv, 1);
1182
597
      std::advance(IRed, 1);
1183
597
      std::advance(ILHS, 1);
1184
597
      std::advance(IRHS, 1);
1185
597
    }
1186
574
  }
1187
11.7k
  ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
1188
11.7k
  unsigned Count = 0;
1189
11.7k
  auto ILHS = LHSs.begin();
1190
11.7k
  auto IRHS = RHSs.begin();
1191
11.7k
  auto IPriv = Privates.begin();
1192
11.7k
  for (const Expr *IRef : Shareds) {
1193
597
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1194
597
    // Emit private VarDecl with reduction init.
1195
597
    RedCG.emitSharedLValue(*this, Count);
1196
597
    RedCG.emitAggregateType(*this, Count);
1197
597
    AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1198
597
    RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1199
597
                             RedCG.getSharedLValue(Count),
1200
597
                             [&Emission](CodeGenFunction &CGF) {
1201
430
                               CGF.EmitAutoVarInit(Emission);
1202
430
                               return true;
1203
430
                             });
1204
597
    EmitAutoVarCleanups(Emission);
1205
597
    Address BaseAddr = RedCG.adjustPrivateAddress(
1206
597
        *this, Count, Emission.getAllocatedAddress());
1207
597
    bool IsRegistered = PrivateScope.addPrivate(
1208
597
        RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1209
597
    assert(IsRegistered && "private var already registered as private");
1210
597
    // Silence the warning about unused variable.
1211
597
    (void)IsRegistered;
1212
597
1213
597
    const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1214
597
    const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1215
597
    QualType Type = PrivateVD->getType();
1216
597
    bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1217
597
    if (isaOMPArraySectionExpr && 
Type->isVariablyModifiedType()85
) {
1218
42
      // Store the address of the original variable associated with the LHS
1219
42
      // implicit variable.
1220
42
      PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1221
42
        return RedCG.getSharedLValue(Count).getAddress(*this);
1222
42
      });
1223
42
      PrivateScope.addPrivate(
1224
42
          RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1225
555
    } else if ((isaOMPArraySectionExpr && 
Type->isScalarType()43
) ||
1226
555
               isa<ArraySubscriptExpr>(IRef)) {
1227
0
      // Store the address of the original variable associated with the LHS
1228
0
      // implicit variable.
1229
0
      PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1230
0
        return RedCG.getSharedLValue(Count).getAddress(*this);
1231
0
      });
1232
0
      PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1233
0
        return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1234
0
                                            ConvertTypeForMem(RHSVD->getType()),
1235
0
                                            "rhs.begin");
1236
0
      });
1237
555
    } else {
1238
555
      QualType Type = PrivateVD->getType();
1239
555
      bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1240
555
      Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1241
555
      // Store the address of the original variable associated with the LHS
1242
555
      // implicit variable.
1243
555
      if (IsArray) {
1244
99
        OriginalAddr = Builder.CreateElementBitCast(
1245
99
            OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1246
99
      }
1247
555
      PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1248
555
      PrivateScope.addPrivate(
1249
555
          RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1250
555
            return IsArray
1251
555
                       ? Builder.CreateElementBitCast(
1252
99
                             GetAddrOfLocalVar(PrivateVD),
1253
99
                             ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1254
555
                       : 
GetAddrOfLocalVar(PrivateVD)456
;
1255
555
          });
1256
555
    }
1257
597
    ++ILHS;
1258
597
    ++IRHS;
1259
597
    ++IPriv;
1260
597
    ++Count;
1261
597
  }
1262
11.7k
}
1263
1264
void CodeGenFunction::EmitOMPReductionClauseFinal(
1265
11.7k
    const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1266
11.7k
  if (!HaveInsertPoint())
1267
2
    return;
1268
11.7k
  llvm::SmallVector<const Expr *, 8> Privates;
1269
11.7k
  llvm::SmallVector<const Expr *, 8> LHSExprs;
1270
11.7k
  llvm::SmallVector<const Expr *, 8> RHSExprs;
1271
11.7k
  llvm::SmallVector<const Expr *, 8> ReductionOps;
1272
11.7k
  bool HasAtLeastOneReduction = false;
1273
11.7k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1274
566
    HasAtLeastOneReduction = true;
1275
566
    Privates.append(C->privates().begin(), C->privates().end());
1276
566
    LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1277
566
    RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1278
566
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1279
566
  }
1280
11.7k
  if (HasAtLeastOneReduction) {
1281
475
    bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1282
475
                      
isOpenMPParallelDirective(D.getDirectiveKind())469
||
1283
475
                      
ReductionKind == OMPD_simd255
;
1284
475
    bool SimpleReduction = ReductionKind == OMPD_simd;
1285
475
    // Emit nowait reduction if nowait clause is present or directive is a
1286
475
    // parallel directive (it always has implicit barrier).
1287
475
    CGM.getOpenMPRuntime().emitReduction(
1288
475
        *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1289
475
        {WithNowait, SimpleReduction, ReductionKind});
1290
475
  }
1291
11.7k
}
1292
1293
static void emitPostUpdateForReductionClause(
1294
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1295
11.7k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1296
11.7k
  if (!CGF.HaveInsertPoint())
1297
0
    return;
1298
11.7k
  llvm::BasicBlock *DoneBB = nullptr;
1299
11.7k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1300
574
    if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1301
4
      if (!DoneBB) {
1302
4
        if (llvm::Value *Cond = CondGen(CGF)) {
1303
0
          // If the first post-update expression is found, emit conditional
1304
0
          // block if it was requested.
1305
0
          llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1306
0
          DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1307
0
          CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1308
0
          CGF.EmitBlock(ThenBB);
1309
0
        }
1310
4
      }
1311
4
      CGF.EmitIgnoredExpr(PostUpdate);
1312
4
    }
1313
574
  }
1314
11.7k
  if (DoneBB)
1315
0
    CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1316
11.7k
}
1317
1318
namespace {
1319
/// Codegen lambda for appending distribute lower and upper bounds to outlined
1320
/// parallel function. This is necessary for combined constructs such as
1321
/// 'distribute parallel for'
1322
typedef llvm::function_ref<void(CodeGenFunction &,
1323
                                const OMPExecutableDirective &,
1324
                                llvm::SmallVectorImpl<llvm::Value *> &)>
1325
    CodeGenBoundParametersTy;
1326
} // anonymous namespace
1327
1328
static void
1329
checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1330
12.3k
                                     const OMPExecutableDirective &S) {
1331
12.3k
  if (CGF.getLangOpts().OpenMP < 50)
1332
10.6k
    return;
1333
1.68k
  llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1334
1.68k
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1335
20
    for (const Expr *Ref : C->varlists()) {
1336
20
      if (!Ref->getType()->isScalarType())
1337
0
        continue;
1338
20
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1339
20
      if (!DRE)
1340
0
        continue;
1341
20
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1342
20
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1343
20
    }
1344
20
  }
1345
1.68k
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1346
188
    for (const Expr *Ref : C->varlists()) {
1347
188
      if (!Ref->getType()->isScalarType())
1348
48
        continue;
1349
140
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1350
140
      if (!DRE)
1351
0
        continue;
1352
140
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1353
140
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1354
140
    }
1355
88
  }
1356
1.68k
  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1357
140
    for (const Expr *Ref : C->varlists()) {
1358
140
      if (!Ref->getType()->isScalarType())
1359
0
        continue;
1360
140
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1361
140
      if (!DRE)
1362
0
        continue;
1363
140
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1364
140
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1365
140
    }
1366
120
  }
1367
1.68k
  // Privates should ne analyzed since they are not captured at all.
1368
1.68k
  // Task reductions may be skipped - tasks are ignored.
1369
1.68k
  // Firstprivates do not return value but may be passed by reference - no need
1370
1.68k
  // to check for updated lastprivate conditional.
1371
1.68k
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1372
612
    for (const Expr *Ref : C->varlists()) {
1373
612
      if (!Ref->getType()->isScalarType())
1374
4
        continue;
1375
608
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1376
608
      if (!DRE)
1377
0
        continue;
1378
608
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1379
608
    }
1380
335
  }
1381
1.68k
  CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1382
1.68k
      CGF, S, PrivateDecls);
1383
1.68k
}
1384
1385
static void emitCommonOMPParallelDirective(
1386
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
1387
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1388
4.13k
    const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1389
4.13k
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1390
4.13k
  llvm::Function *OutlinedFn =
1391
4.13k
      CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1392
4.13k
          S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1393
4.13k
  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1394
140
    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1395
140
    llvm::Value *NumThreads =
1396
140
        CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1397
140
                           /*IgnoreResultAssign=*/true);
1398
140
    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1399
140
        CGF, NumThreads, NumThreadsClause->getBeginLoc());
1400
140
  }
1401
4.13k
  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1402
74
    CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1403
74
    CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1404
74
        CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1405
74
  }
1406
4.13k
  const Expr *IfCond = nullptr;
1407
4.13k
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1408
685
    if (C->getNameModifier() == OMPD_unknown ||
1409
685
        
C->getNameModifier() == OMPD_parallel466
) {
1410
301
      IfCond = C->getCondition();
1411
301
      break;
1412
301
    }
1413
685
  }
1414
4.13k
1415
4.13k
  OMPParallelScope Scope(CGF, S);
1416
4.13k
  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1417
4.13k
  // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1418
4.13k
  // lower and upper bounds with the pragma 'for' chunking mechanism.
1419
4.13k
  // The following lambda takes care of appending the lower and upper bound
1420
4.13k
  // parameters when necessary
1421
4.13k
  CodeGenBoundParameters(CGF, S, CapturedVars);
1422
4.13k
  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1423
4.13k
  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1424
4.13k
                                              CapturedVars, IfCond);
1425
4.13k
}
1426
1427
static void emitEmptyBoundParameters(CodeGenFunction &,
1428
                                     const OMPExecutableDirective &,
1429
2.25k
                                     llvm::SmallVectorImpl<llvm::Value *> &) {}
1430
1431
716
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1432
716
  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
1433
10
    // Check if we have any if clause associated with the directive.
1434
10
    llvm::Value *IfCond = nullptr;
1435
10
    if (const auto *C = S.getSingleClause<OMPIfClause>())
1436
0
      IfCond = EmitScalarExpr(C->getCondition(),
1437
0
                              /*IgnoreResultAssign=*/true);
1438
10
1439
10
    llvm::Value *NumThreads = nullptr;
1440
10
    if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1441
0
      NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1442
0
                                  /*IgnoreResultAssign=*/true);
1443
10
1444
10
    ProcBindKind ProcBind = OMP_PROC_BIND_default;
1445
10
    if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1446
0
      ProcBind = ProcBindClause->getProcBindKind();
1447
10
1448
10
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1449
10
1450
10
    // The cleanup callback that finalizes all variabels at the given location,
1451
10
    // thus calls destructors etc.
1452
14
    auto FiniCB = [this](InsertPointTy IP) {
1453
14
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1454
14
    };
1455
10
1456
10
    // Privatization callback that performs appropriate action for
1457
10
    // shared/private/firstprivate/lastprivate/copyin/... variables.
1458
10
    //
1459
10
    // TODO: This defaults to shared right now.
1460
10
    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1461
10
                     llvm::Value &Val, llvm::Value *&ReplVal) {
1462
10
      // The next line is appropriate only for variables (Val) with the
1463
10
      // data-sharing attribute "shared".
1464
10
      ReplVal = &Val;
1465
10
1466
10
      return CodeGenIP;
1467
10
    };
1468
10
1469
10
    const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1470
10
    const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1471
10
1472
10
    auto BodyGenCB = [ParallelRegionBodyStmt,
1473
10
                      this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1474
10
                            llvm::BasicBlock &ContinuationBB) {
1475
10
      OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1476
10
                                                      ContinuationBB);
1477
10
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1478
10
                                             CodeGenIP, ContinuationBB);
1479
10
    };
1480
10
1481
10
    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1482
10
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1483
10
    Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB,
1484
10
                                                 FiniCB, IfCond, NumThreads,
1485
10
                                                 ProcBind, S.hasCancel()));
1486
10
    return;
1487
10
  }
1488
706
1489
706
  // Emit parallel region as a standalone region.
1490
706
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1491
706
    Action.Enter(CGF);
1492
706
    OMPPrivateScope PrivateScope(CGF);
1493
706
    bool Copyins = CGF.EmitOMPCopyinClause(S);
1494
706
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1495
706
    if (Copyins) {
1496
23
      // Emit implicit barrier to synchronize threads and avoid data races on
1497
23
      // propagation master's thread values of threadprivate variables to local
1498
23
      // instances of that variables of all other implicit threads.
1499
23
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1500
23
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1501
23
          /*ForceSimpleCall=*/true);
1502
23
    }
1503
706
    CGF.EmitOMPPrivateClause(S, PrivateScope);
1504
706
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1505
706
    (void)PrivateScope.Privatize();
1506
706
    CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1507
706
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1508
706
  };
1509
706
  {
1510
706
    auto LPCRegion =
1511
706
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1512
706
    emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1513
706
                                   emitEmptyBoundParameters);
1514
706
    emitPostUpdateForReductionClause(*this, S,
1515
706
                                     [](CodeGenFunction &) 
{ return nullptr; }4
);
1516
706
  }
1517
706
  // Check for outer lastprivate conditional update.
1518
706
  checkForLastprivateConditionalUpdate(*this, S);
1519
706
}
1520
1521
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1522
8.54k
                     int MaxLevel, int Level = 0) {
1523
8.54k
  assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1524
8.54k
  const Stmt *SimplifiedS = S->IgnoreContainers();
1525
8.54k
  if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1526
2
    PrettyStackTraceLoc CrashInfo(
1527
2
        CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1528
2
        "LLVM IR generation of compound statement ('{}')");
1529
2
1530
2
    // Keep track of the current cleanup stack depth, including debug scopes.
1531
2
    CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1532
2
    for (const Stmt *CurStmt : CS->body())
1533
10
      emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1534
2
    return;
1535
2
  }
1536
8.53k
  if (SimplifiedS == NextLoop) {
1537
8.53k
    if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1538
8.52k
      S = For->getBody();
1539
8.52k
    } else {
1540
6
      assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1541
6
             "Expected canonical for loop or range-based for loop.");
1542
6
      const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1543
6
      CGF.EmitStmt(CXXFor->getLoopVarStmt());
1544
6
      S = CXXFor->getBody();
1545
6
    }
1546
8.53k
    if (Level + 1 < MaxLevel) {
1547
442
      NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1548
442
          S, /*TryImperfectlyNestedLoops=*/true);
1549
442
      emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1550
442
      return;
1551
442
    }
1552
8.09k
  }
1553
8.09k
  CGF.EmitStmt(S);
1554
8.09k
}
1555
1556
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1557
8.08k
                                      JumpDest LoopExit) {
1558
8.08k
  RunCleanupsScope BodyScope(*this);
1559
8.08k
  // Update counters values on current iteration.
1560
8.08k
  for (const Expr *UE : D.updates())
1561
8.53k
    EmitIgnoredExpr(UE);
1562
8.08k
  // Update the linear variables.
1563
8.08k
  // In distribute directives only loop counters may be marked as linear, no
1564
8.08k
  // need to generate the code for them.
1565
8.08k
  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1566
3.29k
    for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1567
388
      for (const Expr *UE : C->updates())
1568
500
        EmitIgnoredExpr(UE);
1569
388
    }
1570
3.29k
  }
1571
8.08k
1572
8.08k
  // On a continue in the body, jump to the end.
1573
8.08k
  JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1574
8.08k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1575
8.53k
  for (const Expr *E : D.finals_conditions()) {
1576
8.53k
    if (!E)
1577
8.51k
      continue;
1578
16
    // Check that loop counter in non-rectangular nest fits into the iteration
1579
16
    // space.
1580
16
    llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1581
16
    EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1582
16
                         getProfileCount(D.getBody()));
1583
16
    EmitBlock(NextBB);
1584
16
  }
1585
8.08k
  // Emit loop variables for C++ range loops.
1586
8.08k
  const Stmt *Body =
1587
8.08k
      D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1588
8.08k
  // Emit loop body.
1589
8.08k
  emitBody(*this, Body,
1590
8.08k
           OMPLoopDirective::tryToFindNextInnerLoop(
1591
8.08k
               Body, /*TryImperfectlyNestedLoops=*/true),
1592
8.08k
           D.getCollapsedNumber());
1593
8.08k
1594
8.08k
  // The end (updates/cleanups).
1595
8.08k
  EmitBlock(Continue.getBlock());
1596
8.08k
  BreakContinueStack.pop_back();
1597
8.08k
}
1598
1599
void CodeGenFunction::EmitOMPInnerLoop(
1600
    const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1601
    const Expr *IncExpr,
1602
    const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1603
10.0k
    const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1604
10.0k
  auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1605
10.0k
1606
10.0k
  // Start the loop with a block that tests the condition.
1607
10.0k
  auto CondBlock = createBasicBlock("omp.inner.for.cond");
1608
10.0k
  EmitBlock(CondBlock);
1609
10.0k
  const SourceRange R = S.getSourceRange();
1610
10.0k
  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1611
10.0k
                 SourceLocToDebugLoc(R.getEnd()));
1612
10.0k
1613
10.0k
  // If there are any cleanups between here and the loop-exit scope,
1614
10.0k
  // create a block to stage a loop exit along.
1615
10.0k
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
1616
10.0k
  if (RequiresCleanup)
1617
789
    ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1618
10.0k
1619
10.0k
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
1620
10.0k
1621
10.0k
  // Emit condition.
1622
10.0k
  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1623
10.0k
  if (ExitBlock != LoopExit.getBlock()) {
1624
789
    EmitBlock(ExitBlock);
1625
789
    EmitBranchThroughCleanup(LoopExit);
1626
789
  }
1627
10.0k
1628
10.0k
  EmitBlock(LoopBody);
1629
10.0k
  incrementProfileCounter(&S);
1630
10.0k
1631
10.0k
  // Create a block for the increment.
1632
10.0k
  JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1633
10.0k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1634
10.0k
1635
10.0k
  BodyGen(*this);
1636
10.0k
1637
10.0k
  // Emit "IV = IV + 1" and a back-edge to the condition block.
1638
10.0k
  EmitBlock(Continue.getBlock());
1639
10.0k
  EmitIgnoredExpr(IncExpr);
1640
10.0k
  PostIncGen(*this);
1641
10.0k
  BreakContinueStack.pop_back();
1642
10.0k
  EmitBranch(CondBlock);
1643
10.0k
  LoopStack.pop();
1644
10.0k
  // Emit the fall-through block.
1645
10.0k
  EmitBlock(LoopExit.getBlock());
1646
10.0k
}
1647
1648
6.64k
bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1649
6.64k
  if (!HaveInsertPoint())
1650
0
    return false;
1651
6.64k
  // Emit inits for the linear variables.
1652
6.64k
  bool HasLinears = false;
1653
6.64k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1654
596
    for (const Expr *Init : C->inits()) {
1655
596
      HasLinears = true;
1656
596
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1657
596
      if (const auto *Ref =
1658
596
              dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1659
596
        AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1660
596
        const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1661
596
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1662
596
                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
1663
596
                        VD->getInit()->getType(), VK_LValue,
1664
596
                        VD->getInit()->getExprLoc());
1665
596
        EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1666
596
                                                VD->getType()),
1667
596
                       /*capturedByInit=*/false);
1668
596
        EmitAutoVarCleanups(Emission);
1669
596
      } else {
1670
0
        EmitVarDecl(*VD);
1671
0
      }
1672
596
    }
1673
484
    // Emit the linear steps for the linear clauses.
1674
484
    // If a step is not constant, it is pre-calculated before the loop.
1675
484
    if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1676
144
      if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1677
144
        EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1678
144
        // Emit calculation of the linear step.
1679
144
        EmitIgnoredExpr(CS);
1680
144
      }
1681
484
  }
1682
6.64k
  return HasLinears;
1683
6.64k
}
1684
1685
void CodeGenFunction::EmitOMPLinearClauseFinal(
1686
    const OMPLoopDirective &D,
1687
6.64k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1688
6.64k
  if (!HaveInsertPoint())
1689
0
    return;
1690
6.64k
  llvm::BasicBlock *DoneBB = nullptr;
1691
6.64k
  // Emit the final values of the linear variables.
1692
6.64k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1693
484
    auto IC = C->varlist_begin();
1694
596
    for (const Expr *F : C->finals()) {
1695
596
      if (!DoneBB) {
1696
531
        if (llvm::Value *Cond = CondGen(*this)) {
1697
119
          // If the first post-update expression is found, emit conditional
1698
119
          // block if it was requested.
1699
119
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
1700
119
          DoneBB = createBasicBlock(".omp.linear.pu.done");
1701
119
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1702
119
          EmitBlock(ThenBB);
1703
119
        }
1704
531
      }
1705
596
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1706
596
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1707
596
                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
1708
596
                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1709
596
      Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
1710
596
      CodeGenFunction::OMPPrivateScope VarScope(*this);
1711
596
      VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1712
596
      (void)VarScope.Privatize();
1713
596
      EmitIgnoredExpr(F);
1714
596
      ++IC;
1715
596
    }
1716
484
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
1717
4
      EmitIgnoredExpr(PostUpdate);
1718
484
  }
1719
6.64k
  if (DoneBB)
1720
119
    EmitBlock(DoneBB, /*IsFinished=*/true);
1721
6.64k
}
1722
1723
static void emitAlignedClause(CodeGenFunction &CGF,
1724
9.59k
                              const OMPExecutableDirective &D) {
1725
9.59k
  if (!CGF.HaveInsertPoint())
1726
0
    return;
1727
9.59k
  for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1728
310
    llvm::APInt ClauseAlignment(64, 0);
1729
310
    if (const Expr *AlignmentExpr = Clause->getAlignment()) {
1730
106
      auto *AlignmentCI =
1731
106
          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1732
106
      ClauseAlignment = AlignmentCI->getValue();
1733
106
    }
1734
358
    for (const Expr *E : Clause->varlists()) {
1735
358
      llvm::APInt Alignment(ClauseAlignment);
1736
358
      if (Alignment == 0) {
1737
252
        // OpenMP [2.8.1, Description]
1738
252
        // If no optional parameter is specified, implementation-defined default
1739
252
        // alignments for SIMD instructions on the target platforms are assumed.
1740
252
        Alignment =
1741
252
            CGF.getContext()
1742
252
                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1743
252
                    E->getType()->getPointeeType()))
1744
252
                .getQuantity();
1745
252
      }
1746
358
      assert((Alignment == 0 || Alignment.isPowerOf2()) &&
1747
358
             "alignment is not power of 2");
1748
358
      if (Alignment != 0) {
1749
358
        llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1750
358
        CGF.emitAlignmentAssumption(
1751
358
            PtrValue, E, /*No second loc needed*/ SourceLocation(),
1752
358
            llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
1753
358
      }
1754
358
    }
1755
310
  }
1756
9.59k
}
1757
1758
void CodeGenFunction::EmitOMPPrivateLoopCounters(
1759
11.7k
    const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1760
11.7k
  if (!HaveInsertPoint())
1761
0
    return;
1762
11.7k
  auto I = S.private_counters().begin();
1763
12.4k
  for (const Expr *E : S.counters()) {
1764
12.4k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1765
12.4k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1766
12.4k
    // Emit var without initialization.
1767
12.4k
    AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
1768
12.4k
    EmitAutoVarCleanups(VarEmission);
1769
12.4k
    LocalDeclMap.erase(PrivateVD);
1770
12.4k
    (void)LoopScope.addPrivate(VD, [&VarEmission]() {
1771
12.4k
      return VarEmission.getAllocatedAddress();
1772
12.4k
    });
1773
12.4k
    if (LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)11.8k
||
1774
12.4k
        
VD->hasGlobalStorage()11.7k
) {
1775
692
      (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
1776
692
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
1777
692
                        LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)51
,
1778
692
                        E->getType(), VK_LValue, E->getExprLoc());
1779
692
        return EmitLValue(&DRE).getAddress(*this);
1780
692
      });
1781
11.7k
    } else {
1782
11.7k
      (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
1783
11.7k
        return VarEmission.getAllocatedAddress();
1784
11.7k
      });
1785
11.7k
    }
1786
12.4k
    ++I;
1787
12.4k
  }
1788
11.7k
  // Privatize extra loop counters used in loops for ordered(n) clauses.
1789
11.7k
  for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
1790
58
    if (!C->getNumForLoops())
1791
40
      continue;
1792
18
    for (unsigned I = S.getCollapsedNumber(),
1793
18
                  E = C->getLoopNumIterations().size();
1794
24
         I < E; 
++I6
) {
1795
6
      const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
1796
6
      const auto *VD = cast<VarDecl>(DRE->getDecl());
1797
6
      // Override only those variables that can be captured to avoid re-emission
1798
6
      // of the variables declared within the loops.
1799
6
      if (DRE->refersToEnclosingVariableOrCapture()) {
1800
4
        (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
1801
4
          return CreateMemTemp(DRE->getType(), VD->getName());
1802
4
        });
1803
4
      }
1804
6
    }
1805
18
  }
1806
11.7k
}
1807
1808
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1809
                        const Expr *Cond, llvm::BasicBlock *TrueBlock,
1810
1.95k
                        llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1811
1.95k
  if (!CGF.HaveInsertPoint())
1812
0
    return;
1813
1.95k
  {
1814
1.95k
    CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1815
1.95k
    CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1816
1.95k
    (void)PreCondScope.Privatize();
1817
1.95k
    // Get initial values of real counters.
1818
2.13k
    for (const Expr *I : S.inits()) {
1819
2.13k
      CGF.EmitIgnoredExpr(I);
1820
2.13k
    }
1821
1.95k
  }
1822
1.95k
  // Create temp loop control variables with their init values to support
1823
1.95k
  // non-rectangular loops.
1824
1.95k
  CodeGenFunction::OMPMapVars PreCondVars;
1825
2.13k
  for (const Expr * E: S.dependent_counters()) {
1826
2.13k
    if (!E)
1827
2.12k
      continue;
1828
4
    assert(!E->getType().getNonReferenceType()->isRecordType() &&
1829
4
           "dependent counter must not be an iterator.");
1830
4
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1831
4
    Address CounterAddr =
1832
4
        CGF.CreateMemTemp(VD->getType().getNonReferenceType());
1833
4
    (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
1834
4
  }
1835
1.95k
  (void)PreCondVars.apply(CGF);
1836
2.13k
  for (const Expr *E : S.dependent_inits()) {
1837
2.13k
    if (!E)
1838
2.12k
      continue;
1839
4
    CGF.EmitIgnoredExpr(E);
1840
4
  }
1841
1.95k
  // Check that loop is executed at least one time.
1842
1.95k
  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1843
1.95k
  PreCondVars.restore(CGF);
1844
1.95k
}
1845
1846
void CodeGenFunction::EmitOMPLinearClause(
1847
6.64k
    const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1848
6.64k
  if (!HaveInsertPoint())
1849
0
    return;
1850
6.64k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
1851
6.64k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1852
4.89k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1853
5.17k
    for (const Expr *C : LoopDirective->counters()) {
1854
5.17k
      SIMDLCVs.insert(
1855
5.17k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1856
5.17k
    }
1857
4.89k
  }
1858
6.64k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1859
484
    auto CurPrivate = C->privates().begin();
1860
596
    for (const Expr *E : C->varlists()) {
1861
596
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1862
596
      const auto *PrivateVD =
1863
596
          cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1864
596
      if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1865
490
        bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
1866
490
          // Emit private VarDecl with copy init.
1867
490
          EmitVarDecl(*PrivateVD);
1868
490
          return GetAddrOfLocalVar(PrivateVD);
1869
490
        });
1870
490
        assert(IsRegistered && "linear var already registered as private");
1871
490
        // Silence the warning about unused variable.
1872
490
        (void)IsRegistered;
1873
490
      } else {
1874
106
        EmitVarDecl(*PrivateVD);
1875
106
      }
1876
596
      ++CurPrivate;
1877
596
    }
1878
484
  }
1879
6.64k
}
1880
1881
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1882
                                     const OMPExecutableDirective &D,
1883
6.55k
                                     bool IsMonotonic) {
1884
6.55k
  if (!CGF.HaveInsertPoint())
1885
0
    return;
1886
6.55k
  if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1887
244
    RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1888
244
                                 /*ignoreResult=*/true);
1889
244
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1890
244
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1891
244
    // In presence of finite 'safelen', it may be unsafe to mark all
1892
244
    // the memory instructions parallel, because loop-carried
1893
244
    // dependences of 'safelen' iterations are possible.
1894
244
    if (!IsMonotonic)
1895
144
      CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1896
6.31k
  } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1897
168
    RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1898
168
                                 /*ignoreResult=*/true);
1899
168
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1900
168
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1901
168
    // In presence of finite 'safelen', it may be unsafe to mark all
1902
168
    // the memory instructions parallel, because loop-carried
1903
168
    // dependences of 'safelen' iterations are possible.
1904
168
    CGF.LoopStack.setParallel(/*Enable=*/false);
1905
168
  }
1906
6.55k
}
1907
1908
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1909
6.55k
                                      bool IsMonotonic) {
1910
6.55k
  // Walk clauses and process safelen/lastprivate.
1911
6.55k
  LoopStack.setParallel(!IsMonotonic);
1912
6.55k
  LoopStack.setVectorizeEnable();
1913
6.55k
  emitSimdlenSafelenClause(*this, D, IsMonotonic);
1914
6.55k
  if (const auto *C = D.getSingleClause<OMPOrderClause>())
1915
0
    if (C->getKind() == OMPC_ORDER_concurrent)
1916
0
      LoopStack.setParallel(/*Enable=*/true);
1917
6.55k
}
1918
1919
void CodeGenFunction::EmitOMPSimdFinal(
1920
    const OMPLoopDirective &D,
1921
6.47k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1922
6.47k
  if (!HaveInsertPoint())
1923
0
    return;
1924
6.47k
  llvm::BasicBlock *DoneBB = nullptr;
1925
6.47k
  auto IC = D.counters().begin();
1926
6.47k
  auto IPC = D.private_counters().begin();
1927
6.80k
  for (const Expr *F : D.finals()) {
1928
6.80k
    const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1929
6.80k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1930
6.80k
    const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1931
6.80k
    if (LocalDeclMap.count(OrigVD) || 
CapturedStmtInfo->lookup(OrigVD)0
||
1932
6.80k
        
OrigVD->hasGlobalStorage()0
||
CED0
) {
1933
6.80k
      if (!DoneBB) {
1934
6.65k
        if (llvm::Value *Cond = CondGen(*this)) {
1935
3.30k
          // If the first post-update expression is found, emit conditional
1936
3.30k
          // block if it was requested.
1937
3.30k
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
1938
3.30k
          DoneBB = createBasicBlock(".omp.final.done");
1939
3.30k
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1940
3.30k
          EmitBlock(ThenBB);
1941
3.30k
        }
1942
6.65k
      }
1943
6.80k
      Address OrigAddr = Address::invalid();
1944
6.80k
      if (CED) {
1945
28
        OrigAddr =
1946
28
            EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
1947
6.77k
      } else {
1948
6.77k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
1949
6.77k
                        /*RefersToEnclosingVariableOrCapture=*/false,
1950
6.77k
                        (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1951
6.77k
        OrigAddr = EmitLValue(&DRE).getAddress(*this);
1952
6.77k
      }
1953
6.80k
      OMPPrivateScope VarScope(*this);
1954
6.80k
      VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
1955
6.80k
      (void)VarScope.Privatize();
1956
6.80k
      EmitIgnoredExpr(F);
1957
6.80k
    }
1958
6.80k
    ++IC;
1959
6.80k
    ++IPC;
1960
6.80k
  }
1961
6.47k
  if (DoneBB)
1962
3.30k
    EmitBlock(DoneBB, /*IsFinished=*/true);
1963
6.47k
}
1964
1965
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1966
                                         const OMPLoopDirective &S,
1967
2.16k
                                         CodeGenFunction::JumpDest LoopExit) {
1968
2.16k
  CGF.EmitOMPLoopBody(S, LoopExit);
1969
2.16k
  CGF.EmitStopPoint(&S);
1970
2.16k
}
1971
1972
/// Emit a helper variable and return corresponding lvalue.
1973
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
1974
30.1k
                               const DeclRefExpr *Helper) {
1975
30.1k
  auto VDecl = cast<VarDecl>(Helper->getDecl());
1976
30.1k
  CGF.EmitVarDecl(*VDecl);
1977
30.1k
  return CGF.EmitLValue(Helper);
1978
30.1k
}
1979
1980
static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
1981
                               const RegionCodeGenTy &SimdInitGen,
1982
9.81k
                               const RegionCodeGenTy &BodyCodeGen) {
1983
9.81k
  auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
1984
9.81k
                                                    PrePostActionTy &) {
1985
9.78k
    CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
1986
9.78k
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
1987
9.78k
    SimdInitGen(CGF);
1988
9.78k
1989
9.78k
    BodyCodeGen(CGF);
1990
9.78k
  };
1991
9.81k
  auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
1992
184
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
1993
184
    CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
1994
184
1995
184
    BodyCodeGen(CGF);
1996
184
  };
1997
9.81k
  const Expr *IfCond = nullptr;
1998
9.81k
  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
1999
6.59k
    for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2000
1.41k
      if (CGF.getLangOpts().OpenMP >= 50 &&
2001
1.41k
          
(636
C->getNameModifier() == OMPD_unknown636
||
2002
636
           
C->getNameModifier() == OMPD_simd404
)) {
2003
292
        IfCond = C->getCondition();
2004
292
        break;
2005
292
      }
2006
1.41k
    }
2007
6.59k
  }
2008
9.81k
  if (IfCond) {
2009
292
    CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2010
9.52k
  } else {
2011
9.52k
    RegionCodeGenTy ThenRCG(ThenGen);
2012
9.52k
    ThenRCG(CGF);
2013
9.52k
  }
2014
9.81k
}
2015
2016
static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2017
3.24k
                              PrePostActionTy &Action) {
2018
3.24k
  Action.Enter(CGF);
2019
3.24k
  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2020
3.24k
         "Expected simd directive");
2021
3.24k
  OMPLoopScope PreInitScope(CGF, S);
2022
3.24k
  // if (PreCond) {
2023
3.24k
  //   for (IV in 0..LastIteration) BODY;
2024
3.24k
  //   <Final counter/linear vars updates>;
2025
3.24k
  // }
2026
3.24k
  //
2027
3.24k
  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2028
3.24k
      
isOpenMPWorksharingDirective(S.getDirectiveKind())1.70k
||
2029
3.24k
      
isOpenMPTaskLoopDirective(S.getDirectiveKind())1.16k
) {
2030
2.20k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2031
2.20k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2032
2.20k
  }
2033
3.24k
2034
3.24k
  // Emit: if (PreCond) - begin.
2035
3.24k
  // If the condition constant folds and can be elided, avoid emitting the
2036
3.24k
  // whole loop.
2037
3.24k
  bool CondConstant;
2038
3.24k
  llvm::BasicBlock *ContBlock = nullptr;
2039
3.24k
  if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2040
2.62k
    if (!CondConstant)
2041
82
      return;
2042
619
  } else {
2043
619
    llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2044
619
    ContBlock = CGF.createBasicBlock("simd.if.end");
2045
619
    emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2046
619
                CGF.getProfileCount(&S));
2047
619
    CGF.EmitBlock(ThenBlock);
2048
619
    CGF.incrementProfileCounter(&S);
2049
619
  }
2050
3.24k
2051
3.24k
  // Emit the loop iteration variable.
2052
3.24k
  const Expr *IVExpr = S.getIterationVariable();
2053
3.16k
  const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2054
3.16k
  CGF.EmitVarDecl(*IVDecl);
2055
3.16k
  CGF.EmitIgnoredExpr(S.getInit());
2056
3.16k
2057
3.16k
  // Emit the iterations count variable.
2058
3.16k
  // If it is not a variable, Sema decided to calculate iterations count on
2059
3.16k
  // each iteration (e.g., it is foldable into a constant).
2060
3.16k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2061
0
    CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2062
0
    // Emit calculation of the iterations count.
2063
0
    CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2064
0
  }
2065
3.16k
2066
3.16k
  emitAlignedClause(CGF, S);
2067
3.16k
  (void)CGF.EmitOMPLinearClauseInit(S);
2068
3.16k
  {
2069
3.16k
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2070
3.16k
    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2071
3.16k
    CGF.EmitOMPLinearClause(S, LoopScope);
2072
3.16k
    CGF.EmitOMPPrivateClause(S, LoopScope);
2073
3.16k
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
2074
3.16k
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2075
3.16k
        CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2076
3.16k
    bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2077
3.16k
    (void)LoopScope.Privatize();
2078
3.16k
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2079
1.87k
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2080
3.16k
2081
3.16k
    emitCommonSimdLoop(
2082
3.16k
        CGF, S,
2083
3.16k
        [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2084
3.15k
          CGF.EmitOMPSimdInit(S);
2085
3.15k
        },
2086
3.24k
        [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2087
3.24k
          CGF.EmitOMPInnerLoop(
2088
3.24k
              S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2089
3.24k
              [&S](CodeGenFunction &CGF) {
2090
3.24k
                CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
2091
3.24k
                CGF.EmitStopPoint(&S);
2092
3.24k
              },
2093
3.24k
              [](CodeGenFunction &) {});
2094
3.24k
        });
2095
3.34k
    CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2096
3.16k
    // Emit final copy of the lastprivate variables at the end of loops.
2097
3.16k
    if (HasLastprivateClause)
2098
127
      CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2099
3.16k
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2100
3.16k
    emitPostUpdateForReductionClause(CGF, S,
2101
3.16k
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
2102
3.16k
  }
2103
3.16k
  CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) 
{ return nullptr; }412
);
2104
3.16k
  // Emit: if (PreCond) - end.
2105
3.16k
  if (ContBlock) {
2106
619
    CGF.EmitBranch(ContBlock);
2107
619
    CGF.EmitBlock(ContBlock, true);
2108
619
  }
2109
3.16k
}
2110
2111
166
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2112
166
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2113
166
    emitOMPSimdRegion(CGF, S, Action);
2114
166
  };
2115
166
  {
2116
166
    auto LPCRegion =
2117
166
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2118
166
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
2119
166
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2120
166
  }
2121
166
  // Check for outer lastprivate conditional update.
2122
166
  checkForLastprivateConditionalUpdate(*this, S);
2123
166
}
2124
2125
void CodeGenFunction::EmitOMPOuterLoop(
2126
    bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2127
    CodeGenFunction::OMPPrivateScope &LoopScope,
2128
    const CodeGenFunction::OMPLoopArguments &LoopArgs,
2129
    const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2130
971
    const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2131
971
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2132
971
2133
971
  const Expr *IVExpr = S.getIterationVariable();
2134
971
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2135
971
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2136
971
2137
971
  JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2138
971
2139
971
  // Start the loop with a block that tests the condition.
2140
971
  llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2141
971
  EmitBlock(CondBlock);
2142
971
  const SourceRange R = S.getSourceRange();
2143
971
  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2144
971
                 SourceLocToDebugLoc(R.getEnd()));
2145
971
2146
971
  llvm::Value *BoolCondVal = nullptr;
2147
971
  if (!DynamicOrOrdered) {
2148
353
    // UB = min(UB, GlobalUB) or
2149
353
    // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2150
353
    // 'distribute parallel for')
2151
353
    EmitIgnoredExpr(LoopArgs.EUB);
2152
353
    // IV = LB
2153
353
    EmitIgnoredExpr(LoopArgs.Init);
2154
353
    // IV < UB
2155
353
    BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2156
618
  } else {
2157
618
    BoolCondVal =
2158
618
        RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2159
618
                       LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2160
618
  }
2161
971
2162
971
  // If there are any cleanups between here and the loop-exit scope,
2163
971
  // create a block to stage a loop exit along.
2164
971
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2165
971
  if (LoopScope.requiresCleanups())
2166
17
    ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2167
971
2168
971
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2169
971
  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2170
971
  if (ExitBlock != LoopExit.getBlock()) {
2171
17
    EmitBlock(ExitBlock);
2172
17
    EmitBranchThroughCleanup(LoopExit);
2173
17
  }
2174
971
  EmitBlock(LoopBody);
2175
971
2176
971
  // Emit "IV = LB" (in case of static schedule, we have already calculated new
2177
971
  // LB for loop condition and emitted it above).
2178
971
  if (DynamicOrOrdered)
2179
618
    EmitIgnoredExpr(LoopArgs.Init);
2180
971
2181
971
  // Create a block for the increment.
2182
971
  JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2183
971
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2184
971
2185
971
  emitCommonSimdLoop(
2186
971
      *this, S,
2187
971
      [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2188
971
        // Generate !llvm.loop.parallel metadata for loads and stores for loops
2189
971
        // with dynamic/guided scheduling and without ordered clause.
2190
971
        if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2191
516
          CGF.LoopStack.setParallel(!IsMonotonic);
2192
516
          if (const auto *C = S.getSingleClause<OMPOrderClause>())
2193
0
            if (C->getKind() == OMPC_ORDER_concurrent)
2194
0
              CGF.LoopStack.setParallel(/*Enable=*/true);
2195
516
        } else {
2196
455
          CGF.EmitOMPSimdInit(S, IsMonotonic);
2197
455
        }
2198
971
      },
2199
971
      [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2200
974
       &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2201
974
        SourceLocation Loc = S.getBeginLoc();
2202
974
        // when 'distribute' is not combined with a 'for':
2203
974
        // while (idx <= UB) { BODY; ++idx; }
2204
974
        // when 'distribute' is combined with a 'for'
2205
974
        // (e.g. 'distribute parallel for')
2206
974
        // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2207
974
        CGF.EmitOMPInnerLoop(
2208
974
            S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2209
974
            [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2210
974
              CodeGenLoop(CGF, S, LoopExit);
2211
974
            },
2212
974
            [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2213
974
              CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2214
974
            });
2215
974
      });
2216
971
2217
971
  EmitBlock(Continue.getBlock());
2218
971
  BreakContinueStack.pop_back();
2219
971
  if (!DynamicOrOrdered) {
2220
353
    // Emit "LB = LB + Stride", "UB = UB + Stride".
2221
353
    EmitIgnoredExpr(LoopArgs.NextLB);
2222
353
    EmitIgnoredExpr(LoopArgs.NextUB);
2223
353
  }
2224
971
2225
971
  EmitBranch(CondBlock);
2226
971
  LoopStack.pop();
2227
971
  // Emit the fall-through block.
2228
971
  EmitBlock(LoopExit.getBlock());
2229
971
2230
971
  // Tell the runtime we are done.
2231
971
  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2232
971
    if (!DynamicOrOrdered)
2233
353
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2234
353
                                                     S.getDirectiveKind());
2235
971
  };
2236
971
  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2237
971
}
2238
2239
void CodeGenFunction::EmitOMPForOuterLoop(
2240
    const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2241
    const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2242
    const OMPLoopArguments &LoopArgs,
2243
831
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2244
831
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2245
831
2246
831
  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2247
831
  const bool DynamicOrOrdered =
2248
831
      Ordered || 
RT.isDynamic(ScheduleKind.Schedule)804
;
2249
831
2250
831
  assert((Ordered ||
2251
831
          !RT.isStaticNonchunked(ScheduleKind.Schedule,
2252
831
                                 LoopArgs.Chunk != nullptr)) &&
2253
831
         "static non-chunked schedule does not need outer loop");
2254
831
2255
831
  // Emit outer loop.
2256
831
  //
2257
831
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2258
831
  // When schedule(dynamic,chunk_size) is specified, the iterations are
2259
831
  // distributed to threads in the team in chunks as the threads request them.
2260
831
  // Each thread executes a chunk of iterations, then requests another chunk,
2261
831
  // until no chunks remain to be distributed. Each chunk contains chunk_size
2262
831
  // iterations, except for the last chunk to be distributed, which may have
2263
831
  // fewer iterations. When no chunk_size is specified, it defaults to 1.
2264
831
  //
2265
831
  // When schedule(guided,chunk_size) is specified, the iterations are assigned
2266
831
  // to threads in the team in chunks as the executing threads request them.
2267
831
  // Each thread executes a chunk of iterations, then requests another chunk,
2268
831
  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2269
831
  // each chunk is proportional to the number of unassigned iterations divided
2270
831
  // by the number of threads in the team, decreasing to 1. For a chunk_size
2271
831
  // with value k (greater than 1), the size of each chunk is determined in the
2272
831
  // same way, with the restriction that the chunks do not contain fewer than k
2273
831
  // iterations (except for the last chunk to be assigned, which may have fewer
2274
831
  // than k iterations).
2275
831
  //
2276
831
  // When schedule(auto) is specified, the decision regarding scheduling is
2277
831
  // delegated to the compiler and/or runtime system. The programmer gives the
2278
831
  // implementation the freedom to choose any possible mapping of iterations to
2279
831
  // threads in the team.
2280
831
  //
2281
831
  // When schedule(runtime) is specified, the decision regarding scheduling is
2282
831
  // deferred until run time, and the schedule and chunk size are taken from the
2283
831
  // run-sched-var ICV. If the ICV is set to auto, the schedule is
2284
831
  // implementation defined
2285
831
  //
2286
831
  // while(__kmpc_dispatch_next(&LB, &UB)) {
2287
831
  //   idx = LB;
2288
831
  //   while (idx <= UB) { BODY; ++idx;
2289
831
  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2290
831
  //   } // inner loop
2291
831
  // }
2292
831
  //
2293
831
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2294
831
  // When schedule(static, chunk_size) is specified, iterations are divided into
2295
831
  // chunks of size chunk_size, and the chunks are assigned to the threads in
2296
831
  // the team in a round-robin fashion in the order of the thread number.
2297
831
  //
2298
831
  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2299
831
  //   while (idx <= UB) { BODY; ++idx; } // inner loop
2300
831
  //   LB = LB + ST;
2301
831
  //   UB = UB + ST;
2302
831
  // }
2303
831
  //
2304
831
2305
831
  const Expr *IVExpr = S.getIterationVariable();
2306
831
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2307
831
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2308
831
2309
831
  if (DynamicOrOrdered) {
2310
618
    const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2311
618
        CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2312
618
    llvm::Value *LBVal = DispatchBounds.first;
2313
618
    llvm::Value *UBVal = DispatchBounds.second;
2314
618
    CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2315
618
                                                             LoopArgs.Chunk};
2316
618
    RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2317
618
                           IVSigned, Ordered, DipatchRTInputValues);
2318
618
  } else {
2319
213
    CGOpenMPRuntime::StaticRTInput StaticInit(
2320
213
        IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2321
213
        LoopArgs.ST, LoopArgs.Chunk);
2322
213
    RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2323
213
                         ScheduleKind, StaticInit);
2324
213
  }
2325
831
2326
831
  auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2327
831
                                    const unsigned IVSize,
2328
834
                                    const bool IVSigned) {
2329
834
    if (Ordered) {
2330
27
      CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2331
27
                                                            IVSigned);
2332
27
    }
2333
834
  };
2334
831
2335
831
  OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2336
831
                                 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2337
831
  OuterLoopArgs.IncExpr = S.getInc();
2338
831
  OuterLoopArgs.Init = S.getInit();
2339
831
  OuterLoopArgs.Cond = S.getCond();
2340
831
  OuterLoopArgs.NextLB = S.getNextLowerBound();
2341
831
  OuterLoopArgs.NextUB = S.getNextUpperBound();
2342
831
  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2343
831
                   emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2344
831
}
2345
2346
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2347
140
                             const unsigned IVSize, const bool IVSigned) {}
2348
2349
void CodeGenFunction::EmitOMPDistributeOuterLoop(
2350
    OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2351
    OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2352
140
    const CodeGenLoopTy &CodeGenLoopContent) {
2353
140
2354
140
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2355
140
2356
140
  // Emit outer loop.
2357
140
  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2358
140
  // dynamic
2359
140
  //
2360
140
2361
140
  const Expr *IVExpr = S.getIterationVariable();
2362
140
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2363
140
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2364
140
2365
140
  CGOpenMPRuntime::StaticRTInput StaticInit(
2366
140
      IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2367
140
      LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2368
140
  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2369
140
2370
140
  // for combined 'distribute' and 'for' the increment expression of distribute
2371
140
  // is stored in DistInc. For 'distribute' alone, it is in Inc.
2372
140
  Expr *IncExpr;
2373
140
  if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2374
0
    IncExpr = S.getDistInc();
2375
140
  else
2376
140
    IncExpr = S.getInc();
2377
140
2378
140
  // this routine is shared by 'omp distribute parallel for' and
2379
140
  // 'omp distribute': select the right EUB expression depending on the
2380
140
  // directive
2381
140
  OMPLoopArguments OuterLoopArgs;
2382
140
  OuterLoopArgs.LB = LoopArgs.LB;
2383
140
  OuterLoopArgs.UB = LoopArgs.UB;
2384
140
  OuterLoopArgs.ST = LoopArgs.ST;
2385
140
  OuterLoopArgs.IL = LoopArgs.IL;
2386
140
  OuterLoopArgs.Chunk = LoopArgs.Chunk;
2387
140
  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2388
140
                          ? 
S.getCombinedEnsureUpperBound()0
2389
140
                          : S.getEnsureUpperBound();
2390
140
  OuterLoopArgs.IncExpr = IncExpr;
2391
140
  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2392
140
                           ? 
S.getCombinedInit()0
2393
140
                           : S.getInit();
2394
140
  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2395
140
                           ? 
S.getCombinedCond()0
2396
140
                           : S.getCond();
2397
140
  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2398
140
                             ? 
S.getCombinedNextLowerBound()0
2399
140
                             : S.getNextLowerBound();
2400
140
  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2401
140
                             ? 
S.getCombinedNextUpperBound()0
2402
140
                             : S.getNextUpperBound();
2403
140
2404
140
  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2405
140
                   LoopScope, OuterLoopArgs, CodeGenLoopContent,
2406
140
                   emitEmptyOrdered);
2407
140
}
2408
2409
static std::pair<LValue, LValue>
2410
emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2411
1.87k
                                     const OMPExecutableDirective &S) {
2412
1.87k
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2413
1.87k
  LValue LB =
2414
1.87k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2415
1.87k
  LValue UB =
2416
1.87k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2417
1.87k
2418
1.87k
  // When composing 'distribute' with 'for' (e.g. as in 'distribute
2419
1.87k
  // parallel for') we need to use the 'distribute'
2420
1.87k
  // chunk lower and upper bounds rather than the whole loop iteration
2421
1.87k
  // space. These are parameters to the outlined function for 'parallel'
2422
1.87k
  // and we copy the bounds of the previous schedule into the
2423
1.87k
  // the current ones.
2424
1.87k
  LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2425
1.87k
  LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2426
1.87k
  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2427
1.87k
      PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2428
1.87k
  PrevLBVal = CGF.EmitScalarConversion(
2429
1.87k
      PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2430
1.87k
      LS.getIterationVariable()->getType(),
2431
1.87k
      LS.getPrevLowerBoundVariable()->getExprLoc());
2432
1.87k
  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2433
1.87k
      PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2434
1.87k
  PrevUBVal = CGF.EmitScalarConversion(
2435
1.87k
      PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2436
1.87k
      LS.getIterationVariable()->getType(),
2437
1.87k
      LS.getPrevUpperBoundVariable()->getExprLoc());
2438
1.87k
2439
1.87k
  CGF.EmitStoreOfScalar(PrevLBVal, LB);
2440
1.87k
  CGF.EmitStoreOfScalar(PrevUBVal, UB);
2441
1.87k
2442
1.87k
  return {LB, UB};
2443
1.87k
}
2444
2445
/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2446
/// we need to use the LB and UB expressions generated by the worksharing
2447
/// code generation support, whereas in non combined situations we would
2448
/// just emit 0 and the LastIteration expression
2449
/// This function is necessary due to the difference of the LB and UB
2450
/// types for the RT emission routines for 'for_static_init' and
2451
/// 'for_dispatch_init'
2452
static std::pair<llvm::Value *, llvm::Value *>
2453
emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2454
                                        const OMPExecutableDirective &S,
2455
344
                                        Address LB, Address UB) {
2456
344
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2457
344
  const Expr *IVExpr = LS.getIterationVariable();
2458
344
  // when implementing a dynamic schedule for a 'for' combined with a
2459
344
  // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2460
344
  // is not normalized as each team only executes its own assigned
2461
344
  // distribute chunk
2462
344
  QualType IteratorTy = IVExpr->getType();
2463
344
  llvm::Value *LBVal =
2464
344
      CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2465
344
  llvm::Value *UBVal =
2466
344
      CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2467
344
  return {LBVal, UBVal};
2468
344
}
2469
2470
static void emitDistributeParallelForDistributeInnerBoundParams(
2471
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
2472
1.87k
    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2473
1.87k
  const auto &Dir = cast<OMPLoopDirective>(S);
2474
1.87k
  LValue LB =
2475
1.87k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2476
1.87k
  llvm::Value *LBCast =
2477
1.87k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2478
1.87k
                                CGF.SizeTy, /*isSigned=*/false);
2479
1.87k
  CapturedVars.push_back(LBCast);
2480
1.87k
  LValue UB =
2481
1.87k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2482
1.87k
2483
1.87k
  llvm::Value *UBCast =
2484
1.87k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2485
1.87k
                                CGF.SizeTy, /*isSigned=*/false);
2486
1.87k
  CapturedVars.push_back(UBCast);
2487
1.87k
}
2488
2489
static void
2490
emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2491
                                 const OMPLoopDirective &S,
2492
1.87k
                                 CodeGenFunction::JumpDest LoopExit) {
2493
1.87k
  auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2494
1.87k
                                         PrePostActionTy &Action) {
2495
1.87k
    Action.Enter(CGF);
2496
1.87k
    bool HasCancel = false;
2497
1.87k
    if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2498
894
      if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2499
218
        HasCancel = D->hasCancel();
2500
676
      else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2501
307
        HasCancel = D->hasCancel();
2502
369
      else if (const auto *D =
2503
369
                   dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2504
369
        HasCancel = D->hasCancel();
2505
894
    }
2506
1.87k
    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2507
1.87k
                                                     HasCancel);
2508
1.87k
    CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2509
1.87k
                               emitDistributeParallelForInnerBounds,
2510
1.87k
                               emitDistributeParallelForDispatchBounds);
2511
1.87k
  };
2512
1.87k
2513
1.87k
  emitCommonOMPParallelDirective(
2514
1.87k
      CGF, S,
2515
1.87k
      isOpenMPSimdDirective(S.getDirectiveKind()) ? 
OMPD_for_simd981
:
OMPD_for894
,
2516
1.87k
      CGInlinedWorksharingLoop,
2517
1.87k
      emitDistributeParallelForDistributeInnerBoundParams);
2518
1.87k
}
2519
2520
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2521
307
    const OMPDistributeParallelForDirective &S) {
2522
307
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2523
307
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2524
307
                              S.getDistInc());
2525
307
  };
2526
307
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
2527
307
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2528
307
}
2529
2530
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2531
240
    const OMPDistributeParallelForSimdDirective &S) {
2532
240
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2533
240
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2534
240
                              S.getDistInc());
2535
240
  };
2536
240
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
2537
240
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
2538
240
}
2539
2540
void CodeGenFunction::EmitOMPDistributeSimdDirective(
2541
150
    const OMPDistributeSimdDirective &S) {
2542
150
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2543
150
    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
2544
150
  };
2545
150
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
2546
150
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2547
150
}
2548
2549
void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
2550
132
    CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
2551
132
  // Emit SPMD target parallel for region as a standalone region.
2552
132
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2553
132
    emitOMPSimdRegion(CGF, S, Action);
2554
132
  };
2555
132
  llvm::Function *Fn;
2556
132
  llvm::Constant *Addr;
2557
132
  // Emit target region as a standalone region.
2558
132
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
2559
132
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
2560
132
  assert(Fn && Addr && "Target device function emission failed.");
2561
132
}
2562
2563
void CodeGenFunction::EmitOMPTargetSimdDirective(
2564
225
    const OMPTargetSimdDirective &S) {
2565
225
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2566
225
    emitOMPSimdRegion(CGF, S, Action);
2567
225
  };
2568
225
  emitCommonOMPTargetDirective(*this, S, CodeGen);
2569
225
}
2570
2571
namespace {
2572
  struct ScheduleKindModifiersTy {
2573
    OpenMPScheduleClauseKind Kind;
2574
    OpenMPScheduleClauseModifier M1;
2575
    OpenMPScheduleClauseModifier M2;
2576
    ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2577
                            OpenMPScheduleClauseModifier M1,
2578
                            OpenMPScheduleClauseModifier M2)
2579
0
        : Kind(Kind), M1(M1), M2(M2) {}
2580
  };
2581
} // namespace
2582
2583
bool CodeGenFunction::EmitOMPWorksharingLoop(
2584
    const OMPLoopDirective &S, Expr *EUB,
2585
    const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2586
3.30k
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2587
3.30k
  // Emit the loop iteration variable.
2588
3.30k
  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2589
3.30k
  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
2590
3.30k
  EmitVarDecl(*IVDecl);
2591
3.30k
2592
3.30k
  // Emit the iterations count variable.
2593
3.30k
  // If it is not a variable, Sema decided to calculate iterations count on each
2594
3.30k
  // iteration (e.g., it is foldable into a constant).
2595
3.30k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2596
0
    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2597
0
    // Emit calculation of the iterations count.
2598
0
    EmitIgnoredExpr(S.getCalcLastIteration());
2599
0
  }
2600
3.30k
2601
3.30k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2602
3.30k
2603
3.30k
  bool HasLastprivateClause;
2604
3.30k
  // Check pre-condition.
2605
3.30k
  {
2606
3.30k
    OMPLoopScope PreInitScope(*this, S);
2607
3.30k
    // Skip the entire loop if we don't meet the precondition.
2608
3.30k
    // If the condition constant folds and can be elided, avoid emitting the
2609
3.30k
    // whole loop.
2610
3.30k
    bool CondConstant;
2611
3.30k
    llvm::BasicBlock *ContBlock = nullptr;
2612
3.30k
    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2613
2.68k
      if (!CondConstant)
2614
42
        return false;
2615
615
    } else {
2616
615
      llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
2617
615
      ContBlock = createBasicBlock("omp.precond.end");
2618
615
      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2619
615
                  getProfileCount(&S));
2620
615
      EmitBlock(ThenBlock);
2621
615
      incrementProfileCounter(&S);
2622
615
    }
2623
3.30k
2624
3.30k
    RunCleanupsScope DoacrossCleanupScope(*this);
2625
3.26k
    bool Ordered = false;
2626
3.26k
    if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2627
39
      if (OrderedClause->getNumForLoops())
2628
12
        RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
2629
27
      else
2630
27
        Ordered = true;
2631
39
    }
2632
3.26k
2633
3.26k
    llvm::DenseSet<const Expr *> EmittedFinals;
2634
3.26k
    emitAlignedClause(*this, S);
2635
3.26k
    bool HasLinears = EmitOMPLinearClauseInit(S);
2636
3.26k
    // Emit helper vars inits.
2637
3.26k
2638
3.26k
    std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2639
3.26k
    LValue LB = Bounds.first;
2640
3.26k
    LValue UB = Bounds.second;
2641
3.26k
    LValue ST =
2642
3.26k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2643
3.26k
    LValue IL =
2644
3.26k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2645
3.26k
2646
3.26k
    // Emit 'then' code.
2647
3.26k
    {
2648
3.26k
      OMPPrivateScope LoopScope(*this);
2649
3.26k
      if (EmitOMPFirstprivateClause(S, LoopScope) || 
HasLinears3.25k
) {
2650
126
        // Emit implicit barrier to synchronize threads and avoid data races on
2651
126
        // initialization of firstprivate variables and post-update of
2652
126
        // lastprivate variables.
2653
126
        CGM.getOpenMPRuntime().emitBarrierCall(
2654
126
            *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2655
126
            /*ForceSimpleCall=*/true);
2656
126
      }
2657
3.26k
      EmitOMPPrivateClause(S, LoopScope);
2658
3.26k
      CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2659
3.26k
          *this, S, EmitLValue(S.getIterationVariable()));
2660
3.26k
      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2661
3.26k
      EmitOMPReductionClauseInit(S, LoopScope);
2662
3.26k
      EmitOMPPrivateLoopCounters(S, LoopScope);
2663
3.26k
      EmitOMPLinearClause(S, LoopScope);
2664
3.26k
      (void)LoopScope.Privatize();
2665
3.26k
      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2666
1.52k
        CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
2667
3.26k
2668
3.26k
      // Detect the loop schedule kind and chunk.
2669
3.26k
      const Expr *ChunkExpr = nullptr;
2670
3.26k
      OpenMPScheduleTy ScheduleKind;
2671
3.26k
      if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
2672
997
        ScheduleKind.Schedule = C->getScheduleKind();
2673
997
        ScheduleKind.M1 = C->getFirstScheduleModifier();
2674
997
        ScheduleKind.M2 = C->getSecondScheduleModifier();
2675
997
        ChunkExpr = C->getChunkSize();
2676
2.26k
      } else {
2677
2.26k
        // Default behaviour for schedule clause.
2678
2.26k
        CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
2679
2.26k
            *this, S, ScheduleKind.Schedule, ChunkExpr);
2680
2.26k
      }
2681
3.26k
      bool HasChunkSizeOne = false;
2682
3.26k
      llvm::Value *Chunk = nullptr;
2683
3.26k
      if (ChunkExpr) {
2684
413
        Chunk = EmitScalarExpr(ChunkExpr);
2685
413
        Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
2686
413
                                     S.getIterationVariable()->getType(),
2687
413
                                     S.getBeginLoc());
2688
413
        Expr::EvalResult Result;
2689
413
        if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
2690
274
          llvm::APSInt EvaluatedChunk = Result.Val.getInt();
2691
274
          HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
2692
274
        }
2693
413
      }
2694
3.26k
      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2695
3.26k
      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2696
3.26k
      // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2697
3.26k
      // If the static schedule kind is specified or if the ordered clause is
2698
3.26k
      // specified, and if no monotonic modifier is specified, the effect will
2699
3.26k
      // be as if the monotonic modifier was specified.
2700
3.26k
      bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2701
3.26k
          /* Chunked */ Chunk != nullptr) && 
HasChunkSizeOne332
&&
2702
3.26k
          
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())194
;
2703
3.26k
      if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2704
3.26k
                                 /* Chunked */ Chunk != nullptr) ||
2705
3.26k
           
StaticChunkedOne933
) &&
2706
3.26k
          
!Ordered2.43k
) {
2707
2.42k
        JumpDest LoopExit =
2708
2.42k
            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2709
2.42k
        emitCommonSimdLoop(
2710
2.42k
            *this, S,
2711
2.42k
            [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2712
2.41k
              if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2713
1.22k
                CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2714
1.22k
              } else 
if (const auto *1.19k
C1.19k
= S.getSingleClause<OMPOrderClause>()) {
2715
3
                if (C->getKind() == OMPC_ORDER_concurrent)
2716
3
                  CGF.LoopStack.setParallel(/*Enable=*/true);
2717
3
              }
2718
2.41k
            },
2719
2.42k
            [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
2720
2.42k
             &S, ScheduleKind, LoopExit,
2721
2.45k
             &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2722
2.45k
              // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2723
2.45k
              // When no chunk_size is specified, the iteration space is divided
2724
2.45k
              // into chunks that are approximately equal in size, and at most
2725
2.45k
              // one chunk is distributed to each thread. Note that the size of
2726
2.45k
              // the chunks is unspecified in this case.
2727
2.45k
              CGOpenMPRuntime::StaticRTInput StaticInit(
2728
2.45k
                  IVSize, IVSigned, Ordered, IL.getAddress(CGF),
2729
2.45k
                  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
2730
2.45k
                  StaticChunkedOne ? 
Chunk112
:
nullptr2.34k
);
2731
2.45k
              CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2732
2.45k
                  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
2733
2.45k
                  StaticInit);
2734
2.45k
              // UB = min(UB, GlobalUB);
2735
2.45k
              if (!StaticChunkedOne)
2736
2.34k
                CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
2737
2.45k
              // IV = LB;
2738
2.45k
              CGF.EmitIgnoredExpr(S.getInit());
2739
2.45k
              // For unchunked static schedule generate:
2740
2.45k
              //
2741
2.45k
              // while (idx <= UB) {
2742
2.45k
              //   BODY;
2743
2.45k
              //   ++idx;
2744
2.45k
              // }
2745
2.45k
              //
2746
2.45k
              // For static schedule with chunk one:
2747
2.45k
              //
2748
2.45k
              // while (IV <= PrevUB) {
2749
2.45k
              //   BODY;
2750
2.45k
              //   IV += ST;
2751
2.45k
              // }
2752
2.45k
              CGF.EmitOMPInnerLoop(
2753
2.45k
                  S, LoopScope.requiresCleanups(),
2754
2.45k
                  StaticChunkedOne ? 
S.getCombinedParForInDistCond()112
2755
2.45k
                                   : 
S.getCond()2.34k
,
2756
2.45k
                  StaticChunkedOne ? 
S.getDistInc()112
:
S.getInc()2.34k
,
2757
2.45k
                  [&S, LoopExit](CodeGenFunction &CGF) {
2758
2.45k
                    CGF.EmitOMPLoopBody(S, LoopExit);
2759
2.45k
                    CGF.EmitStopPoint(&S);
2760
2.45k
                  },
2761
2.45k
                  [](CodeGenFunction &) {});
2762
2.45k
            });
2763
2.42k
        EmitBlock(LoopExit.getBlock());
2764
2.42k
        // Tell the runtime we are done.
2765
2.47k
        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2766
2.47k
          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2767
2.47k
                                                         S.getDirectiveKind());
2768
2.47k
        };
2769
2.42k
        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2770
2.42k
      } else {
2771
831
        const bool IsMonotonic =
2772
831
            Ordered || 
ScheduleKind.Schedule == OMPC_SCHEDULE_static804
||
2773
831
            
ScheduleKind.Schedule == OMPC_SCHEDULE_unknown591
||
2774
831
            
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic591
||
2775
831
            
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic583
;
2776
831
        // Emit the outer loop, which requests its work chunk [LB..UB] from
2777
831
        // runtime and runs the inner loop to process it.
2778
831
        const OMPLoopArguments LoopArguments(
2779
831
            LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
2780
831
            IL.getAddress(*this), Chunk, EUB);
2781
831
        EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2782
831
                            LoopArguments, CGDispatchBounds);
2783
831
      }
2784
3.26k
      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2785
1.61k
        EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
2786
1.61k
          return CGF.Builder.CreateIsNotNull(
2787
1.61k
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2788
1.61k
        });
2789
1.61k
      }
2790
3.26k
      EmitOMPReductionClauseFinal(
2791
3.26k
          S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2792
3.26k
                 ? /*Parallel and Simd*/ 
OMPD_parallel_for_simd1.61k
2793
3.26k
                 : /*Parallel only*/ 
OMPD_parallel1.64k
);
2794
3.26k
      // Emit post-update of the reduction variables if IsLastIter != 0.
2795
3.26k
      emitPostUpdateForReductionClause(
2796
3.26k
          *this, S, [IL, &S](CodeGenFunction &CGF) {
2797
0
            return CGF.Builder.CreateIsNotNull(
2798
0
                CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2799
0
          });
2800
3.26k
      // Emit final copy of the lastprivate variables if IsLastIter != 0.
2801
3.26k
      if (HasLastprivateClause)
2802
159
        EmitOMPLastprivateClauseFinal(
2803
159
            S, isOpenMPSimdDirective(S.getDirectiveKind()),
2804
159
            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
2805
3.26k
    }
2806
3.26k
    EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
2807
118
      return CGF.Builder.CreateIsNotNull(
2808
118
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
2809
118
    });
2810
3.26k
    DoacrossCleanupScope.ForceCleanup();
2811
3.26k
    // We're now done with the loop, so jump to the continuation block.
2812
3.26k
    if (ContBlock) {
2813
615
      EmitBranch(ContBlock);
2814
615
      EmitBlock(ContBlock, /*IsFinished=*/true);
2815
615
    }
2816
3.26k
  }
2817
3.26k
  return HasLastprivateClause;
2818
3.30k
}
2819
2820
/// The following two functions generate expressions for the loop lower
2821
/// and upper bounds in case of static and dynamic (dispatch) schedule
2822
/// of the associated 'for' or 'distribute' loop.
2823
static std::pair<LValue, LValue>
2824
1.38k
emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2825
1.38k
  const auto &LS = cast<OMPLoopDirective>(S);
2826
1.38k
  LValue LB =
2827
1.38k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2828
1.38k
  LValue UB =
2829
1.38k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2830
1.38k
  return {LB, UB};
2831
1.38k
}
2832
2833
/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2834
/// consider the lower and upper bound expressions generated by the
2835
/// worksharing loop support, but we use 0 and the iteration space size as
2836
/// constants
2837
static std::pair<llvm::Value *, llvm::Value *>
2838
emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2839
274
                          Address LB, Address UB) {
2840
274
  const auto &LS = cast<OMPLoopDirective>(S);
2841
274
  const Expr *IVExpr = LS.getIterationVariable();
2842
274
  const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2843
274
  llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2844
274
  llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2845
274
  return {LBVal, UBVal};
2846
274
}
2847
2848
281
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2849
281
  bool HasLastprivates = false;
2850
281
  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2851
281
                                          PrePostActionTy &) {
2852
281
    OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2853
281
    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2854
281
                                                 emitForLoopBounds,
2855
281
                                                 emitDispatchForLoopBounds);
2856
281
  };
2857
281
  {
2858
281
    auto LPCRegion =
2859
281
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2860
281
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
2861
281
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2862
281
                                                S.hasCancel());
2863
281
  }
2864
281
2865
281
  // Emit an implicit barrier at the end.
2866
281
  if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates10
)
2867
271
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
2868
281
  // Check for outer lastprivate conditional update.
2869
281
  checkForLastprivateConditionalUpdate(*this, S);
2870
281
}
2871
2872
241
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2873
241
  bool HasLastprivates = false;
2874
241
  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2875
241
                                          PrePostActionTy &) {
2876
241
    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2877
241
                                                 emitForLoopBounds,
2878
241
                                                 emitDispatchForLoopBounds);
2879
241
  };
2880
241
  {
2881
241
    auto LPCRegion =
2882
241
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2883
241
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
2884
241
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2885
241
  }
2886
241
2887
241
  // Emit an implicit barrier at the end.
2888
241
  if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates0
)
2889
241
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
2890
241
  // Check for outer lastprivate conditional update.
2891
241
  checkForLastprivateConditionalUpdate(*this, S);
2892
241
}
2893
2894
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2895
                                const Twine &Name,
2896
340
                                llvm::Value *Init = nullptr) {
2897
340
  LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2898
340
  if (Init)
2899
272
    CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2900
340
  return LVal;
2901
340
}
2902
2903
68
void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2904
68
  const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
2905
68
  const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
2906
68
  bool HasLastprivates = false;
2907
68
  auto &&CodeGen = [&S, CapturedStmt, CS,
2908
68
                    &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
2909
68
    ASTContext &C = CGF.getContext();
2910
68
    QualType KmpInt32Ty =
2911
68
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2912
68
    // Emit helper vars inits.
2913
68
    LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2914
68
                                  CGF.Builder.getInt32(0));
2915
68
    llvm::ConstantInt *GlobalUBVal = CS != nullptr
2916
68
                                         ? CGF.Builder.getInt32(CS->size() - 1)
2917
68
                                         : 
CGF.Builder.getInt32(0)0
;
2918
68
    LValue UB =
2919
68
        createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2920
68
    LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2921
68
                                  CGF.Builder.getInt32(1));
2922
68
    LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2923
68
                                  CGF.Builder.getInt32(0));
2924
68
    // Loop counter.
2925
68
    LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2926
68
    OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
2927
68
    CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2928
68
    OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
2929
68
    CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2930
68
    // Generate condition for loop.
2931
68
    BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2932
68
                        OK_Ordinary, S.getBeginLoc(), FPOptions());
2933
68
    // Increment for loop counter.
2934
68
    UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2935
68
                      S.getBeginLoc(), true);
2936
68
    auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
2937
68
      // Iterate through all sections and emit a switch construct:
2938
68
      // switch (IV) {
2939
68
      //   case 0:
2940
68
      //     <SectionStmt[0]>;
2941
68
      //     break;
2942
68
      // ...
2943
68
      //   case <NumSection> - 1:
2944
68
      //     <SectionStmt[<NumSection> - 1]>;
2945
68
      //     break;
2946
68
      // }
2947
68
      // .omp.sections.exit:
2948
68
      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2949
68
      llvm::SwitchInst *SwitchStmt =
2950
68
          CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
2951
68
                                   ExitBB, CS == nullptr ? 
10
: CS->size());
2952
68
      if (CS) {
2953
68
        unsigned CaseNumber = 0;
2954
110
        for (const Stmt *SubStmt : CS->children()) {
2955
110
          auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2956
110
          CGF.EmitBlock(CaseBB);
2957
110
          SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2958
110
          CGF.EmitStmt(SubStmt);
2959
110
          CGF.EmitBranch(ExitBB);
2960
110
          ++CaseNumber;
2961
110
        }
2962
68
      } else {
2963
0
        llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
2964
0
        CGF.EmitBlock(CaseBB);
2965
0
        SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2966
0
        CGF.EmitStmt(CapturedStmt);
2967
0
        CGF.EmitBranch(ExitBB);
2968
0
      }
2969
68
      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2970
68
    };
2971
68
2972
68
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2973
68
    if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2974
0
      // Emit implicit barrier to synchronize threads and avoid data races on
2975
0
      // initialization of firstprivate variables and post-update of lastprivate
2976
0
      // variables.
2977
0
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2978
0
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
2979
0
          /*ForceSimpleCall=*/true);
2980
0
    }
2981
68
    CGF.EmitOMPPrivateClause(S, LoopScope);
2982
68
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
2983
68
    HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2984
68
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
2985
68
    (void)LoopScope.Privatize();
2986
68
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2987
0
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2988
68
2989
68
    // Emit static non-chunked loop.
2990
68
    OpenMPScheduleTy ScheduleKind;
2991
68
    ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2992
68
    CGOpenMPRuntime::StaticRTInput StaticInit(
2993
68
        /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
2994
68
        LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
2995
68
    CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2996
68
        CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
2997
68
    // UB = min(UB, GlobalUB);
2998
68
    llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
2999
68
    llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3000
68
        CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3001
68
    CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3002
68
    // IV = LB;
3003
68
    CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3004
68
    // while (idx <= UB) { BODY; ++idx; }
3005
68
    CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
3006
68
                         [](CodeGenFunction &) {});
3007
68
    // Tell the runtime we are done.
3008
92
    auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3009
92
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3010
92
                                                     S.getDirectiveKind());
3011
92
    };
3012
68
    CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3013
68
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3014
68
    // Emit post-update of the reduction variables if IsLastIter != 0.
3015
68
    emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3016
0
      return CGF.Builder.CreateIsNotNull(
3017
0
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3018
0
    });
3019
68
3020
68
    // Emit final copy of the lastprivate variables if IsLastIter != 0.
3021
68
    if (HasLastprivates)
3022
16
      CGF.EmitOMPLastprivateClauseFinal(
3023
16
          S, /*NoFinals=*/false,
3024
16
          CGF.Builder.CreateIsNotNull(
3025
16
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3026
68
  };
3027
68
3028
68
  bool HasCancel = false;
3029
68
  if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3030
52
    HasCancel = OSD->hasCancel();
3031
16
  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3032
16
    HasCancel = OPSD->hasCancel();
3033
68
  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3034
68
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3035
68
                                              HasCancel);
3036
68
  // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3037
68
  // clause. Otherwise the barrier will be generated by the codegen for the
3038
68
  // directive.
3039
68
  if (HasLastprivates && 
S.getSingleClause<OMPNowaitClause>()16
) {
3040
0
    // Emit implicit barrier to synchronize threads and avoid data races on
3041
0
    // initialization of firstprivate variables.
3042
0
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3043
0
                                           OMPD_unknown);
3044
0
  }
3045
68
}
3046
3047
52
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3048
52
  {
3049
52
    auto LPCRegion =
3050
52
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3051
52
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3052
52
    EmitSections(S);
3053
52
  }
3054
52
  // Emit an implicit barrier at the end.
3055
52
  if (!S.getSingleClause<OMPNowaitClause>()) {
3056
46
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3057
46
                                           OMPD_sections);
3058
46
  }
3059
52
  // Check for outer lastprivate conditional update.
3060
52
  checkForLastprivateConditionalUpdate(*this, S);
3061
52
}
3062
3063
46
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3064
46
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3065
46
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3066
46
  };
3067
46
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3068
46
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
3069
46
                                              S.hasCancel());
3070
46
}
3071
3072
39
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3073
39
  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3074
39
  llvm::SmallVector<const Expr *, 8> DestExprs;
3075
39
  llvm::SmallVector<const Expr *, 8> SrcExprs;
3076
39
  llvm::SmallVector<const Expr *, 8> AssignmentOps;
3077
39
  // Check if there are any 'copyprivate' clauses associated with this
3078
39
  // 'single' construct.
3079
39
  // Build a list of copyprivate variables along with helper expressions
3080
39
  // (<source>, <destination>, <destination>=<source> expressions)
3081
39
  for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3082
18
    CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3083
18
    DestExprs.append(C->destination_exprs().begin(),
3084
18
                     C->destination_exprs().end());
3085
18
    SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3086
18
    AssignmentOps.append(C->assignment_ops().begin(),
3087
18
                         C->assignment_ops().end());
3088
18
  }
3089
39
  // Emit code for 'single' region along with 'copyprivate' clauses
3090
39
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3091
39
    Action.Enter(CGF);
3092
39
    OMPPrivateScope SingleScope(CGF);
3093
39
    (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3094
39
    CGF.EmitOMPPrivateClause(S, SingleScope);
3095
39
    (void)SingleScope.Privatize();
3096
39
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3097
39
  };
3098
39
  {
3099
39
    auto LPCRegion =
3100
39
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3101
39
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3102
39
    CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3103
39
                                            CopyprivateVars, DestExprs,
3104
39
                                            SrcExprs, AssignmentOps);
3105
39
  }
3106
39
  // Emit an implicit barrier at the end (to avoid data race on firstprivate
3107
39
  // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3108
39
  if (!S.getSingleClause<OMPNowaitClause>() && 
CopyprivateVars.empty()34
) {
3109
16
    CGM.getOpenMPRuntime().emitBarrierCall(
3110
16
        *this, S.getBeginLoc(),
3111
16
        S.getSingleClause<OMPNowaitClause>() ? 
OMPD_unknown0
: OMPD_single);
3112
16
  }
3113
39
  // Check for outer lastprivate conditional update.
3114
39
  checkForLastprivateConditionalUpdate(*this, S);
3115
39
}
3116
3117
33
static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3118
33
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3119
33
    Action.Enter(CGF);
3120
33
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3121
33
  };
3122
33
  CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3123
33
}
3124
3125
15
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3126
15
  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
3127
6
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3128
6
3129
6
    const CapturedStmt *CS = S.getInnermostCapturedStmt();
3130
6
    const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt();
3131
6
3132
6
    auto FiniCB = [this](InsertPointTy IP) {
3133
6
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3134
6
    };
3135
6
3136
6
    auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3137
6
                                                  InsertPointTy CodeGenIP,
3138
6
                                                  llvm::BasicBlock &FiniBB) {
3139
6
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3140
6
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3141
6
                                             CodeGenIP, FiniBB);
3142
6
    };
3143
6
3144
6
    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
3145
6
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3146
6
    Builder.restoreIP(OMPBuilder->CreateMaster(Builder, BodyGenCB, FiniCB));
3147
6
3148
6
    return;
3149
6
  }
3150
9
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3151
9
  emitMaster(*this, S);
3152
9
}
3153
3154
40
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
3155
40
  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
3156
14
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3157
14
3158
14
    const CapturedStmt *CS = S.getInnermostCapturedStmt();
3159
14
    const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt();
3160
14
    const Expr *Hint = nullptr;
3161
14
    if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3162
2
      Hint = HintClause->getHint();
3163
14
3164
14
    // TODO: This is slightly different from what's currently being done in
3165
14
    // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
3166
14
    // about typing is final.
3167
14
    llvm::Value *HintInst = nullptr;
3168
14
    if (Hint)
3169
2
      HintInst =
3170
2
          Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
3171
14
3172
14
    auto FiniCB = [this](InsertPointTy IP) {
3173
12
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3174
12
    };
3175
14
3176
14
    auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
3177
14
                                                    InsertPointTy CodeGenIP,
3178
14
                                                    llvm::BasicBlock &FiniBB) {
3179
14
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3180
14
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
3181
14
                                             CodeGenIP, FiniBB);
3182
14
    };
3183
14
3184
14
    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
3185
14
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3186
14
    Builder.restoreIP(OMPBuilder->CreateCritical(
3187
14
        Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
3188
14
        HintInst));
3189
14
3190
14
    return;
3191
14
  }
3192
26
3193
26
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3194
26
    Action.Enter(CGF);
3195
26
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3196
26
  };
3197
26
  const Expr *Hint = nullptr;
3198
26
  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3199
3
    Hint = HintClause->getHint();
3200
26
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3201
26
  CGM.getOpenMPRuntime().emitCriticalRegion(*this,
3202
26
                                            S.getDirectiveName().getAsString(),
3203
26
                                            CodeGen, S.getBeginLoc(), Hint);
3204
26
}
3205
3206
void CodeGenFunction::EmitOMPParallelForDirective(
3207
147
    const OMPParallelForDirective &S) {
3208
147
  // Emit directive as a combined directive that consists of two implicit
3209
147
  // directives: 'parallel' with 'for' directive.
3210
147
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3211
147
    Action.Enter(CGF);
3212
147
    OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
3213
147
    CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
3214
147
                               emitDispatchForLoopBounds);
3215
147
  };
3216
147
  {
3217
147
    auto LPCRegion =
3218
147
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3219
147
    emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
3220
147
                                   emitEmptyBoundParameters);
3221
147
  }
3222
147
  // Check for outer lastprivate conditional update.
3223
147
  checkForLastprivateConditionalUpdate(*this, S);
3224
147
}
3225
3226
void CodeGenFunction::EmitOMPParallelForSimdDirective(
3227
84
    const OMPParallelForSimdDirective &S) {
3228
84
  // Emit directive as a combined directive that consists of two implicit
3229
84
  // directives: 'parallel' with 'for' directive.
3230
84
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3231
84
    Action.Enter(CGF);
3232
84
    CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
3233
84
                               emitDispatchForLoopBounds);
3234
84
  };
3235
84
  {
3236
84
    auto LPCRegion =
3237
84
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3238
84
    emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
3239
84
                                   emitEmptyBoundParameters);
3240
84
  }
3241
84
  // Check for outer lastprivate conditional update.
3242
84
  checkForLastprivateConditionalUpdate(*this, S);
3243
84
}
3244
3245
void CodeGenFunction::EmitOMPParallelMasterDirective(
3246
24
    const OMPParallelMasterDirective &S) {
3247
24
  // Emit directive as a combined directive that consists of two implicit
3248
24
  // directives: 'parallel' with 'master' directive.
3249
24
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3250
24
    Action.Enter(CGF);
3251
24
    OMPPrivateScope PrivateScope(CGF);
3252
24
    bool Copyins = CGF.EmitOMPCopyinClause(S);
3253
24
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3254
24
    if (Copyins) {
3255
4
      // Emit implicit barrier to synchronize threads and avoid data races on
3256
4
      // propagation master's thread values of threadprivate variables to local
3257
4
      // instances of that variables of all other implicit threads.
3258
4
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3259
4
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3260
4
          /*ForceSimpleCall=*/true);
3261
4
    }
3262
24
    CGF.EmitOMPPrivateClause(S, PrivateScope);
3263
24
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3264
24
    (void)PrivateScope.Privatize();
3265
24
    emitMaster(CGF, S);
3266
24
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3267
24
  };
3268
24
  {
3269
24
    auto LPCRegion =
3270
24
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3271
24
    emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
3272
24
                                   emitEmptyBoundParameters);
3273
24
    emitPostUpdateForReductionClause(*this, S,
3274
24
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
3275
24
  }
3276
24
  // Check for outer lastprivate conditional update.
3277
24
  checkForLastprivateConditionalUpdate(*this, S);
3278
24
}
3279
3280
void CodeGenFunction::EmitOMPParallelSectionsDirective(
3281
16
    const OMPParallelSectionsDirective &S) {
3282
16
  // Emit directive as a combined directive that consists of two implicit
3283
16
  // directives: 'parallel' with 'sections' directive.
3284
16
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3285
16
    Action.Enter(CGF);
3286
16
    CGF.EmitSections(S);
3287
16
  };
3288
16
  {
3289
16
    auto LPCRegion =
3290
16
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3291
16
    emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
3292
16
                                   emitEmptyBoundParameters);
3293
16
  }
3294
16
  // Check for outer lastprivate conditional update.
3295
16
  checkForLastprivateConditionalUpdate(*this, S);
3296
16
}
3297
3298
void CodeGenFunction::EmitOMPTaskBasedDirective(
3299
    const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
3300
    const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
3301
343
    OMPTaskDataTy &Data) {
3302
343
  // Emit outlined function for task construct.
3303
343
  const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
3304
343
  auto I = CS->getCapturedDecl()->param_begin();
3305
343
  auto PartId = std::next(I);
3306
343
  auto TaskT = std::next(I, 4);
3307
343
  // Check if the task is final
3308
343
  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
3309
22
    // If the condition constant folds and can be elided, try to avoid emitting
3310
22
    // the condition and the dead arm of the if/else.
3311
22
    const Expr *Cond = Clause->getCondition();
3312
22
    bool CondConstant;
3313
22
    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
3314
12
      Data.Final.setInt(CondConstant);
3315
10
    else
3316
10
      Data.Final.setPointer(EvaluateExprAsBool(Cond));
3317
321
  } else {
3318
321
    // By default the task is not final.
3319
321
    Data.Final.setInt(/*IntVal=*/false);
3320
321
  }
3321
343
  // Check if the task has 'priority' clause.
3322
343
  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
3323
22
    const Expr *Prio = Clause->getPriority();
3324
22
    Data.Priority.setInt(/*IntVal=*/true);
3325
22
    Data.Priority.setPointer(EmitScalarConversion(
3326
22
        EmitScalarExpr(Prio), Prio->getType(),
3327
22
        getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
3328
22
        Prio->getExprLoc()));
3329
22
  }
3330
343
  // The first function argument for tasks is a thread id, the second one is a
3331
343
  // part id (0 for tied tasks, >=0 for untied task).
3332
343
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
3333
343
  // Get list of private variables.
3334
343
  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
3335
50
    auto IRef = C->varlist_begin();
3336
226
    for (const Expr *IInit : C->private_copies()) {
3337
226
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3338
226
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3339
170
        Data.PrivateVars.push_back(*IRef);
3340
170
        Data.PrivateCopies.push_back(IInit);
3341
170
      }
3342
226
      ++IRef;
3343
226
    }
3344
50
  }
3345
343
  EmittedAsPrivate.clear();
3346
343
  // Get list of firstprivate variables.
3347
343
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3348
71
    auto IRef = C->varlist_begin();
3349
71
    auto IElemInitRef = C->inits().begin();
3350
263
    for (const Expr *IInit : C->private_copies()) {
3351
263
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3352
263
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3353
211
        Data.FirstprivateVars.push_back(*IRef);
3354
211
        Data.FirstprivateCopies.push_back(IInit);
3355
211
        Data.FirstprivateInits.push_back(*IElemInitRef);
3356
211
      }
3357
263
      ++IRef;
3358
263
      ++IElemInitRef;
3359
263
    }
3360
71
  }
3361
343
  // Get list of lastprivate variables (for taskloops).
3362
343
  llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
3363
343
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
3364
49
    auto IRef = C->varlist_begin();
3365
49
    auto ID = C->destination_exprs().begin();
3366
199
    for (const Expr *IInit : C->private_copies()) {
3367
199
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3368
199
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3369
151
        Data.LastprivateVars.push_back(*IRef);
3370
151
        Data.LastprivateCopies.push_back(IInit);
3371
151
      }
3372
199
      LastprivateDstsOrigs.insert(
3373
199
          {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
3374
199
           cast<DeclRefExpr>(*IRef)});
3375
199
      ++IRef;
3376
199
      ++ID;
3377
199
    }
3378
49
  }
3379
343
  SmallVector<const Expr *, 4> LHSs;
3380
343
  SmallVector<const Expr *, 4> RHSs;
3381
343
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3382
6
    auto IPriv = C->privates().begin();
3383
6
    auto IRed = C->reduction_ops().begin();
3384
6
    auto ILHS = C->lhs_exprs().begin();
3385
6
    auto IRHS = C->rhs_exprs().begin();
3386
24
    for (const Expr *Ref : C->varlists()) {
3387
24
      Data.ReductionVars.emplace_back(Ref);
3388
24
      Data.ReductionCopies.emplace_back(*IPriv);
3389
24
      Data.ReductionOps.emplace_back(*IRed);
3390
24
      LHSs.emplace_back(*ILHS);
3391
24
      RHSs.emplace_back(*IRHS);
3392
24
      std::advance(IPriv, 1);
3393
24
      std::advance(IRed, 1);
3394
24
      std::advance(ILHS, 1);
3395
24
      std::advance(IRHS, 1);
3396
24
    }
3397
6
  }
3398
343
  Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
3399
343
      *this, S.getBeginLoc(), LHSs, RHSs, Data);
3400
343
  // Build list of dependences.
3401
343
  for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3402
24
    for (const Expr *IRef : C->varlists())
3403
52
      Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
3404
343
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
3405
343
                    CapturedRegion](CodeGenFunction &CGF,
3406
343
                                    PrePostActionTy &Action) {
3407
343
    // Set proper addresses for generated private copies.
3408
343
    OMPPrivateScope Scope(CGF);
3409
343
    if (!Data.PrivateVars.empty() || 
!Data.FirstprivateVars.empty()293
||
3410
343
        
!Data.LastprivateVars.empty()224
) {
3411
168
      llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3412
168
          CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3413
168
      enum { PrivatesParam = 2, CopyFnParam = 3 };
3414
168
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3415
168
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3416
168
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3417
168
          CS->getCapturedDecl()->getParam(PrivatesParam)));
3418
168
      // Map privates.
3419
168
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3420
168
      llvm::SmallVector<llvm::Value *, 16> CallArgs;
3421
168
      CallArgs.push_back(PrivatesPtr);
3422
170
      for (const Expr *E : Data.PrivateVars) {
3423
170
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3424
170
        Address PrivatePtr = CGF.CreateMemTemp(
3425
170
            CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
3426
170
        PrivatePtrs.emplace_back(VD, PrivatePtr);
3427
170
        CallArgs.push_back(PrivatePtr.getPointer());
3428
170
      }
3429
211
      for (const Expr *E : Data.FirstprivateVars) {
3430
211
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3431
211
        Address PrivatePtr =
3432
211
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3433
211
                              ".firstpriv.ptr.addr");
3434
211
        PrivatePtrs.emplace_back(VD, PrivatePtr);
3435
211
        CallArgs.push_back(PrivatePtr.getPointer());
3436
211
      }
3437
168
      for (const Expr *E : Data.LastprivateVars) {
3438
151
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3439
151
        Address PrivatePtr =
3440
151
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3441
151
                              ".lastpriv.ptr.addr");
3442
151
        PrivatePtrs.emplace_back(VD, PrivatePtr);
3443
151
        CallArgs.push_back(PrivatePtr.getPointer());
3444
151
      }
3445
168
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3446
168
          CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3447
199
      for (const auto &Pair : LastprivateDstsOrigs) {
3448
199
        const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
3449
199
        DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
3450
199
                        /*RefersToEnclosingVariableOrCapture=*/
3451
199
                            CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
3452
199
                        Pair.second->getType(), VK_LValue,
3453
199
                        Pair.second->getExprLoc());
3454
199
        Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
3455
199
          return CGF.EmitLValue(&DRE).getAddress(CGF);
3456
199
        });
3457
199
      }
3458
532
      for (const auto &Pair : PrivatePtrs) {
3459
532
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3460
532
                            CGF.getContext().getDeclAlign(Pair.first));
3461
532
        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3462
532
      }
3463
168
    }
3464
343
    if (Data.Reductions) {
3465
6
      OMPLexicalScope LexScope(CGF, S, CapturedRegion);
3466
6
      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
3467
6
                             Data.ReductionOps);
3468
6
      llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
3469
6
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
3470
30
      for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; 
++Cnt24
) {
3471
24
        RedCG.emitSharedLValue(CGF, Cnt);
3472
24
        RedCG.emitAggregateType(CGF, Cnt);
3473
24
        // FIXME: This must removed once the runtime library is fixed.
3474
24
        // Emit required threadprivate variables for
3475
24
        // initializer/combiner/finalizer.
3476
24
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
3477
24
                                                           RedCG, Cnt);
3478
24
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
3479
24
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
3480
24
        Replacement =
3481
24
            Address(CGF.EmitScalarConversion(
3482
24
                        Replacement.getPointer(), CGF.getContext().VoidPtrTy,
3483
24
                        CGF.getContext().getPointerType(
3484
24
                            Data.ReductionCopies[Cnt]->getType()),
3485
24
                        Data.ReductionCopies[Cnt]->getExprLoc()),
3486
24
                    Replacement.getAlignment());
3487
24
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
3488
24
        Scope.addPrivate(RedCG.getBaseDecl(Cnt),
3489
24
                         [Replacement]() { return Replacement; });
3490
24
      }
3491
6
    }
3492
343
    // Privatize all private variables except for in_reduction items.
3493
343
    (void)Scope.Privatize();
3494
343
    SmallVector<const Expr *, 4> InRedVars;
3495
343
    SmallVector<const Expr *, 4> InRedPrivs;
3496
343
    SmallVector<const Expr *, 4> InRedOps;
3497
343
    SmallVector<const Expr *, 4> TaskgroupDescriptors;
3498
343
    for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
3499
20
      auto IPriv = C->privates().begin();
3500
20
      auto IRed = C->reduction_ops().begin();
3501
20
      auto ITD = C->taskgroup_descriptors().begin();
3502
20
      for (const Expr *Ref : C->varlists()) {
3503
20
        InRedVars.emplace_back(Ref);
3504
20
        InRedPrivs.emplace_back(*IPriv);
3505
20
        InRedOps.emplace_back(*IRed);
3506
20
        TaskgroupDescriptors.emplace_back(*ITD);
3507
20
        std::advance(IPriv, 1);
3508
20
        std::advance(IRed, 1);
3509
20
        std::advance(ITD, 1);
3510
20
      }
3511
20
    }
3512
343
    // Privatize in_reduction items here, because taskgroup descriptors must be
3513
343
    // privatized earlier.
3514
343
    OMPPrivateScope InRedScope(CGF);
3515
343
    if (!InRedVars.empty()) {
3516
10
      ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
3517
30
      for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; 
++Cnt20
) {
3518
20
        RedCG.emitSharedLValue(CGF, Cnt);
3519
20
        RedCG.emitAggregateType(CGF, Cnt);
3520
20
        // The taskgroup descriptor variable is always implicit firstprivate and
3521
20
        // privatized already during processing of the firstprivates.
3522
20
        // FIXME: This must removed once the runtime library is fixed.
3523
20
        // Emit required threadprivate variables for
3524
20
        // initializer/combiner/finalizer.
3525
20
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
3526
20
                                                           RedCG, Cnt);
3527
20
        llvm::Value *ReductionsPtr =
3528
20
            CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
3529
20
                                 TaskgroupDescriptors[Cnt]->getExprLoc());
3530
20
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
3531
20
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
3532
20
        Replacement = Address(
3533
20
            CGF.EmitScalarConversion(
3534
20
                Replacement.getPointer(), CGF.getContext().VoidPtrTy,
3535
20
                CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
3536
20
                InRedPrivs[Cnt]->getExprLoc()),
3537
20
            Replacement.getAlignment());
3538
20
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
3539
20
        InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
3540
20
                              [Replacement]() { return Replacement; });
3541
20
      }
3542
10
    }
3543
343
    (void)InRedScope.Privatize();
3544
343
3545
343
    Action.Enter(CGF);
3546
343
    BodyGen(CGF);
3547
343
  };
3548
343
  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3549
343
      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
3550
343
      Data.NumberOfParts);
3551
343
  OMPLexicalScope Scope(*this, S, llvm::None,
3552
343
                        !isOpenMPParallelDirective(S.getDirectiveKind()) &&
3553
343
                            
!isOpenMPSimdDirective(S.getDirectiveKind())271
);
3554
343
  TaskGen(*this, OutlinedFn, Data);
3555
343
}
3556
3557
static ImplicitParamDecl *
3558
createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
3559
                                  QualType Ty, CapturedDecl *CD,
3560
324
                                  SourceLocation Loc) {
3561
324
  auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
3562
324
                                           ImplicitParamDecl::Other);
3563
324
  auto *OrigRef = DeclRefExpr::Create(
3564
324
      C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
3565
324
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
3566
324
  auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
3567
324
                                              ImplicitParamDecl::Other);
3568
324
  auto *PrivateRef = DeclRefExpr::Create(
3569
324
      C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
3570
324
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
3571
324
  QualType ElemType = C.getBaseElementType(Ty);
3572
324
  auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
3573
324
                                           ImplicitParamDecl::Other);
3574
324
  auto *InitRef = DeclRefExpr::Create(
3575
324
      C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
3576
324
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
3577
324
  PrivateVD->setInitStyle(VarDecl::CInit);
3578
324
  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
3579
324
                                              InitRef, /*BasePath=*/nullptr,
3580
324
                                              VK_RValue));
3581
324
  Data.FirstprivateVars.emplace_back(OrigRef);
3582
324
  Data.FirstprivateCopies.emplace_back(PrivateRef);
3583
324
  Data.FirstprivateInits.emplace_back(InitRef);
3584
324
  return OrigVD;
3585
324
}
3586
3587
void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
3588
    const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
3589
292
    OMPTargetDataInfo &InputInfo) {
3590
292
  // Emit outlined function for task construct.
3591
292
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3592
292
  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
3593
292
  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3594
292
  auto I = CS->getCapturedDecl()->param_begin();
3595
292
  auto PartId = std::next(I);
3596
292
  auto TaskT = std::next(I, 4);
3597
292
  OMPTaskDataTy Data;
3598
292
  // The task is not final.
3599
292
  Data.Final.setInt(/*IntVal=*/false);
3600
292
  // Get list of firstprivate variables.
3601
292
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
3602
180
    auto IRef = C->varlist_begin();
3603
180
    auto IElemInitRef = C->inits().begin();
3604
300
    for (auto *IInit : C->private_copies()) {
3605
300
      Data.FirstprivateVars.push_back(*IRef);
3606
300
      Data.FirstprivateCopies.push_back(IInit);
3607
300
      Data.FirstprivateInits.push_back(*IElemInitRef);
3608
300
      ++IRef;
3609
300
      ++IElemInitRef;
3610
300
    }
3611
180
  }
3612
292
  OMPPrivateScope TargetScope(*this);
3613
292
  VarDecl *BPVD = nullptr;
3614
292
  VarDecl *PVD = nullptr;
3615
292
  VarDecl *SVD = nullptr;
3616
292
  if (InputInfo.NumberOfTargetItems > 0) {
3617
108
    auto *CD = CapturedDecl::Create(
3618
108
        getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3619
108
    llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3620
108
    QualType BaseAndPointersType = getContext().getConstantArrayType(
3621
108
        getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
3622
108
        /*IndexTypeQuals=*/0);
3623
108
    BPVD = createImplicitFirstprivateForType(
3624
108
        getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
3625
108
    PVD = createImplicitFirstprivateForType(
3626
108
        getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
3627
108
    QualType SizesType = getContext().getConstantArrayType(
3628
108
        getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
3629
108
        ArrSize, nullptr, ArrayType::Normal,
3630
108
        /*IndexTypeQuals=*/0);
3631
108
    SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3632
108
                                            S.getBeginLoc());
3633
108
    TargetScope.addPrivate(
3634
108
        BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3635
108
    TargetScope.addPrivate(PVD,
3636
108
                           [&InputInfo]() { return InputInfo.PointersArray; });
3637
108
    TargetScope.addPrivate(SVD,
3638
108
                           [&InputInfo]() { return InputInfo.SizesArray; });
3639
108
  }
3640
292
  (void)TargetScope.Privatize();
3641
292
  // Build list of dependences.
3642
292
  for (const auto *C : S.getClausesOfKind<OMPDependClause>())
3643
356
    for (const Expr *IRef : C->varlists())
3644
832
      Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
3645
292
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
3646
292
                    &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3647
292
    // Set proper addresses for generated private copies.
3648
292
    OMPPrivateScope Scope(CGF);
3649
292
    if (!Data.FirstprivateVars.empty()) {
3650
228
      llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3651
228
          CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3652
228
      enum { PrivatesParam = 2, CopyFnParam = 3 };
3653
228
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3654
228
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3655
228
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3656
228
          CS->getCapturedDecl()->getParam(PrivatesParam)));
3657
228
      // Map privates.
3658
228
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3659
228
      llvm::SmallVector<llvm::Value *, 16> CallArgs;
3660
228
      CallArgs.push_back(PrivatesPtr);
3661
624
      for (const Expr *E : Data.FirstprivateVars) {
3662
624
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3663
624
        Address PrivatePtr =
3664
624
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3665
624
                              ".firstpriv.ptr.addr");
3666
624
        PrivatePtrs.emplace_back(VD, PrivatePtr);
3667
624
        CallArgs.push_back(PrivatePtr.getPointer());
3668
624
      }
3669
228
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3670
228
          CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3671
624
      for (const auto &Pair : PrivatePtrs) {
3672
624
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3673
624
                            CGF.getContext().getDeclAlign(Pair.first));
3674
624
        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3675
624
      }
3676
228
    }
3677
292
    // Privatize all private variables except for in_reduction items.
3678
292
    (void)Scope.Privatize();
3679
292
    if (InputInfo.NumberOfTargetItems > 0) {
3680
108
      InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
3681
108
          CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
3682
108
      InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3683
108
          CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
3684
108
      InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3685
108
          CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
3686
108
    }
3687
292
3688
292
    Action.Enter(CGF);
3689
292
    OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3690
292
    BodyGen(CGF);
3691
292
  };
3692
292
  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3693
292
      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
3694
292
      Data.NumberOfParts);
3695
292
  llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 
1136
:
0156
);
3696
292
  IntegerLiteral IfCond(getContext(), TrueOrFalse,
3697
292
                        getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3698
292
                        SourceLocation());
3699
292
3700
292
  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
3701
292
                                      SharedsTy, CapturedStruct, &IfCond, Data);
3702
292
}
3703
3704
121
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3705
121
  // Emit outlined function for task construct.
3706
121
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
3707
121
  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
3708
121
  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3709
121
  const Expr *IfCond = nullptr;
3710
121
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3711
22
    if (C->getNameModifier() == OMPD_unknown ||
3712
22
        
C->getNameModifier() == OMPD_task6
) {
3713
22
      IfCond = C->getCondition();
3714
22
      break;
3715
22
    }
3716
22
  }
3717
121
3718
121
  OMPTaskDataTy Data;
3719
121
  // Check if we should emit tied or untied task.
3720
121
  Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3721
121
  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3722
121
    CGF.EmitStmt(CS->getCapturedStmt());
3723
121
  };
3724
121
  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3725
121
                    IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
3726
121
                            const OMPTaskDataTy &Data) {
3727
121
    CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
3728
121
                                            SharedsTy, CapturedStruct, IfCond,
3729
121
                                            Data);
3730
121
  };
3731
121
  auto LPCRegion =
3732
121
      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3733
121
  EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
3734
121
}
3735
3736
void CodeGenFunction::EmitOMPTaskyieldDirective(
3737
16
    const OMPTaskyieldDirective &S) {
3738
16
  CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
3739
16
}
3740
3741
27
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3742
27
  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
3743
27
}
3744
3745
12
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3746
12
  CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
3747
12
}
3748
3749
void CodeGenFunction::EmitOMPTaskgroupDirective(
3750
39
    const OMPTaskgroupDirective &S) {
3751
39
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3752
39
    Action.Enter(CGF);
3753
39
    if (const Expr *E = S.getReductionRef()) {
3754
26
      SmallVector<const Expr *, 4> LHSs;
3755
26
      SmallVector<const Expr *, 4> RHSs;
3756
26
      OMPTaskDataTy Data;
3757
26
      for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
3758
26
        auto IPriv = C->privates().begin();
3759
26
        auto IRed = C->reduction_ops().begin();
3760
26
        auto ILHS = C->lhs_exprs().begin();
3761
26
        auto IRHS = C->rhs_exprs().begin();
3762
65
        for (const Expr *Ref : C->varlists()) {
3763
65
          Data.ReductionVars.emplace_back(Ref);
3764
65
          Data.ReductionCopies.emplace_back(*IPriv);
3765
65
          Data.ReductionOps.emplace_back(*IRed);
3766
65
          LHSs.emplace_back(*ILHS);
3767
65
          RHSs.emplace_back(*IRHS);
3768
65
          std::advance(IPriv, 1);
3769
65
          std::advance(IRed, 1);
3770
65
          std::advance(ILHS, 1);
3771
65
          std::advance(IRHS, 1);
3772
65
        }
3773
26
      }
3774
26
      llvm::Value *ReductionDesc =
3775
26
          CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
3776
26
                                                           LHSs, RHSs, Data);
3777
26
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3778
26
      CGF.EmitVarDecl(*VD);
3779
26
      CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
3780
26
                            /*Volatile=*/false, E->getType());
3781
26
    }
3782
39
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3783
39
  };
3784
39
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3785
39
  CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
3786
39
}
3787
3788
40
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3789
40
  llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
3790
40
                                ? 
llvm::AtomicOrdering::NotAtomic8
3791
40
                                : 
llvm::AtomicOrdering::AcquireRelease32
;
3792
40
  CGM.getOpenMPRuntime().emitFlush(
3793
40
      *this,
3794
40
      [&S]() -> ArrayRef<const Expr *> {
3795
40
        if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
3796
8
          return llvm::makeArrayRef(FlushClause->varlist_begin(),
3797
8
                                    FlushClause->varlist_end());
3798
32
        return llvm::None;
3799
32
      }(),
3800
40
      S.getBeginLoc(), AO);
3801
40
}
3802
3803
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3804
                                            const CodeGenLoopTy &CodeGenLoop,
3805
3.17k
                                            Expr *IncExpr) {
3806
3.17k
  // Emit the loop iteration variable.
3807
3.17k
  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3808
3.17k
  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3809
3.17k
  EmitVarDecl(*IVDecl);
3810
3.17k
3811
3.17k
  // Emit the iterations count variable.
3812
3.17k
  // If it is not a variable, Sema decided to calculate iterations count on each
3813
3.17k
  // iteration (e.g., it is foldable into a constant).
3814
3.17k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3815
0
    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3816
0
    // Emit calculation of the iterations count.
3817
0
    EmitIgnoredExpr(S.getCalcLastIteration());
3818
0
  }
3819
3.17k
3820
3.17k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3821
3.17k
3822
3.17k
  bool HasLastprivateClause = false;
3823
3.17k
  // Check pre-condition.
3824
3.17k
  {
3825
3.17k
    OMPLoopScope PreInitScope(*this, S);
3826
3.17k
    // Skip the entire loop if we don't meet the precondition.
3827
3.17k
    // If the condition constant folds and can be elided, avoid emitting the
3828
3.17k
    // whole loop.
3829
3.17k
    bool CondConstant;
3830
3.17k
    llvm::BasicBlock *ContBlock = nullptr;
3831
3.17k
    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3832
2.49k
      if (!CondConstant)
3833
0
        return;
3834
680
    } else {
3835
680
      llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3836
680
      ContBlock = createBasicBlock("omp.precond.end");
3837
680
      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3838
680
                  getProfileCount(&S));
3839
680
      EmitBlock(ThenBlock);
3840
680
      incrementProfileCounter(&S);
3841
680
    }
3842
3.17k
3843
3.17k
    emitAlignedClause(*this, S);
3844
3.17k
    // Emit 'then' code.
3845
3.17k
    {
3846
3.17k
      // Emit helper vars inits.
3847
3.17k
3848
3.17k
      LValue LB = EmitOMPHelperVar(
3849
3.17k
          *this, cast<DeclRefExpr>(
3850
3.17k
                     (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3851
3.17k
                          ? 
S.getCombinedLowerBoundVariable()1.86k
3852
3.17k
                          : 
S.getLowerBoundVariable()1.30k
)));
3853
3.17k
      LValue UB = EmitOMPHelperVar(
3854
3.17k
          *this, cast<DeclRefExpr>(
3855
3.17k
                     (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3856
3.17k
                          ? 
S.getCombinedUpperBoundVariable()1.86k
3857
3.17k
                          : 
S.getUpperBoundVariable()1.30k
)));
3858
3.17k
      LValue ST =
3859
3.17k
          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3860
3.17k
      LValue IL =
3861
3.17k
          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3862
3.17k
3863
3.17k
      OMPPrivateScope LoopScope(*this);
3864
3.17k
      if (EmitOMPFirstprivateClause(S, LoopScope)) {
3865
0
        // Emit implicit barrier to synchronize threads and avoid data races
3866
0
        // on initialization of firstprivate variables and post-update of
3867
0
        // lastprivate variables.
3868
0
        CGM.getOpenMPRuntime().emitBarrierCall(
3869
0
            *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3870
0
            /*ForceSimpleCall=*/true);
3871
0
      }
3872
3.17k
      EmitOMPPrivateClause(S, LoopScope);
3873
3.17k
      if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
3874
3.17k
          
!isOpenMPParallelDirective(S.getDirectiveKind())1.69k
&&
3875
3.17k
          
!isOpenMPTeamsDirective(S.getDirectiveKind())723
)
3876
150
        EmitOMPReductionClauseInit(S, LoopScope);
3877
3.17k
      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3878
3.17k
      EmitOMPPrivateLoopCounters(S, LoopScope);
3879
3.17k
      (void)LoopScope.Privatize();
3880
3.17k
      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3881
1.68k
        CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3882
3.17k
3883
3.17k
      // Detect the distribute schedule kind and chunk.
3884
3.17k
      llvm::Value *Chunk = nullptr;
3885
3.17k
      OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3886
3.17k
      if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3887
400
        ScheduleKind = C->getDistScheduleKind();
3888
400
        if (const Expr *Ch = C->getChunkSize()) {
3889
230
          Chunk = EmitScalarExpr(Ch);
3890
230
          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3891
230
                                       S.getIterationVariable()->getType(),
3892
230
                                       S.getBeginLoc());
3893
230
        }
3894
2.77k
      } else {
3895
2.77k
        // Default behaviour for dist_schedule clause.
3896
2.77k
        CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
3897
2.77k
            *this, S, ScheduleKind, Chunk);
3898
2.77k
      }
3899
3.17k
      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3900
3.17k
      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3901
3.17k
3902
3.17k
      // OpenMP [2.10.8, distribute Construct, Description]
3903
3.17k
      // If dist_schedule is specified, kind must be static. If specified,
3904
3.17k
      // iterations are divided into chunks of size chunk_size, chunks are
3905
3.17k
      // assigned to the teams of the league in a round-robin fashion in the
3906
3.17k
      // order of the team number. When no chunk_size is specified, the
3907
3.17k
      // iteration space is divided into chunks that are approximately equal
3908
3.17k
      // in size, and at most one chunk is distributed to each team of the
3909
3.17k
      // league. The size of the chunks is unspecified in this case.
3910
3.17k
      bool StaticChunked = RT.isStaticChunked(
3911
3.17k
          ScheduleKind, /* Chunked */ Chunk != nullptr) &&
3912
3.17k
          
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())484
;
3913
3.17k
      if (RT.isStaticNonchunked(ScheduleKind,
3914
3.17k
                                /* Chunked */ Chunk != nullptr) ||
3915
3.17k
          
StaticChunked484
) {
3916
3.03k
        CGOpenMPRuntime::StaticRTInput StaticInit(
3917
3.03k
            IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
3918
3.03k
            LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3919
3.03k
            StaticChunked ? 
Chunk344
:
nullptr2.68k
);
3920
3.03k
        RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
3921
3.03k
                                    StaticInit);
3922
3.03k
        JumpDest LoopExit =
3923
3.03k
            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3924
3.03k
        // UB = min(UB, GlobalUB);
3925
3.03k
        EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3926
3.03k
                            ? 
S.getCombinedEnsureUpperBound()1.86k
3927
3.03k
                            : 
S.getEnsureUpperBound()1.16k
);
3928
3.03k
        // IV = LB;
3929
3.03k
        EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3930
3.03k
                            ? 
S.getCombinedInit()1.86k
3931
3.03k
                            : 
S.getInit()1.16k
);
3932
3.03k
3933
3.03k
        const Expr *Cond =
3934
3.03k
            isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3935
3.03k
                ? 
S.getCombinedCond()1.86k
3936
3.03k
                : 
S.getCond()1.16k
;
3937
3.03k
3938
3.03k
        if (StaticChunked)
3939
344
          Cond = S.getCombinedDistCond();
3940
3.03k
3941
3.03k
        // For static unchunked schedules generate:
3942
3.03k
        //
3943
3.03k
        //  1. For distribute alone, codegen
3944
3.03k
        //    while (idx <= UB) {
3945
3.03k
        //      BODY;
3946
3.03k
        //      ++idx;
3947
3.03k
        //    }
3948
3.03k
        //
3949
3.03k
        //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
3950
3.03k
        //    while (idx <= UB) {
3951
3.03k
        //      <CodeGen rest of pragma>(LB, UB);
3952
3.03k
        //      idx += ST;
3953
3.03k
        //    }
3954
3.03k
        //
3955
3.03k
        // For static chunk one schedule generate:
3956
3.03k
        //
3957
3.03k
        // while (IV <= GlobalUB) {
3958
3.03k
        //   <CodeGen rest of pragma>(LB, UB);
3959
3.03k
        //   LB += ST;
3960
3.03k
        //   UB += ST;
3961
3.03k
        //   UB = min(UB, GlobalUB);
3962
3.03k
        //   IV = LB;
3963
3.03k
        // }
3964
3.03k
        //
3965
3.03k
        emitCommonSimdLoop(
3966
3.03k
            *this, S,
3967
3.03k
            [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3968
3.02k
              if (isOpenMPSimdDirective(S.getDirectiveKind()))
3969
1.60k
                CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
3970
3.02k
            },
3971
3.03k
            [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
3972
3.06k
             StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
3973
3.06k
              CGF.EmitOMPInnerLoop(
3974
3.06k
                  S, LoopScope.requiresCleanups(), Cond, IncExpr,
3975
3.06k
                  [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3976
3.06k
                    CodeGenLoop(CGF, S, LoopExit);
3977
3.06k
                  },
3978
3.06k
                  [&S, StaticChunked](CodeGenFunction &CGF) {
3979
3.06k
                    if (StaticChunked) {
3980
344
                      CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
3981
344
                      CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
3982
344
                      CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
3983
344
                      CGF.EmitIgnoredExpr(S.getCombinedInit());
3984
344
                    }
3985
3.06k
                  });
3986
3.06k
            });
3987
3.03k
        EmitBlock(LoopExit.getBlock());
3988
3.03k
        // Tell the runtime we are done.
3989
3.03k
        RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
3990
3.03k
      } else {
3991
140
        // Emit the outer loop, which requests its work chunk [LB..UB] from
3992
140
        // runtime and runs the inner loop to process it.
3993
140
        const OMPLoopArguments LoopArguments = {
3994
140
            LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3995
140
            IL.getAddress(*this), Chunk};
3996
140
        EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3997
140
                                   CodeGenLoop);
3998
140
      }
3999
3.17k
      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
4000
1.69k
        EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
4001
1.69k
          return CGF.Builder.CreateIsNotNull(
4002
1.69k
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4003
1.69k
        });
4004
1.69k
      }
4005
3.17k
      if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
4006
3.17k
          
!isOpenMPParallelDirective(S.getDirectiveKind())1.69k
&&
4007
3.17k
          
!isOpenMPTeamsDirective(S.getDirectiveKind())723
) {
4008
150
        EmitOMPReductionClauseFinal(S, OMPD_simd);
4009
150
        // Emit post-update of the reduction variables if IsLastIter != 0.
4010
150
        emitPostUpdateForReductionClause(
4011
150
            *this, S, [IL, &S](CodeGenFunction &CGF) {
4012
0
              return CGF.Builder.CreateIsNotNull(
4013
0
                  CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4014
0
            });
4015
150
      }
4016
3.17k
      // Emit final copy of the lastprivate variables if IsLastIter != 0.
4017
3.17k
      if (HasLastprivateClause) {
4018
175
        EmitOMPLastprivateClauseFinal(
4019
175
            S, /*NoFinals=*/false,
4020
175
            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
4021
175
      }
4022
3.17k
    }
4023
3.17k
4024
3.17k
    // We're now done with the loop, so jump to the continuation block.
4025
3.17k
    if (ContBlock) {
4026
680
      EmitBranch(ContBlock);
4027
680
      EmitBlock(ContBlock, true);
4028
680
    }
4029
3.17k
  }
4030
3.17k
}
4031
4032
void CodeGenFunction::EmitOMPDistributeDirective(
4033
110
    const OMPDistributeDirective &S) {
4034
110
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4035
110
    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4036
110
  };
4037
110
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4038
110
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
4039
110
}
4040
4041
static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
4042
                                                   const CapturedStmt *S,
4043
4
                                                   SourceLocation Loc) {
4044
4
  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
4045
4
  CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
4046
4
  CGF.CapturedStmtInfo = &CapStmtInfo;
4047
4
  llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
4048
4
  Fn->setDoesNotRecurse();
4049
4
  return Fn;
4050
4
}
4051
4052
24
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
4053
24
  if (S.hasClausesOfKind<OMPDependClause>()) {
4054
12
    assert(!S.getAssociatedStmt() &&
4055
12
           "No associated statement must be in ordered depend construct.");
4056
12
    for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
4057
14
      CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
4058
12
    return;
4059
12
  }
4060
12
  const auto *C = S.getSingleClause<OMPSIMDClause>();
4061
12
  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
4062
12
                                 PrePostActionTy &Action) {
4063
12
    const CapturedStmt *CS = S.getInnermostCapturedStmt();
4064
12
    if (C) {
4065
4
      llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4066
4
      CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4067
4
      llvm::Function *OutlinedFn =
4068
4
          emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
4069
4
      CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
4070
4
                                                      OutlinedFn, CapturedVars);
4071
8
    } else {
4072
8
      Action.Enter(CGF);
4073
8
      CGF.EmitStmt(CS->getCapturedStmt());
4074
8
    }
4075
12
  };
4076
12
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4077
12
  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
4078
12
}
4079
4080
static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
4081
                                         QualType SrcType, QualType DestType,
4082
265
                                         SourceLocation Loc) {
4083
265
  assert(CGF.hasScalarEvaluationKind(DestType) &&
4084
265
         "DestType must have scalar evaluation kind.");
4085
265
  assert(!Val.isAggregate() && "Must be a scalar or complex.");
4086
265
  return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
4087
261
                                                   DestType, Loc)
4088
265
                        : CGF.EmitComplexToScalarConversion(
4089
4
                              Val.getComplexVal(), SrcType, DestType, Loc);
4090
265
}
4091
4092
static CodeGenFunction::ComplexPairTy
4093
convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
4094
24
                      QualType DestType, SourceLocation Loc) {
4095
24
  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
4096
24
         "DestType must have complex evaluation kind.");
4097
24
  CodeGenFunction::ComplexPairTy ComplexVal;
4098
24
  if (Val.isScalar()) {
4099
6
    // Convert the input element to the element type of the complex.
4100
6
    QualType DestElementType =
4101
6
        DestType->castAs<ComplexType>()->getElementType();
4102
6
    llvm::Value *ScalarVal = CGF.EmitScalarConversion(
4103
6
        Val.getScalarVal(), SrcType, DestElementType, Loc);
4104
6
    ComplexVal = CodeGenFunction::ComplexPairTy(
4105
6
        ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
4106
18
  } else {
4107
18
    assert(Val.isComplex() && "Must be a scalar or complex.");
4108
18
    QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
4109
18
    QualType DestElementType =
4110
18
        DestType->castAs<ComplexType>()->getElementType();
4111
18
    ComplexVal.first = CGF.EmitScalarConversion(
4112
18
        Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
4113
18
    ComplexVal.second = CGF.EmitScalarConversion(
4114
18
        Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
4115
18
  }
4116
24
  return ComplexVal;
4117
24
}
4118
4119
static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4120
110
                                  LValue LVal, RValue RVal) {
4121
110
  if (LVal.isGlobalReg())
4122
0
    CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
4123
110
  else
4124
110
    CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
4125
110
}
4126
4127
static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
4128
                                   llvm::AtomicOrdering AO, LValue LVal,
4129
108
                                   SourceLocation Loc) {
4130
108
  if (LVal.isGlobalReg())
4131
2
    return CGF.EmitLoadOfLValue(LVal, Loc);
4132
106
  return CGF.EmitAtomicLoad(
4133
106
      LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
4134
106
      LVal.isVolatile());
4135
106
}
4136
4137
void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
4138
287
                                         QualType RValTy, SourceLocation Loc) {
4139
287
  switch (getEvaluationKind(LVal.getType())) {
4140
263
  case TEK_Scalar:
4141
263
    EmitStoreThroughLValue(RValue::get(convertToScalarValue(
4142
263
                               *this, RVal, RValTy, LVal.getType(), Loc)),
4143
263
                           LVal);
4144
263
    break;
4145
24
  case TEK_Complex:
4146
24
    EmitStoreOfComplex(
4147
24
        convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
4148
24
        /*isInit=*/false);
4149
24
    break;
4150
0
  case TEK_Aggregate:
4151
0
    llvm_unreachable("Must be a scalar or complex.");
4152
287
  }
4153
287
}
4154
4155
static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
4156
                                  const Expr *X, const Expr *V,
4157
108
                                  SourceLocation Loc) {
4158
108
  // v = x;
4159
108
  assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
4160
108
  assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
4161
108
  LValue XLValue = CGF.EmitLValue(X);
4162
108
  LValue VLValue = CGF.EmitLValue(V);
4163
108
  RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
4164
108
  // OpenMP, 2.17.7, atomic Construct
4165
108
  // If the read or capture clause is specified and the acquire, acq_rel, or
4166
108
  // seq_cst clause is specified then the strong flush on exit from the atomic
4167
108
  // operation is also an acquire flush.
4168
108
  switch (AO) {
4169
14
  case llvm::AtomicOrdering::Acquire:
4170
14
  case llvm::AtomicOrdering::AcquireRelease:
4171
14
  case llvm::AtomicOrdering::SequentiallyConsistent:
4172
14
    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4173
14
                                         llvm::AtomicOrdering::Acquire);
4174
14
    break;
4175
94
  case llvm::AtomicOrdering::Monotonic:
4176
94
  case llvm::AtomicOrdering::Release:
4177
94
    break;
4178
94
  case llvm::AtomicOrdering::NotAtomic:
4179
0
  case llvm::AtomicOrdering::Unordered:
4180
0
    llvm_unreachable("Unexpected ordering.");
4181
108
  }
4182
108
  CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
4183
108
  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
4184
108
}
4185
4186
static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
4187
                                   llvm::AtomicOrdering AO, const Expr *X,
4188
110
                                   const Expr *E, SourceLocation Loc) {
4189
110
  // x = expr;
4190
110
  assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
4191
110
  emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
4192
110
  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
4193
110
  // OpenMP, 2.17.7, atomic Construct
4194
110
  // If the write, update, or capture clause is specified and the release,
4195
110
  // acq_rel, or seq_cst clause is specified then the strong flush on entry to
4196
110
  // the atomic operation is also a release flush.
4197
110
  switch (AO) {
4198
14
  case llvm::AtomicOrdering::Release:
4199
14
  case llvm::AtomicOrdering::AcquireRelease:
4200
14
  case llvm::AtomicOrdering::SequentiallyConsistent:
4201
14
    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4202
14
                                         llvm::AtomicOrdering::Release);
4203
14
    break;
4204
96
  case llvm::AtomicOrdering::Acquire:
4205
96
  case llvm::AtomicOrdering::Monotonic:
4206
96
    break;
4207
96
  case llvm::AtomicOrdering::NotAtomic:
4208
0
  case llvm::AtomicOrdering::Unordered:
4209
0
    llvm_unreachable("Unexpected ordering.");
4210
110
  }
4211
110
}
4212
4213
static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
4214
                                                RValue Update,
4215
                                                BinaryOperatorKind BO,
4216
                                                llvm::AtomicOrdering AO,
4217
588
                                                bool IsXLHSInRHSPart) {
4218
588
  ASTContext &Context = CGF.getContext();
4219
588
  // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
4220
588
  // expression is simple and atomic is allowed for the given type for the
4221
588
  // target platform.
4222
588
  if (BO == BO_Comma || !Update.isScalar() ||
4223
588
      
!Update.getScalarVal()->getType()->isIntegerTy()562
||
!X.isSimple()455
||
4224
588
      
(451
!isa<llvm::ConstantInt>(Update.getScalarVal())451
&&
4225
451
       (Update.getScalarVal()->getType() !=
4226
409
        X.getAddress(CGF).getElementType())) ||
4227
588
      
!X.getAddress(CGF).getElementType()->isIntegerTy()419
||
4228
588
      !Context.getTargetInfo().hasBuiltinAtomic(
4229
417
          Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
4230
181
    return std::make_pair(false, RValue::get(nullptr));
4231
407
4232
407
  llvm::AtomicRMWInst::BinOp RMWOp;
4233
407
  switch (BO) {
4234
312
  case BO_Add:
4235
312
    RMWOp = llvm::AtomicRMWInst::Add;
4236
312
    break;
4237
12
  case BO_Sub:
4238
12
    if (!IsXLHSInRHSPart)
4239
0
      return std::make_pair(false, RValue::get(nullptr));
4240
12
    RMWOp = llvm::AtomicRMWInst::Sub;
4241
12
    break;
4242
15
  case BO_And:
4243
15
    RMWOp = llvm::AtomicRMWInst::And;
4244
15
    break;
4245
14
  case BO_Or:
4246
14
    RMWOp = llvm::AtomicRMWInst::Or;
4247
14
    break;
4248
12
  case BO_Xor:
4249
4
    RMWOp = llvm::AtomicRMWInst::Xor;
4250
4
    break;
4251
12
  case BO_LT:
4252
6
    RMWOp = X.getType()->hasSignedIntegerRepresentation()
4253
6
                ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
4254
6
                                   : 
llvm::AtomicRMWInst::Max0
)
4255
6
                : 
(IsXLHSInRHSPart 0
?
llvm::AtomicRMWInst::UMin0
4256
0
                                   : llvm::AtomicRMWInst::UMax);
4257
6
    break;
4258
12
  case BO_GT:
4259
0
    RMWOp = X.getType()->hasSignedIntegerRepresentation()
4260
0
                ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
4261
0
                                   : llvm::AtomicRMWInst::Min)
4262
0
                : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
4263
0
                                   : llvm::AtomicRMWInst::UMin);
4264
0
    break;
4265
12
  case BO_Assign:
4266
2
    RMWOp = llvm::AtomicRMWInst::Xchg;
4267
2
    break;
4268
42
  case BO_Mul:
4269
42
  case BO_Div:
4270
42
  case BO_Rem:
4271
42
  case BO_Shl:
4272
42
  case BO_Shr:
4273
42
  case BO_LAnd:
4274
42
  case BO_LOr:
4275
42
    return std::make_pair(false, RValue::get(nullptr));
4276
42
  case BO_PtrMemD:
4277
0
  case BO_PtrMemI:
4278
0
  case BO_LE:
4279
0
  case BO_GE:
4280
0
  case BO_EQ:
4281
0
  case BO_NE:
4282
0
  case BO_Cmp:
4283
0
  case BO_AddAssign:
4284
0
  case BO_SubAssign:
4285
0
  case BO_AndAssign:
4286
0
  case BO_OrAssign:
4287
0
  case BO_XorAssign:
4288
0
  case BO_MulAssign:
4289
0
  case BO_DivAssign:
4290
0
  case BO_RemAssign:
4291
0
  case BO_ShlAssign:
4292
0
  case BO_ShrAssign:
4293
0
  case BO_Comma:
4294
0
    llvm_unreachable("Unsupported atomic update operation");
4295
365
  }
4296
365
  llvm::Value *UpdateVal = Update.getScalarVal();
4297
365
  if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
4298
40
    UpdateVal = CGF.Builder.CreateIntCast(
4299
40
        IC, X.getAddress(CGF).getElementType(),
4300
40
        X.getType()->hasSignedIntegerRepresentation());
4301
40
  }
4302
365
  llvm::Value *Res =
4303
365
      CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
4304
365
  return std::make_pair(true, RValue::get(Res));
4305
365
}
4306
4307
std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
4308
    LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
4309
    llvm::AtomicOrdering AO, SourceLocation Loc,
4310
588
    const llvm::function_ref<RValue(RValue)> CommonGen) {
4311
588
  // Update expressions are allowed to have the following forms:
4312
588
  // x binop= expr; -> xrval + expr;
4313
588
  // x++, ++x -> xrval + 1;
4314
588
  // x--, --x -> xrval - 1;
4315
588
  // x = x binop expr; -> xrval binop expr
4316
588
  // x = expr Op x; - > expr binop xrval;
4317
588
  auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
4318
588
  if (!Res.first) {
4319
223
    if (X.isGlobalReg()) {
4320
4
      // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
4321
4
      // 'xrval'.
4322
4
      EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
4323
219
    } else {
4324
219
      // Perform compare-and-swap procedure.
4325
219
      EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
4326
219
    }
4327
223
  }
4328
588
  return Res;
4329
588
}
4330
4331
static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
4332
                                    llvm::AtomicOrdering AO, const Expr *X,
4333
                                    const Expr *E, const Expr *UE,
4334
124
                                    bool IsXLHSInRHSPart, SourceLocation Loc) {
4335
124
  assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
4336
124
         "Update expr in 'atomic update' must be a binary operator.");
4337
124
  const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
4338
124
  // Update expressions are allowed to have the following forms:
4339
124
  // x binop= expr; -> xrval + expr;
4340
124
  // x++, ++x -> xrval + 1;
4341
124
  // x--, --x -> xrval - 1;
4342
124
  // x = x binop expr; -> xrval binop expr
4343
124
  // x = expr Op x; - > expr binop xrval;
4344
124
  assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
4345
124
  LValue XLValue = CGF.EmitLValue(X);
4346
124
  RValue ExprRValue = CGF.EmitAnyExpr(E);
4347
124
  const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
4348
124
  const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
4349
124
  const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? 
LHS98
:
RHS26
;
4350
124
  const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? 
RHS98
:
LHS26
;
4351
124
  auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
4352
78
    CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
4353
78
    CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
4354
78
    return CGF.EmitAnyExpr(UE);
4355
78
  };
4356
124
  (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
4357
124
      XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
4358
124
  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
4359
124
  // OpenMP, 2.17.7, atomic Construct
4360
124
  // If the write, update, or capture clause is specified and the release,
4361
124
  // acq_rel, or seq_cst clause is specified then the strong flush on entry to
4362
124
  // the atomic operation is also a release flush.
4363
124
  switch (AO) {
4364
18
  case llvm::AtomicOrdering::Release:
4365
18
  case llvm::AtomicOrdering::AcquireRelease:
4366
18
  case llvm::AtomicOrdering::SequentiallyConsistent:
4367
18
    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4368
18
                                         llvm::AtomicOrdering::Release);
4369
18
    break;
4370
106
  case llvm::AtomicOrdering::Acquire:
4371
106
  case llvm::AtomicOrdering::Monotonic:
4372
106
    break;
4373
106
  case llvm::AtomicOrdering::NotAtomic:
4374
0
  case llvm::AtomicOrdering::Unordered:
4375
0
    llvm_unreachable("Unexpected ordering.");
4376
124
  }
4377
124
}
4378
4379
static RValue convertToType(CodeGenFunction &CGF, RValue Value,
4380
                            QualType SourceType, QualType ResType,
4381
2
                            SourceLocation Loc) {
4382
2
  switch (CGF.getEvaluationKind(ResType)) {
4383
2
  case TEK_Scalar:
4384
2
    return RValue::get(
4385
2
        convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
4386
0
  case TEK_Complex: {
4387
0
    auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
4388
0
    return RValue::getComplex(Res.first, Res.second);
4389
0
  }
4390
0
  case TEK_Aggregate:
4391
0
    break;
4392
0
  }
4393
0
  llvm_unreachable("Must be a scalar or complex.");
4394
0
}
4395
4396
static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
4397
                                     llvm::AtomicOrdering AO,
4398
                                     bool IsPostfixUpdate, const Expr *V,
4399
                                     const Expr *X, const Expr *E,
4400
                                     const Expr *UE, bool IsXLHSInRHSPart,
4401
110
                                     SourceLocation Loc) {
4402
110
  assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
4403
110
  assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
4404
110
  RValue NewVVal;
4405
110
  LValue VLValue = CGF.EmitLValue(V);
4406
110
  LValue XLValue = CGF.EmitLValue(X);
4407
110
  RValue ExprRValue = CGF.EmitAnyExpr(E);
4408
110
  QualType NewVValType;
4409
110
  if (UE) {
4410
108
    // 'x' is updated with some additional value.
4411
108
    assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
4412
108
           "Update expr in 'atomic capture' must be a binary operator.");
4413
108
    const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
4414
108
    // Update expressions are allowed to have the following forms:
4415
108
    // x binop= expr; -> xrval + expr;
4416
108
    // x++, ++x -> xrval + 1;
4417
108
    // x--, --x -> xrval - 1;
4418
108
    // x = x binop expr; -> xrval binop expr
4419
108
    // x = expr Op x; - > expr binop xrval;
4420
108
    const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
4421
108
    const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
4422
108
    const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? 
LHS82
:
RHS26
;
4423
108
    NewVValType = XRValExpr->getType();
4424
108
    const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? 
RHS82
:
LHS26
;
4425
108
    auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
4426
108
                  IsPostfixUpdate](RValue XRValue) {
4427
76
      CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
4428
76
      CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
4429
76
      RValue Res = CGF.EmitAnyExpr(UE);
4430
76
      NewVVal = IsPostfixUpdate ? 
XRValue24
:
Res52
;
4431
76
      return Res;
4432
76
    };
4433
108
    auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
4434
108
        XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
4435
108
    CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
4436
108
    if (Res.first) {
4437
32
      // 'atomicrmw' instruction was generated.
4438
32
      if (IsPostfixUpdate) {
4439
16
        // Use old value from 'atomicrmw'.
4440
16
        NewVVal = Res.second;
4441
16
      } else {
4442
16
        // 'atomicrmw' does not provide new value, so evaluate it using old
4443
16
        // value of 'x'.
4444
16
        CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
4445
16
        CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
4446
16
        NewVVal = CGF.EmitAnyExpr(UE);
4447
16
      }
4448
32
    }
4449
108
  } else {
4450
2
    // 'x' is simply rewritten with some 'expr'.
4451
2
    NewVValType = X->getType().getNonReferenceType();
4452
2
    ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
4453
2
                               X->getType().getNonReferenceType(), Loc);
4454
2
    auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
4455
0
      NewVVal = XRValue;
4456
0
      return ExprRValue;
4457
0
    };
4458
2
    // Try to perform atomicrmw xchg, otherwise simple exchange.
4459
2
    auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
4460
2
        XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
4461
2
        Loc, Gen);
4462
2
    CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
4463
2
    if (Res.first) {
4464
2
      // 'atomicrmw' instruction was generated.
4465
2
      NewVVal = IsPostfixUpdate ? Res.second : 
ExprRValue0
;
4466
2
    }
4467
2
  }
4468
110
  // Emit post-update store to 'v' of old/new 'x' value.
4469
110
  CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
4470
110
  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
4471
110
  // OpenMP, 2.17.7, atomic Construct
4472
110
  // If the write, update, or capture clause is specified and the release,
4473
110
  // acq_rel, or seq_cst clause is specified then the strong flush on entry to
4474
110
  // the atomic operation is also a release flush.
4475
110
  // If the read or capture clause is specified and the acquire, acq_rel, or
4476
110
  // seq_cst clause is specified then the strong flush on exit from the atomic
4477
110
  // operation is also an acquire flush.
4478
110
  switch (AO) {
4479
2
  case llvm::AtomicOrdering::Release:
4480
2
    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4481
2
                                         llvm::AtomicOrdering::Release);
4482
2
    break;
4483
2
  case llvm::AtomicOrdering::Acquire:
4484
2
    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4485
2
                                         llvm::AtomicOrdering::Acquire);
4486
2
    break;
4487
14
  case llvm::AtomicOrdering::AcquireRelease:
4488
14
  case llvm::AtomicOrdering::SequentiallyConsistent:
4489
14
    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
4490
14
                                         llvm::AtomicOrdering::AcquireRelease);
4491
14
    break;
4492
92
  case llvm::AtomicOrdering::Monotonic:
4493
92
    break;
4494
14
  case llvm::AtomicOrdering::NotAtomic:
4495
0
  case llvm::AtomicOrdering::Unordered:
4496
0
    llvm_unreachable("Unexpected ordering.");
4497
110
  }
4498
110
}
4499
4500
static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
4501
                              llvm::AtomicOrdering AO, bool IsPostfixUpdate,
4502
                              const Expr *X, const Expr *V, const Expr *E,
4503
                              const Expr *UE, bool IsXLHSInRHSPart,
4504
452
                              SourceLocation Loc) {
4505
452
  switch (Kind) {
4506
108
  case OMPC_read:
4507
108
    emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
4508
108
    break;
4509
110
  case OMPC_write:
4510
110
    emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
4511
110
    break;
4512
124
  case OMPC_unknown:
4513
124
  case OMPC_update:
4514
124
    emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
4515
124
    break;
4516
124
  case OMPC_capture:
4517
110
    emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
4518
110
                             IsXLHSInRHSPart, Loc);
4519
110
    break;
4520
124
  case OMPC_if:
4521
0
  case OMPC_final:
4522
0
  case OMPC_num_threads:
4523
0
  case OMPC_private:
4524
0
  case OMPC_firstprivate:
4525
0
  case OMPC_lastprivate:
4526
0
  case OMPC_reduction:
4527
0
  case OMPC_task_reduction:
4528
0
  case OMPC_in_reduction:
4529
0
  case OMPC_safelen:
4530
0
  case OMPC_simdlen:
4531
0
  case OMPC_allocator:
4532
0
  case OMPC_allocate:
4533
0
  case OMPC_collapse:
4534
0
  case OMPC_default:
4535
0
  case OMPC_seq_cst:
4536
0
  case OMPC_acq_rel:
4537
0
  case OMPC_acquire:
4538
0
  case OMPC_release:
4539
0
  case OMPC_relaxed:
4540
0
  case OMPC_shared:
4541
0
  case OMPC_linear:
4542
0
  case OMPC_aligned:
4543
0
  case OMPC_copyin:
4544
0
  case OMPC_copyprivate:
4545
0
  case OMPC_flush:
4546
0
  case OMPC_proc_bind:
4547
0
  case OMPC_schedule:
4548
0
  case OMPC_ordered:
4549
0
  case OMPC_nowait:
4550
0
  case OMPC_untied:
4551
0
  case OMPC_threadprivate:
4552
0
  case OMPC_depend:
4553
0
  case OMPC_mergeable:
4554
0
  case OMPC_device:
4555
0
  case OMPC_threads:
4556
0
  case OMPC_simd:
4557
0
  case OMPC_map:
4558
0
  case OMPC_num_teams:
4559
0
  case OMPC_thread_limit:
4560
0
  case OMPC_priority:
4561
0
  case OMPC_grainsize:
4562
0
  case OMPC_nogroup:
4563
0
  case OMPC_num_tasks:
4564
0
  case OMPC_hint:
4565
0
  case OMPC_dist_schedule:
4566
0
  case OMPC_defaultmap:
4567
0
  case OMPC_uniform:
4568
0
  case OMPC_to:
4569
0
  case OMPC_from:
4570
0
  case OMPC_use_device_ptr:
4571
0
  case OMPC_is_device_ptr:
4572
0
  case OMPC_unified_address:
4573
0
  case OMPC_unified_shared_memory:
4574
0
  case OMPC_reverse_offload:
4575
0
  case OMPC_dynamic_allocators:
4576
0
  case OMPC_atomic_default_mem_order:
4577
0
  case OMPC_device_type:
4578
0
  case OMPC_match:
4579
0
  case OMPC_nontemporal:
4580
0
  case OMPC_order:
4581
0
    llvm_unreachable("Clause is not allowed in 'omp atomic'.");
4582
452
  }
4583
452
}
4584
4585
452
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
4586
452
  llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
4587
452
  bool MemOrderingSpecified = false;
4588
452
  if (S.getSingleClause<OMPSeqCstClause>()) {
4589
32
    AO = llvm::AtomicOrdering::SequentiallyConsistent;
4590
32
    MemOrderingSpecified = true;
4591
420
  } else if (S.getSingleClause<OMPAcqRelClause>()) {
4592
2
    AO = llvm::AtomicOrdering::AcquireRelease;
4593
2
    MemOrderingSpecified = true;
4594
418
  } else if (S.getSingleClause<OMPAcquireClause>()) {
4595
4
    AO = llvm::AtomicOrdering::Acquire;
4596
4
    MemOrderingSpecified = true;
4597
414
  } else if (S.getSingleClause<OMPReleaseClause>()) {
4598
6
    AO = llvm::AtomicOrdering::Release;
4599
6
    MemOrderingSpecified = true;
4600
408
  } else if (S.getSingleClause<OMPRelaxedClause>()) {
4601
18
    AO = llvm::AtomicOrdering::Monotonic;
4602
18
    MemOrderingSpecified = true;
4603
18
  }
4604
452
  OpenMPClauseKind Kind = OMPC_unknown;
4605
452
  for (const OMPClause *C : S.clauses()) {
4606
410
    // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
4607
410
    // if it is first).
4608
410
    if (C->getClauseKind() != OMPC_seq_cst &&
4609
410
        
C->getClauseKind() != OMPC_acq_rel394
&&
4610
410
        
C->getClauseKind() != OMPC_acquire394
&&
4611
410
        
C->getClauseKind() != OMPC_release394
&&
4612
410
        
C->getClauseKind() != OMPC_relaxed394
) {
4613
384
      Kind = C->getClauseKind();
4614
384
      break;
4615
384
    }
4616
410
  }
4617
452
  if (!MemOrderingSpecified) {
4618
390
    llvm::AtomicOrdering DefaultOrder =
4619
390
        CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
4620
390
    if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
4621
390
        
DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent20
||
4622
390
        
(10
DefaultOrder == llvm::AtomicOrdering::AcquireRelease10
&&
4623
382
         
Kind == OMPC_capture10
)) {
4624
382
      AO = DefaultOrder;
4625
382
    } else 
if (8
DefaultOrder == llvm::AtomicOrdering::AcquireRelease8
) {
4626
8
      if (Kind == OMPC_unknown || 
Kind == OMPC_update6
||
Kind == OMPC_write4
) {
4627
6
        AO = llvm::AtomicOrdering::Release;
4628
6
      } else 
if (2
Kind == OMPC_read2
) {
4629
2
        assert(Kind == OMPC_read && "Unexpected atomic kind.");
4630
2
        AO = llvm::AtomicOrdering::Acquire;
4631
2
      }
4632
8
    }
4633
390
  }
4634
452
4635
452
  const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
4636
452
  if (const auto *FE = dyn_cast<FullExpr>(CS))
4637
0
    enterFullExpression(FE);
4638
452
  // Processing for statements under 'atomic capture'.
4639
452
  if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
4640
0
    for (const Stmt *C : Compound->body()) {
4641
0
      if (const auto *FE = dyn_cast<FullExpr>(C))
4642
0
        enterFullExpression(FE);
4643
0
    }
4644
0
  }
4645
452
4646
452
  auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF,
4647
452
                                            PrePostActionTy &) {
4648
452
    CGF.EmitStopPoint(CS);
4649
452
    emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
4650
452
                      S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
4651
452
                      S.getBeginLoc());
4652
452
  };
4653
452
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4654
452
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
4655
452
}
4656
4657
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
4658
                                         const OMPExecutableDirective &S,
4659
5.08k
                                         const RegionCodeGenTy &CodeGen) {
4660
5.08k
  assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
4661
5.08k
  CodeGenModule &CGM = CGF.CGM;
4662
5.08k
4663
5.08k
  // On device emit this construct as inlined code.
4664
5.08k
  if (CGM.getLangOpts().OpenMPIsDevice) {
4665
6
    OMPLexicalScope Scope(CGF, S, OMPD_target);
4666
6
    CGM.getOpenMPRuntime().emitInlinedDirective(
4667
6
        CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4668
6
          CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4669
6
        });
4670
6
    return;
4671
6
  }
4672
5.08k
4673
5.08k
  auto LPCRegion =
4674
5.08k
      CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
4675
5.08k
  llvm::Function *Fn = nullptr;
4676
5.08k
  llvm::Constant *FnID = nullptr;
4677
5.08k
4678
5.08k
  const Expr *IfCond = nullptr;
4679
5.08k
  // Check for the at most one if clause associated with the target region.
4680
5.08k
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4681
781
    if (C->getNameModifier() == OMPD_unknown ||
4682
781
        
C->getNameModifier() == OMPD_target535
) {
4683
749
      IfCond = C->getCondition();
4684
749
      break;
4685
749
    }
4686
781
  }
4687
5.08k
4688
5.08k
  // Check if we have any device clause associated with the directive.
4689
5.08k
  const Expr *Device = nullptr;
4690
5.08k
  if (auto *C = S.getSingleClause<OMPDeviceClause>())
4691
134
    Device = C->getDevice();
4692
5.08k
4693
5.08k
  // Check if we have an if clause whose conditional always evaluates to false
4694
5.08k
  // or if we do not have any targets specified. If so the target region is not
4695
5.08k
  // an offload entry point.
4696
5.08k
  bool IsOffloadEntry = true;
4697
5.08k
  if (IfCond) {
4698
749
    bool Val;
4699
749
    if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && 
!Val291
)
4700
209
      IsOffloadEntry = false;
4701
749
  }
4702
5.08k
  if (CGM.getLangOpts().OMPTargetTriples.empty())
4703
129
    IsOffloadEntry = false;
4704
5.08k
4705
5.08k
  assert(CGF.CurFuncDecl && "No parent declaration for target region!");
4706
5.08k
  StringRef ParentName;
4707
5.08k
  // In case we have Ctors/Dtors we use the complete type variant to produce
4708
5.08k
  // the mangling of the device outlined kernel.
4709
5.08k
  if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
4710
280
    ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
4711
4.80k
  else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
4712
252
    ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
4713
4.55k
  else
4714
4.55k
    ParentName =
4715
4.55k
        CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
4716
5.08k
4717
5.08k
  // Emit target region as a standalone region.
4718
5.08k
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
4719
5.08k
                                                    IsOffloadEntry, CodeGen);
4720
5.08k
  OMPLexicalScope Scope(CGF, S, OMPD_task);
4721
5.08k
  auto &&SizeEmitter =
4722
5.08k
      [IsOffloadEntry](CodeGenFunction &CGF,
4723
5.08k
                       const OMPLoopDirective &D) -> llvm::Value * {
4724
2.39k
    if (IsOffloadEntry) {
4725
2.39k
      OMPLoopScope(CGF, D);
4726
2.39k
      // Emit calculation of the iterations count.
4727
2.39k
      llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
4728
2.39k
      NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
4729
2.39k
                                                /*isSigned=*/false);
4730
2.39k
      return NumIterations;
4731
2.39k
    }
4732
0
    return nullptr;
4733
0
  };
4734
5.08k
  CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
4735
5.08k
                                        SizeEmitter);
4736
5.08k
}
4737
4738
static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
4739
3.06k
                             PrePostActionTy &Action) {
4740
3.06k
  Action.Enter(CGF);
4741
3.06k
  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4742
3.06k
  (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4743
3.06k
  CGF.EmitOMPPrivateClause(S, PrivateScope);
4744
3.06k
  (void)PrivateScope.Privatize();
4745
3.06k
  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4746
3.06k
    CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4747
3.06k
4748
3.06k
  CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
4749
3.06k
}
4750
4751
void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
4752
                                                  StringRef ParentName,
4753
521
                                                  const OMPTargetDirective &S) {
4754
521
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4755
521
    emitTargetRegion(CGF, S, Action);
4756
521
  };
4757
521
  llvm::Function *Fn;
4758
521
  llvm::Constant *Addr;
4759
521
  // Emit target region as a standalone region.
4760
521
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4761
521
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4762
521
  assert(Fn && Addr && "Target device function emission failed.");
4763
521
}
4764
4765
2.54k
void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
4766
2.54k
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4767
2.54k
    emitTargetRegion(CGF, S, Action);
4768
2.54k
  };
4769
2.54k
  emitCommonOMPTargetDirective(*this, S, CodeGen);
4770
2.54k
}
4771
4772
static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
4773
                                        const OMPExecutableDirective &S,
4774
                                        OpenMPDirectiveKind InnermostKind,
4775
3.86k
                                        const RegionCodeGenTy &CodeGen) {
4776
3.86k
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
4777
3.86k
  llvm::Function *OutlinedFn =
4778
3.86k
      CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
4779
3.86k
          S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
4780
3.86k
4781
3.86k
  const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
4782
3.86k
  const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
4783
3.86k
  if (NT || 
TL3.62k
) {
4784
285
    const Expr *NumTeams = NT ? 
NT->getNumTeams()241
:
nullptr44
;
4785
285
    const Expr *ThreadLimit = TL ? 
TL->getThreadLimit()221
:
nullptr64
;
4786
285
4787
285
    CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
4788
285
                                                  S.getBeginLoc());
4789
285
  }
4790
3.86k
4791
3.86k
  OMPTeamsScope Scope(CGF, S);
4792
3.86k
  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
4793
3.86k
  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
4794
3.86k
  CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
4795
3.86k
                                           CapturedVars);
4796
3.86k
}
4797
4798
835
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
4799
835
  // Emit teams region as a standalone region.
4800
835
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4801
835
    Action.Enter(CGF);
4802
835
    OMPPrivateScope PrivateScope(CGF);
4803
835
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4804
835
    CGF.EmitOMPPrivateClause(S, PrivateScope);
4805
835
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4806
835
    (void)PrivateScope.Privatize();
4807
835
    CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
4808
835
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4809
835
  };
4810
835
  emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
4811
835
  emitPostUpdateForReductionClause(*this, S,
4812
835
                                   [](CodeGenFunction &) 
{ return nullptr; }0
);
4813
835
}
4814
4815
static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4816
665
                                  const OMPTargetTeamsDirective &S) {
4817
665
  auto *CS = S.getCapturedStmt(OMPD_teams);
4818
665
  Action.Enter(CGF);
4819
665
  // Emit teams region as a standalone region.
4820
665
  auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
4821
665
    Action.Enter(CGF);
4822
665
    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4823
665
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4824
665
    CGF.EmitOMPPrivateClause(S, PrivateScope);
4825
665
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4826
665
    (void)PrivateScope.Privatize();
4827
665
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4828
665
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4829
665
    CGF.EmitStmt(CS->getCapturedStmt());
4830
665
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4831
665
  };
4832
665
  emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
4833
665
  emitPostUpdateForReductionClause(CGF, S,
4834
665
                                   [](CodeGenFunction &) 
{ return nullptr; }0
);
4835
665
}
4836
4837
void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
4838
    CodeGenModule &CGM, StringRef ParentName,
4839
284
    const OMPTargetTeamsDirective &S) {
4840
284
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4841
284
    emitTargetTeamsRegion(CGF, Action, S);
4842
284
  };
4843
284
  llvm::Function *Fn;
4844
284
  llvm::Constant *Addr;
4845
284
  // Emit target region as a standalone region.
4846
284
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4847
284
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4848
284
  assert(Fn && Addr && "Target device function emission failed.");
4849
284
}
4850
4851
void CodeGenFunction::EmitOMPTargetTeamsDirective(
4852
381
    const OMPTargetTeamsDirective &S) {
4853
381
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4854
381
    emitTargetTeamsRegion(CGF, Action, S);
4855
381
  };
4856
381
  emitCommonOMPTargetDirective(*this, S, CodeGen);
4857
381
}
4858
4859
static void
4860
emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
4861
356
                                const OMPTargetTeamsDistributeDirective &S) {
4862
356
  Action.Enter(CGF);
4863
356
  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4864
356
    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4865
356
  };
4866
356
4867
356
  // Emit teams region as a standalone region.
4868
356
  auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4869
356
                                            PrePostActionTy &Action) {
4870
356
    Action.Enter(CGF);
4871
356
    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4872
356
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4873
356
    (void)PrivateScope.Privatize();
4874
356
    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4875
356
                                                    CodeGenDistribute);
4876
356
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4877
356
  };
4878
356
  emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
4879
356
  emitPostUpdateForReductionClause(CGF, S,
4880
356
                                   [](CodeGenFunction &) 
{ return nullptr; }0
);
4881
356
}
4882
4883
void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
4884
    CodeGenModule &CGM, StringRef ParentName,
4885
95
    const OMPTargetTeamsDistributeDirective &S) {
4886
95
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4887
95
    emitTargetTeamsDistributeRegion(CGF, Action, S);
4888
95
  };
4889
95
  llvm::Function *Fn;
4890
95
  llvm::Constant *Addr;
4891
95
  // Emit target region as a standalone region.
4892
95
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4893
95
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4894
95
  assert(Fn && Addr && "Target device function emission failed.");
4895
95
}
4896
4897
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
4898
261
    const OMPTargetTeamsDistributeDirective &S) {
4899
261
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4900
261
    emitTargetTeamsDistributeRegion(CGF, Action, S);
4901
261
  };
4902
261
  emitCommonOMPTargetDirective(*this, S, CodeGen);
4903
261
}
4904
4905
static void emitTargetTeamsDistributeSimdRegion(
4906
    CodeGenFunction &CGF, PrePostActionTy &Action,
4907
447
    const OMPTargetTeamsDistributeSimdDirective &S) {
4908
447
  Action.Enter(CGF);
4909
447
  auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4910
447
    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
4911
447
  };
4912
447
4913
447
  // Emit teams region as a standalone region.
4914
447
  auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
4915
447
                                            PrePostActionTy &Action) {
4916
447
    Action.Enter(CGF);
4917
447
    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4918
447
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4919
447
    (void)PrivateScope.Privatize();
4920
447
    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
4921
447
                                                    CodeGenDistribute);
4922
447
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
4923
447
  };
4924
447
  emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
4925
447
  emitPostUpdateForReductionClause(CGF, S,
4926
447
                                   [](CodeGenFunction &) 
{ return nullptr; }0
);
4927
447
}
4928
4929
void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
4930
    CodeGenModule &CGM, StringRef ParentName,
4931
132
    const OMPTargetTeamsDistributeSimdDirective &S) {
4932
132
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4933
132
    emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4934
132
  };
4935
132
  llvm::Function *Fn;
4936
132
  llvm::Constant *Addr;
4937
132
  // Emit target region as a standalone region.
4938
132
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4939
132
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4940
132
  assert(Fn && Addr && "Target device function emission failed.");
4941
132
}
4942
4943
void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
4944
315
    const OMPTargetTeamsDistributeSimdDirective &S) {
4945
315
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4946
315
    emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
4947
315
  };
4948
315
  emitCommonOMPTargetDirective(*this, S, CodeGen);
4949
315
}
4950
4951
void CodeGenFunction::EmitOMPTeamsDis