Coverage Report

Created: 2021-08-24 07:12

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp
<
Line
Count
Source (jump to first uncovered line)
1
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This contains code to emit OpenMP nodes as LLVM code.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGCleanup.h"
14
#include "CGOpenMPRuntime.h"
15
#include "CodeGenFunction.h"
16
#include "CodeGenModule.h"
17
#include "TargetInfo.h"
18
#include "clang/AST/ASTContext.h"
19
#include "clang/AST/Attr.h"
20
#include "clang/AST/DeclOpenMP.h"
21
#include "clang/AST/OpenMPClause.h"
22
#include "clang/AST/Stmt.h"
23
#include "clang/AST/StmtOpenMP.h"
24
#include "clang/AST/StmtVisitor.h"
25
#include "clang/Basic/OpenMPKinds.h"
26
#include "clang/Basic/PrettyStackTrace.h"
27
#include "llvm/Frontend/OpenMP/OMPConstants.h"
28
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29
#include "llvm/IR/Constants.h"
30
#include "llvm/IR/Instructions.h"
31
#include "llvm/Support/AtomicOrdering.h"
32
using namespace clang;
33
using namespace CodeGen;
34
using namespace llvm::omp;
35
36
static const VarDecl *getBaseDecl(const Expr *Ref);
37
38
namespace {
39
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
40
/// for captured expressions.
41
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
42
15.4k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
43
18.3k
    for (const auto *C : S.clauses()) {
44
18.3k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
45
10.7k
        if (const auto *PreInit =
46
10.7k
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
47
1.14k
          for (const auto *I : PreInit->decls()) {
48
1.14k
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
49
1.12k
              CGF.EmitVarDecl(cast<VarDecl>(*I));
50
1.12k
            } else {
51
18
              CodeGenFunction::AutoVarEmission Emission =
52
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
53
18
              CGF.EmitAutoVarCleanups(Emission);
54
18
            }
55
1.14k
          }
56
1.06k
        }
57
10.7k
      }
58
18.3k
    }
59
15.4k
  }
60
  CodeGenFunction::OMPPrivateScope InlinedShareds;
61
62
16.6k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
63
16.6k
    return CGF.LambdaCaptureFields.lookup(VD) ||
64
16.6k
           
(16.1k
CGF.CapturedStmtInfo16.1k
&&
CGF.CapturedStmtInfo->lookup(VD)4.11k
) ||
65
16.6k
           
(12.6k
CGF.CurCodeDecl12.6k
&&
isa<BlockDecl>(CGF.CurCodeDecl)12.6k
&&
66
12.6k
            
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)8
);
67
16.6k
  }
68
69
public:
70
  OMPLexicalScope(
71
      CodeGenFunction &CGF, const OMPExecutableDirective &S,
72
      const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
73
      const bool EmitPreInitStmt = true)
74
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
75
25.1k
        InlinedShareds(CGF) {
76
25.1k
    if (EmitPreInitStmt)
77
15.4k
      emitPreInitStmt(CGF, S);
78
25.1k
    if (!CapturedRegion.hasValue())
79
12.4k
      return;
80
12.6k
    assert(S.hasAssociatedStmt() &&
81
12.6k
           "Expected associated statement for inlined directive.");
82
0
    const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
83
18.8k
    for (const auto &C : CS->captures()) {
84
18.8k
      if (C.capturesVariable() || 
C.capturesVariableByCopy()11.0k
) {
85
16.6k
        auto *VD = C.getCapturedVar();
86
16.6k
        assert(VD == VD->getCanonicalDecl() &&
87
16.6k
               "Canonical decl must be captured.");
88
0
        DeclRefExpr DRE(
89
16.6k
            CGF.getContext(), const_cast<VarDecl *>(VD),
90
16.6k
            isCapturedVar(CGF, VD) || 
(12.6k
CGF.CapturedStmtInfo12.6k
&&
91
12.6k
                                       
InlinedShareds.isGlobalVarCaptured(VD)562
),
92
16.6k
            VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
93
16.6k
        InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
94
16.6k
          return CGF.EmitLValue(&DRE).getAddress(CGF);
95
16.6k
        });
96
16.6k
      }
97
18.8k
    }
98
12.6k
    (void)InlinedShareds.Privatize();
99
12.6k
  }
100
};
101
102
/// Lexical scope for OpenMP parallel construct, that handles correct codegen
103
/// for captured expressions.
104
class OMPParallelScope final : public OMPLexicalScope {
105
6.08k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106
6.08k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
107
6.08k
    return !(isOpenMPTargetExecutionDirective(Kind) ||
108
6.08k
             
isOpenMPLoopBoundSharingDirective(Kind)2.77k
) &&
109
6.08k
           
isOpenMPParallelDirective(Kind)1.38k
;
110
6.08k
  }
111
112
public:
113
  OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
114
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
115
6.08k
                        EmitPreInitStmt(S)) {}
116
};
117
118
/// Lexical scope for OpenMP teams construct, that handles correct codegen
119
/// for captured expressions.
120
class OMPTeamsScope final : public OMPLexicalScope {
121
5.69k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
122
5.69k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
123
5.69k
    return !isOpenMPTargetExecutionDirective(Kind) &&
124
5.69k
           
isOpenMPTeamsDirective(Kind)1.90k
;
125
5.69k
  }
126
127
public:
128
  OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
129
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
130
5.69k
                        EmitPreInitStmt(S)) {}
131
};
132
133
/// Private scope for OpenMP loop-based directives, that supports capturing
134
/// of used expression from loop statement.
135
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
136
17.2k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
137
17.2k
    const DeclStmt *PreInits;
138
17.2k
    CodeGenFunction::OMPMapVars PreCondVars;
139
17.2k
    if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
140
17.2k
      llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
141
17.9k
      for (const auto *E : LD->counters()) {
142
17.9k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
143
17.9k
        EmittedAsPrivate.insert(VD->getCanonicalDecl());
144
17.9k
        (void)PreCondVars.setVarAddr(
145
17.9k
            CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
146
17.9k
      }
147
      // Mark private vars as undefs.
148
17.2k
      for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
149
2.85k
        for (const Expr *IRef : C->varlists()) {
150
2.85k
          const auto *OrigVD =
151
2.85k
              cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
152
2.85k
          if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
153
2.52k
            (void)PreCondVars.setVarAddr(
154
2.52k
                CGF, OrigVD,
155
2.52k
                Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
156
2.52k
                            CGF.getContext().getPointerType(
157
2.52k
                                OrigVD->getType().getNonReferenceType()))),
158
2.52k
                        CGF.getContext().getDeclAlign(OrigVD)));
159
2.52k
          }
160
2.85k
        }
161
716
      }
162
17.2k
      (void)PreCondVars.apply(CGF);
163
      // Emit init, __range and __end variables for C++ range loops.
164
17.2k
      (void)OMPLoopBasedDirective::doForAllLoops(
165
17.2k
          LD->getInnermostCapturedStmt()->getCapturedStmt(),
166
17.2k
          /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
167
17.9k
          [&CGF](unsigned Cnt, const Stmt *CurStmt) {
168
17.9k
            if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
169
6
              if (const Stmt *Init = CXXFor->getInit())
170
0
                CGF.EmitStmt(Init);
171
6
              CGF.EmitStmt(CXXFor->getRangeStmt());
172
6
              CGF.EmitStmt(CXXFor->getEndStmt());
173
6
            }
174
17.9k
            return false;
175
17.9k
          });
176
17.2k
      PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
177
17.2k
    } else 
if (const auto *8
Tile8
= dyn_cast<OMPTileDirective>(&S)) {
178
8
      PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
179
8
    } else 
if (const auto *0
Unroll0
= dyn_cast<OMPUnrollDirective>(&S)) {
180
0
      PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
181
0
    } else {
182
0
      llvm_unreachable("Unknown loop-based directive kind.");
183
0
    }
184
17.2k
    if (PreInits) {
185
3.02k
      for (const auto *I : PreInits->decls())
186
6.69k
        CGF.EmitVarDecl(cast<VarDecl>(*I));
187
3.02k
    }
188
17.2k
    PreCondVars.restore(CGF);
189
17.2k
  }
190
191
public:
192
  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
193
17.2k
      : CodeGenFunction::RunCleanupsScope(CGF) {
194
17.2k
    emitPreInitStmt(CGF, S);
195
17.2k
  }
196
};
197
198
class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
199
  CodeGenFunction::OMPPrivateScope InlinedShareds;
200
201
41.4k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
202
41.4k
    return CGF.LambdaCaptureFields.lookup(VD) ||
203
41.4k
           
(40.4k
CGF.CapturedStmtInfo40.4k
&&
CGF.CapturedStmtInfo->lookup(VD)6.51k
) ||
204
41.4k
           
(40.4k
CGF.CurCodeDecl40.4k
&&
isa<BlockDecl>(CGF.CurCodeDecl)40.4k
&&
205
40.4k
            
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)78
);
206
41.4k
  }
207
208
public:
209
  OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
210
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
211
13.3k
        InlinedShareds(CGF) {
212
17.4k
    for (const auto *C : S.clauses()) {
213
17.4k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
214
11.4k
        if (const auto *PreInit =
215
11.4k
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
216
1.35k
          for (const auto *I : PreInit->decls()) {
217
1.35k
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
218
1.34k
              CGF.EmitVarDecl(cast<VarDecl>(*I));
219
1.34k
            } else {
220
18
              CodeGenFunction::AutoVarEmission Emission =
221
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
222
18
              CGF.EmitAutoVarCleanups(Emission);
223
18
            }
224
1.35k
          }
225
1.29k
        }
226
11.4k
      } else 
if (const auto *5.95k
UDP5.95k
= dyn_cast<OMPUseDevicePtrClause>(C)) {
227
86
        for (const Expr *E : UDP->varlists()) {
228
86
          const Decl *D = cast<DeclRefExpr>(E)->getDecl();
229
86
          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
230
20
            CGF.EmitVarDecl(*OED);
231
86
        }
232
5.87k
      } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
233
24
        for (const Expr *E : UDP->varlists()) {
234
24
          const Decl *D = getBaseDecl(E);
235
24
          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
236
10
            CGF.EmitVarDecl(*OED);
237
24
        }
238
6
      }
239
17.4k
    }
240
13.3k
    if (!isOpenMPSimdDirective(S.getDirectiveKind()))
241
9.88k
      CGF.EmitOMPPrivateClause(S, InlinedShareds);
242
13.3k
    if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
243
37
      if (const Expr *E = TG->getReductionRef())
244
26
        CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
245
37
    }
246
    // Temp copy arrays for inscan reductions should not be emitted as they are
247
    // not used in simd only mode.
248
13.3k
    llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
249
13.3k
    for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
250
466
      if (C->getModifier() != OMPC_REDUCTION_inscan)
251
446
        continue;
252
20
      for (const Expr *E : C->copy_array_temps())
253
36
        CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
254
20
    }
255
13.3k
    const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
256
41.1k
    while (CS) {
257
47.5k
      for (auto &C : CS->captures()) {
258
47.5k
        if (C.capturesVariable() || 
C.capturesVariableByCopy()30.9k
) {
259
41.4k
          auto *VD = C.getCapturedVar();
260
41.4k
          if (CopyArrayTemps.contains(VD))
261
16
            continue;
262
41.4k
          assert(VD == VD->getCanonicalDecl() &&
263
41.4k
                 "Canonical decl must be captured.");
264
0
          DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
265
41.4k
                          isCapturedVar(CGF, VD) ||
266
41.4k
                              
(40.3k
CGF.CapturedStmtInfo40.3k
&&
267
40.3k
                               
InlinedShareds.isGlobalVarCaptured(VD)6.50k
),
268
41.4k
                          VD->getType().getNonReferenceType(), VK_LValue,
269
41.4k
                          C.getLocation());
270
41.4k
          InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
271
41.4k
            return CGF.EmitLValue(&DRE).getAddress(CGF);
272
41.4k
          });
273
41.4k
        }
274
47.5k
      }
275
27.8k
      CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
276
27.8k
    }
277
13.3k
    (void)InlinedShareds.Privatize();
278
13.3k
  }
279
};
280
281
} // namespace
282
283
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
284
                                         const OMPExecutableDirective &S,
285
                                         const RegionCodeGenTy &CodeGen);
286
287
12.7k
LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
288
12.7k
  if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
289
9.36k
    if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
290
9.36k
      OrigVD = OrigVD->getCanonicalDecl();
291
9.36k
      bool IsCaptured =
292
9.36k
          LambdaCaptureFields.lookup(OrigVD) ||
293
9.36k
          
(9.24k
CapturedStmtInfo9.24k
&&
CapturedStmtInfo->lookup(OrigVD)972
) ||
294
9.36k
          
(8.57k
CurCodeDecl8.57k
&&
isa<BlockDecl>(CurCodeDecl)8.51k
);
295
9.36k
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
296
9.36k
                      OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
297
9.36k
      return EmitLValue(&DRE);
298
9.36k
    }
299
9.36k
  }
300
3.41k
  return EmitLValue(E);
301
12.7k
}
302
303
17.5k
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
304
17.5k
  ASTContext &C = getContext();
305
17.5k
  llvm::Value *Size = nullptr;
306
17.5k
  auto SizeInChars = C.getTypeSizeInChars(Ty);
307
17.5k
  if (SizeInChars.isZero()) {
308
    // getTypeSizeInChars() returns 0 for a VLA.
309
2.33k
    while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
310
1.16k
      VlaSizePair VlaSize = getVLASize(VAT);
311
1.16k
      Ty = VlaSize.Type;
312
1.16k
      Size =
313
1.16k
          Size ? 
Builder.CreateNUWMul(Size, VlaSize.NumElts)0
: VlaSize.NumElts;
314
1.16k
    }
315
1.16k
    SizeInChars = C.getTypeSizeInChars(Ty);
316
1.16k
    if (SizeInChars.isZero())
317
0
      return llvm::ConstantInt::get(SizeTy, /*V=*/0);
318
1.16k
    return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
319
1.16k
  }
320
16.4k
  return CGM.getSize(SizeInChars);
321
17.5k
}
322
323
void CodeGenFunction::GenerateOpenMPCapturedVars(
324
21.4k
    const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
325
21.4k
  const RecordDecl *RD = S.getCapturedRecordDecl();
326
21.4k
  auto CurField = RD->field_begin();
327
21.4k
  auto CurCap = S.captures().begin();
328
21.4k
  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
329
21.4k
                                                 E = S.capture_init_end();
330
52.9k
       I != E; 
++I, ++CurField, ++CurCap31.5k
) {
331
31.5k
    if (CurField->hasCapturedVLAType()) {
332
2.63k
      const VariableArrayType *VAT = CurField->getCapturedVLAType();
333
2.63k
      llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
334
2.63k
      CapturedVars.push_back(Val);
335
28.9k
    } else if (CurCap->capturesThis()) {
336
1.69k
      CapturedVars.push_back(CXXThisValue);
337
27.2k
    } else if (CurCap->capturesVariableByCopy()) {
338
14.5k
      llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
339
340
      // If the field is not a pointer, we need to save the actual value
341
      // and load it as a void pointer.
342
14.5k
      if (!CurField->getType()->isAnyPointerType()) {
343
12.7k
        ASTContext &Ctx = getContext();
344
12.7k
        Address DstAddr = CreateMemTemp(
345
12.7k
            Ctx.getUIntPtrType(),
346
12.7k
            Twine(CurCap->getCapturedVar()->getName(), ".casted"));
347
12.7k
        LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
348
349
12.7k
        llvm::Value *SrcAddrVal = EmitScalarConversion(
350
12.7k
            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
351
12.7k
            Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
352
12.7k
        LValue SrcLV =
353
12.7k
            MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
354
355
        // Store the value using the source type pointer.
356
12.7k
        EmitStoreThroughLValue(RValue::get(CV), SrcLV);
357
358
        // Load the value using the destination type pointer.
359
12.7k
        CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
360
12.7k
      }
361
14.5k
      CapturedVars.push_back(CV);
362
14.5k
    } else {
363
12.6k
      assert(CurCap->capturesVariable() && "Expected capture by reference.");
364
0
      CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
365
12.6k
    }
366
31.5k
  }
367
21.4k
}
368
369
static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
370
                                    QualType DstType, StringRef Name,
371
17.8k
                                    LValue AddrLV) {
372
17.8k
  ASTContext &Ctx = CGF.getContext();
373
374
17.8k
  llvm::Value *CastedPtr = CGF.EmitScalarConversion(
375
17.8k
      AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
376
17.8k
      Ctx.getPointerType(DstType), Loc);
377
17.8k
  Address TmpAddr =
378
17.8k
      CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
379
17.8k
          .getAddress(CGF);
380
17.8k
  return TmpAddr;
381
17.8k
}
382
383
7.19k
static QualType getCanonicalParamType(ASTContext &C, QualType T) {
384
7.19k
  if (T->isLValueReferenceType())
385
2.10k
    return C.getLValueReferenceType(
386
2.10k
        getCanonicalParamType(C, T.getNonReferenceType()),
387
2.10k
        /*SpelledAsLValue=*/false);
388
5.08k
  if (T->isPointerType())
389
27
    return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
390
5.06k
  if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
391
2.99k
    if (const auto *VLA = dyn_cast<VariableArrayType>(A))
392
2.93k
      return getCanonicalParamType(C, VLA->getElementType());
393
60
    if (!A->isVariablyModifiedType())
394
60
      return C.getCanonicalType(T);
395
60
  }
396
2.06k
  return C.getCanonicalParamType(T);
397
5.06k
}
398
399
namespace {
400
/// Contains required data for proper outlined function codegen.
401
struct FunctionOptions {
402
  /// Captured statement for which the function is generated.
403
  const CapturedStmt *S = nullptr;
404
  /// true if cast to/from  UIntPtr is required for variables captured by
405
  /// value.
406
  const bool UIntPtrCastRequired = true;
407
  /// true if only casted arguments must be registered as local args or VLA
408
  /// sizes.
409
  const bool RegisterCastedArgsOnly = false;
410
  /// Name of the generated function.
411
  const StringRef FunctionName;
412
  /// Location of the non-debug version of the outlined function.
413
  SourceLocation Loc;
414
  explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
415
                           bool RegisterCastedArgsOnly, StringRef FunctionName,
416
                           SourceLocation Loc)
417
      : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
418
        RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
419
23.8k
        FunctionName(FunctionName), Loc(Loc) {}
420
};
421
} // namespace
422
423
static llvm::Function *emitOutlinedFunctionPrologue(
424
    CodeGenFunction &CGF, FunctionArgList &Args,
425
    llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
426
        &LocalAddrs,
427
    llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
428
        &VLASizes,
429
23.8k
    llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
430
23.8k
  const CapturedDecl *CD = FO.S->getCapturedDecl();
431
23.8k
  const RecordDecl *RD = FO.S->getCapturedRecordDecl();
432
23.8k
  assert(CD->hasBody() && "missing CapturedDecl body");
433
434
0
  CXXThisValue = nullptr;
435
  // Build the argument list.
436
23.8k
  CodeGenModule &CGM = CGF.CGM;
437
23.8k
  ASTContext &Ctx = CGM.getContext();
438
23.8k
  FunctionArgList TargetArgs;
439
23.8k
  Args.append(CD->param_begin(),
440
23.8k
              std::next(CD->param_begin(), CD->getContextParamPosition()));
441
23.8k
  TargetArgs.append(
442
23.8k
      CD->param_begin(),
443
23.8k
      std::next(CD->param_begin(), CD->getContextParamPosition()));
444
23.8k
  auto I = FO.S->captures().begin();
445
23.8k
  FunctionDecl *DebugFunctionDecl = nullptr;
446
23.8k
  if (!FO.UIntPtrCastRequired) {
447
152
    FunctionProtoType::ExtProtoInfo EPI;
448
152
    QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
449
152
    DebugFunctionDecl = FunctionDecl::Create(
450
152
        Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
451
152
        SourceLocation(), DeclarationName(), FunctionTy,
452
152
        Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
453
152
        /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
454
152
        /*hasWrittenPrototype=*/false);
455
152
  }
456
35.5k
  for (const FieldDecl *FD : RD->fields()) {
457
35.5k
    QualType ArgType = FD->getType();
458
35.5k
    IdentifierInfo *II = nullptr;
459
35.5k
    VarDecl *CapVar = nullptr;
460
461
    // If this is a capture by copy and the type is not a pointer, the outlined
462
    // function argument type should be uintptr and the value properly casted to
463
    // uintptr. This is necessary given that the runtime library is only able to
464
    // deal with pointers. We can pass in the same way the VLA type sizes to the
465
    // outlined function.
466
35.5k
    if (FO.UIntPtrCastRequired &&
467
35.5k
        
(35.3k
(35.3k
I->capturesVariableByCopy()35.3k
&&
!ArgType->isAnyPointerType()16.7k
) ||
468
35.3k
         
I->capturesVariableArrayType()20.5k
))
469
17.8k
      ArgType = Ctx.getUIntPtrType();
470
471
35.5k
    if (I->capturesVariable() || 
I->capturesVariableByCopy()21.6k
) {
472
30.6k
      CapVar = I->getCapturedVar();
473
30.6k
      II = CapVar->getIdentifier();
474
30.6k
    } else 
if (4.89k
I->capturesThis()4.89k
) {
475
1.85k
      II = &Ctx.Idents.get("this");
476
3.04k
    } else {
477
3.04k
      assert(I->capturesVariableArrayType());
478
0
      II = &Ctx.Idents.get("vla");
479
3.04k
    }
480
35.5k
    if (ArgType->isVariablyModifiedType())
481
2.12k
      ArgType = getCanonicalParamType(Ctx, ArgType);
482
35.5k
    VarDecl *Arg;
483
35.5k
    if (DebugFunctionDecl && 
(204
CapVar204
||
I->capturesThis()17
)) {
484
193
      Arg = ParmVarDecl::Create(
485
193
          Ctx, DebugFunctionDecl,
486
193
          CapVar ? 
CapVar->getBeginLoc()187
:
FD->getBeginLoc()6
,
487
193
          CapVar ? 
CapVar->getLocation()187
:
FD->getLocation()6
, II, ArgType,
488
193
          /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
489
35.3k
    } else {
490
35.3k
      Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
491
35.3k
                                      II, ArgType, ImplicitParamDecl::Other);
492
35.3k
    }
493
35.5k
    Args.emplace_back(Arg);
494
    // Do not cast arguments if we emit function with non-original types.
495
35.5k
    TargetArgs.emplace_back(
496
35.5k
        FO.UIntPtrCastRequired
497
35.5k
            ? 
Arg35.3k
498
35.5k
            : 
CGM.getOpenMPRuntime().translateParameter(FD, Arg)204
);
499
35.5k
    ++I;
500
35.5k
  }
501
23.8k
  Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
502
23.8k
              CD->param_end());
503
23.8k
  TargetArgs.append(
504
23.8k
      std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
505
23.8k
      CD->param_end());
506
507
  // Create the function declaration.
508
23.8k
  const CGFunctionInfo &FuncInfo =
509
23.8k
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
510
23.8k
  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
511
512
23.8k
  auto *F =
513
23.8k
      llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
514
23.8k
                             FO.FunctionName, &CGM.getModule());
515
23.8k
  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
516
23.8k
  if (CD->isNothrow())
517
23.8k
    F->setDoesNotThrow();
518
23.8k
  F->setDoesNotRecurse();
519
520
  // Always inline the outlined function if optimizations are enabled.
521
23.8k
  if (CGM.getCodeGenOpts().OptimizationLevel != 0)
522
104
    F->addFnAttr(llvm::Attribute::AlwaysInline);
523
524
  // Generate the function.
525
23.8k
  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
526
23.8k
                    FO.UIntPtrCastRequired ? 
FO.Loc23.7k
:
FO.S->getBeginLoc()152
,
527
23.8k
                    FO.UIntPtrCastRequired ? 
FO.Loc23.7k
528
23.8k
                                           : 
CD->getBody()->getBeginLoc()152
);
529
23.8k
  unsigned Cnt = CD->getContextParamPosition();
530
23.8k
  I = FO.S->captures().begin();
531
35.5k
  for (const FieldDecl *FD : RD->fields()) {
532
    // Do not map arguments if we emit function with non-original types.
533
35.5k
    Address LocalAddr(Address::invalid());
534
35.5k
    if (!FO.UIntPtrCastRequired && 
Args[Cnt] != TargetArgs[Cnt]204
) {
535
58
      LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
536
58
                                                             TargetArgs[Cnt]);
537
35.5k
    } else {
538
35.5k
      LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
539
35.5k
    }
540
    // If we are capturing a pointer by copy we don't need to do anything, just
541
    // use the value that we get from the arguments.
542
35.5k
    if (I->capturesVariableByCopy() && 
FD->getType()->isAnyPointerType()16.7k
) {
543
1.89k
      const VarDecl *CurVD = I->getCapturedVar();
544
1.89k
      if (!FO.RegisterCastedArgsOnly)
545
1.89k
        LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
546
1.89k
      ++Cnt;
547
1.89k
      ++I;
548
1.89k
      continue;
549
1.89k
    }
550
551
33.6k
    LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
552
33.6k
                                        AlignmentSource::Decl);
553
33.6k
    if (FD->hasCapturedVLAType()) {
554
3.04k
      if (FO.UIntPtrCastRequired) {
555
3.03k
        ArgLVal = CGF.MakeAddrLValue(
556
3.03k
            castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
557
3.03k
                                 Args[Cnt]->getName(), ArgLVal),
558
3.03k
            FD->getType(), AlignmentSource::Decl);
559
3.03k
      }
560
3.04k
      llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
561
3.04k
      const VariableArrayType *VAT = FD->getCapturedVLAType();
562
3.04k
      VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
563
30.6k
    } else if (I->capturesVariable()) {
564
13.8k
      const VarDecl *Var = I->getCapturedVar();
565
13.8k
      QualType VarTy = Var->getType();
566
13.8k
      Address ArgAddr = ArgLVal.getAddress(CGF);
567
13.8k
      if (ArgLVal.getType()->isLValueReferenceType()) {
568
13.8k
        ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
569
13.8k
      } else 
if (0
!VarTy->isVariablyModifiedType()0
||
!VarTy->isPointerType()0
) {
570
0
        assert(ArgLVal.getType()->isPointerType());
571
0
        ArgAddr = CGF.EmitLoadOfPointer(
572
0
            ArgAddr, ArgLVal.getType()->castAs<PointerType>());
573
0
      }
574
13.8k
      if (!FO.RegisterCastedArgsOnly) {
575
13.7k
        LocalAddrs.insert(
576
13.7k
            {Args[Cnt],
577
13.7k
             {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
578
13.7k
      }
579
16.7k
    } else if (I->capturesVariableByCopy()) {
580
14.8k
      assert(!FD->getType()->isAnyPointerType() &&
581
14.8k
             "Not expecting a captured pointer.");
582
0
      const VarDecl *Var = I->getCapturedVar();
583
14.8k
      LocalAddrs.insert({Args[Cnt],
584
14.8k
                         {Var, FO.UIntPtrCastRequired
585
14.8k
                                   ? castValueFromUintptr(
586
14.8k
                                         CGF, I->getLocation(), FD->getType(),
587
14.8k
                                         Args[Cnt]->getName(), ArgLVal)
588
14.8k
                                   : 
ArgLVal.getAddress(CGF)23
}});
589
14.8k
    } else {
590
      // If 'this' is captured, load it into CXXThisValue.
591
1.85k
      assert(I->capturesThis());
592
0
      CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
593
1.85k
      LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
594
1.85k
    }
595
0
    ++Cnt;
596
33.6k
    ++I;
597
33.6k
  }
598
599
23.8k
  return F;
600
23.8k
}
601
602
llvm::Function *
603
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
604
23.7k
                                                    SourceLocation Loc) {
605
23.7k
  assert(
606
23.7k
      CapturedStmtInfo &&
607
23.7k
      "CapturedStmtInfo should be set when generating the captured function");
608
0
  const CapturedDecl *CD = S.getCapturedDecl();
609
  // Build the argument list.
610
23.7k
  bool NeedWrapperFunction =
611
23.7k
      getDebugInfo() && 
CGM.getCodeGenOpts().hasReducedDebugInfo()312
;
612
23.7k
  FunctionArgList Args;
613
23.7k
  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
614
23.7k
  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
615
23.7k
  SmallString<256> Buffer;
616
23.7k
  llvm::raw_svector_ostream Out(Buffer);
617
23.7k
  Out << CapturedStmtInfo->getHelperName();
618
23.7k
  if (NeedWrapperFunction)
619
152
    Out << "_debug__";
620
23.7k
  FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
621
23.7k
                     Out.str(), Loc);
622
23.7k
  llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
623
23.7k
                                                   VLASizes, CXXThisValue, FO);
624
23.7k
  CodeGenFunction::OMPPrivateScope LocalScope(*this);
625
32.3k
  for (const auto &LocalAddrPair : LocalAddrs) {
626
32.3k
    if (LocalAddrPair.second.first) {
627
30.4k
      LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
628
30.4k
        return LocalAddrPair.second.second;
629
30.4k
      });
630
30.4k
    }
631
32.3k
  }
632
23.7k
  (void)LocalScope.Privatize();
633
23.7k
  for (const auto &VLASizePair : VLASizes)
634
3.03k
    VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
635
23.7k
  PGO.assignRegionCounters(GlobalDecl(CD), F);
636
23.7k
  CapturedStmtInfo->EmitBody(*this, CD->getBody());
637
23.7k
  (void)LocalScope.ForceCleanup();
638
23.7k
  FinishFunction(CD->getBodyRBrace());
639
23.7k
  if (!NeedWrapperFunction)
640
23.5k
    return F;
641
642
152
  FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
643
152
                            /*RegisterCastedArgsOnly=*/true,
644
152
                            CapturedStmtInfo->getHelperName(), Loc);
645
152
  CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
646
152
  WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
647
152
  Args.clear();
648
152
  LocalAddrs.clear();
649
152
  VLASizes.clear();
650
152
  llvm::Function *WrapperF =
651
152
      emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
652
152
                                   WrapperCGF.CXXThisValue, WrapperFO);
653
152
  llvm::SmallVector<llvm::Value *, 4> CallArgs;
654
152
  auto *PI = F->arg_begin();
655
324
  for (const auto *Arg : Args) {
656
324
    llvm::Value *CallArg;
657
324
    auto I = LocalAddrs.find(Arg);
658
324
    if (I != LocalAddrs.end()) {
659
29
      LValue LV = WrapperCGF.MakeAddrLValue(
660
29
          I->second.second,
661
29
          I->second.first ? 
I->second.first->getType()23
:
Arg->getType()6
,
662
29
          AlignmentSource::Decl);
663
29
      if (LV.getType()->isAnyComplexType())
664
1
        LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
665
1
            LV.getAddress(WrapperCGF),
666
1
            PI->getType()->getPointerTo(
667
1
                LV.getAddress(WrapperCGF).getAddressSpace())));
668
29
      CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
669
295
    } else {
670
295
      auto EI = VLASizes.find(Arg);
671
295
      if (EI != VLASizes.end()) {
672
11
        CallArg = EI->second.second;
673
284
      } else {
674
284
        LValue LV =
675
284
            WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
676
284
                                      Arg->getType(), AlignmentSource::Decl);
677
284
        CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
678
284
      }
679
295
    }
680
324
    CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
681
324
    ++PI;
682
324
  }
683
152
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
684
152
  WrapperCGF.FinishFunction();
685
152
  return WrapperF;
686
23.7k
}
687
688
//===----------------------------------------------------------------------===//
689
//                              OpenMP Directive Emission
690
//===----------------------------------------------------------------------===//
691
void CodeGenFunction::EmitOMPAggregateAssign(
692
    Address DestAddr, Address SrcAddr, QualType OriginalType,
693
539
    const llvm::function_ref<void(Address, Address)> CopyGen) {
694
  // Perform element-by-element initialization.
695
539
  QualType ElementTy;
696
697
  // Drill down to the base element type on both arrays.
698
539
  const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
699
539
  llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
700
539
  SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
701
702
539
  llvm::Value *SrcBegin = SrcAddr.getPointer();
703
539
  llvm::Value *DestBegin = DestAddr.getPointer();
704
  // Cast from pointer to array type to pointer to single element.
705
539
  llvm::Value *DestEnd =
706
539
      Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
707
  // The basic structure here is a while-do loop.
708
539
  llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
709
539
  llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
710
539
  llvm::Value *IsEmpty =
711
539
      Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
712
539
  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
713
714
  // Enter the loop body, making that address the current address.
715
539
  llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
716
539
  EmitBlock(BodyBB);
717
718
539
  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
719
720
539
  llvm::PHINode *SrcElementPHI =
721
539
      Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
722
539
  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
723
539
  Address SrcElementCurrent =
724
539
      Address(SrcElementPHI,
725
539
              SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726
727
539
  llvm::PHINode *DestElementPHI = Builder.CreatePHI(
728
539
      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
729
539
  DestElementPHI->addIncoming(DestBegin, EntryBB);
730
539
  Address DestElementCurrent =
731
539
      Address(DestElementPHI,
732
539
              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
733
734
  // Emit copy.
735
539
  CopyGen(DestElementCurrent, SrcElementCurrent);
736
737
  // Shift the address forward by one element.
738
539
  llvm::Value *DestElementNext =
739
539
      Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
740
539
                                 /*Idx0=*/1, "omp.arraycpy.dest.element");
741
539
  llvm::Value *SrcElementNext =
742
539
      Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
743
539
                                 /*Idx0=*/1, "omp.arraycpy.src.element");
744
  // Check whether we've reached the end.
745
539
  llvm::Value *Done =
746
539
      Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
747
539
  Builder.CreateCondBr(Done, DoneBB, BodyBB);
748
539
  DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
749
539
  SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
750
751
  // Done.
752
539
  EmitBlock(DoneBB, /*IsFinished=*/true);
753
539
}
754
755
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
756
                                  Address SrcAddr, const VarDecl *DestVD,
757
1.95k
                                  const VarDecl *SrcVD, const Expr *Copy) {
758
1.95k
  if (OriginalType->isArrayType()) {
759
580
    const auto *BO = dyn_cast<BinaryOperator>(Copy);
760
580
    if (BO && 
BO->getOpcode() == BO_Assign313
) {
761
      // Perform simple memcpy for simple copying.
762
313
      LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
763
313
      LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
764
313
      EmitAggregateAssign(Dest, Src, OriginalType);
765
313
    } else {
766
      // For arrays with complex element types perform element by element
767
      // copying.
768
267
      EmitOMPAggregateAssign(
769
267
          DestAddr, SrcAddr, OriginalType,
770
267
          [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
771
            // Working with the single array element, so have to remap
772
            // destination and source variables to corresponding array
773
            // elements.
774
267
            CodeGenFunction::OMPPrivateScope Remap(*this);
775
267
            Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
776
267
            Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
777
267
            (void)Remap.Privatize();
778
267
            EmitIgnoredExpr(Copy);
779
267
          });
780
267
    }
781
1.37k
  } else {
782
    // Remap pseudo source variable to private copy.
783
1.37k
    CodeGenFunction::OMPPrivateScope Remap(*this);
784
1.37k
    Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
785
1.37k
    Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
786
1.37k
    (void)Remap.Privatize();
787
    // Emit copying of the whole variable.
788
1.37k
    EmitIgnoredExpr(Copy);
789
1.37k
  }
790
1.95k
}
791
792
bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
793
19.0k
                                                OMPPrivateScope &PrivateScope) {
794
19.0k
  if (!HaveInsertPoint())
795
0
    return false;
796
19.0k
  bool DeviceConstTarget =
797
19.0k
      getLangOpts().OpenMPIsDevice &&
798
19.0k
      
isOpenMPTargetExecutionDirective(D.getDirectiveKind())3.53k
;
799
19.0k
  bool FirstprivateIsLastprivate = false;
800
19.0k
  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
801
19.0k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
802
364
    for (const auto *D : C->varlists())
803
1.48k
      Lastprivates.try_emplace(
804
1.48k
          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
805
1.48k
          C->getKind());
806
364
  }
807
19.0k
  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
808
19.0k
  llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
809
19.0k
  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
810
  // Force emission of the firstprivate copy if the directive does not emit
811
  // outlined function, like omp for, omp simd, omp distribute etc.
812
19.0k
  bool MustEmitFirstprivateCopy =
813
19.0k
      CaptureRegions.size() == 1 && 
CaptureRegions.back() == OMPD_unknown4.94k
;
814
19.0k
  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
815
6.66k
    const auto *IRef = C->varlist_begin();
816
6.66k
    const auto *InitsRef = C->inits().begin();
817
10.2k
    for (const Expr *IInit : C->private_copies()) {
818
10.2k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
819
10.2k
      bool ThisFirstprivateIsLastprivate =
820
10.2k
          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
821
10.2k
      const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
822
10.2k
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
823
10.2k
      if (!MustEmitFirstprivateCopy && 
!ThisFirstprivateIsLastprivate10.0k
&&
FD10.0k
&&
824
10.2k
          
!FD->getType()->isReferenceType()10.0k
&&
825
10.2k
          
(8.92k
!VD8.92k
||
!VD->hasAttr<OMPAllocateDeclAttr>()8.92k
)) {
826
8.90k
        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
827
8.90k
        ++IRef;
828
8.90k
        ++InitsRef;
829
8.90k
        continue;
830
8.90k
      }
831
      // Do not emit copy for firstprivate constant variables in target regions,
832
      // captured by reference.
833
1.36k
      if (DeviceConstTarget && 
OrigVD->getType().isConstant(getContext())204
&&
834
1.36k
          
FD10
&&
FD->getType()->isReferenceType()10
&&
835
1.36k
          
(10
!VD10
||
!VD->hasAttr<OMPAllocateDeclAttr>()10
)) {
836
10
        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
837
10
        ++IRef;
838
10
        ++InitsRef;
839
10
        continue;
840
10
      }
841
1.35k
      FirstprivateIsLastprivate =
842
1.35k
          FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
843
1.35k
      if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
844
1.25k
        const auto *VDInit =
845
1.25k
            cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
846
1.25k
        bool IsRegistered;
847
1.25k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
848
1.25k
                        /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
849
1.25k
                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
850
1.25k
        LValue OriginalLVal;
851
1.25k
        if (!FD) {
852
          // Check if the firstprivate variable is just a constant value.
853
58
          ConstantEmission CE = tryEmitAsConstant(&DRE);
854
58
          if (CE && 
!CE.isReference()6
) {
855
            // Constant value, no need to create a copy.
856
4
            ++IRef;
857
4
            ++InitsRef;
858
4
            continue;
859
4
          }
860
54
          if (CE && 
CE.isReference()2
) {
861
2
            OriginalLVal = CE.getReferenceLValue(*this, &DRE);
862
52
          } else {
863
52
            assert(!CE && "Expected non-constant firstprivate.");
864
0
            OriginalLVal = EmitLValue(&DRE);
865
52
          }
866
1.19k
        } else {
867
1.19k
          OriginalLVal = EmitLValue(&DRE);
868
1.19k
        }
869
1.25k
        QualType Type = VD->getType();
870
1.25k
        if (Type->isArrayType()) {
871
          // Emit VarDecl with copy init for arrays.
872
          // Get the address of the original variable captured in current
873
          // captured region.
874
603
          IsRegistered = PrivateScope.addPrivate(
875
603
              OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
876
603
                AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
877
603
                const Expr *Init = VD->getInit();
878
603
                if (!isa<CXXConstructExpr>(Init) ||
879
603
                    
isTrivialInitializer(Init)220
) {
880
                  // Perform simple memcpy.
881
383
                  LValue Dest =
882
383
                      MakeAddrLValue(Emission.getAllocatedAddress(), Type);
883
383
                  EmitAggregateAssign(Dest, OriginalLVal, Type);
884
383
                } else {
885
220
                  EmitOMPAggregateAssign(
886
220
                      Emission.getAllocatedAddress(),
887
220
                      OriginalLVal.getAddress(*this), Type,
888
220
                      [this, VDInit, Init](Address DestElement,
889
220
                                           Address SrcElement) {
890
                        // Clean up any temporaries needed by the
891
                        // initialization.
892
220
                        RunCleanupsScope InitScope(*this);
893
                        // Emit initialization for single element.
894
220
                        setAddrOfLocalVar(VDInit, SrcElement);
895
220
                        EmitAnyExprToMem(Init, DestElement,
896
220
                                         Init->getType().getQualifiers(),
897
220
                                         /*IsInitializer*/ false);
898
220
                        LocalDeclMap.erase(VDInit);
899
220
                      });
900
220
                }
901
603
                EmitAutoVarCleanups(Emission);
902
603
                return Emission.getAllocatedAddress();
903
603
              });
904
648
        } else {
905
648
          Address OriginalAddr = OriginalLVal.getAddress(*this);
906
648
          IsRegistered =
907
648
              PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
908
648
                                               ThisFirstprivateIsLastprivate,
909
648
                                               OrigVD, &Lastprivates, IRef]() {
910
                // Emit private VarDecl with copy init.
911
                // Remap temp VDInit variable to the address of the original
912
                // variable (for proper handling of captured global variables).
913
648
                setAddrOfLocalVar(VDInit, OriginalAddr);
914
648
                EmitDecl(*VD);
915
648
                LocalDeclMap.erase(VDInit);
916
648
                if (ThisFirstprivateIsLastprivate &&
917
648
                    Lastprivates[OrigVD->getCanonicalDecl()] ==
918
8
                        OMPC_LASTPRIVATE_conditional) {
919
                  // Create/init special variable for lastprivate conditionals.
920
0
                  Address VDAddr =
921
0
                      CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
922
0
                          *this, OrigVD);
923
0
                  llvm::Value *V = EmitLoadOfScalar(
924
0
                      MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
925
0
                                     AlignmentSource::Decl),
926
0
                      (*IRef)->getExprLoc());
927
0
                  EmitStoreOfScalar(V,
928
0
                                    MakeAddrLValue(VDAddr, (*IRef)->getType(),
929
0
                                                   AlignmentSource::Decl));
930
0
                  LocalDeclMap.erase(VD);
931
0
                  setAddrOfLocalVar(VD, VDAddr);
932
0
                  return VDAddr;
933
0
                }
934
648
                return GetAddrOfLocalVar(VD);
935
648
              });
936
648
        }
937
1.25k
        assert(IsRegistered &&
938
1.25k
               "firstprivate var already registered as private");
939
        // Silence the warning about unused variable.
940
0
        (void)IsRegistered;
941
1.25k
      }
942
1.34k
      ++IRef;
943
1.34k
      ++InitsRef;
944
1.34k
    }
945
6.66k
  }
946
19.0k
  return FirstprivateIsLastprivate && 
!EmittedAsFirstprivate.empty()16
;
947
19.0k
}
948
949
void CodeGenFunction::EmitOMPPrivateClause(
950
    const OMPExecutableDirective &D,
951
32.9k
    CodeGenFunction::OMPPrivateScope &PrivateScope) {
952
32.9k
  if (!HaveInsertPoint())
953
0
    return;
954
32.9k
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
955
32.9k
  for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
956
983
    auto IRef = C->varlist_begin();
957
3.14k
    for (const Expr *IInit : C->private_copies()) {
958
3.14k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
959
3.14k
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
960
2.87k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
961
2.87k
        bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
962
          // Emit private VarDecl with copy init.
963
2.87k
          EmitDecl(*VD);
964
2.87k
          return GetAddrOfLocalVar(VD);
965
2.87k
        });
966
2.87k
        assert(IsRegistered && "private var already registered as private");
967
        // Silence the warning about unused variable.
968
0
        (void)IsRegistered;
969
2.87k
      }
970
0
      ++IRef;
971
3.14k
    }
972
983
  }
973
32.9k
}
974
975
952
bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
976
952
  if (!HaveInsertPoint())
977
0
    return false;
978
  // threadprivate_var1 = master_threadprivate_var1;
979
  // operator=(threadprivate_var2, master_threadprivate_var2);
980
  // ...
981
  // __kmpc_barrier(&loc, global_tid);
982
952
  llvm::DenseSet<const VarDecl *> CopiedVars;
983
952
  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
984
952
  for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
985
26
    auto IRef = C->varlist_begin();
986
26
    auto ISrcRef = C->source_exprs().begin();
987
26
    auto IDestRef = C->destination_exprs().begin();
988
52
    for (const Expr *AssignOp : C->assignment_ops()) {
989
52
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
990
52
      QualType Type = VD->getType();
991
52
      if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
992
        // Get the address of the master variable. If we are emitting code with
993
        // TLS support, the address is passed from the master as field in the
994
        // captured declaration.
995
52
        Address MasterAddr = Address::invalid();
996
52
        if (getLangOpts().OpenMPUseTLS &&
997
52
            
getContext().getTargetInfo().isTLSSupported()26
) {
998
26
          assert(CapturedStmtInfo->lookup(VD) &&
999
26
                 "Copyin threadprivates should have been captured!");
1000
0
          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1001
26
                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1002
26
          MasterAddr = EmitLValue(&DRE).getAddress(*this);
1003
26
          LocalDeclMap.erase(VD);
1004
26
        } else {
1005
26
          MasterAddr =
1006
26
              Address(VD->isStaticLocal() ? 
CGM.getStaticLocalDeclAddress(VD)22
1007
26
                                          : 
CGM.GetAddrOfGlobal(VD)4
,
1008
26
                      getContext().getDeclAlign(VD));
1009
26
        }
1010
        // Get the address of the threadprivate variable.
1011
0
        Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1012
52
        if (CopiedVars.size() == 1) {
1013
          // At first check if current thread is a master thread. If it is, no
1014
          // need to copy data.
1015
26
          CopyBegin = createBasicBlock("copyin.not.master");
1016
26
          CopyEnd = createBasicBlock("copyin.not.master.end");
1017
          // TODO: Avoid ptrtoint conversion.
1018
26
          auto *MasterAddrInt =
1019
26
              Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1020
26
          auto *PrivateAddrInt =
1021
26
              Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1022
26
          Builder.CreateCondBr(
1023
26
              Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1024
26
              CopyEnd);
1025
26
          EmitBlock(CopyBegin);
1026
26
        }
1027
52
        const auto *SrcVD =
1028
52
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1029
52
        const auto *DestVD =
1030
52
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1031
52
        EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1032
52
      }
1033
0
      ++IRef;
1034
52
      ++ISrcRef;
1035
52
      ++IDestRef;
1036
52
    }
1037
26
  }
1038
952
  if (CopyEnd) {
1039
    // Exit out of copying procedure for non-master thread.
1040
26
    EmitBlock(CopyEnd, /*IsFinished=*/true);
1041
26
    return true;
1042
26
  }
1043
926
  return false;
1044
952
}
1045
1046
bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1047
13.5k
    const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1048
13.5k
  if (!HaveInsertPoint())
1049
0
    return false;
1050
13.5k
  bool HasAtLeastOneLastprivate = false;
1051
13.5k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
1052
13.5k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1053
8.67k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1054
9.02k
    for (const Expr *C : LoopDirective->counters()) {
1055
9.02k
      SIMDLCVs.insert(
1056
9.02k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1057
9.02k
    }
1058
8.67k
  }
1059
13.5k
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1060
13.5k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1061
540
    HasAtLeastOneLastprivate = true;
1062
540
    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1063
540
        
!getLangOpts().OpenMPSimd74
)
1064
49
      break;
1065
491
    const auto *IRef = C->varlist_begin();
1066
491
    const auto *IDestRef = C->destination_exprs().begin();
1067
2.01k
    for (const Expr *IInit : C->private_copies()) {
1068
      // Keep the address of the original variable for future update at the end
1069
      // of the loop.
1070
2.01k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1071
      // Taskloops do not require additional initialization, it is done in
1072
      // runtime support library.
1073
2.01k
      if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1074
1.60k
        const auto *DestVD =
1075
1.60k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1076
1.60k
        PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1077
1.60k
          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1078
                          /*RefersToEnclosingVariableOrCapture=*/
1079
1.60k
                          CapturedStmtInfo->lookup(OrigVD) != nullptr,
1080
1.60k
                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1081
1.60k
          return EmitLValue(&DRE).getAddress(*this);
1082
1.60k
        });
1083
        // Check if the variable is also a firstprivate: in this case IInit is
1084
        // not generated. Initialization of this variable will happen in codegen
1085
        // for 'firstprivate' clause.
1086
1.60k
        if (IInit && 
!SIMDLCVs.count(OrigVD->getCanonicalDecl())1.56k
) {
1087
1.55k
          const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1088
1.55k
          bool IsRegistered =
1089
1.55k
              PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() {
1090
1.55k
                if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1091
10
                  Address VDAddr =
1092
10
                      CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1093
10
                          *this, OrigVD);
1094
10
                  setAddrOfLocalVar(VD, VDAddr);
1095
10
                  return VDAddr;
1096
10
                }
1097
                // Emit private VarDecl with copy init.
1098
1.54k
                EmitDecl(*VD);
1099
1.54k
                return GetAddrOfLocalVar(VD);
1100
1.55k
              });
1101
1.55k
          assert(IsRegistered &&
1102
1.55k
                 "lastprivate var already registered as private");
1103
0
          (void)IsRegistered;
1104
1.55k
        }
1105
1.60k
      }
1106
0
      ++IRef;
1107
2.01k
      ++IDestRef;
1108
2.01k
    }
1109
491
  }
1110
13.5k
  return HasAtLeastOneLastprivate;
1111
13.5k
}
1112
1113
void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1114
    const OMPExecutableDirective &D, bool NoFinals,
1115
532
    llvm::Value *IsLastIterCond) {
1116
532
  if (!HaveInsertPoint())
1117
0
    return;
1118
  // Emit following code:
1119
  // if (<IsLastIterCond>) {
1120
  //   orig_var1 = private_orig_var1;
1121
  //   ...
1122
  //   orig_varn = private_orig_varn;
1123
  // }
1124
532
  llvm::BasicBlock *ThenBB = nullptr;
1125
532
  llvm::BasicBlock *DoneBB = nullptr;
1126
532
  if (IsLastIterCond) {
1127
    // Emit implicit barrier if at least one lastprivate conditional is found
1128
    // and this is not a simd mode.
1129
405
    if (!getLangOpts().OpenMPSimd &&
1130
405
        llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1131
413
                     [](const OMPLastprivateClause *C) {
1132
413
                       return C->getKind() == OMPC_LASTPRIVATE_conditional;
1133
413
                     })) {
1134
6
      CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1135
6
                                             OMPD_unknown,
1136
6
                                             /*EmitChecks=*/false,
1137
6
                                             /*ForceSimpleCall=*/true);
1138
6
    }
1139
405
    ThenBB = createBasicBlock(".omp.lastprivate.then");
1140
405
    DoneBB = createBasicBlock(".omp.lastprivate.done");
1141
405
    Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1142
405
    EmitBlock(ThenBB);
1143
405
  }
1144
532
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1145
532
  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1146
532
  if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1147
516
    auto IC = LoopDirective->counters().begin();
1148
536
    for (const Expr *F : LoopDirective->finals()) {
1149
536
      const auto *D =
1150
536
          cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1151
536
      if (NoFinals)
1152
221
        AlreadyEmittedVars.insert(D);
1153
315
      else
1154
315
        LoopCountersAndUpdates[D] = F;
1155
536
      ++IC;
1156
536
    }
1157
516
  }
1158
540
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1159
540
    auto IRef = C->varlist_begin();
1160
540
    auto ISrcRef = C->source_exprs().begin();
1161
540
    auto IDestRef = C->destination_exprs().begin();
1162
2.21k
    for (const Expr *AssignOp : C->assignment_ops()) {
1163
2.21k
      const auto *PrivateVD =
1164
2.21k
          cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1165
2.21k
      QualType Type = PrivateVD->getType();
1166
2.21k
      const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1167
2.21k
      if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1168
        // If lastprivate variable is a loop control variable for loop-based
1169
        // directive, update its value before copyin back to original
1170
        // variable.
1171
1.73k
        if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1172
13
          EmitIgnoredExpr(FinalExpr);
1173
1.73k
        const auto *SrcVD =
1174
1.73k
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1175
1.73k
        const auto *DestVD =
1176
1.73k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1177
        // Get the address of the private variable.
1178
1.73k
        Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1179
1.73k
        if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1180
334
          PrivateAddr =
1181
334
              Address(Builder.CreateLoad(PrivateAddr),
1182
334
                      CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1183
        // Store the last value to the private copy in the last iteration.
1184
1.73k
        if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1185
10
          CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1186
10
              *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1187
10
              (*IRef)->getExprLoc());
1188
        // Get the address of the original variable.
1189
1.73k
        Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1190
1.73k
        EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1191
1.73k
      }
1192
2.21k
      ++IRef;
1193
2.21k
      ++ISrcRef;
1194
2.21k
      ++IDestRef;
1195
2.21k
    }
1196
540
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
1197
10
      EmitIgnoredExpr(PostUpdate);
1198
540
  }
1199
532
  if (IsLastIterCond)
1200
405
    EmitBlock(DoneBB, /*IsFinished=*/true);
1201
532
}
1202
1203
void CodeGenFunction::EmitOMPReductionClauseInit(
1204
    const OMPExecutableDirective &D,
1205
27.5k
    CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1206
27.5k
  if (!HaveInsertPoint())
1207
0
    return;
1208
27.5k
  SmallVector<const Expr *, 4> Shareds;
1209
27.5k
  SmallVector<const Expr *, 4> Privates;
1210
27.5k
  SmallVector<const Expr *, 4> ReductionOps;
1211
27.5k
  SmallVector<const Expr *, 4> LHSs;
1212
27.5k
  SmallVector<const Expr *, 4> RHSs;
1213
27.5k
  OMPTaskDataTy Data;
1214
27.5k
  SmallVector<const Expr *, 4> TaskLHSs;
1215
27.5k
  SmallVector<const Expr *, 4> TaskRHSs;
1216
27.5k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1217
1.16k
    if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1218
447
      continue;
1219
721
    Shareds.append(C->varlist_begin(), C->varlist_end());
1220
721
    Privates.append(C->privates().begin(), C->privates().end());
1221
721
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1222
721
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1223
721
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1224
721
    if (C->getModifier() == OMPC_REDUCTION_task) {
1225
27
      Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1226
27
      Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1227
27
      Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1228
27
      Data.ReductionOps.append(C->reduction_ops().begin(),
1229
27
                               C->reduction_ops().end());
1230
27
      TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1231
27
      TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1232
27
    }
1233
721
  }
1234
27.5k
  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1235
27.5k
  unsigned Count = 0;
1236
27.5k
  auto *ILHS = LHSs.begin();
1237
27.5k
  auto *IRHS = RHSs.begin();
1238
27.5k
  auto *IPriv = Privates.begin();
1239
27.5k
  for (const Expr *IRef : Shareds) {
1240
813
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1241
    // Emit private VarDecl with reduction init.
1242
813
    RedCG.emitSharedOrigLValue(*this, Count);
1243
813
    RedCG.emitAggregateType(*this, Count);
1244
813
    AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1245
813
    RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1246
813
                             RedCG.getSharedLValue(Count),
1247
813
                             [&Emission](CodeGenFunction &CGF) {
1248
617
                               CGF.EmitAutoVarInit(Emission);
1249
617
                               return true;
1250
617
                             });
1251
813
    EmitAutoVarCleanups(Emission);
1252
813
    Address BaseAddr = RedCG.adjustPrivateAddress(
1253
813
        *this, Count, Emission.getAllocatedAddress());
1254
813
    bool IsRegistered = PrivateScope.addPrivate(
1255
813
        RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1256
813
    assert(IsRegistered && "private var already registered as private");
1257
    // Silence the warning about unused variable.
1258
0
    (void)IsRegistered;
1259
1260
813
    const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1261
813
    const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1262
813
    QualType Type = PrivateVD->getType();
1263
813
    bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1264
813
    if (isaOMPArraySectionExpr && 
Type->isVariablyModifiedType()157
) {
1265
      // Store the address of the original variable associated with the LHS
1266
      // implicit variable.
1267
109
      PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1268
109
        return RedCG.getSharedLValue(Count).getAddress(*this);
1269
109
      });
1270
109
      PrivateScope.addPrivate(
1271
109
          RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1272
704
    } else if ((isaOMPArraySectionExpr && 
Type->isScalarType()48
) ||
1273
704
               
isa<ArraySubscriptExpr>(IRef)702
) {
1274
      // Store the address of the original variable associated with the LHS
1275
      // implicit variable.
1276
6
      PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1277
6
        return RedCG.getSharedLValue(Count).getAddress(*this);
1278
6
      });
1279
6
      PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1280
6
        return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1281
6
                                            ConvertTypeForMem(RHSVD->getType()),
1282
6
                                            "rhs.begin");
1283
6
      });
1284
698
    } else {
1285
698
      QualType Type = PrivateVD->getType();
1286
698
      bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1287
698
      Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1288
      // Store the address of the original variable associated with the LHS
1289
      // implicit variable.
1290
698
      if (IsArray) {
1291
108
        OriginalAddr = Builder.CreateElementBitCast(
1292
108
            OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1293
108
      }
1294
698
      PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1295
698
      PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1296
698
        return IsArray ? Builder.CreateElementBitCast(
1297
108
                             GetAddrOfLocalVar(PrivateVD),
1298
108
                             ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1299
698
                       : 
GetAddrOfLocalVar(PrivateVD)590
;
1300
698
      });
1301
698
    }
1302
813
    ++ILHS;
1303
813
    ++IRHS;
1304
813
    ++IPriv;
1305
813
    ++Count;
1306
813
  }
1307
27.5k
  if (!Data.ReductionVars.empty()) {
1308
27
    Data.IsReductionWithTaskMod = true;
1309
27
    Data.IsWorksharingReduction =
1310
27
        isOpenMPWorksharingDirective(D.getDirectiveKind());
1311
27
    llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1312
27
        *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1313
27
    const Expr *TaskRedRef = nullptr;
1314
27
    switch (D.getDirectiveKind()) {
1315
2
    case OMPD_parallel:
1316
2
      TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1317
2
      break;
1318
2
    case OMPD_for:
1319
2
      TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1320
2
      break;
1321
2
    case OMPD_sections:
1322
2
      TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1323
2
      break;
1324
2
    case OMPD_parallel_for:
1325
2
      TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1326
2
      break;
1327
2
    case OMPD_parallel_master:
1328
2
      TaskRedRef =
1329
2
          cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1330
2
      break;
1331
2
    case OMPD_parallel_sections:
1332
2
      TaskRedRef =
1333
2
          cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1334
2
      break;
1335
2
    case OMPD_target_parallel:
1336
2
      TaskRedRef =
1337
2
          cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1338
2
      break;
1339
3
    case OMPD_target_parallel_for:
1340
3
      TaskRedRef =
1341
3
          cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1342
3
      break;
1343
2
    case OMPD_distribute_parallel_for:
1344
2
      TaskRedRef =
1345
2
          cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1346
2
      break;
1347
4
    case OMPD_teams_distribute_parallel_for:
1348
4
      TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1349
4
                       .getTaskReductionRefExpr();
1350
4
      break;
1351
4
    case OMPD_target_teams_distribute_parallel_for:
1352
4
      TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1353
4
                       .getTaskReductionRefExpr();
1354
4
      break;
1355
0
    case OMPD_simd:
1356
0
    case OMPD_for_simd:
1357
0
    case OMPD_section:
1358
0
    case OMPD_single:
1359
0
    case OMPD_master:
1360
0
    case OMPD_critical:
1361
0
    case OMPD_parallel_for_simd:
1362
0
    case OMPD_task:
1363
0
    case OMPD_taskyield:
1364
0
    case OMPD_barrier:
1365
0
    case OMPD_taskwait:
1366
0
    case OMPD_taskgroup:
1367
0
    case OMPD_flush:
1368
0
    case OMPD_depobj:
1369
0
    case OMPD_scan:
1370
0
    case OMPD_ordered:
1371
0
    case OMPD_atomic:
1372
0
    case OMPD_teams:
1373
0
    case OMPD_target:
1374
0
    case OMPD_cancellation_point:
1375
0
    case OMPD_cancel:
1376
0
    case OMPD_target_data:
1377
0
    case OMPD_target_enter_data:
1378
0
    case OMPD_target_exit_data:
1379
0
    case OMPD_taskloop:
1380
0
    case OMPD_taskloop_simd:
1381
0
    case OMPD_master_taskloop:
1382
0
    case OMPD_master_taskloop_simd:
1383
0
    case OMPD_parallel_master_taskloop:
1384
0
    case OMPD_parallel_master_taskloop_simd:
1385
0
    case OMPD_distribute:
1386
0
    case OMPD_target_update:
1387
0
    case OMPD_distribute_parallel_for_simd:
1388
0
    case OMPD_distribute_simd:
1389
0
    case OMPD_target_parallel_for_simd:
1390
0
    case OMPD_target_simd:
1391
0
    case OMPD_teams_distribute:
1392
0
    case OMPD_teams_distribute_simd:
1393
0
    case OMPD_teams_distribute_parallel_for_simd:
1394
0
    case OMPD_target_teams:
1395
0
    case OMPD_target_teams_distribute:
1396
0
    case OMPD_target_teams_distribute_parallel_for_simd:
1397
0
    case OMPD_target_teams_distribute_simd:
1398
0
    case OMPD_declare_target:
1399
0
    case OMPD_end_declare_target:
1400
0
    case OMPD_threadprivate:
1401
0
    case OMPD_allocate:
1402
0
    case OMPD_declare_reduction:
1403
0
    case OMPD_declare_mapper:
1404
0
    case OMPD_declare_simd:
1405
0
    case OMPD_requires:
1406
0
    case OMPD_declare_variant:
1407
0
    case OMPD_begin_declare_variant:
1408
0
    case OMPD_end_declare_variant:
1409
0
    case OMPD_unknown:
1410
0
    default:
1411
0
      llvm_unreachable("Enexpected directive with task reductions.");
1412
27
    }
1413
1414
27
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1415
27
    EmitVarDecl(*VD);
1416
27
    EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1417
27
                      /*Volatile=*/false, TaskRedRef->getType());
1418
27
  }
1419
27.5k
}
1420
1421
void CodeGenFunction::EmitOMPReductionClauseFinal(
1422
16.5k
    const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1423
16.5k
  if (!HaveInsertPoint())
1424
2
    return;
1425
16.5k
  llvm::SmallVector<const Expr *, 8> Privates;
1426
16.5k
  llvm::SmallVector<const Expr *, 8> LHSExprs;
1427
16.5k
  llvm::SmallVector<const Expr *, 8> RHSExprs;
1428
16.5k
  llvm::SmallVector<const Expr *, 8> ReductionOps;
1429
16.5k
  bool HasAtLeastOneReduction = false;
1430
16.5k
  bool IsReductionWithTaskMod = false;
1431
16.5k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1432
    // Do not emit for inscan reductions.
1433
713
    if (C->getModifier() == OMPC_REDUCTION_inscan)
1434
48
      continue;
1435
665
    HasAtLeastOneReduction = true;
1436
665
    Privates.append(C->privates().begin(), C->privates().end());
1437
665
    LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1438
665
    RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1439
665
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1440
665
    IsReductionWithTaskMod =
1441
665
        IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1442
665
  }
1443
16.5k
  if (HasAtLeastOneReduction) {
1444
574
    if (IsReductionWithTaskMod) {
1445
27
      CGM.getOpenMPRuntime().emitTaskReductionFini(
1446
27
          *this, D.getBeginLoc(),
1447
27
          isOpenMPWorksharingDirective(D.getDirectiveKind()));
1448
27
    }
1449
574
    bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1450
574
                      
isOpenMPParallelDirective(D.getDirectiveKind())568
||
1451
574
                      
ReductionKind == OMPD_simd268
;
1452
574
    bool SimpleReduction = ReductionKind == OMPD_simd;
1453
    // Emit nowait reduction if nowait clause is present or directive is a
1454
    // parallel directive (it always has implicit barrier).
1455
574
    CGM.getOpenMPRuntime().emitReduction(
1456
574
        *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1457
574
        {WithNowait, SimpleReduction, ReductionKind});
1458
574
  }
1459
16.5k
}
1460
1461
static void emitPostUpdateForReductionClause(
1462
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1463
16.5k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1464
16.5k
  if (!CGF.HaveInsertPoint())
1465
0
    return;
1466
16.5k
  llvm::BasicBlock *DoneBB = nullptr;
1467
16.5k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1468
721
    if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1469
4
      if (!DoneBB) {
1470
4
        if (llvm::Value *Cond = CondGen(CGF)) {
1471
          // If the first post-update expression is found, emit conditional
1472
          // block if it was requested.
1473
0
          llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1474
0
          DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1475
0
          CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1476
0
          CGF.EmitBlock(ThenBB);
1477
0
        }
1478
4
      }
1479
4
      CGF.EmitIgnoredExpr(PostUpdate);
1480
4
    }
1481
721
  }
1482
16.5k
  if (DoneBB)
1483
0
    CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1484
16.5k
}
1485
1486
namespace {
1487
/// Codegen lambda for appending distribute lower and upper bounds to outlined
1488
/// parallel function. This is necessary for combined constructs such as
1489
/// 'distribute parallel for'
1490
typedef llvm::function_ref<void(CodeGenFunction &,
1491
                                const OMPExecutableDirective &,
1492
                                llvm::SmallVectorImpl<llvm::Value *> &)>
1493
    CodeGenBoundParametersTy;
1494
} // anonymous namespace
1495
1496
static void
1497
checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1498
16.2k
                                     const OMPExecutableDirective &S) {
1499
16.2k
  if (CGF.getLangOpts().OpenMP < 50)
1500
4.11k
    return;
1501
12.1k
  llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1502
12.1k
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1503
902
    for (const Expr *Ref : C->varlists()) {
1504
902
      if (!Ref->getType()->isScalarType())
1505
473
        continue;
1506
429
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1507
429
      if (!DRE)
1508
0
        continue;
1509
429
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1510
429
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1511
429
    }
1512
744
  }
1513
12.1k
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1514
1.22k
    for (const Expr *Ref : C->varlists()) {
1515
1.22k
      if (!Ref->getType()->isScalarType())
1516
658
        continue;
1517
567
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1518
567
      if (!DRE)
1519
0
        continue;
1520
567
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1521
567
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1522
567
    }
1523
295
  }
1524
12.1k
  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1525
350
    for (const Expr *Ref : C->varlists()) {
1526
350
      if (!Ref->getType()->isScalarType())
1527
0
        continue;
1528
350
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1529
350
      if (!DRE)
1530
0
        continue;
1531
350
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1532
350
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1533
350
    }
1534
279
  }
1535
  // Privates should ne analyzed since they are not captured at all.
1536
  // Task reductions may be skipped - tasks are ignored.
1537
  // Firstprivates do not return value but may be passed by reference - no need
1538
  // to check for updated lastprivate conditional.
1539
12.1k
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1540
6.40k
    for (const Expr *Ref : C->varlists()) {
1541
6.40k
      if (!Ref->getType()->isScalarType())
1542
853
        continue;
1543
5.55k
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1544
5.55k
      if (!DRE)
1545
0
        continue;
1546
5.55k
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1547
5.55k
    }
1548
3.95k
  }
1549
12.1k
  CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1550
12.1k
      CGF, S, PrivateDecls);
1551
12.1k
}
1552
1553
static void emitCommonOMPParallelDirective(
1554
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
1555
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1556
6.08k
    const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1557
6.08k
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1558
6.08k
  llvm::Function *OutlinedFn =
1559
6.08k
      CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1560
6.08k
          S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1561
6.08k
  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1562
345
    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1563
345
    llvm::Value *NumThreads =
1564
345
        CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1565
345
                           /*IgnoreResultAssign=*/true);
1566
345
    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1567
345
        CGF, NumThreads, NumThreadsClause->getBeginLoc());
1568
345
  }
1569
6.08k
  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1570
87
    CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1571
87
    CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1572
87
        CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1573
87
  }
1574
6.08k
  const Expr *IfCond = nullptr;
1575
6.08k
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1576
1.17k
    if (C->getNameModifier() == OMPD_unknown ||
1577
1.17k
        
C->getNameModifier() == OMPD_parallel769
) {
1578
589
      IfCond = C->getCondition();
1579
589
      break;
1580
589
    }
1581
1.17k
  }
1582
1583
6.08k
  OMPParallelScope Scope(CGF, S);
1584
6.08k
  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1585
  // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1586
  // lower and upper bounds with the pragma 'for' chunking mechanism.
1587
  // The following lambda takes care of appending the lower and upper bound
1588
  // parameters when necessary
1589
6.08k
  CodeGenBoundParameters(CGF, S, CapturedVars);
1590
6.08k
  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1591
6.08k
  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1592
6.08k
                                              CapturedVars, IfCond);
1593
6.08k
}
1594
1595
270
static bool isAllocatableDecl(const VarDecl *VD) {
1596
270
  const VarDecl *CVD = VD->getCanonicalDecl();
1597
270
  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1598
266
    return false;
1599
4
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1600
  // Use the default allocation.
1601
4
  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1602
4
            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1603
4
           
!AA->getAllocator()0
);
1604
270
}
1605
1606
static void emitEmptyBoundParameters(CodeGenFunction &,
1607
                                     const OMPExecutableDirective &,
1608
3.34k
                                     llvm::SmallVectorImpl<llvm::Value *> &) {}
1609
1610
Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1611
254
    CodeGenFunction &CGF, const VarDecl *VD) {
1612
254
  CodeGenModule &CGM = CGF.CGM;
1613
254
  auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1614
1615
254
  if (!VD)
1616
0
    return Address::invalid();
1617
254
  const VarDecl *CVD = VD->getCanonicalDecl();
1618
254
  if (!isAllocatableDecl(CVD))
1619
254
    return Address::invalid();
1620
0
  llvm::Value *Size;
1621
0
  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1622
0
  if (CVD->getType()->isVariablyModifiedType()) {
1623
0
    Size = CGF.getTypeSize(CVD->getType());
1624
    // Align the size: ((size + align - 1) / align) * align
1625
0
    Size = CGF.Builder.CreateNUWAdd(
1626
0
        Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1627
0
    Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1628
0
    Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1629
0
  } else {
1630
0
    CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1631
0
    Size = CGM.getSize(Sz.alignTo(Align));
1632
0
  }
1633
1634
0
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1635
0
  assert(AA->getAllocator() &&
1636
0
         "Expected allocator expression for non-default allocator.");
1637
0
  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1638
  // According to the standard, the original allocator type is a enum (integer).
1639
  // Convert to pointer type, if required.
1640
0
  if (Allocator->getType()->isIntegerTy())
1641
0
    Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1642
0
  else if (Allocator->getType()->isPointerTy())
1643
0
    Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1644
0
                                                                CGM.VoidPtrTy);
1645
1646
0
  llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1647
0
      CGF.Builder, Size, Allocator,
1648
0
      getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1649
0
  llvm::CallInst *FreeCI =
1650
0
      OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1651
1652
0
  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1653
0
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1654
0
      Addr,
1655
0
      CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1656
0
      getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1657
0
  return Address(Addr, Align);
1658
254
}
1659
1660
Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1661
    CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1662
0
    SourceLocation Loc) {
1663
0
  CodeGenModule &CGM = CGF.CGM;
1664
0
  if (CGM.getLangOpts().OpenMPUseTLS &&
1665
0
      CGM.getContext().getTargetInfo().isTLSSupported())
1666
0
    return VDAddr;
1667
1668
0
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1669
1670
0
  llvm::Type *VarTy = VDAddr.getElementType();
1671
0
  llvm::Value *Data =
1672
0
      CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1673
0
  llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1674
0
  std::string Suffix = getNameWithSeparators({"cache", ""});
1675
0
  llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1676
1677
0
  llvm::CallInst *ThreadPrivateCacheCall =
1678
0
      OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1679
1680
0
  return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1681
0
}
1682
1683
std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1684
0
    ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1685
0
  SmallString<128> Buffer;
1686
0
  llvm::raw_svector_ostream OS(Buffer);
1687
0
  StringRef Sep = FirstSeparator;
1688
0
  for (StringRef Part : Parts) {
1689
0
    OS << Sep << Part;
1690
0
    Sep = Separator;
1691
0
  }
1692
0
  return OS.str().str();
1693
0
}
1694
980
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1695
980
  if (CGM.getLangOpts().OpenMPIRBuilder) {
1696
50
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1697
    // Check if we have any if clause associated with the directive.
1698
50
    llvm::Value *IfCond = nullptr;
1699
50
    if (const auto *C = S.getSingleClause<OMPIfClause>())
1700
0
      IfCond = EmitScalarExpr(C->getCondition(),
1701
0
                              /*IgnoreResultAssign=*/true);
1702
1703
50
    llvm::Value *NumThreads = nullptr;
1704
50
    if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1705
0
      NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1706
0
                                  /*IgnoreResultAssign=*/true);
1707
1708
50
    ProcBindKind ProcBind = OMP_PROC_BIND_default;
1709
50
    if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1710
0
      ProcBind = ProcBindClause->getProcBindKind();
1711
1712
50
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1713
1714
    // The cleanup callback that finalizes all variabels at the given location,
1715
    // thus calls destructors etc.
1716
58
    auto FiniCB = [this](InsertPointTy IP) {
1717
58
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1718
58
    };
1719
1720
    // Privatization callback that performs appropriate action for
1721
    // shared/private/firstprivate/lastprivate/copyin/... variables.
1722
    //
1723
    // TODO: This defaults to shared right now.
1724
50
    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1725
82
                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1726
      // The next line is appropriate only for variables (Val) with the
1727
      // data-sharing attribute "shared".
1728
82
      ReplVal = &Val;
1729
1730
82
      return CodeGenIP;
1731
82
    };
1732
1733
50
    const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1734
50
    const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1735
1736
50
    auto BodyGenCB = [ParallelRegionBodyStmt,
1737
50
                      this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1738
50
                            llvm::BasicBlock &ContinuationBB) {
1739
50
      OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1740
50
                                                      ContinuationBB);
1741
50
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1742
50
                                             CodeGenIP, ContinuationBB);
1743
50
    };
1744
1745
50
    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1746
50
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1747
50
    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1748
50
        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1749
50
    Builder.restoreIP(
1750
50
        OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1751
50
                                  IfCond, NumThreads, ProcBind, S.hasCancel()));
1752
50
    return;
1753
50
  }
1754
1755
  // Emit parallel region as a standalone region.
1756
930
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1757
930
    Action.Enter(CGF);
1758
930
    OMPPrivateScope PrivateScope(CGF);
1759
930
    bool Copyins = CGF.EmitOMPCopyinClause(S);
1760
930
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1761
930
    if (Copyins) {
1762
      // Emit implicit barrier to synchronize threads and avoid data races on
1763
      // propagation master's thread values of threadprivate variables to local
1764
      // instances of that variables of all other implicit threads.
1765
23
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1766
23
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1767
23
          /*ForceSimpleCall=*/true);
1768
23
    }
1769
930
    CGF.EmitOMPPrivateClause(S, PrivateScope);
1770
930
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1771
930
    (void)PrivateScope.Privatize();
1772
930
    CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1773
930
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1774
930
  };
1775
930
  {
1776
930
    auto LPCRegion =
1777
930
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1778
930
    emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1779
930
                                   emitEmptyBoundParameters);
1780
930
    emitPostUpdateForReductionClause(*this, S,
1781
930
                                     [](CodeGenFunction &) 
{ return nullptr; }4
);
1782
930
  }
1783
  // Check for outer lastprivate conditional update.
1784
930
  checkForLastprivateConditionalUpdate(*this, S);
1785
930
}
1786
1787
namespace {
1788
/// RAII to handle scopes for loop transformation directives.
1789
class OMPTransformDirectiveScopeRAII {
1790
  OMPLoopScope *Scope = nullptr;
1791
  CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1792
  CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1793
1794
public:
1795
8
  OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1796
8
    if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1797
8
      Scope = new OMPLoopScope(CGF, *Dir);
1798
8
      CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1799
8
      CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1800
8
    }
1801
8
  }
1802
8
  ~OMPTransformDirectiveScopeRAII() {
1803
8
    if (!Scope)
1804
0
      return;
1805
8
    delete CapInfoRAII;
1806
8
    delete CGSI;
1807
8
    delete Scope;
1808
8
  }
1809
};
1810
} // namespace
1811
1812
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1813
11.4k
                     int MaxLevel, int Level = 0) {
1814
11.4k
  assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1815
0
  const Stmt *SimplifiedS = S->IgnoreContainers();
1816
11.4k
  if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1817
8
    PrettyStackTraceLoc CrashInfo(
1818
8
        CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1819
8
        "LLVM IR generation of compound statement ('{}')");
1820
1821
    // Keep track of the current cleanup stack depth, including debug scopes.
1822
8
    CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1823
8
    for (const Stmt *CurStmt : CS->body())
1824
34
      emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1825
8
    return;
1826
8
  }
1827
11.4k
  if (SimplifiedS == NextLoop) {
1828
11.4k
    if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
1829
14
      SimplifiedS = Dir->getTransformedStmt();
1830
11.4k
    if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
1831
10
      SimplifiedS = Dir->getTransformedStmt();
1832
11.4k
    if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1833
8
      SimplifiedS = CanonLoop->getLoopStmt();
1834
11.4k
    if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1835
11.4k
      S = For->getBody();
1836
11.4k
    } else {
1837
6
      assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1838
6
             "Expected canonical for loop or range-based for loop.");
1839
0
      const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1840
6
      CGF.EmitStmt(CXXFor->getLoopVarStmt());
1841
6
      S = CXXFor->getBody();
1842
6
    }
1843
11.4k
    if (Level + 1 < MaxLevel) {
1844
491
      NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1845
491
          S, /*TryImperfectlyNestedLoops=*/true);
1846
491
      emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1847
491
      return;
1848
491
    }
1849
11.4k
  }
1850
10.9k
  CGF.EmitStmt(S);
1851
10.9k
}
1852
1853
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1854
10.9k
                                      JumpDest LoopExit) {
1855
10.9k
  RunCleanupsScope BodyScope(*this);
1856
  // Update counters values on current iteration.
1857
10.9k
  for (const Expr *UE : D.updates())
1858
11.4k
    EmitIgnoredExpr(UE);
1859
  // Update the linear variables.
1860
  // In distribute directives only loop counters may be marked as linear, no
1861
  // need to generate the code for them.
1862
10.9k
  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1863
4.35k
    for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1864
396
      for (const Expr *UE : C->updates())
1865
518
        EmitIgnoredExpr(UE);
1866
396
    }
1867
4.35k
  }
1868
1869
  // On a continue in the body, jump to the end.
1870
10.9k
  JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1871
10.9k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1872
11.4k
  for (const Expr *E : D.finals_conditions()) {
1873
11.4k
    if (!E)
1874
11.4k
      continue;
1875
    // Check that loop counter in non-rectangular nest fits into the iteration
1876
    // space.
1877
25
    llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1878
25
    EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1879
25
                         getProfileCount(D.getBody()));
1880
25
    EmitBlock(NextBB);
1881
25
  }
1882
1883
10.9k
  OMPPrivateScope InscanScope(*this);
1884
10.9k
  EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1885
10.9k
  bool IsInscanRegion = InscanScope.Privatize();
1886
10.9k
  if (IsInscanRegion) {
1887
    // Need to remember the block before and after scan directive
1888
    // to dispatch them correctly depending on the clause used in
1889
    // this directive, inclusive or exclusive. For inclusive scan the natural
1890
    // order of the blocks is used, for exclusive clause the blocks must be
1891
    // executed in reverse order.
1892
48
    OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1893
48
    OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1894
    // No need to allocate inscan exit block, in simd mode it is selected in the
1895
    // codegen for the scan directive.
1896
48
    if (D.getDirectiveKind() != OMPD_simd && 
!getLangOpts().OpenMPSimd40
)
1897
32
      OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1898
48
    OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1899
48
    EmitBranch(OMPScanDispatch);
1900
48
    EmitBlock(OMPBeforeScanBlock);
1901
48
  }
1902
1903
  // Emit loop variables for C++ range loops.
1904
10.9k
  const Stmt *Body =
1905
10.9k
      D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1906
  // Emit loop body.
1907
10.9k
  emitBody(*this, Body,
1908
10.9k
           OMPLoopBasedDirective::tryToFindNextInnerLoop(
1909
10.9k
               Body, /*TryImperfectlyNestedLoops=*/true),
1910
10.9k
           D.getLoopsNumber());
1911
1912
  // Jump to the dispatcher at the end of the loop body.
1913
10.9k
  if (IsInscanRegion)
1914
48
    EmitBranch(OMPScanExitBlock);
1915
1916
  // The end (updates/cleanups).
1917
10.9k
  EmitBlock(Continue.getBlock());
1918
10.9k
  BreakContinueStack.pop_back();
1919
10.9k
}
1920
1921
using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1922
1923
/// Emit a captured statement and return the function as well as its captured
1924
/// closure context.
1925
static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1926
46
                                             const CapturedStmt *S) {
1927
46
  LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1928
46
  CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1929
46
  std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1930
46
      std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1931
46
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1932
46
  llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1933
1934
46
  return {F, CapStruct.getPointer(ParentCGF)};
1935
46
}
1936
1937
/// Emit a call to a previously captured closure.
1938
static llvm::CallInst *
1939
emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1940
46
                     llvm::ArrayRef<llvm::Value *> Args) {
1941
  // Append the closure context to the argument.
1942
46
  SmallVector<llvm::Value *> EffectiveArgs;
1943
46
  EffectiveArgs.reserve(Args.size() + 1);
1944
46
  llvm::append_range(EffectiveArgs, Args);
1945
46
  EffectiveArgs.push_back(Cap.second);
1946
1947
46
  return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1948
46
}
1949
1950
llvm::CanonicalLoopInfo *
1951
23
CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1952
23
  assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1953
1954
0
  EmitStmt(S);
1955
23
  assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1956
1957
  // The last added loop is the outermost one.
1958
0
  return OMPLoopNestStack.back();
1959
23
}
1960
1961
23
void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
1962
23
  const Stmt *SyntacticalLoop = S->getLoopStmt();
1963
23
  if (!getLangOpts().OpenMPIRBuilder) {
1964
    // Ignore if OpenMPIRBuilder is not enabled.
1965
0
    EmitStmt(SyntacticalLoop);
1966
0
    return;
1967
0
  }
1968
1969
23
  LexicalScope ForScope(*this, S->getSourceRange());
1970
1971
  // Emit init statements. The Distance/LoopVar funcs may reference variable
1972
  // declarations they contain.
1973
23
  const Stmt *BodyStmt;
1974
23
  if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
1975
22
    if (const Stmt *InitStmt = For->getInit())
1976
22
      EmitStmt(InitStmt);
1977
22
    BodyStmt = For->getBody();
1978
22
  } else 
if (const auto *1
RangeFor1
=
1979
1
                 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
1980
1
    if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
1981
1
      EmitStmt(RangeStmt);
1982
1
    if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
1983
1
      EmitStmt(BeginStmt);
1984
1
    if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
1985
1
      EmitStmt(EndStmt);
1986
1
    if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
1987
1
      EmitStmt(LoopVarStmt);
1988
1
    BodyStmt = RangeFor->getBody();
1989
1
  } else
1990
0
    llvm_unreachable("Expected for-stmt or range-based for-stmt");
1991
1992
  // Emit closure for later use. By-value captures will be captured here.
1993
23
  const CapturedStmt *DistanceFunc = S->getDistanceFunc();
1994
23
  EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
1995
23
  const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
1996
23
  EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
1997
1998
  // Call the distance function to get the number of iterations of the loop to
1999
  // come.
2000
23
  QualType LogicalTy = DistanceFunc->getCapturedDecl()
2001
23
                           ->getParam(0)
2002
23
                           ->getType()
2003
23
                           .getNonReferenceType();
2004
23
  Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2005
23
  emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2006
23
  llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2007
2008
  // Emit the loop structure.
2009
23
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2010
23
  auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2011
23
                           llvm::Value *IndVar) {
2012
23
    Builder.restoreIP(CodeGenIP);
2013
2014
    // Emit the loop body: Convert the logical iteration number to the loop
2015
    // variable and emit the body.
2016
23
    const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2017
23
    LValue LCVal = EmitLValue(LoopVarRef);
2018
23
    Address LoopVarAddress = LCVal.getAddress(*this);
2019
23
    emitCapturedStmtCall(*this, LoopVarClosure,
2020
23
                         {LoopVarAddress.getPointer(), IndVar});
2021
2022
23
    RunCleanupsScope BodyScope(*this);
2023
23
    EmitStmt(BodyStmt);
2024
23
  };
2025
23
  llvm::CanonicalLoopInfo *CL =
2026
23
      OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2027
2028
  // Finish up the loop.
2029
23
  Builder.restoreIP(CL->getAfterIP());
2030
23
  ForScope.ForceCleanup();
2031
2032
  // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2033
23
  OMPLoopNestStack.push_back(CL);
2034
23
}
2035
2036
void CodeGenFunction::EmitOMPInnerLoop(
2037
    const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2038
    const Expr *IncExpr,
2039
    const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2040
13.7k
    const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2041
13.7k
  auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2042
2043
  // Start the loop with a block that tests the condition.
2044
13.7k
  auto CondBlock = createBasicBlock("omp.inner.for.cond");
2045
13.7k
  EmitBlock(CondBlock);
2046
13.7k
  const SourceRange R = S.getSourceRange();
2047
2048
  // If attributes are attached, push to the basic block with them.
2049
13.7k
  const auto &OMPED = cast<OMPExecutableDirective>(S);
2050
13.7k
  const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2051
13.7k
  const Stmt *SS = ICS->getCapturedStmt();
2052
13.7k
  const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2053
13.7k
  OMPLoopNestStack.clear();
2054
13.7k
  if (AS)
2055
2
    LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2056
2
                   AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2057
2
                   SourceLocToDebugLoc(R.getEnd()));
2058
13.7k
  else
2059
13.7k
    LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2060
13.7k
                   SourceLocToDebugLoc(R.getEnd()));
2061
2062
  // If there are any cleanups between here and the loop-exit scope,
2063
  // create a block to stage a loop exit along.
2064
13.7k
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2065
13.7k
  if (RequiresCleanup)
2066
845
    ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2067
2068
13.7k
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2069
2070
  // Emit condition.
2071
13.7k
  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2072
13.7k
  if (ExitBlock != LoopExit.getBlock()) {
2073
845
    EmitBlock(ExitBlock);
2074
845
    EmitBranchThroughCleanup(LoopExit);
2075
845
  }
2076
2077
13.7k
  EmitBlock(LoopBody);
2078
13.7k
  incrementProfileCounter(&S);
2079
2080
  // Create a block for the increment.
2081
13.7k
  JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2082
13.7k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2083
2084
13.7k
  BodyGen(*this);
2085
2086
  // Emit "IV = IV + 1" and a back-edge to the condition block.
2087
13.7k
  EmitBlock(Continue.getBlock());
2088
13.7k
  EmitIgnoredExpr(IncExpr);
2089
13.7k
  PostIncGen(*this);
2090
13.7k
  BreakContinueStack.pop_back();
2091
13.7k
  EmitBranch(CondBlock);
2092
13.7k
  LoopStack.pop();
2093
  // Emit the fall-through block.
2094
13.7k
  EmitBlock(LoopExit.getBlock());
2095
13.7k
}
2096
2097
8.98k
bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2098
8.98k
  if (!HaveInsertPoint())
2099
0
    return false;
2100
  // Emit inits for the linear variables.
2101
8.98k
  bool HasLinears = false;
2102
8.98k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2103
614
    for (const Expr *Init : C->inits()) {
2104
614
      HasLinears = true;
2105
614
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2106
614
      if (const auto *Ref =
2107
614
              dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2108
614
        AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2109
614
        const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2110
614
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2111
614
                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
2112
614
                        VD->getInit()->getType(), VK_LValue,
2113
614
                        VD->getInit()->getExprLoc());
2114
614
        EmitExprAsInit(
2115
614
            &DRE, VD,
2116
614
            MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2117
614
            /*capturedByInit=*/false);
2118
614
        EmitAutoVarCleanups(Emission);
2119
614
      } else {
2120
0
        EmitVarDecl(*VD);
2121
0
      }
2122
614
    }
2123
    // Emit the linear steps for the linear clauses.
2124
    // If a step is not constant, it is pre-calculated before the loop.
2125
492
    if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2126
154
      if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2127
154
        EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2128
        // Emit calculation of the linear step.
2129
154
        EmitIgnoredExpr(CS);
2130
154
      }
2131
492
  }
2132
8.98k
  return HasLinears;
2133
8.98k
}
2134
2135
void CodeGenFunction::EmitOMPLinearClauseFinal(
2136
    const OMPLoopDirective &D,
2137
8.98k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2138
8.98k
  if (!HaveInsertPoint())
2139
0
    return;
2140
8.98k
  llvm::BasicBlock *DoneBB = nullptr;
2141
  // Emit the final values of the linear variables.
2142
8.98k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2143
492
    auto IC = C->varlist_begin();
2144
614
    for (const Expr *F : C->finals()) {
2145
614
      if (!DoneBB) {
2146
539
        if (llvm::Value *Cond = CondGen(*this)) {
2147
          // If the first post-update expression is found, emit conditional
2148
          // block if it was requested.
2149
135
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2150
135
          DoneBB = createBasicBlock(".omp.linear.pu.done");
2151
135
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2152
135
          EmitBlock(ThenBB);
2153
135
        }
2154
539
      }
2155
614
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2156
614
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2157
614
                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
2158
614
                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2159
614
      Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2160
614
      CodeGenFunction::OMPPrivateScope VarScope(*this);
2161
614
      VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2162
614
      (void)VarScope.Privatize();
2163
614
      EmitIgnoredExpr(F);
2164
614
      ++IC;
2165
614
    }
2166
492
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
2167
4
      EmitIgnoredExpr(PostUpdate);
2168
492
  }
2169
8.98k
  if (DoneBB)
2170
135
    EmitBlock(DoneBB, /*IsFinished=*/true);
2171
8.98k
}
2172
2173
static void emitAlignedClause(CodeGenFunction &CGF,
2174
13.2k
                              const OMPExecutableDirective &D) {
2175
13.2k
  if (!CGF.HaveInsertPoint())
2176
0
    return;
2177
13.2k
  for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2178
314
    llvm::APInt ClauseAlignment(64, 0);
2179
314
    if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2180
108
      auto *AlignmentCI =
2181
108
          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2182
108
      ClauseAlignment = AlignmentCI->getValue();
2183
108
    }
2184
364
    for (const Expr *E : Clause->varlists()) {
2185
364
      llvm::APInt Alignment(ClauseAlignment);
2186
364
      if (Alignment == 0) {
2187
        // OpenMP [2.8.1, Description]
2188
        // If no optional parameter is specified, implementation-defined default
2189
        // alignments for SIMD instructions on the target platforms are assumed.
2190
248
        Alignment =
2191
248
            CGF.getContext()
2192
248
                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2193
248
                    E->getType()->getPointeeType()))
2194
248
                .getQuantity();
2195
248
      }
2196
364
      assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2197
364
             "alignment is not power of 2");
2198
364
      if (Alignment != 0) {
2199
364
        llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2200
364
        CGF.emitAlignmentAssumption(
2201
364
            PtrValue, E, /*No second loc needed*/ SourceLocation(),
2202
364
            llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2203
364
      }
2204
364
    }
2205
314
  }
2206
13.2k
}
2207
2208
void CodeGenFunction::EmitOMPPrivateLoopCounters(
2209
15.7k
    const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2210
15.7k
  if (!HaveInsertPoint())
2211
0
    return;
2212
15.7k
  auto I = S.private_counters().begin();
2213
16.4k
  for (const Expr *E : S.counters()) {
2214
16.4k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2215
16.4k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2216
    // Emit var without initialization.
2217
16.4k
    AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2218
16.4k
    EmitAutoVarCleanups(VarEmission);
2219
16.4k
    LocalDeclMap.erase(PrivateVD);
2220
16.4k
    (void)LoopScope.addPrivate(
2221
16.4k
        VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); });
2222
16.4k
    if (LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)15.8k
||
2223
16.4k
        
VD->hasGlobalStorage()15.8k
) {
2224
685
      (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2225
685
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2226
685
                        LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)31
,
2227
685
                        E->getType(), VK_LValue, E->getExprLoc());
2228
685
        return EmitLValue(&DRE).getAddress(*this);
2229
685
      });
2230
15.7k
    } else {
2231
15.7k
      (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2232
15.7k
        return VarEmission.getAllocatedAddress();
2233
15.7k
      });
2234
15.7k
    }
2235
16.4k
    ++I;
2236
16.4k
  }
2237
  // Privatize extra loop counters used in loops for ordered(n) clauses.
2238
15.7k
  for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2239
80
    if (!C->getNumForLoops())
2240
54
      continue;
2241
26
    for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2242
32
         I < E; 
++I6
) {
2243
6
      const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2244
6
      const auto *VD = cast<VarDecl>(DRE->getDecl());
2245
      // Override only those variables that can be captured to avoid re-emission
2246
      // of the variables declared within the loops.
2247
6
      if (DRE->refersToEnclosingVariableOrCapture()) {
2248
4
        (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2249
4
          return CreateMemTemp(DRE->getType(), VD->getName());
2250
4
        });
2251
4
      }
2252
6
    }
2253
26
  }
2254
15.7k
}
2255
2256
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2257
                        const Expr *Cond, llvm::BasicBlock *TrueBlock,
2258
2.27k
                        llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2259
2.27k
  if (!CGF.HaveInsertPoint())
2260
0
    return;
2261
2.27k
  {
2262
2.27k
    CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2263
2.27k
    CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2264
2.27k
    (void)PreCondScope.Privatize();
2265
    // Get initial values of real counters.
2266
2.46k
    for (const Expr *I : S.inits()) {
2267
2.46k
      CGF.EmitIgnoredExpr(I);
2268
2.46k
    }
2269
2.27k
  }
2270
  // Create temp loop control variables with their init values to support
2271
  // non-rectangular loops.
2272
2.27k
  CodeGenFunction::OMPMapVars PreCondVars;
2273
2.46k
  for (const Expr *E : S.dependent_counters()) {
2274
2.46k
    if (!E)
2275
2.45k
      continue;
2276
5
    assert(!E->getType().getNonReferenceType()->isRecordType() &&
2277
5
           "dependent counter must not be an iterator.");
2278
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2279
5
    Address CounterAddr =
2280
5
        CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2281
5
    (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2282
5
  }
2283
2.27k
  (void)PreCondVars.apply(CGF);
2284
2.46k
  for (const Expr *E : S.dependent_inits()) {
2285
2.46k
    if (!E)
2286
2.45k
      continue;
2287
5
    CGF.EmitIgnoredExpr(E);
2288
5
  }
2289
  // Check that loop is executed at least one time.
2290
2.27k
  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2291
2.27k
  PreCondVars.restore(CGF);
2292
2.27k
}
2293
2294
void CodeGenFunction::EmitOMPLinearClause(
2295
8.98k
    const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2296
8.98k
  if (!HaveInsertPoint())
2297
0
    return;
2298
8.98k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
2299
8.98k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2300
6.40k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2301
6.68k
    for (const Expr *C : LoopDirective->counters()) {
2302
6.68k
      SIMDLCVs.insert(
2303
6.68k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2304
6.68k
    }
2305
6.40k
  }
2306
8.98k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2307
492
    auto CurPrivate = C->privates().begin();
2308
614
    for (const Expr *E : C->varlists()) {
2309
614
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2310
614
      const auto *PrivateVD =
2311
614
          cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2312
614
      if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2313
508
        bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2314
          // Emit private VarDecl with copy init.
2315
508
          EmitVarDecl(*PrivateVD);
2316
508
          return GetAddrOfLocalVar(PrivateVD);
2317
508
        });
2318
508
        assert(IsRegistered && "linear var already registered as private");
2319
        // Silence the warning about unused variable.
2320
0
        (void)IsRegistered;
2321
508
      } else {
2322
106
        EmitVarDecl(*PrivateVD);
2323
106
      }
2324
0
      ++CurPrivate;
2325
614
    }
2326
492
  }
2327
8.98k
}
2328
2329
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2330
8.55k
                                     const OMPExecutableDirective &D) {
2331
8.55k
  if (!CGF.HaveInsertPoint())
2332
0
    return;
2333
8.55k
  if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2334
248
    RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2335
248
                                 /*ignoreResult=*/true);
2336
248
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2337
248
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2338
    // In presence of finite 'safelen', it may be unsafe to mark all
2339
    // the memory instructions parallel, because loop-carried
2340
    // dependences of 'safelen' iterations are possible.
2341
248
    CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2342
8.31k
  } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2343
166
    RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2344
166
                                 /*ignoreResult=*/true);
2345
166
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2346
166
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2347
    // In presence of finite 'safelen', it may be unsafe to mark all
2348
    // the memory instructions parallel, because loop-carried
2349
    // dependences of 'safelen' iterations are possible.
2350
166
    CGF.LoopStack.setParallel(/*Enable=*/false);
2351
166
  }
2352
8.55k
}
2353
2354
8.55k
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2355
  // Walk clauses and process safelen/lastprivate.
2356
8.55k
  LoopStack.setParallel(/*Enable=*/true);
2357
8.55k
  LoopStack.setVectorizeEnable();
2358
8.55k
  emitSimdlenSafelenClause(*this, D);
2359
8.55k
  if (const auto *C = D.getSingleClause<OMPOrderClause>())
2360
0
    if (C->getKind() == OMPC_ORDER_concurrent)
2361
0
      LoopStack.setParallel(/*Enable=*/true);
2362
8.55k
  if ((D.getDirectiveKind() == OMPD_simd ||
2363
8.55k
       
(8.23k
getLangOpts().OpenMPSimd8.23k
&&
2364
8.23k
        
isOpenMPSimdDirective(D.getDirectiveKind())3.16k
)) &&
2365
8.55k
      llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2366
3.49k
                   [](const OMPReductionClause *C) {
2367
95
                     return C->getModifier() == OMPC_REDUCTION_inscan;
2368
95
                   }))
2369
    // Disable parallel access in case of prefix sum.
2370
16
    LoopStack.setParallel(/*Enable=*/false);
2371
8.55k
}
2372
2373
void CodeGenFunction::EmitOMPSimdFinal(
2374
    const OMPLoopDirective &D,
2375
8.55k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2376
8.55k
  if (!HaveInsertPoint())
2377
0
    return;
2378
8.55k
  llvm::BasicBlock *DoneBB = nullptr;
2379
8.55k
  auto IC = D.counters().begin();
2380
8.55k
  auto IPC = D.private_counters().begin();
2381
8.89k
  for (const Expr *F : D.finals()) {
2382
8.89k
    const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2383
8.89k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2384
8.89k
    const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2385
8.89k
    if (LocalDeclMap.count(OrigVD) || 
CapturedStmtInfo->lookup(OrigVD)0
||
2386
8.89k
        
OrigVD->hasGlobalStorage()0
||
CED0
) {
2387
8.89k
      if (!DoneBB) {
2388
8.74k
        if (llvm::Value *Cond = CondGen(*this)) {
2389
          // If the first post-update expression is found, emit conditional
2390
          // block if it was requested.
2391
4.47k
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2392
4.47k
          DoneBB = createBasicBlock(".omp.final.done");
2393
4.47k
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2394
4.47k
          EmitBlock(ThenBB);
2395
4.47k
        }
2396
8.74k
      }
2397
8.89k
      Address OrigAddr = Address::invalid();
2398
8.89k
      if (CED) {
2399
28
        OrigAddr =
2400
28
            EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2401
8.86k
      } else {
2402
8.86k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2403
8.86k
                        /*RefersToEnclosingVariableOrCapture=*/false,
2404
8.86k
                        (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2405
8.86k
        OrigAddr = EmitLValue(&DRE).getAddress(*this);
2406
8.86k
      }
2407
8.89k
      OMPPrivateScope VarScope(*this);
2408
8.89k
      VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2409
8.89k
      (void)VarScope.Privatize();
2410
8.89k
      EmitIgnoredExpr(F);
2411
8.89k
    }
2412
8.89k
    ++IC;
2413
8.89k
    ++IPC;
2414
8.89k
  }
2415
8.55k
  if (DoneBB)
2416
4.47k
    EmitBlock(DoneBB, /*IsFinished=*/true);
2417
8.55k
}
2418
2419
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2420
                                         const OMPLoopDirective &S,
2421
10.9k
                                         CodeGenFunction::JumpDest LoopExit) {
2422
10.9k
  CGF.EmitOMPLoopBody(S, LoopExit);
2423
10.9k
  CGF.EmitStopPoint(&S);
2424
10.9k
}
2425
2426
/// Emit a helper variable and return corresponding lvalue.
2427
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2428
42.2k
                               const DeclRefExpr *Helper) {
2429
42.2k
  auto VDecl = cast<VarDecl>(Helper->getDecl());
2430
42.2k
  CGF.EmitVarDecl(*VDecl);
2431
42.2k
  return CGF.EmitLValue(Helper);
2432
42.2k
}
2433
2434
static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2435
                               const RegionCodeGenTy &SimdInitGen,
2436
13.4k
                               const RegionCodeGenTy &BodyCodeGen) {
2437
13.4k
  auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2438
13.4k
                                                    PrePostActionTy &) {
2439
13.3k
    CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2440
13.3k
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2441
13.3k
    SimdInitGen(CGF);
2442
2443
13.3k
    BodyCodeGen(CGF);
2444
13.3k
  };
2445
13.4k
  auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2446
343
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2447
343
    CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2448
2449
343
    BodyCodeGen(CGF);
2450
343
  };
2451
13.4k
  const Expr *IfCond = nullptr;
2452
13.4k
  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2453
8.67k
    for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2454
1.77k
      if (CGF.getLangOpts().OpenMP >= 50 &&
2455
1.77k
          
(987
C->getNameModifier() == OMPD_unknown987
||
2456
987
           
C->getNameModifier() == OMPD_simd560
)) {
2457
493
        IfCond = C->getCondition();
2458
493
        break;
2459
493
      }
2460
1.77k
    }
2461
8.67k
  }
2462
13.4k
  if (IfCond) {
2463
493
    CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2464
12.9k
  } else {
2465
12.9k
    RegionCodeGenTy ThenRCG(ThenGen);
2466
12.9k
    ThenRCG(CGF);
2467
12.9k
  }
2468
13.4k
}
2469
2470
static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2471
4.16k
                              PrePostActionTy &Action) {
2472
4.16k
  Action.Enter(CGF);
2473
4.16k
  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2474
4.16k
         "Expected simd directive");
2475
0
  OMPLoopScope PreInitScope(CGF, S);
2476
  // if (PreCond) {
2477
  //   for (IV in 0..LastIteration) BODY;
2478
  //   <Final counter/linear vars updates>;
2479
  // }
2480
  //
2481
4.16k
  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2482
4.16k
      
isOpenMPWorksharingDirective(S.getDirectiveKind())2.19k
||
2483
4.16k
      
isOpenMPTaskLoopDirective(S.getDirectiveKind())1.49k
) {
2484
2.78k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2485
2.78k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2486
2.78k
  }
2487
2488
  // Emit: if (PreCond) - begin.
2489
  // If the condition constant folds and can be elided, avoid emitting the
2490
  // whole loop.
2491
4.16k
  bool CondConstant;
2492
4.16k
  llvm::BasicBlock *ContBlock = nullptr;
2493
4.16k
  if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2494
3.54k
    if (!CondConstant)
2495
82
      return;
2496
3.54k
  } else {
2497
621
    llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2498
621
    ContBlock = CGF.createBasicBlock("simd.if.end");
2499
621
    emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2500
621
                CGF.getProfileCount(&S));
2501
621
    CGF.EmitBlock(ThenBlock);
2502
621
    CGF.incrementProfileCounter(&S);
2503
621
  }
2504
2505
  // Emit the loop iteration variable.
2506
4.08k
  const Expr *IVExpr = S.getIterationVariable();
2507
4.08k
  const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2508
4.08k
  CGF.EmitVarDecl(*IVDecl);
2509
4.08k
  CGF.EmitIgnoredExpr(S.getInit());
2510
2511
  // Emit the iterations count variable.
2512
  // If it is not a variable, Sema decided to calculate iterations count on
2513
  // each iteration (e.g., it is foldable into a constant).
2514
4.08k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2515
0
    CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2516
    // Emit calculation of the iterations count.
2517
0
    CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2518
0
  }
2519
2520
4.08k
  emitAlignedClause(CGF, S);
2521
4.08k
  (void)CGF.EmitOMPLinearClauseInit(S);
2522
4.08k
  {
2523
4.08k
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2524
4.08k
    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2525
4.08k
    CGF.EmitOMPLinearClause(S, LoopScope);
2526
4.08k
    CGF.EmitOMPPrivateClause(S, LoopScope);
2527
4.08k
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
2528
4.08k
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2529
4.08k
        CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2530
4.08k
    bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2531
4.08k
    (void)LoopScope.Privatize();
2532
4.08k
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2533
2.67k
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2534
2535
4.08k
    emitCommonSimdLoop(
2536
4.08k
        CGF, S,
2537
4.08k
        [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2538
4.01k
          CGF.EmitOMPSimdInit(S);
2539
4.01k
        },
2540
4.19k
        [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2541
4.19k
          CGF.EmitOMPInnerLoop(
2542
4.19k
              S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2543
4.19k
              [&S](CodeGenFunction &CGF) {
2544
4.19k
                emitOMPLoopBodyWithStopPoint(CGF, S,
2545
4.19k
                                             CodeGenFunction::JumpDest());
2546
4.19k
              },
2547
4.19k
              [](CodeGenFunction &) {});
2548
4.19k
        });
2549
4.26k
    CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2550
    // Emit final copy of the lastprivate variables at the end of loops.
2551
4.08k
    if (HasLastprivateClause)
2552
127
      CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2553
4.08k
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2554
4.08k
    emitPostUpdateForReductionClause(CGF, S,
2555
4.08k
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
2556
4.08k
  }
2557
4.08k
  CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) 
{ return nullptr; }404
);
2558
  // Emit: if (PreCond) - end.
2559
4.08k
  if (ContBlock) {
2560
621
    CGF.EmitBranch(ContBlock);
2561
621
    CGF.EmitBlock(ContBlock, true);
2562
621
  }
2563
4.08k
}
2564
2565
167
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2566
167
  ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2567
167
  OMPFirstScanLoop = true;
2568
167
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2569
167
    emitOMPSimdRegion(CGF, S, Action);
2570
167
  };
2571
167
  {
2572
167
    auto LPCRegion =
2573
167
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2574
167
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
2575
167
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2576
167
  }
2577
  // Check for outer lastprivate conditional update.
2578
167
  checkForLastprivateConditionalUpdate(*this, S);
2579
167
}
2580
2581
8
void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2582
  // Emit the de-sugared statement.
2583
8
  OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2584
8
  EmitStmt(S.getTransformedStmt());
2585
8
}
2586
2587
8
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2588
  // This function is only called if the unrolled loop is not consumed by any
2589
  // other loop-associated construct. Such a loop-associated construct will have
2590
  // used the transformed AST.
2591
2592
  // Set the unroll metadata for the next emitted loop.
2593
8
  LoopStack.setUnrollState(LoopAttributes::Enable);
2594
2595
8
  if (S.hasClausesOfKind<OMPFullClause>()) {
2596
2
    LoopStack.setUnrollState(LoopAttributes::Full);
2597
6
  } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2598
4
    if (Expr *FactorExpr = PartialClause->getFactor()) {
2599
2
      uint64_t Factor =
2600
2
          FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2601
2
      assert(Factor >= 1 && "Only positive factors are valid");
2602
0
      LoopStack.setUnrollCount(Factor);
2603
2
    }
2604
4
  }
2605
2606
0
  EmitStmt(S.getAssociatedStmt());
2607
8
}
2608
2609
void CodeGenFunction::EmitOMPOuterLoop(
2610
    bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2611
    CodeGenFunction::OMPPrivateScope &LoopScope,
2612
    const CodeGenFunction::OMPLoopArguments &LoopArgs,
2613
    const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2614
1.18k
    const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2615
1.18k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2616
2617
1.18k
  const Expr *IVExpr = S.getIterationVariable();
2618
1.18k
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2619
1.18k
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2620
2621
1.18k
  JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2622
2623
  // Start the loop with a block that tests the condition.
2624
1.18k
  llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2625
1.18k
  EmitBlock(CondBlock);
2626
1.18k
  const SourceRange R = S.getSourceRange();
2627
1.18k
  OMPLoopNestStack.clear();
2628
1.18k
  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2629
1.18k
                 SourceLocToDebugLoc(R.getEnd()));
2630
2631
1.18k
  llvm::Value *BoolCondVal = nullptr;
2632
1.18k
  if (!DynamicOrOrdered) {
2633
    // UB = min(UB, GlobalUB) or
2634
    // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2635
    // 'distribute parallel for')
2636
443
    EmitIgnoredExpr(LoopArgs.EUB);
2637
    // IV = LB
2638
443
    EmitIgnoredExpr(LoopArgs.Init);
2639
    // IV < UB
2640
443
    BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2641
744
  } else {
2642
744
    BoolCondVal =
2643
744
        RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2644
744
                       LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2645
744
  }
2646
2647
  // If there are any cleanups between here and the loop-exit scope,
2648
  // create a block to stage a loop exit along.
2649
1.18k
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2650
1.18k
  if (LoopScope.requiresCleanups())
2651
26
    ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2652
2653
1.18k
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2654
1.18k
  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2655
1.18k
  if (ExitBlock != LoopExit.getBlock()) {
2656
26
    EmitBlock(ExitBlock);
2657
26
    EmitBranchThroughCleanup(LoopExit);
2658
26
  }
2659
1.18k
  EmitBlock(LoopBody);
2660
2661
  // Emit "IV = LB" (in case of static schedule, we have already calculated new
2662
  // LB for loop condition and emitted it above).
2663
1.18k
  if (DynamicOrOrdered)
2664
744
    EmitIgnoredExpr(LoopArgs.Init);
2665
2666
  // Create a block for the increment.
2667
1.18k
  JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2668
1.18k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2669
2670
1.18k
  emitCommonSimdLoop(
2671
1.18k
      *this, S,
2672
1.18k
      [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2673
        // Generate !llvm.loop.parallel metadata for loads and stores for loops
2674
        // with dynamic/guided scheduling and without ordered clause.
2675
1.18k
        if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2676
651
          CGF.LoopStack.setParallel(!IsMonotonic);
2677
651
          if (const auto *C = S.getSingleClause<OMPOrderClause>())
2678
0
            if (C->getKind() == OMPC_ORDER_concurrent)
2679
0
              CGF.LoopStack.setParallel(/*Enable=*/true);
2680
651
        } else {
2681
536
          CGF.EmitOMPSimdInit(S);
2682
536
        }
2683
1.18k
      },
2684
1.18k
      [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2685
1.19k
       &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2686
1.19k
        SourceLocation Loc = S.getBeginLoc();
2687
        // when 'distribute' is not combined with a 'for':
2688
        // while (idx <= UB) { BODY; ++idx; }
2689
        // when 'distribute' is combined with a 'for'
2690
        // (e.g. 'distribute parallel for')
2691
        // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2692
1.19k
        CGF.EmitOMPInnerLoop(
2693
1.19k
            S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2694
1.19k
            [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2695
1.19k
              CodeGenLoop(CGF, S, LoopExit);
2696
1.19k
            },
2697
1.19k
            [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2698
1.19k
              CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2699
1.19k
            });
2700
1.19k
      });
2701
2702
1.18k
  EmitBlock(Continue.getBlock());
2703
1.18k
  BreakContinueStack.pop_back();
2704
1.18k
  if (!DynamicOrOrdered) {
2705
    // Emit "LB = LB + Stride", "UB = UB + Stride".
2706
443
    EmitIgnoredExpr(LoopArgs.NextLB);
2707
443
    EmitIgnoredExpr(LoopArgs.NextUB);
2708
443
  }
2709
2710
1.18k
  EmitBranch(CondBlock);
2711
1.18k
  OMPLoopNestStack.clear();
2712
1.18k
  LoopStack.pop();
2713
  // Emit the fall-through block.
2714
1.18k
  EmitBlock(LoopExit.getBlock());
2715
2716
  // Tell the runtime we are done.
2717
1.18k
  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2718
1.18k
    if (!DynamicOrOrdered)
2719
443
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2720
443
                                                     S.getDirectiveKind());
2721
1.18k
  };
2722
1.18k
  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2723
1.18k
}
2724
2725
void CodeGenFunction::EmitOMPForOuterLoop(
2726
    const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2727
    const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2728
    const OMPLoopArguments &LoopArgs,
2729
1.02k
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2730
1.02k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2731
2732
  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2733
1.02k
  const bool DynamicOrOrdered = Ordered || 
RT.isDynamic(ScheduleKind.Schedule)986
;
2734
2735
1.02k
  assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2736
1.02k
                                            LoopArgs.Chunk != nullptr)) &&
2737
1.02k
         "static non-chunked schedule does not need outer loop");
2738
2739
  // Emit outer loop.
2740
  //
2741
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2742
  // When schedule(dynamic,chunk_size) is specified, the iterations are
2743
  // distributed to threads in the team in chunks as the threads request them.
2744
  // Each thread executes a chunk of iterations, then requests another chunk,
2745
  // until no chunks remain to be distributed. Each chunk contains chunk_size
2746
  // iterations, except for the last chunk to be distributed, which may have
2747
  // fewer iterations. When no chunk_size is specified, it defaults to 1.
2748
  //
2749
  // When schedule(guided,chunk_size) is specified, the iterations are assigned
2750
  // to threads in the team in chunks as the executing threads request them.
2751
  // Each thread executes a chunk of iterations, then requests another chunk,
2752
  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2753
  // each chunk is proportional to the number of unassigned iterations divided
2754
  // by the number of threads in the team, decreasing to 1. For a chunk_size
2755
  // with value k (greater than 1), the size of each chunk is determined in the
2756
  // same way, with the restriction that the chunks do not contain fewer than k
2757
  // iterations (except for the last chunk to be assigned, which may have fewer
2758
  // than k iterations).
2759
  //
2760
  // When schedule(auto) is specified, the decision regarding scheduling is
2761
  // delegated to the compiler and/or runtime system. The programmer gives the
2762
  // implementation the freedom to choose any possible mapping of iterations to
2763
  // threads in the team.
2764
  //
2765
  // When schedule(runtime) is specified, the decision regarding scheduling is
2766
  // deferred until run time, and the schedule and chunk size are taken from the
2767
  // run-sched-var ICV. If the ICV is set to auto, the schedule is
2768
  // implementation defined
2769
  //
2770
  // while(__kmpc_dispatch_next(&LB, &UB)) {
2771
  //   idx = LB;
2772
  //   while (idx <= UB) { BODY; ++idx;
2773
  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2774
  //   } // inner loop
2775
  // }
2776
  //
2777
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2778
  // When schedule(static, chunk_size) is specified, iterations are divided into
2779
  // chunks of size chunk_size, and the chunks are assigned to the threads in
2780
  // the team in a round-robin fashion in the order of the thread number.
2781
  //
2782
  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2783
  //   while (idx <= UB) { BODY; ++idx; } // inner loop
2784
  //   LB = LB + ST;
2785
  //   UB = UB + ST;
2786
  // }
2787
  //
2788
2789
0
  const Expr *IVExpr = S.getIterationVariable();
2790
1.02k
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2791
1.02k
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2792
2793
1.02k
  if (DynamicOrOrdered) {
2794
744
    const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2795
744
        CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2796
744
    llvm::Value *LBVal = DispatchBounds.first;
2797
744
    llvm::Value *UBVal = DispatchBounds.second;
2798
744
    CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2799
744
                                                             LoopArgs.Chunk};
2800
744
    RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2801
744
                           IVSigned, Ordered, DipatchRTInputValues);
2802
744
  } else {
2803
279
    CGOpenMPRuntime::StaticRTInput StaticInit(
2804
279
        IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2805
279
        LoopArgs.ST, LoopArgs.Chunk);
2806
279
    RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2807
279
                         ScheduleKind, StaticInit);
2808
279
  }
2809
2810
1.02k
  auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2811
1.02k
                                    const unsigned IVSize,
2812
1.02k
                                    const bool IVSigned) {
2813
1.02k
    if (Ordered) {
2814
37
      CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2815
37
                                                            IVSigned);
2816
37
    }
2817
1.02k
  };
2818
2819
1.02k
  OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2820
1.02k
                                 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2821
1.02k
  OuterLoopArgs.IncExpr = S.getInc();
2822
1.02k
  OuterLoopArgs.Init = S.getInit();
2823
1.02k
  OuterLoopArgs.Cond = S.getCond();
2824
1.02k
  OuterLoopArgs.NextLB = S.getNextLowerBound();
2825
1.02k
  OuterLoopArgs.NextUB = S.getNextUpperBound();
2826
1.02k
  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2827
1.02k
                   emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2828
1.02k
}
2829
2830
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2831
164
                             const unsigned IVSize, const bool IVSigned) {}
2832
2833
void CodeGenFunction::EmitOMPDistributeOuterLoop(
2834
    OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2835
    OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2836
164
    const CodeGenLoopTy &CodeGenLoopContent) {
2837
2838
164
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2839
2840
  // Emit outer loop.
2841
  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2842
  // dynamic
2843
  //
2844
2845
164
  const Expr *IVExpr = S.getIterationVariable();
2846
164
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2847
164
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2848
2849
164
  CGOpenMPRuntime::StaticRTInput StaticInit(
2850
164
      IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2851
164
      LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2852
164
  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2853
2854
  // for combined 'distribute' and 'for' the increment expression of distribute
2855
  // is stored in DistInc. For 'distribute' alone, it is in Inc.
2856
164
  Expr *IncExpr;
2857
164
  if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2858
0
    IncExpr = S.getDistInc();
2859
164
  else
2860
164
    IncExpr = S.getInc();
2861
2862
  // this routine is shared by 'omp distribute parallel for' and
2863
  // 'omp distribute': select the right EUB expression depending on the
2864
  // directive
2865
164
  OMPLoopArguments OuterLoopArgs;
2866
164
  OuterLoopArgs.LB = LoopArgs.LB;
2867
164
  OuterLoopArgs.UB = LoopArgs.UB;
2868
164
  OuterLoopArgs.ST = LoopArgs.ST;
2869
164
  OuterLoopArgs.IL = LoopArgs.IL;
2870
164
  OuterLoopArgs.Chunk = LoopArgs.Chunk;
2871
164
  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2872
164
                          ? 
S.getCombinedEnsureUpperBound()0
2873
164
                          : S.getEnsureUpperBound();
2874
164
  OuterLoopArgs.IncExpr = IncExpr;
2875
164
  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2876
164
                           ? 
S.getCombinedInit()0
2877
164
                           : S.getInit();
2878
164
  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2879
164
                           ? 
S.getCombinedCond()0
2880
164
                           : S.getCond();
2881
164
  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2882
164
                             ? 
S.getCombinedNextLowerBound()0
2883
164
                             : S.getNextLowerBound();
2884
164
  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2885
164
                             ? 
S.getCombinedNextUpperBound()0
2886
164
                             : S.getNextUpperBound();
2887
2888
164
  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2889
164
                   LoopScope, OuterLoopArgs, CodeGenLoopContent,
2890
164
                   emitEmptyOrdered);
2891
164
}
2892
2893
static std::pair<LValue, LValue>
2894
emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2895
2.74k
                                     const OMPExecutableDirective &S) {
2896
2.74k
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2897
2.74k
  LValue LB =
2898
2.74k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2899
2.74k
  LValue UB =
2900
2.74k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2901
2902
  // When composing 'distribute' with 'for' (e.g. as in 'distribute
2903
  // parallel for') we need to use the 'distribute'
2904
  // chunk lower and upper bounds rather than the whole loop iteration
2905
  // space. These are parameters to the outlined function for 'parallel'
2906
  // and we copy the bounds of the previous schedule into the
2907
  // the current ones.
2908
2.74k
  LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2909
2.74k
  LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2910
2.74k
  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2911
2.74k
      PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2912
2.74k
  PrevLBVal = CGF.EmitScalarConversion(
2913
2.74k
      PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2914
2.74k
      LS.getIterationVariable()->getType(),
2915
2.74k
      LS.getPrevLowerBoundVariable()->getExprLoc());
2916
2.74k
  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2917
2.74k
      PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2918
2.74k
  PrevUBVal = CGF.EmitScalarConversion(
2919
2.74k
      PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2920
2.74k
      LS.getIterationVariable()->getType(),
2921
2.74k
      LS.getPrevUpperBoundVariable()->getExprLoc());
2922
2923
2.74k
  CGF.EmitStoreOfScalar(PrevLBVal, LB);
2924
2.74k
  CGF.EmitStoreOfScalar(PrevUBVal, UB);
2925
2926
2.74k
  return {LB, UB};
2927
2.74k
}
2928
2929
/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2930
/// we need to use the LB and UB expressions generated by the worksharing
2931
/// code generation support, whereas in non combined situations we would
2932
/// just emit 0 and the LastIteration expression
2933
/// This function is necessary due to the difference of the LB and UB
2934
/// types for the RT emission routines for 'for_static_init' and
2935
/// 'for_dispatch_init'
2936
static std::pair<llvm::Value *, llvm::Value *>
2937
emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2938
                                        const OMPExecutableDirective &S,
2939
440
                                        Address LB, Address UB) {
2940
440
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2941
440
  const Expr *IVExpr = LS.getIterationVariable();
2942
  // when implementing a dynamic schedule for a 'for' combined with a
2943
  // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2944
  // is not normalized as each team only executes its own assigned
2945
  // distribute chunk
2946
440
  QualType IteratorTy = IVExpr->getType();
2947
440
  llvm::Value *LBVal =
2948
440
      CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2949
440
  llvm::Value *UBVal =
2950
440
      CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
2951
440
  return {LBVal, UBVal};
2952
440
}
2953
2954
static void emitDistributeParallelForDistributeInnerBoundParams(
2955
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
2956
2.74k
    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2957
2.74k
  const auto &Dir = cast<OMPLoopDirective>(S);
2958
2.74k
  LValue LB =
2959
2.74k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2960
2.74k
  llvm::Value *LBCast =
2961
2.74k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
2962
2.74k
                                CGF.SizeTy, /*isSigned=*/false);
2963
2.74k
  CapturedVars.push_back(LBCast);
2964
2.74k
  LValue UB =
2965
2.74k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2966
2967
2.74k
  llvm::Value *UBCast =
2968
2.74k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
2969
2.74k
                                CGF.SizeTy, /*isSigned=*/false);
2970
2.74k
  CapturedVars.push_back(UBCast);
2971
2.74k
}
2972
2973
static void
2974
emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2975
                                 const OMPLoopDirective &S,
2976
2.74k
                                 CodeGenFunction::JumpDest LoopExit) {
2977
2.74k
  auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2978
2.74k
                                         PrePostActionTy &Action) {
2979
2.74k
    Action.Enter(CGF);
2980
2.74k
    bool HasCancel = false;
2981
2.74k
    if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2982
1.34k
      if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
2983
340
        HasCancel = D->hasCancel();
2984
1.00k
      else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
2985
401
        HasCancel = D->hasCancel();
2986
606
      else if (const auto *D =
2987
606
                   dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
2988
606
        HasCancel = D->hasCancel();
2989
1.34k
    }
2990
2.74k
    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
2991
2.74k
                                                     HasCancel);
2992
2.74k
    CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2993
2.74k
                               emitDistributeParallelForInnerBounds,
2994
2.74k
                               emitDistributeParallelForDispatchBounds);
2995
2.74k
  };
2996
2997
2.74k
  emitCommonOMPParallelDirective(
2998
2.74k
      CGF, S,
2999
2.74k
      isOpenMPSimdDirective(S.getDirectiveKind()) ? 
OMPD_for_simd1.39k
:
OMPD_for1.34k
,
3000
2.74k
      CGInlinedWorksharingLoop,
3001
2.74k
      emitDistributeParallelForDistributeInnerBoundParams);
3002
2.74k
}
3003
3004
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3005
401
    const OMPDistributeParallelForDirective &S) {
3006
401
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3007
401
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3008
401
                              S.getDistInc());
3009
401
  };
3010
401
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3011
401
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3012
401
}
3013
3014
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3015
308
    const OMPDistributeParallelForSimdDirective &S) {
3016
308
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3017
308
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3018
308
                              S.getDistInc());
3019
308
  };
3020
308
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3021
308
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3022
308
}
3023
3024
void CodeGenFunction::EmitOMPDistributeSimdDirective(
3025
150
    const OMPDistributeSimdDirective &S) {
3026
150
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3027
150
    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3028
150
  };
3029
150
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3030
150
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3031
150
}
3032
3033
void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3034
193
    CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3035
  // Emit SPMD target parallel for region as a standalone region.
3036
193
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3037
193
    emitOMPSimdRegion(CGF, S, Action);
3038
193
  };
3039
193
  llvm::Function *Fn;
3040
193
  llvm::Constant *Addr;
3041
  // Emit target region as a standalone region.
3042
193
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3043
193
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3044
193
  assert(Fn && Addr && "Target device function emission failed.");
3045
193
}
3046
3047
void CodeGenFunction::EmitOMPTargetSimdDirective(
3048
355
    const OMPTargetSimdDirective &S) {
3049
355
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3050
355
    emitOMPSimdRegion(CGF, S, Action);
3051
355
  };
3052
355
  emitCommonOMPTargetDirective(*this, S, CodeGen);
3053
355
}
3054
3055
namespace {
3056
struct ScheduleKindModifiersTy {
3057
  OpenMPScheduleClauseKind Kind;
3058
  OpenMPScheduleClauseModifier M1;
3059
  OpenMPScheduleClauseModifier M2;
3060
  ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3061
                          OpenMPScheduleClauseModifier M1,
3062
                          OpenMPScheduleClauseModifier M2)
3063
0
      : Kind(Kind), M1(M1), M2(M2) {}
3064
};
3065
} // namespace
3066
3067
bool CodeGenFunction::EmitOMPWorksharingLoop(
3068
    const OMPLoopDirective &S, Expr *EUB,
3069
    const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3070
4.73k
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3071
  // Emit the loop iteration variable.
3072
4.73k
  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3073
4.73k
  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3074
4.73k
  EmitVarDecl(*IVDecl);
3075
3076
  // Emit the iterations count variable.
3077
  // If it is not a variable, Sema decided to calculate iterations count on each
3078
  // iteration (e.g., it is foldable into a constant).
3079
4.73k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3080
0
    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3081
    // Emit calculation of the iterations count.
3082
0
    EmitIgnoredExpr(S.getCalcLastIteration());
3083
0
  }
3084
3085
4.73k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3086
3087
4.73k
  bool HasLastprivateClause;
3088
  // Check pre-condition.
3089
4.73k
  {
3090
4.73k
    OMPLoopScope PreInitScope(*this, S);
3091
    // Skip the entire loop if we don't meet the precondition.
3092
    // If the condition constant folds and can be elided, avoid emitting the
3093
    // whole loop.
3094
4.73k
    bool CondConstant;
3095
4.73k
    llvm::BasicBlock *ContBlock = nullptr;
3096
4.73k
    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3097
3.92k
      if (!CondConstant)
3098
52
        return false;
3099
3.92k
    } else {
3100
804
      llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3101
804
      ContBlock = createBasicBlock("omp.precond.end");
3102
804
      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3103
804
                  getProfileCount(&S));
3104
804
      EmitBlock(ThenBlock);
3105
804
      incrementProfileCounter(&S);
3106
804
    }
3107
3108
4.68k
    RunCleanupsScope DoacrossCleanupScope(*this);
3109
4.68k
    bool Ordered = false;
3110
4.68k
    if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3111
53
      if (OrderedClause->getNumForLoops())
3112
16
        RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3113
37
      else
3114
37
        Ordered = true;
3115
53
    }
3116
3117
4.68k
    llvm::DenseSet<const Expr *> EmittedFinals;
3118
4.68k
    emitAlignedClause(*this, S);
3119
4.68k
    bool HasLinears = EmitOMPLinearClauseInit(S);
3120
    // Emit helper vars inits.
3121
3122
4.68k
    std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3123
4.68k
    LValue LB = Bounds.first;
3124
4.68k
    LValue UB = Bounds.second;
3125
4.68k
    LValue ST =
3126
4.68k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3127
4.68k
    LValue IL =
3128
4.68k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3129
3130
    // Emit 'then' code.
3131
4.68k
    {
3132
4.68k
      OMPPrivateScope LoopScope(*this);
3133
4.68k
      if (EmitOMPFirstprivateClause(S, LoopScope) || 
HasLinears4.66k
) {
3134
        // Emit implicit barrier to synchronize threads and avoid data races on
3135
        // initialization of firstprivate variables and post-update of
3136
        // lastprivate variables.
3137
150
        CGM.getOpenMPRuntime().emitBarrierCall(
3138
150
            *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3139
150
            /*ForceSimpleCall=*/true);
3140
150
      }
3141
4.68k
      EmitOMPPrivateClause(S, LoopScope);
3142
4.68k
      CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3143
4.68k
          *this, S, EmitLValue(S.getIterationVariable()));
3144
4.68k
      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3145
4.68k
      EmitOMPReductionClauseInit(S, LoopScope);
3146
4.68k
      EmitOMPPrivateLoopCounters(S, LoopScope);
3147
4.68k
      EmitOMPLinearClause(S, LoopScope);
3148
4.68k
      (void)LoopScope.Privatize();
3149
4.68k
      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3150
2.35k
        CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3151
3152
      // Detect the loop schedule kind and chunk.
3153
4.68k
      const Expr *ChunkExpr = nullptr;
3154
4.68k
      OpenMPScheduleTy ScheduleKind;
3155
4.68k
      if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3156
1.21k
        ScheduleKind.Schedule = C->getScheduleKind();
3157
1.21k
        ScheduleKind.M1 = C->getFirstScheduleModifier();
3158
1.21k
        ScheduleKind.M2 = C->getSecondScheduleModifier();
3159
1.21k
        ChunkExpr = C->getChunkSize();
3160
3.46k
      } else {
3161
        // Default behaviour for schedule clause.
3162
3.46k
        CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3163
3.46k
            *this, S, ScheduleKind.Schedule, ChunkExpr);
3164
3.46k
      }
3165
4.68k
      bool HasChunkSizeOne = false;
3166
4.68k
      llvm::Value *Chunk = nullptr;
3167
4.68k
      if (ChunkExpr) {
3168
537
        Chunk = EmitScalarExpr(ChunkExpr);
3169
537
        Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3170
537
                                     S.getIterationVariable()->getType(),
3171
537
                                     S.getBeginLoc());
3172
537
        Expr::EvalResult Result;
3173
537
        if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3174
332
          llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3175
332
          HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3176
332
        }
3177
537
      }
3178
4.68k
      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3179
4.68k
      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3180
      // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3181
      // If the static schedule kind is specified or if the ordered clause is
3182
      // specified, and if no monotonic modifier is specified, the effect will
3183
      // be as if the monotonic modifier was specified.
3184
4.68k
      bool StaticChunkedOne =
3185
4.68k
          RT.isStaticChunked(ScheduleKind.Schedule,
3186
4.68k
                             /* Chunked */ Chunk != nullptr) &&
3187
4.68k
          
HasChunkSizeOne405
&&
3188
4.68k
          
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())211
;
3189
4.68k
      bool IsMonotonic =
3190
4.68k
          Ordered ||
3191
4.68k
          
(4.64k
ScheduleKind.Schedule == OMPC_SCHEDULE_static4.64k
&&
3192
4.64k
           
!(592
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic592
||
3193
592
             
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic591
)) ||
3194
4.68k
          
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic4.05k
||
3195
4.68k
          
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic4.04k
;
3196
4.68k
      if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3197
4.68k
                                 /* Chunked */ Chunk != nullptr) ||
3198
4.68k
           
StaticChunkedOne1.12k
) &&
3199
4.68k
          
!Ordered3.67k
) {
3200
3.65k
        JumpDest LoopExit =
3201
3.65k
            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3202
3.65k
        emitCommonSimdLoop(
3203
3.65k
            *this, S,
3204
3.65k
            [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3205
3.63k
              if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3206
1.73k
                CGF.EmitOMPSimdInit(S);
3207
1.90k
              } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3208
5
                if (C->getKind() == OMPC_ORDER_concurrent)
3209
5
                  CGF.LoopStack.setParallel(/*Enable=*/true);
3210
5
              }
3211
3.63k
            },
3212
3.65k
            [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3213
3.65k
             &S, ScheduleKind, LoopExit,
3214
3.70k
             &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3215
              // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3216
              // When no chunk_size is specified, the iteration space is divided
3217
              // into chunks that are approximately equal in size, and at most
3218
              // one chunk is distributed to each thread. Note that the size of
3219
              // the chunks is unspecified in this case.
3220
3.70k
              CGOpenMPRuntime::StaticRTInput StaticInit(
3221
3.70k
                  IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3222
3.70k
                  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3223
3.70k
                  StaticChunkedOne ? 
Chunk125
:
nullptr3.57k
);
3224
3.70k
              CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3225
3.70k
                  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3226
3.70k
                  StaticInit);
3227
              // UB = min(UB, GlobalUB);
3228
3.70k
              if (!StaticChunkedOne)
3229
3.57k
                CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3230
              // IV = LB;
3231
3.70k
              CGF.EmitIgnoredExpr(S.getInit());
3232
              // For unchunked static schedule generate:
3233
              //
3234
              // while (idx <= UB) {
3235
              //   BODY;
3236
              //   ++idx;
3237
              // }
3238
              //
3239
              // For static schedule with chunk one:
3240
              //
3241
              // while (IV <= PrevUB) {
3242
              //   BODY;
3243
              //   IV += ST;
3244
              // }
3245
3.70k
              CGF.EmitOMPInnerLoop(
3246
3.70k
                  S, LoopScope.requiresCleanups(),
3247
3.70k
                  StaticChunkedOne ? 
S.getCombinedParForInDistCond()125
3248
3.70k
                                   : 
S.getCond()3.57k
,
3249
3.70k
                  StaticChunkedOne ? 
S.getDistInc()125
:
S.getInc()3.57k
,
3250
3.70k
                  [&S, LoopExit](CodeGenFunction &CGF) {
3251
3.70k
                    emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3252
3.70k
                  },
3253
3.70k
                  [](CodeGenFunction &) {});
3254
3.70k
            });
3255
3.65k
        EmitBlock(LoopExit.getBlock());
3256
        // Tell the runtime we are done.
3257
3.71k
        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3258
3.71k
          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3259
3.71k
                                                         S.getDirectiveKind());
3260
3.71k
        };
3261
3.65k
        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3262
3.65k
      } else {
3263
        // Emit the outer loop, which requests its work chunk [LB..UB] from
3264
        // runtime and runs the inner loop to process it.
3265
1.02k
        const OMPLoopArguments LoopArguments(
3266
1.02k
            LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3267
1.02k
            IL.getAddress(*this), Chunk, EUB);
3268
1.02k
        EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3269
1.02k
                            LoopArguments, CGDispatchBounds);
3270
1.02k
      }
3271
4.68k
      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3272
2.20k
        EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3273
2.20k
          return CGF.Builder.CreateIsNotNull(
3274
2.20k
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3275
2.20k
        });
3276
2.20k
      }
3277
4.68k
      EmitOMPReductionClauseFinal(
3278
4.68k
          S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3279
4.68k
                 ? /*Parallel and Simd*/ 
OMPD_parallel_for_simd2.20k
3280
4.68k
                 : /*Parallel only*/ 
OMPD_parallel2.48k
);
3281
      // Emit post-update of the reduction variables if IsLastIter != 0.
3282
4.68k
      emitPostUpdateForReductionClause(
3283
4.68k
          *this, S, [IL, &S](CodeGenFunction &CGF) {
3284
0
            return CGF.Builder.CreateIsNotNull(
3285
0
                CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3286
0
          });
3287
      // Emit final copy of the lastprivate variables if IsLastIter != 0.
3288
4.68k
      if (HasLastprivateClause)
3289
160
        EmitOMPLastprivateClauseFinal(
3290
160
            S, isOpenMPSimdDirective(S.getDirectiveKind()),
3291
160
            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3292
4.68k
    }
3293
4.68k
    EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3294
134
      return CGF.Builder.CreateIsNotNull(
3295
134
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3296
134
    });
3297
4.68k
    DoacrossCleanupScope.ForceCleanup();
3298
    // We're now done with the loop, so jump to the continuation block.
3299
4.68k
    if (ContBlock) {
3300
804
      EmitBranch(ContBlock);
3301
804
      EmitBlock(ContBlock, /*IsFinished=*/true);
3302
804
    }
3303
4.68k
  }
3304
0
  return HasLastprivateClause;
3305
4.73k
}
3306
3307
/// The following two functions generate expressions for the loop lower
3308
/// and upper bounds in case of static and dynamic (dispatch) schedule
3309
/// of the associated 'for' or 'distribute' loop.
3310
static std::pair<LValue, LValue>
3311
1.93k
emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3312
1.93k
  const auto &LS = cast<OMPLoopDirective>(S);
3313
1.93k
  LValue LB =
3314
1.93k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3315
1.93k
  LValue UB =
3316
1.93k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3317
1.93k
  return {LB, UB};
3318
1.93k
}
3319
3320
/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3321
/// consider the lower and upper bound expressions generated by the
3322
/// worksharing loop support, but we use 0 and the iteration space size as
3323
/// constants
3324
static std::pair<llvm::Value *, llvm::Value *>
3325
emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3326
304
                          Address LB, Address UB) {
3327
304
  const auto &LS = cast<OMPLoopDirective>(S);
3328
304
  const Expr *IVExpr = LS.getIterationVariable();
3329
304
  const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3330
304
  llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3331
304
  llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3332
304
  return {LBVal, UBVal};
3333
304
}
3334
3335
/// Emits internal temp array declarations for the directive with inscan
3336
/// reductions.
3337
/// The code is the following:
3338
/// \code
3339
/// size num_iters = <num_iters>;
3340
/// <type> buffer[num_iters];
3341
/// \endcode
3342
static void emitScanBasedDirectiveDecls(
3343
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3344
16
    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3345
16
  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3346
16
      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3347
16
  SmallVector<const Expr *, 4> Shareds;
3348
16
  SmallVector<const Expr *, 4> Privates;
3349
16
  SmallVector<const Expr *, 4> ReductionOps;
3350
16
  SmallVector<const Expr *, 4> CopyArrayTemps;
3351
16
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3352
16
    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3353
16
           "Only inscan reductions are expected.");
3354
0
    Shareds.append(C->varlist_begin(), C->varlist_end());
3355
16
    Privates.append(C->privates().begin(), C->privates().end());
3356
16
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3357
16
    CopyArrayTemps.append(C->copy_array_temps().begin(),
3358
16
                          C->copy_array_temps().end());
3359
16
  }
3360
16
  {
3361
    // Emit buffers for each reduction variables.
3362
    // ReductionCodeGen is required to emit correctly the code for array
3363
    // reductions.
3364
16
    ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3365
16
    unsigned Count = 0;
3366
16
    auto *ITA = CopyArrayTemps.begin();
3367
32
    for (const Expr *IRef : Privates) {
3368
32
      const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3369
      // Emit variably modified arrays, used for arrays/array sections
3370
      // reductions.
3371
32
      if (PrivateVD->getType()->isVariablyModifiedType()) {
3372
16
        RedCG.emitSharedOrigLValue(CGF, Count);
3373
16
        RedCG.emitAggregateType(CGF, Count);
3374
16
      }
3375
32
      CodeGenFunction::OpaqueValueMapping DimMapping(
3376
32
          CGF,
3377
32
          cast<OpaqueValueExpr>(
3378
32
              cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3379
32
                  ->getSizeExpr()),
3380
32
          RValue::get(OMPScanNumIterations));
3381
      // Emit temp buffer.
3382
32
      CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3383
32
      ++ITA;
3384
32
      ++Count;
3385
32
    }
3386
16
  }
3387
16
}
3388
3389
/// Emits the code for the directive with inscan reductions.
3390
/// The code is the following:
3391
/// \code
3392
/// #pragma omp ...
3393
/// for (i: 0..<num_iters>) {
3394
///   <input phase>;
3395
///   buffer[i] = red;
3396
/// }
3397
/// #pragma omp master // in parallel region
3398
/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3399
/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3400
///   buffer[i] op= buffer[i-pow(2,k)];
3401
/// #pragma omp barrier // in parallel region
3402
/// #pragma omp ...
3403
/// for (0..<num_iters>) {
3404
///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3405
///   <scan phase>;
3406
/// }
3407
/// \endcode
3408
static void emitScanBasedDirective(
3409
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3410
    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3411
    llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3412
16
    llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3413
16
  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3414
16
      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3415
16
  SmallVector<const Expr *, 4> Privates;
3416
16
  SmallVector<const Expr *, 4> ReductionOps;
3417
16
  SmallVector<const Expr *, 4> LHSs;
3418
16
  SmallVector<const Expr *, 4> RHSs;
3419
16
  SmallVector<const Expr *, 4> CopyArrayElems;
3420
16
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3421
16
    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3422
16
           "Only inscan reductions are expected.");
3423
0
    Privates.append(C->privates().begin(), C->privates().end());
3424
16
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3425
16
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3426
16
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3427
16
    CopyArrayElems.append(C->copy_array_elems().begin(),
3428
16
                          C->copy_array_elems().end());
3429
16
  }
3430
16
  CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3431
16
  {
3432
    // Emit loop with input phase:
3433
    // #pragma omp ...
3434
    // for (i: 0..<num_iters>) {
3435
    //   <input phase>;
3436
    //   buffer[i] = red;
3437
    // }
3438
16
    CGF.OMPFirstScanLoop = true;
3439
16
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3440
16
    FirstGen(CGF);
3441
16
  }
3442
  // #pragma omp barrier // in parallel region
3443
16
  auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3444
16
                    &ReductionOps,
3445
16
                    &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3446
16
    Action.Enter(CGF);
3447
    // Emit prefix reduction:
3448
    // #pragma omp master // in parallel region
3449
    // for (int k = 0; k <= ceil(log2(n)); ++k)
3450
16
    llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3451
16
    llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3452
16
    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3453
16
    llvm::Function *F =
3454
16
        CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3455
16
    llvm::Value *Arg =
3456
16
        CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3457
16
    llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3458
16
    F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3459
16
    LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3460
16
    LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3461
16
    llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3462
16
        OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3463
16
    auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3464
16
    CGF.EmitBlock(LoopBB);
3465
16
    auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3466
    // size pow2k = 1;
3467
16
    auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3468
16
    Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3469
16
    Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3470
    // for (size i = n - 1; i >= 2 ^ k; --i)
3471
    //   tmp[i] op= tmp[i-pow2k];
3472
16
    llvm::BasicBlock *InnerLoopBB =
3473
16
        CGF.createBasicBlock("omp.inner.log.scan.body");
3474
16
    llvm::BasicBlock *InnerExitBB =
3475
16
        CGF.createBasicBlock("omp.inner.log.scan.exit");
3476
16
    llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3477
16
    CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3478
16
    CGF.EmitBlock(InnerLoopBB);
3479
16
    auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3480
16
    IVal->addIncoming(NMin1, LoopBB);
3481
16
    {
3482
16
      CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3483
16
      auto *ILHS = LHSs.begin();
3484
16
      auto *IRHS = RHSs.begin();
3485
32
      for (const Expr *CopyArrayElem : CopyArrayElems) {
3486
32
        const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3487
32
        const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3488
32
        Address LHSAddr = Address::invalid();
3489
32
        {
3490
32
          CodeGenFunction::OpaqueValueMapping IdxMapping(
3491
32
              CGF,
3492
32
              cast<OpaqueValueExpr>(
3493
32
                  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3494
32
              RValue::get(IVal));
3495
32
          LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3496
32
        }
3497
32
        PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3498
32
        Address RHSAddr = Address::invalid();
3499
32
        {
3500
32
          llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3501
32
          CodeGenFunction::OpaqueValueMapping IdxMapping(
3502
32
              CGF,
3503
32
              cast<OpaqueValueExpr>(
3504
32
                  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3505
32
              RValue::get(OffsetIVal));
3506
32
          RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3507
32
        }
3508
32
        PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3509
32
        ++ILHS;
3510
32
        ++IRHS;
3511
32
      }
3512
16
      PrivScope.Privatize();
3513
16
      CGF.CGM.getOpenMPRuntime().emitReduction(
3514
16
          CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3515
16
          {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3516
16
    }
3517
16
    llvm::Value *NextIVal =
3518
16
        CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3519
16
    IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3520
16
    CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3521
16
    CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3522
16
    CGF.EmitBlock(InnerExitBB);
3523
16
    llvm::Value *Next =
3524
16
        CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3525
16
    Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3526
    // pow2k <<= 1;
3527
16
    llvm::Value *NextPow2K =
3528
16
        CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3529
16
    Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3530
16
    llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3531
16
    CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3532
16
    auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3533
16
    CGF.EmitBlock(ExitBB);
3534
16
  };
3535
16
  if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3536
8
    CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3537
8
    CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3538
8
        CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3539
8
        /*ForceSimpleCall=*/true);
3540
8
  } else {
3541
8
    RegionCodeGenTy RCG(CodeGen);
3542
8
    RCG(CGF);
3543
8
  }
3544
3545
16
  CGF.OMPFirstScanLoop = false;
3546
16
  SecondGen(CGF);
3547
16
}
3548
3549
static bool emitWorksharingDirective(CodeGenFunction &CGF,
3550
                                     const OMPLoopDirective &S,
3551
937
                                     bool HasCancel) {
3552
937
  bool HasLastprivates;
3553
937
  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3554
937
                   [](const OMPReductionClause *C) {
3555
193
                     return C->getModifier() == OMPC_REDUCTION_inscan;
3556
193
                   })) {
3557
24
    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3558
24
      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3559
24
      OMPLoopScope LoopScope(CGF, S);
3560
24
      return CGF.EmitScalarExpr(S.getNumIterations());
3561
24
    };
3562
16
    const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3563
16
      CodeGenFunction::OMPCancelStackRAII CancelRegion(
3564
16
          CGF, S.getDirectiveKind(), HasCancel);
3565
16
      (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3566
16
                                       emitForLoopBounds,
3567
16
                                       emitDispatchForLoopBounds);
3568
      // Emit an implicit barrier at the end.
3569
16
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3570
16
                                                 OMPD_for);
3571
16
    };
3572
16
    const auto &&SecondGen = [&S, HasCancel,
3573
16
                              &HasLastprivates](CodeGenFunction &CGF) {
3574
16
      CodeGenFunction::OMPCancelStackRAII CancelRegion(
3575
16
          CGF, S.getDirectiveKind(), HasCancel);
3576
16
      HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3577
16
                                                   emitForLoopBounds,
3578
16
                                                   emitDispatchForLoopBounds);
3579
16
    };
3580
16
    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3581
8
      emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3582
16
    emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3583
921
  } else {
3584
921
    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3585
921
                                                     HasCancel);
3586
921
    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3587
921
                                                 emitForLoopBounds,
3588
921
                                                 emitDispatchForLoopBounds);
3589
921
  }
3590
937
  return HasLastprivates;
3591
937
}
3592
3593
27
static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3594
27
  if (S.hasCancel())
3595
4
    return false;
3596
23
  for (OMPClause *C : S.clauses())
3597
0
    if (!isa<OMPNowaitClause>(C))
3598
0
      return false;
3599
3600
23
  return true;
3601
23
}
3602
3603
376
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3604
376
  bool HasLastprivates = false;
3605
376
  bool UseOMPIRBuilder =
3606
376
      CGM.getLangOpts().OpenMPIRBuilder && 
isSupportedByOpenMPIRBuilder(S)27
;
3607
376
  auto &&CodeGen = [this, &S, &HasLastprivates,
3608
376
                    UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3609
    // Use the OpenMPIRBuilder if enabled.
3610
376
    if (UseOMPIRBuilder) {
3611
      // Emit the associated statement and get its loop representation.
3612
23
      const Stmt *Inner = S.getRawStmt();
3613
23
      llvm::CanonicalLoopInfo *CLI =
3614
23
          EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3615
3616
23
      bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3617
23
      llvm::OpenMPIRBuilder &OMPBuilder =
3618
23
          CGM.getOpenMPRuntime().getOMPBuilder();
3619
23
      llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3620
23
          AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3621
23
      OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI,
3622
23
                                    AllocaIP, NeedsBarrier);
3623
23
      return;
3624
23
    }
3625
3626
353
    HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3627
353
  };
3628
376
  {
3629
376
    auto LPCRegion =
3630
376
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3631
376
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3632
376
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3633
376
                                                S.hasCancel());
3634
376
  }
3635
3636
376
  if (!UseOMPIRBuilder) {
3637
    // Emit an implicit barrier at the end.
3638
353
    if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates11
)
3639
342
      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3640
353
  }
3641
  // Check for outer lastprivate conditional update.
3642
376
  checkForLastprivateConditionalUpdate(*this, S);
3643
376
}
3644
3645
249
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3646
249
  bool HasLastprivates = false;
3647
249
  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3648
249
                                          PrePostActionTy &) {
3649
249
    HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3650
249
  };
3651
249
  {
3652
249
    auto LPCRegion =
3653
249
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3654
249
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3655
249
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3656
249
  }
3657
3658
  // Emit an implicit barrier at the end.
3659
249
  if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates0
)
3660
249
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3661
  // Check for outer lastprivate conditional update.
3662
249
  checkForLastprivateConditionalUpdate(*this, S);
3663
249
}
3664
3665
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3666
                                const Twine &Name,
3667
400
                                llvm::Value *Init = nullptr) {
3668
400
  LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3669
400
  if (Init)
3670
320
    CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3671
400
  return LVal;
3672
400
}
3673
3674
80
void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3675
80
  const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3676
80
  const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3677
80
  bool HasLastprivates = false;
3678
80
  auto &&CodeGen = [&S, CapturedStmt, CS,
3679
80
                    &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3680
80
    const ASTContext &C = CGF.getContext();
3681
80
    QualType KmpInt32Ty =
3682
80
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3683
    // Emit helper vars inits.
3684
80
    LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3685
80
                                  CGF.Builder.getInt32(0));
3686
80
    llvm::ConstantInt *GlobalUBVal = CS != nullptr
3687
80
                                         ? CGF.Builder.getInt32(CS->size() - 1)
3688
80
                                         : 
CGF.Builder.getInt32(0)0
;
3689
80
    LValue UB =
3690
80
        createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3691
80
    LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3692
80
                                  CGF.Builder.getInt32(1));
3693
80
    LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3694
80
                                  CGF.Builder.getInt32(0));
3695
    // Loop counter.
3696
80
    LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3697
80
    OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3698
80
    CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3699
80
    OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3700
80
    CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3701
    // Generate condition for loop.
3702
80
    BinaryOperator *Cond = BinaryOperator::Create(
3703
80
        C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
3704
80
        S.getBeginLoc(), FPOptionsOverride());
3705
    // Increment for loop counter.
3706
80
    UnaryOperator *Inc = UnaryOperator::Create(
3707
80
        C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
3708
80
        S.getBeginLoc(), true, FPOptionsOverride());
3709
80
    auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3710
      // Iterate through all sections and emit a switch construct:
3711
      // switch (IV) {
3712
      //   case 0:
3713
      //     <SectionStmt[0]>;
3714
      //     break;
3715
      // ...
3716
      //   case <NumSection> - 1:
3717
      //     <SectionStmt[<NumSection> - 1]>;
3718
      //     break;
3719
      // }
3720
      // .omp.sections.exit:
3721
80
      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3722
80
      llvm::SwitchInst *SwitchStmt =
3723
80
          CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3724
80
                                   ExitBB, CS == nullptr ? 
10
: CS->size());
3725
80
      if (CS) {
3726
80
        unsigned CaseNumber = 0;
3727
126
        for (const Stmt *SubStmt : CS->children()) {
3728
126
          auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3729
126
          CGF.EmitBlock(CaseBB);
3730
126
          SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3731
126
          CGF.EmitStmt(SubStmt);
3732
126
          CGF.EmitBranch(ExitBB);
3733
126
          ++CaseNumber;
3734
126
        }
3735
80
      } else {
3736
0
        llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3737
0
        CGF.EmitBlock(CaseBB);
3738
0
        SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3739
0
        CGF.EmitStmt(CapturedStmt);
3740
0
        CGF.EmitBranch(ExitBB);
3741
0
      }
3742
80
      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3743
80
    };
3744
3745
80
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3746
80
    if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3747
      // Emit implicit barrier to synchronize threads and avoid data races on
3748
      // initialization of firstprivate variables and post-update of lastprivate
3749
      // variables.
3750
0
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3751
0
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3752
0
          /*ForceSimpleCall=*/true);
3753
0
    }
3754
80
    CGF.EmitOMPPrivateClause(S, LoopScope);
3755
80
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3756
80
    HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3757
80
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
3758
80
    (void)LoopScope.Privatize();
3759
80
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3760
0
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3761
3762
    // Emit static non-chunked loop.
3763
80
    OpenMPScheduleTy ScheduleKind;
3764
80
    ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3765
80
    CGOpenMPRuntime::StaticRTInput StaticInit(
3766
80
        /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3767
80
        LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3768
80
    CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3769
80
        CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3770
    // UB = min(UB, GlobalUB);
3771
80
    llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3772
80
    llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3773
80
        CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3774
80
    CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3775
    // IV = LB;
3776
80
    CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3777
    // while (idx <= UB) { BODY; ++idx; }
3778
80
    CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3779
80
                         [](CodeGenFunction &) {});
3780
    // Tell the runtime we are done.
3781
112
    auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3782
112
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3783
112
                                                     S.getDirectiveKind());
3784
112
    };
3785
80
    CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3786
80
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3787
    // Emit post-update of the reduction variables if IsLastIter != 0.
3788
80
    emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3789
0
      return CGF.Builder.CreateIsNotNull(
3790
0
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3791
0
    });
3792
3793
    // Emit final copy of the lastprivate variables if IsLastIter != 0.
3794
80
    if (HasLastprivates)
3795
16
      CGF.EmitOMPLastprivateClauseFinal(
3796
16
          S, /*NoFinals=*/false,
3797
16
          CGF.Builder.CreateIsNotNull(
3798
16
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3799
80
  };
3800
3801
80
  bool HasCancel = false;
3802
80
  if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3803
54
    HasCancel = OSD->hasCancel();
3804
26
  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3805
26
    HasCancel = OPSD->hasCancel();
3806
80
  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3807
80
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3808
80
                                              HasCancel);
3809
  // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3810
  // clause. Otherwise the barrier will be generated by the codegen for the
3811
  // directive.
3812
80
  if (HasLastprivates && 
S.getSingleClause<OMPNowaitClause>()16
) {
3813
    // Emit implicit barrier to synchronize threads and avoid data races on
3814
    // initialization of firstprivate variables.
3815
0
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3816
0
                                           OMPD_unknown);
3817
0
  }
3818
80
}
3819
3820
62
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3821
62
  if (CGM.getLangOpts().OpenMPIRBuilder) {
3822
8
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3823
8
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3824
8
    using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3825
3826
16
    auto FiniCB = [this](InsertPointTy IP) {
3827
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3828
16
    };
3829
3830
8
    const CapturedStmt *ICS = S.getInnermostCapturedStmt();
3831
8
    const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3832
8
    const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3833
8
    llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3834
8
    if (CS) {
3835
12
      for (const Stmt *SubStmt : CS->children()) {
3836
12
        auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
3837
12
                                         InsertPointTy CodeGenIP,
3838
12
                                         llvm::BasicBlock &FiniBB) {
3839
12
          OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
3840
12
                                                         FiniBB);
3841
12
          OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
3842
12
                                                 FiniBB);
3843
12
        };
3844
12
        SectionCBVector.push_back(SectionCB);
3845
12
      }
3846
8
    } else {
3847
0
      auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
3848
0
                                            InsertPointTy CodeGenIP,
3849
0
                                            llvm::BasicBlock &FiniBB) {
3850
0
        OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3851
0
        OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
3852
0
                                               FiniBB);
3853
0
      };
3854
0
      SectionCBVector.push_back(SectionCB);
3855
0
    }
3856
3857
    // Privatization callback that performs appropriate action for
3858
    // shared/private/firstprivate/lastprivate/copyin/... variables.
3859
    //
3860
    // TODO: This defaults to shared right now.
3861
8
    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3862
8
                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
3863
      // The next line is appropriate only for variables (Val) with the
3864
      // data-sharing attribute "shared".
3865
0
      ReplVal = &Val;
3866
3867
0
      return CodeGenIP;
3868
0
    };
3869
3870
8
    CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
3871
8
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3872
8
    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3873
8
        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3874
8
    Builder.restoreIP(OMPBuilder.createSections(
3875
8
        Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
3876
8
        S.getSingleClause<OMPNowaitClause>()));
3877
8
    return;
3878
8
  }
3879
54
  {
3880
54
    auto LPCRegion =
3881
54
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3882
54
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3883
54
    EmitSections(S);
3884
54
  }
3885
  // Emit an implicit barrier at the end.
3886
54
  if (!S.getSingleClause<OMPNowaitClause>()) {
3887
48
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3888
48
                                           OMPD_sections);
3889
48
  }
3890
  // Check for outer lastprivate conditional update.
3891
54
  checkForLastprivateConditionalUpdate(*this, S);
3892
54
}
3893
3894
54
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3895
54
  if (CGM.getLangOpts().OpenMPIRBuilder) {
3896
8
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3897
8
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3898
3899
8
    const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
3900
16
    auto FiniCB = [this](InsertPointTy IP) {
3901
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3902
16
    };
3903
3904
8
    auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
3905
8
                                                   InsertPointTy CodeGenIP,
3906
8
                                                   llvm::BasicBlock &FiniBB) {
3907
8
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3908
8
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
3909
8
                                             CodeGenIP, FiniBB);
3910
8
    };
3911
3912
8
    LexicalScope Scope(*this, S.getSourceRange());
3913
8
    EmitStopPoint(&S);
3914
8
    Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
3915
3916
8
    return;
3917
8
  }
3918
46
  LexicalScope Scope(*this, S.getSourceRange());
3919
46
  EmitStopPoint(&S);
3920
46
  EmitStmt(S.getAssociatedStmt());
3921
46
}
3922
3923
57
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3924
57
  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3925
57
  llvm::SmallVector<const Expr *, 8> DestExprs;
3926
57
  llvm::SmallVector<const Expr *, 8> SrcExprs;
3927
57
  llvm::SmallVector<const Expr *, 8> AssignmentOps;
3928
  // Check if there are any 'copyprivate' clauses associated with this
3929
  // 'single' construct.
3930
  // Build a list of copyprivate variables along with helper expressions
3931
  // (<source>, <destination>, <destination>=<source> expressions)
3932
57
  for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3933
28
    CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3934
28
    DestExprs.append(C->destination_exprs().begin(),
3935
28
                     C->destination_exprs().end());
3936
28
    SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3937
28
    AssignmentOps.append(C->assignment_ops().begin(),
3938
28
                         C->assignment_ops().end());
3939
28
  }
3940
  // Emit code for 'single' region along with 'copyprivate' clauses
3941
57
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3942
57
    Action.Enter(CGF);
3943
57
    OMPPrivateScope SingleScope(CGF);
3944
57
    (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
3945
57
    CGF.EmitOMPPrivateClause(S, SingleScope);
3946
57
    (void)SingleScope.Privatize();
3947
57
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
3948
57
  };
3949
57
  {
3950
57
    auto LPCRegion =
3951
57
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3952
57
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3953
57
    CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
3954
57
                                            CopyprivateVars, DestExprs,
3955
57
                                            SrcExprs, AssignmentOps);
3956
57
  }
3957
  // Emit an implicit barrier at the end (to avoid data race on firstprivate
3958
  // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
3959
57
  if (!S.getSingleClause<OMPNowaitClause>() && 
CopyprivateVars.empty()50
) {
3960
22
    CGM.getOpenMPRuntime().emitBarrierCall(
3961
22
        *this, S.getBeginLoc(),
3962
22
        S.getSingleClause<OMPNowaitClause>() ? 
OMPD_unknown0
: OMPD_single);
3963
22
  }
3964
  // Check for outer lastprivate conditional update.
3965
57
  checkForLastprivateConditionalUpdate(*this, S);
3966
57
}
3967
3968
37
static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3969
37
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3970
37
    Action.Enter(CGF);
3971
37
    CGF.EmitStmt(S.getRawStmt());
3972
37
  };
3973
37
  CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3974
37
}
3975
3976
25
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3977
25
  if (CGM.getLangOpts().OpenMPIRBuilder) {
3978
10
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3979
10
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3980
3981
10
    const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3982
3983
10
    auto FiniCB = [this](InsertPointTy IP) {
3984
10
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3985
10
    };
3986
3987
10
    auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
3988
10
                                                  InsertPointTy CodeGenIP,
3989
10
                                                  llvm::BasicBlock &FiniBB) {
3990
10
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3991
10
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3992
10
                                             CodeGenIP, FiniBB);
3993
10
    };
3994
3995
10
    LexicalScope Scope(*this, S.getSourceRange());
3996
10
    EmitStopPoint(&S);
3997
10
    Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
3998
3999
10
    return;
4000
10
  }
4001
15
  LexicalScope Scope(*this, S.getSourceRange());
4002
15
  EmitStopPoint(&S);
4003
15
  emitMaster(*this, S);
4004
15
}
4005
4006
24
static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4007
24
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4008
24
    Action.Enter(CGF);
4009
24
    CGF.EmitStmt(S.getRawStmt());
4010
24
  };
4011
24
  Expr *Filter = nullptr;
4012
24
  if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4013
18
    Filter = FilterClause->getThreadID();
4014
24
  CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4015
24
                                              Filter);
4016
24
}
4017
4018
40
void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4019
40
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4020
16
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4021
16
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4022
4023
16
    const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4024
16
    const Expr *Filter = nullptr;
4025
16
    if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4026
12
      Filter = FilterClause->getThreadID();
4027
16
    llvm::Value *FilterVal = Filter
4028
16
                                 ? 
EmitScalarExpr(Filter, CGM.Int32Ty)12
4029
16
                                 : 
llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0)4
;
4030
4031
16
    auto FiniCB = [this](InsertPointTy IP) {
4032
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4033
16
    };
4034
4035
16
    auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4036
16
                                                  InsertPointTy CodeGenIP,
4037
16
                                                  llvm::BasicBlock &FiniBB) {
4038
16
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4039
16
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
4040
16
                                             CodeGenIP, FiniBB);
4041
16
    };
4042
4043
16
    LexicalScope Scope(*this, S.getSourceRange());
4044
16
    EmitStopPoint(&S);
4045
16
    Builder.restoreIP(
4046
16
        OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4047
4048
16
    return;
4049
16
  }
4050
24
  LexicalScope Scope(*this, S.getSourceRange());
4051
24
  EmitStopPoint(&S);
4052
24
  emitMasked(*this, S);
4053
24
}
4054
4055
102
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4056
102
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4057
34
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4058
34
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4059
4060
34
    const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4061
34
    const Expr *Hint = nullptr;
4062
34
    if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4063
4
      Hint = HintClause->getHint();
4064
4065
    // TODO: This is slightly different from what's currently being done in
4066
    // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4067
    // about typing is final.
4068
34
    llvm::Value *HintInst = nullptr;
4069
34
    if (Hint)
4070
4
      HintInst =
4071
4
          Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4072
4073
34
    auto FiniCB = [this](InsertPointTy IP) {
4074
30
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4075
30
    };
4076
4077
34
    auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4078
34
                                                    InsertPointTy CodeGenIP,
4079
34
                                                    llvm::BasicBlock &FiniBB) {
4080
34
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4081
34
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
4082
34
                                             CodeGenIP, FiniBB);
4083
34
    };
4084
4085
34
    LexicalScope Scope(*this, S.getSourceRange());
4086
34
    EmitStopPoint(&S);
4087
34
    Builder.restoreIP(OMPBuilder.createCritical(
4088
34
        Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4089
34
        HintInst));
4090
4091
34
    return;
4092
34
  }
4093
4094
68
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4095
68
    Action.Enter(CGF);
4096
68
    CGF.EmitStmt(S.getAssociatedStmt());
4097
68
  };
4098
68
  const Expr *Hint = nullptr;
4099
68
  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4100
6
    Hint = HintClause->getHint();
4101
68
  LexicalScope Scope(*this, S.getSourceRange());
4102
68
  EmitStopPoint(&S);
4103
68
  CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4104
68
                                            S.getDirectiveName().getAsString(),
4105
68
                                            CodeGen, S.getBeginLoc(), Hint);
4106
68
}
4107
4108
void CodeGenFunction::EmitOMPParallelForDirective(
4109
235
    const OMPParallelForDirective &S) {
4110
  // Emit directive as a combined directive that consists of two implicit
4111
  // directives: 'parallel' with 'for' directive.
4112
235
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4113
235
    Action.Enter(CGF);
4114
235
    (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4115
235
  };
4116
235
  {
4117
235
    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4118
235
                     [](const OMPReductionClause *C) {
4119
66
                       return C->getModifier() == OMPC_REDUCTION_inscan;
4120
66
                     })) {
4121
4
      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4122
4
        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4123
4
        CGCapturedStmtInfo CGSI(CR_OpenMP);
4124
4
        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4125
4
        OMPLoopScope LoopScope(CGF, S);
4126
4
        return CGF.EmitScalarExpr(S.getNumIterations());
4127
4
      };
4128
4
      emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4129
4
    }
4130
235
    auto LPCRegion =
4131
235
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4132
235
    emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4133
235
                                   emitEmptyBoundParameters);
4134
235
  }
4135
  // Check for outer lastprivate conditional update.
4136
235
  checkForLastprivateConditionalUpdate(*this, S);
4137
235
}
4138
4139
void CodeGenFunction::EmitOMPParallelForSimdDirective(
4140
100
    const OMPParallelForSimdDirective &S) {
4141
  // Emit directive as a combined directive that consists of two implicit
4142
  // directives: 'parallel' with 'for' directive.
4143
100
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4144
100
    Action.Enter(CGF);
4145
100
    (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4146
100
  };
4147
100
  {
4148
100
    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4149
100
                     [](const OMPReductionClause *C) {
4150
10
                       return C->getModifier() == OMPC_REDUCTION_inscan;
4151
10
                     })) {
4152
4
      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4153
4
        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4154
4
        CGCapturedStmtInfo CGSI(CR_OpenMP);
4155
4
        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4156
4
        OMPLoopScope LoopScope(CGF, S);
4157
4
        return CGF.EmitScalarExpr(S.getNumIterations());
4158
4
      };
4159
4
      emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4160
4
    }
4161
100
    auto LPCRegion =
4162
100
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4163
100
    emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4164
100
                                   emitEmptyBoundParameters);
4165
100
  }
4166
  // Check for outer lastprivate conditional update.
4167
100
  checkForLastprivateConditionalUpdate(*this, S);
4168
100
}
4169
4170
void CodeGenFunction::EmitOMPParallelMasterDirective(
4171
22
    const OMPParallelMasterDirective &S) {
4172
  // Emit directive as a combined directive that consists of two implicit
4173
  // directives: 'parallel' with 'master' directive.
4174
22
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4175
22
    Action.Enter(CGF);
4176
22
    OMPPrivateScope PrivateScope(CGF);
4177
22
    bool Copyins = CGF.EmitOMPCopyinClause(S);
4178
22
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4179
22
    if (Copyins) {
4180
      // Emit implicit barrier to synchronize threads and avoid data races on
4181
      // propagation master's thread values of threadprivate variables to local
4182
      // instances of that variables of all other implicit threads.
4183
3
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4184
3
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4185
3
          /*ForceSimpleCall=*/true);
4186
3
    }
4187
22
    CGF.EmitOMPPrivateClause(S, PrivateScope);
4188
22
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4189
22
    (void)PrivateScope.Privatize();
4190
22
    emitMaster(CGF, S);
4191
22
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4192
22
  };
4193
22
  {
4194
22
    auto LPCRegion =
4195
22
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4196
22
    emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4197
22
                                   emitEmptyBoundParameters);
4198
22
    emitPostUpdateForReductionClause(*this, S,
4199
22
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
4200
22
  }
4201
  // Check for outer lastprivate conditional update.
4202
22
  checkForLastprivateConditionalUpdate(*this, S);
4203
22
}
4204
4205
void CodeGenFunction::EmitOMPParallelSectionsDirective(
4206
26
    const OMPParallelSectionsDirective &S) {
4207
  // Emit directive as a combined directive that consists of two implicit
4208
  // directives: 'parallel' with 'sections' directive.
4209
26
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4210
26
    Action.Enter(CGF);
4211
26
    CGF.EmitSections(S);
4212
26
  };
4213
26
  {
4214
26
    auto LPCRegion =
4215
26
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4216
26
    emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4217
26
                                   emitEmptyBoundParameters);
4218
26
  }
4219
  // Check for outer lastprivate conditional update.
4220
26
  checkForLastprivateConditionalUpdate(*this, S);
4221
26
}
4222
4223
namespace {
4224
/// Get the list of variables declared in the context of the untied tasks.
4225
class CheckVarsEscapingUntiedTaskDeclContext final
4226
    : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4227
  llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4228
4229
public:
4230
16
  explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4231
16
  virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4232
6
  void VisitDeclStmt(const DeclStmt *S) {
4233
6
    if (!S)
4234
0
      return;
4235
    // Need to privatize only local vars, static locals can be processed as is.
4236
10
    
for (const Decl *D : S->decls())6
{
4237
10
      if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4238
8
        if (VD->hasLocalStorage())
4239
8
          PrivateDecls.push_back(VD);
4240
10
    }
4241
6
  }
4242
16
  void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
4243
0
  void VisitCapturedStmt(const CapturedStmt *) { return; }
4244
0
  void VisitLambdaExpr(const LambdaExpr *) { return; }
4245
0
  void VisitBlockExpr(const BlockExpr *) { return; }
4246
108
  void VisitStmt(const Stmt *S) {
4247
108
    if (!S)
4248
0
      return;
4249
108
    for (const Stmt *Child : S->children())
4250
114
      if (Child)
4251
114
        Visit(Child);
4252
108
  }
4253
4254
  /// Swaps list of vars with the provided one.
4255
32
  ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4256
};
4257
} // anonymous namespace
4258
4259
void CodeGenFunction::EmitOMPTaskBasedDirective(
4260
    const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4261
    const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4262
409
    OMPTaskDataTy &Data) {
4263
  // Emit outlined function for task construct.
4264
409
  const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4265
409
  auto I = CS->getCapturedDecl()->param_begin();
4266
409
  auto PartId = std::next(I);
4267
409
  auto TaskT = std::next(I, 4);
4268
  // Check if the task is final
4269
409
  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4270
    // If the condition constant folds and can be elided, try to avoid emitting
4271
    // the condition and the dead arm of the if/else.
4272
22
    const Expr *Cond = Clause->getCondition();
4273
22
    bool CondConstant;
4274
22
    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4275
12
      Data.Final.setInt(CondConstant);
4276
10
    else
4277
10
      Data.Final.setPointer(EvaluateExprAsBool(Cond));
4278
387
  } else {
4279
    // By default the task is not final.
4280
387
    Data.Final.setInt(/*IntVal=*/false);
4281
387
  }
4282
  // Check if the task has 'priority' clause.
4283
409
  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4284
22
    const Expr *Prio = Clause->getPriority();
4285
22
    Data.Priority.setInt(/*IntVal=*/true);
4286
22
    Data.Priority.setPointer(EmitScalarConversion(
4287
22
        EmitScalarExpr(Prio), Prio->getType(),
4288
22
        getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4289
22
        Prio->getExprLoc()));
4290
22
  }
4291
  // The first function argument for tasks is a thread id, the second one is a
4292
  // part id (0 for tied tasks, >=0 for untied task).
4293
409
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4294
  // Get list of private variables.
4295
409
  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4296
50
    auto IRef = C->varlist_begin();
4297
226
    for (const Expr *IInit : C->private_copies()) {
4298
226
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4299
226
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4300
170
        Data.PrivateVars.push_back(*IRef);
4301
170
        Data.PrivateCopies.push_back(IInit);
4302
170
      }
4303
226
      ++IRef;
4304
226
    }
4305
50
  }
4306
409
  EmittedAsPrivate.clear();
4307
  // Get list of firstprivate variables.
4308
409
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4309
115
    auto IRef = C->varlist_begin();
4310
115
    auto IElemInitRef = C->inits().begin();
4311
335
    for (const Expr *IInit : C->private_copies()) {
4312
335
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4313
335
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4314
261
        Data.FirstprivateVars.push_back(*IRef);
4315
261
        Data.FirstprivateCopies.push_back(IInit);
4316
261
        Data.FirstprivateInits.push_back(*IElemInitRef);
4317
261
      }
4318
335
      ++IRef;
4319
335
      ++IElemInitRef;
4320
335
    }
4321
115
  }
4322
  // Get list of lastprivate variables (for taskloops).
4323
409
  llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4324
409
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4325
49
    auto IRef = C->varlist_begin();
4326
49
    auto ID = C->destination_exprs().begin();
4327
199
    for (const Expr *IInit : C->private_copies()) {
4328
199
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4329
199
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4330
151
        Data.LastprivateVars.push_back(*IRef);
4331
151
        Data.LastprivateCopies.push_back(IInit);
4332
151
      }
4333
199
      LastprivateDstsOrigs.insert(
4334
199
          std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4335
199
                         cast<DeclRefExpr>(*IRef)));
4336
199
      ++IRef;
4337
199
      ++ID;
4338
199
    }
4339
49
  }
4340
409
  SmallVector<const Expr *, 4> LHSs;
4341
409
  SmallVector<const Expr *, 4> RHSs;
4342
409
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4343
6
    Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4344
6
    Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4345
6
    Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4346
6
    Data.ReductionOps.append(C->reduction_ops().begin(),
4347
6
                             C->reduction_ops().end());
4348
6
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4349
6
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4350
6
  }
4351
409
  Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4352
409
      *this, S.getBeginLoc(), LHSs, RHSs, Data);
4353
  // Build list of dependences.
4354
409
  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4355
40
    OMPTaskDataTy::DependData &DD =
4356
40
        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4357
40
    DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4358
40
  }
4359
  // Get list of local vars for untied tasks.
4360
409
  if (!Data.Tied) {
4361
16
    CheckVarsEscapingUntiedTaskDeclContext Checker;
4362
16
    Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4363
16
    Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4364
16
                              Checker.getPrivateDecls().end());
4365
16
  }
4366
409
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4367
409
                    CapturedRegion](CodeGenFunction &CGF,
4368
409
                                    PrePostActionTy &Action) {
4369
409
    llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4370
409
                    std::pair<Address, Address>>
4371
409
        UntiedLocalVars;
4372
    // Set proper addresses for generated private copies.
4373
409
    OMPPrivateScope Scope(CGF);
4374
409
    llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4375
409
    if (!Data.PrivateVars.empty() || 
!Data.FirstprivateVars.empty()359
||
4376
409
        
!Data.LastprivateVars.empty()246
||
!Data.PrivateLocals.empty()197
) {
4377
212
      enum { PrivatesParam = 2, CopyFnParam = 3 };
4378
212
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4379
212
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4380
212
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4381
212
          CS->getCapturedDecl()->getParam(PrivatesParam)));
4382
      // Map privates.
4383
212
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4384
212
      llvm::SmallVector<llvm::Value *, 16> CallArgs;
4385
212
      llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4386
212
      CallArgs.push_back(PrivatesPtr);
4387
212
      ParamTypes.push_back(PrivatesPtr->getType());
4388
212
      for (const Expr *E : Data.PrivateVars) {
4389
170
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4390
170
        Address PrivatePtr = CGF.CreateMemTemp(
4391
170
            CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4392
170
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4393
170
        CallArgs.push_back(PrivatePtr.getPointer());
4394
170
        ParamTypes.push_back(PrivatePtr.getType());
4395
170
      }
4396
261
      for (const Expr *E : Data.FirstprivateVars) {
4397
261
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4398
261
        Address PrivatePtr =
4399
261
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4400
261
                              ".firstpriv.ptr.addr");
4401
261
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4402
261
        FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4403
261
        CallArgs.push_back(PrivatePtr.getPointer());
4404
261
        ParamTypes.push_back(PrivatePtr.getType());
4405
261
      }
4406
212
      for (const Expr *E : Data.LastprivateVars) {
4407
151
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4408
151
        Address PrivatePtr =
4409
151
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4410
151
                              ".lastpriv.ptr.addr");
4411
151
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4412
151
        CallArgs.push_back(PrivatePtr.getPointer());
4413
151
        ParamTypes.push_back(PrivatePtr.getType());
4414
151
      }
4415
212
      for (const VarDecl *VD : Data.PrivateLocals) {
4416
8
        QualType Ty = VD->getType().getNonReferenceType();
4417
8
        if (VD->getType()->isLValueReferenceType())
4418
0
          Ty = CGF.getContext().getPointerType(Ty);
4419
8
        if (isAllocatableDecl(VD))
4420
2
          Ty = CGF.getContext().getPointerType(Ty);
4421
8
        Address PrivatePtr = CGF.CreateMemTemp(
4422
8
            CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4423
8
        auto Result = UntiedLocalVars.insert(
4424
8
            std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4425
        // If key exists update in place.
4426
8
        if (Result.second == false)
4427
0
          *Result.first = std::make_pair(
4428
0
              VD, std::make_pair(PrivatePtr, Address::invalid()));
4429
8
        CallArgs.push_back(PrivatePtr.getPointer());
4430
8
        ParamTypes.push_back(PrivatePtr.getType());
4431
8
      }
4432
212
      auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4433
212
                                               ParamTypes, /*isVarArg=*/false);
4434
212
      CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4435
212
          CopyFn, CopyFnTy->getPointerTo());
4436
212
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4437
212
          CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4438
212
      for (const auto &Pair : LastprivateDstsOrigs) {
4439
199
        const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4440
199
        DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4441
                        /*RefersToEnclosingVariableOrCapture=*/
4442
199
                        CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4443
199
                        Pair.second->getType(), VK_LValue,
4444
199
                        Pair.second->getExprLoc());
4445
199
        Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4446
199
          return CGF.EmitLValue(&DRE).getAddress(CGF);
4447
199
        });
4448
199
      }
4449
582
      for (const auto &Pair : PrivatePtrs) {
4450
582
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4451
582
                            CGF.getContext().getDeclAlign(Pair.first));
4452
582
        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4453
582
      }
4454
      // Adjust mapping for internal locals by mapping actual memory instead of
4455
      // a pointer to this memory.
4456
212
      for (auto &Pair : UntiedLocalVars) {
4457
8
        if (isAllocatableDecl(Pair.first)) {
4458
2
          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4459
2
          Address Replacement(Ptr, CGF.getPointerAlign());
4460
2
          Pair.second.first = Replacement;
4461
2
          Ptr = CGF.Builder.CreateLoad(Replacement);
4462
2
          Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4463
2
          Pair.second.second = Replacement;
4464
6
        } else {
4465
6
          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4466
6
          Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4467
6
          Pair.second.first = Replacement;
4468
6
        }
4469
8
      }
4470
212
    }
4471
409
    if (Data.Reductions) {
4472
6
      OMPPrivateScope FirstprivateScope(CGF);
4473
18
      for (const auto &Pair : FirstprivatePtrs) {
4474
18
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4475
18
                            CGF.getContext().getDeclAlign(Pair.first));
4476
18
        FirstprivateScope.addPrivate(Pair.first,
4477
18
                                     [Replacement]() { return Replacement; });
4478
18
      }
4479
6
      (void)FirstprivateScope.Privatize();
4480
6
      OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4481
6
      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4482
6
                             Data.ReductionCopies, Data.ReductionOps);
4483
6
      llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4484
6
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4485
30
      for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; 
++Cnt24
) {
4486
24
        RedCG.emitSharedOrigLValue(CGF, Cnt);
4487
24
        RedCG.emitAggregateType(CGF, Cnt);
4488
        // FIXME: This must removed once the runtime library is fixed.
4489
        // Emit required threadprivate variables for
4490
        // initializer/combiner/finalizer.
4491
24
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4492
24
                                                           RedCG, Cnt);
4493
24
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4494
24
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4495
24
        Replacement =
4496
24
            Address(CGF.EmitScalarConversion(
4497
24
                        Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4498
24
                        CGF.getContext().getPointerType(
4499
24
                            Data.ReductionCopies[Cnt]->getType()),
4500
24
                        Data.ReductionCopies[Cnt]->getExprLoc()),
4501
24
                    Replacement.getAlignment());
4502
24
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4503
24
        Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4504
24
                         [Replacement]() { return Replacement; });
4505
24
      }
4506
6
    }
4507
    // Privatize all private variables except for in_reduction items.
4508
409
    (void)Scope.Privatize();
4509
409
    SmallVector<const Expr *, 4> InRedVars;
4510
409
    SmallVector<const Expr *, 4> InRedPrivs;
4511
409
    SmallVector<const Expr *, 4> InRedOps;
4512
409
    SmallVector<const Expr *, 4> TaskgroupDescriptors;
4513
409
    for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4514
44
      auto IPriv = C->privates().begin();
4515
44
      auto IRed = C->reduction_ops().begin();
4516
44
      auto ITD = C->taskgroup_descriptors().begin();
4517
66
      for (const Expr *Ref : C->varlists()) {
4518
66
        InRedVars.emplace_back(Ref);
4519
66
        InRedPrivs.emplace_back(*IPriv);
4520
66
        InRedOps.emplace_back(*IRed);
4521
66
        TaskgroupDescriptors.emplace_back(*ITD);
4522
66
        std::advance(IPriv, 1);
4523
66
        std::advance(IRed, 1);
4524
66
        std::advance(ITD, 1);
4525
66
      }
4526
44
    }
4527
    // Privatize in_reduction items here, because taskgroup descriptors must be
4528
    // privatized earlier.
4529
409
    OMPPrivateScope InRedScope(CGF);
4530
409
    if (!InRedVars.empty()) {
4531
34
      ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4532
100
      for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; 
++Cnt66
) {
4533
66
        RedCG.emitSharedOrigLValue(CGF, Cnt);
4534
66
        RedCG.emitAggregateType(CGF, Cnt);
4535
        // The taskgroup descriptor variable is always implicit firstprivate and
4536
        // privatized already during processing of the firstprivates.
4537
        // FIXME: This must removed once the runtime library is fixed.
4538
        // Emit required threadprivate variables for
4539
        // initializer/combiner/finalizer.
4540
66
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4541
66
                                                           RedCG, Cnt);
4542
66
        llvm::Value *ReductionsPtr;
4543
66
        if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4544
64
          ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4545
64
                                               TRExpr->getExprLoc());
4546
64
        } else {
4547
2
          ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4548
2
        }
4549
66
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4550
66
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4551
66
        Replacement = Address(
4552
66
            CGF.EmitScalarConversion(
4553
66
                Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4554
66
                CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4555
66
                InRedPrivs[Cnt]->getExprLoc()),
4556
66
            Replacement.getAlignment());
4557
66
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4558
66
        InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4559
66
                              [Replacement]() { return Replacement; });
4560
66
      }
4561
34
    }
4562
409
    (void)InRedScope.Privatize();
4563
4564
409
    CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4565
409
                                                             UntiedLocalVars);
4566
409
    Action.Enter(CGF);
4567
409
    BodyGen(CGF);
4568
409
  };
4569
409
  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4570
409
      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4571
409
      Data.NumberOfParts);
4572
409
  OMPLexicalScope Scope(*this, S, llvm::None,
4573
409
                        !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4574
409
                            
!isOpenMPSimdDirective(S.getDirectiveKind())337
);
4575
409
  TaskGen(*this, OutlinedFn, Data);
4576
409
}
4577
4578
static ImplicitParamDecl *
4579
createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4580
                                  QualType Ty, CapturedDecl *CD,
4581
710
                                  SourceLocation Loc) {
4582
710
  auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4583
710
                                           ImplicitParamDecl::Other);
4584
710
  auto *OrigRef = DeclRefExpr::Create(
4585
710
      C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4586
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4587
710
  auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4588
710
                                              ImplicitParamDecl::Other);
4589
710
  auto *PrivateRef = DeclRefExpr::Create(
4590
710
      C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4591
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4592
710
  QualType ElemType = C.getBaseElementType(Ty);
4593
710
  auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4594
710
                                           ImplicitParamDecl::Other);
4595
710
  auto *InitRef = DeclRefExpr::Create(
4596
710
      C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4597
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4598
710
  PrivateVD->setInitStyle(VarDecl::CInit);
4599
710
  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4600
710
                                              InitRef, /*BasePath=*/nullptr,
4601
710
                                              VK_PRValue, FPOptionsOverride()));
4602
710
  Data.FirstprivateVars.emplace_back(OrigRef);
4603
710
  Data.FirstprivateCopies.emplace_back(PrivateRef);
4604
710
  Data.FirstprivateInits.emplace_back(InitRef);
4605
710
  return OrigVD;
4606
710
}
4607
4608
void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4609
    const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4610
460
    OMPTargetDataInfo &InputInfo) {
4611
  // Emit outlined function for task construct.
4612
460
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4613
460
  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4614
460
  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4615
460
  auto I = CS->getCapturedDecl()->param_begin();
4616
460
  auto PartId = std::next(I);
4617
460
  auto TaskT = std::next(I, 4);
4618
460
  OMPTaskDataTy Data;
4619
  // The task is not final.
4620
460
  Data.Final.setInt(/*IntVal=*/false);
4621
  // Get list of firstprivate variables.
4622
460
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4623
252
    auto IRef = C->varlist_begin();
4624
252
    auto IElemInitRef = C->inits().begin();
4625
392
    for (auto *IInit : C->private_copies()) {
4626
392
      Data.FirstprivateVars.push_back(*IRef);
4627
392
      Data.FirstprivateCopies.push_back(IInit);
4628
392
      Data.FirstprivateInits.push_back(*IElemInitRef);
4629
392
      ++IRef;
4630
392
      ++IElemInitRef;
4631
392
    }
4632
252
  }
4633
460
  OMPPrivateScope TargetScope(*this);
4634
460
  VarDecl *BPVD = nullptr;
4635
460
  VarDecl *PVD = nullptr;
4636
460
  VarDecl *SVD = nullptr;
4637
460
  VarDecl *MVD = nullptr;
4638
460
  if (InputInfo.NumberOfTargetItems > 0) {
4639
228
    auto *CD = CapturedDecl::Create(
4640
228
        getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4641
228
    llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4642
228
    QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4643
228
        getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4644
228
        /*IndexTypeQuals=*/0);
4645
228
    BPVD = createImplicitFirstprivateForType(
4646
228
        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4647
228
    PVD = createImplicitFirstprivateForType(
4648
228
        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4649
228
    QualType SizesType = getContext().getConstantArrayType(
4650
228
        getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4651
228
        ArrSize, nullptr, ArrayType::Normal,
4652
228
        /*IndexTypeQuals=*/0);
4653
228
    SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4654
228
                                            S.getBeginLoc());
4655
228
    TargetScope.addPrivate(
4656
228
        BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4657
228
    TargetScope.addPrivate(PVD,
4658
228
                           [&InputInfo]() { return InputInfo.PointersArray; });
4659
228
    TargetScope.addPrivate(SVD,
4660
228
                           [&InputInfo]() { return InputInfo.SizesArray; });
4661
    // If there is no user-defined mapper, the mapper array will be nullptr. In
4662
    // this case, we don't need to privatize it.
4663
228
    if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4664
228
            InputInfo.MappersArray.getPointer())) {
4665
26
      MVD = createImplicitFirstprivateForType(
4666
26
          getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4667
26
      TargetScope.addPrivate(MVD,
4668
26
                             [&InputInfo]() { return InputInfo.MappersArray; });
4669
26
    }
4670
228
  }
4671
460
  (void)TargetScope.Privatize();
4672
  // Build list of dependences.
4673
460
  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4674
376
    OMPTaskDataTy::DependData &DD =
4675
376
        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4676
376
    DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4677
376
  }
4678
460
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4679
460
                    &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4680
    // Set proper addresses for generated private copies.
4681
460
    OMPPrivateScope Scope(CGF);
4682
460
    if (!Data.FirstprivateVars.empty()) {
4683
356
      enum { PrivatesParam = 2, CopyFnParam = 3 };
4684
356
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4685
356
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4686
356
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4687
356
          CS->getCapturedDecl()->getParam(PrivatesParam)));
4688
      // Map privates.
4689
356
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4690
356
      llvm::SmallVector<llvm::Value *, 16> CallArgs;
4691
356
      llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4692
356
      CallArgs.push_back(PrivatesPtr);
4693
356
      ParamTypes.push_back(PrivatesPtr->getType());
4694
1.10k
      for (const Expr *E : Data.FirstprivateVars) {
4695
1.10k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4696
1.10k
        Address PrivatePtr =
4697
1.10k
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4698
1.10k
                              ".firstpriv.ptr.addr");
4699
1.10k
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4700
1.10k
        CallArgs.push_back(PrivatePtr.getPointer());
4701
1.10k
        ParamTypes.push_back(PrivatePtr.getType());
4702
1.10k
      }
4703
356
      auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4704
356
                                               ParamTypes, /*isVarArg=*/false);
4705
356
      CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4706
356
          CopyFn, CopyFnTy->getPointerTo());
4707
356
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4708
356
          CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4709
1.10k
      for (const auto &Pair : PrivatePtrs) {
4710
1.10k
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4711
1.10k
                            CGF.getContext().getDeclAlign(Pair.first));
4712
1.10k
        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4713
1.10k
      }
4714
356
    }
4715
    // Privatize all private variables except for in_reduction items.
4716
460
    (void)Scope.Privatize();
4717
460
    if (InputInfo.NumberOfTargetItems > 0) {
4718
228
      InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4719
228
          CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4720
228
      InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4721
228
          CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4722
228
      InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4723
228
          CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4724
      // If MVD is nullptr, the mapper array is not privatized
4725
228
      if (MVD)
4726
26
        InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4727
26
            CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4728
228
    }
4729
4730
460
    Action.Enter(CGF);
4731
460
    OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4732
460
    BodyGen(CGF);
4733
460
  };
4734
460
  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4735
460
      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4736
460
      Data.NumberOfParts);
4737
460
  llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 
1296
:
0164
);
4738
460
  IntegerLiteral IfCond(getContext(), TrueOrFalse,
4739
460
                        getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4740
460
                        SourceLocation());
4741
4742
460
  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4743
460
                                      SharedsTy, CapturedStruct, &IfCond, Data);
4744
460
}
4745
4746
183
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4747
  // Emit outlined function for task construct.
4748
183
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4749
183
  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4750
183
  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4751
183
  const Expr *IfCond = nullptr;
4752
183
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4753
44
    if (C->getNameModifier() == OMPD_unknown ||
4754
44
        
C->getNameModifier() == OMPD_task12
) {
4755
44
      IfCond = C->getCondition();
4756
44
      break;
4757
44
    }
4758
44
  }
4759
4760
183
  OMPTaskDataTy Data;
4761
  // Check if we should emit tied or untied task.
4762
183
  Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4763
183
  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4764
183
    CGF.EmitStmt(CS->getCapturedStmt());
4765
183
  };
4766
183
  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4767
183
                    IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4768
183
                            const OMPTaskDataTy &Data) {
4769
183
    CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4770
183
                                            SharedsTy, CapturedStruct, IfCond,
4771
183
                                            Data);
4772
183
  };
4773
183
  auto LPCRegion =
4774
183
      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4775
183
  EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4776
183
}
4777
4778
void CodeGenFunction::EmitOMPTaskyieldDirective(
4779
16
    const OMPTaskyieldDirective &S) {
4780
16
  CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4781
16
}
4782
4783
30
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4784
30
  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4785
30
}
4786
4787
12
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4788
12
  CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
4789
12
}
4790
4791
void CodeGenFunction::EmitOMPTaskgroupDirective(
4792
39
    const OMPTaskgroupDirective &S) {
4793
39
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4794
39
    Action.Enter(CGF);
4795
39
    if (const Expr *E = S.getReductionRef()) {
4796
26
      SmallVector<const Expr *, 4> LHSs;
4797
26
      SmallVector<const Expr *, 4> RHSs;
4798
26
      OMPTaskDataTy Data;
4799
26
      for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4800
26
        Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4801
26
        Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4802
26
        Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4803
26
        Data.ReductionOps.append(C->reduction_ops().begin(),
4804
26
                                 C->reduction_ops().end());
4805
26
        LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4806
26
        RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4807
26
      }
4808
26
      llvm::Value *ReductionDesc =
4809
26
          CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4810
26
                                                           LHSs, RHSs, Data);
4811
26
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4812
26
      CGF.EmitVarDecl(*VD);
4813
26
      CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4814
26
                            /*Volatile=*/false, E->getType());
4815
26
    }
4816
39
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4817
39
  };
4818
39
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4819
39
  CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4820
39
}
4821
4822
40
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4823
40
  llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4824
40
                                ? 
llvm::AtomicOrdering::NotAtomic8
4825
40
                                : 
llvm::AtomicOrdering::AcquireRelease32
;
4826
40
  CGM.getOpenMPRuntime().emitFlush(
4827
40
      *this,
4828
40
      [&S]() -> ArrayRef<const Expr *> {
4829
40
        if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4830
8
          return llvm::makeArrayRef(FlushClause->varlist_begin(),
4831
8
                                    FlushClause->varlist_end());
4832
32
        return llvm::None;
4833
40
      }(),
4834
40
      S.getBeginLoc(), AO);
4835
40
}
4836
4837
14
void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4838
14
  const auto *DO = S.getSingleClause<OMPDepobjClause>();
4839
14
  LValue DOLVal = EmitLValue(DO->getDepobj());
4840
14
  if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4841
6
    OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4842
6
                                           DC->getModifier());
4843
6
    Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4844
6
    Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4845
6
        *this, Dependencies, DC->getBeginLoc());
4846
6
    EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4847
6
    return;
4848
6
  }
4849
8
  if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4850
4
    CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4851
4
    return;
4852
4
  }
4853
4
  if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4854
4
    CGM.getOpenMPRuntime().emitUpdateClause(
4855
4
        *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4856
4
    return;
4857
4
  }
4858
4
}
4859
4860
56
void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4861
56
  if (!OMPParentLoopDirectiveForScan)
4862
8
    return;
4863
48
  const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4864
48
  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4865
48
  SmallVector<const Expr *, 4> Shareds;
4866
48
  SmallVector<const Expr *, 4> Privates;
4867
48
  SmallVector<const Expr *, 4> LHSs;
4868
48
  SmallVector<const Expr *, 4> RHSs;
4869
48
  SmallVector<const Expr *, 4> ReductionOps;
4870
48
  SmallVector<const Expr *, 4> CopyOps;
4871
48
  SmallVector<const Expr *, 4> CopyArrayTemps;
4872
48
  SmallVector<const Expr *, 4> CopyArrayElems;
4873
48
  for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4874
48
    if (C->getModifier() != OMPC_REDUCTION_inscan)
4875
0
      continue;
4876
48
    Shareds.append(C->varlist_begin(), C->varlist_end());
4877
48
    Privates.append(C->privates().begin(), C->privates().end());
4878
48
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4879
48
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4880
48
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4881
48
    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4882
48
    CopyArrayTemps.append(C->copy_array_temps().begin(),
4883
48
                          C->copy_array_temps().end());
4884
48
    CopyArrayElems.append(C->copy_array_elems().begin(),
4885
48
                          C->copy_array_elems().end());
4886
48
  }
4887
48
  if (ParentDir.getDirectiveKind() == OMPD_simd ||
4888
48
      
(40
getLangOpts().OpenMPSimd40
&&
4889
40
       
isOpenMPSimdDirective(ParentDir.getDirectiveKind())8
)) {
4890
    // For simd directive and simd-based directives in simd only mode, use the
4891
    // following codegen:
4892
    // int x = 0;
4893
    // #pragma omp simd reduction(inscan, +: x)
4894
    // for (..) {
4895
    //   <first part>
4896
    //   #pragma omp scan inclusive(x)
4897
    //   <second part>
4898
    //  }
4899
    // is transformed to:
4900
    // int x = 0;
4901
    // for (..) {
4902
    //   int x_priv = 0;
4903
    //   <first part>
4904
    //   x = x_priv + x;
4905
    //   x_priv = x;
4906
    //   <second part>
4907
    // }
4908
    // and
4909
    // int x = 0;
4910
    // #pragma omp simd reduction(inscan, +: x)
4911
    // for (..) {
4912
    //   <first part>
4913
    //   #pragma omp scan exclusive(x)
4914
    //   <second part>
4915
    // }
4916
    // to
4917
    // int x = 0;
4918
    // for (..) {
4919
    //   int x_priv = 0;
4920
    //   <second part>
4921
    //   int temp = x;
4922
    //   x = x_priv + x;
4923
    //   x_priv = temp;
4924
    //   <first part>
4925
    // }
4926
16
    llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4927
16
    EmitBranch(IsInclusive
4928
16
                   ? 
OMPScanReduce8
4929
16
                   : 
BreakContinueStack.back().ContinueBlock.getBlock()8
);
4930
16
    EmitBlock(OMPScanDispatch);
4931
16
    {
4932
      // New scope for correct construction/destruction of temp variables for
4933
      // exclusive scan.
4934
16
      LexicalScope Scope(*this, S.getSourceRange());
4935
16
      EmitBranch(IsInclusive ? 
OMPBeforeScanBlock8
:
OMPAfterScanBlock8
);
4936
16
      EmitBlock(OMPScanReduce);
4937
16
      if (!IsInclusive) {
4938
        // Create temp var and copy LHS value to this temp value.
4939
        // TMP = LHS;
4940
20
        for (unsigned I = 0, E = CopyArrayElems.size(); I < E; 
++I12
) {
4941
12
          const Expr *PrivateExpr = Privates[I];
4942
12
          const Expr *TempExpr = CopyArrayTemps[I];
4943
12
          EmitAutoVarDecl(
4944
12
              *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4945
12
          LValue DestLVal = EmitLValue(TempExpr);
4946
12
          LValue SrcLVal = EmitLValue(LHSs[I]);
4947
12
          EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4948
12
                      SrcLVal.getAddress(*this),
4949
12
                      cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4950
12
                      cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4951
12
                      CopyOps[I]);
4952
12
        }
4953
8
      }
4954
16
      CGM.getOpenMPRuntime().emitReduction(
4955
16
          *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4956
16
          {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4957
40
      for (unsigned I = 0, E = CopyArrayElems.size(); I < E; 
++I24
) {
4958
24
        const Expr *PrivateExpr = Privates[I];
4959
24
        LValue DestLVal;
4960
24
        LValue SrcLVal;
4961
24
        if (IsInclusive) {
4962
12
          DestLVal = EmitLValue(RHSs[I]);
4963
12
          SrcLVal = EmitLValue(LHSs[I]);
4964
12
        } else {
4965
12
          const Expr *TempExpr = CopyArrayTemps[I];
4966
12
          DestLVal = EmitLValue(RHSs[I]);
4967
12
          SrcLVal = EmitLValue(TempExpr);
4968
12
        }
4969
24
        EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4970
24
                    SrcLVal.getAddress(*this),
4971
24
                    cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4972
24
                    cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4973
24
                    CopyOps[I]);
4974
24
      }
4975
16
    }
4976
16
    EmitBranch(IsInclusive ? 
OMPAfterScanBlock8
:
OMPBeforeScanBlock8
);
4977
16
    OMPScanExitBlock = IsInclusive
4978
16
                           ? 
BreakContinueStack.back().ContinueBlock.getBlock()8
4979
16
                           : 
OMPScanReduce8
;
4980
16
    EmitBlock(OMPAfterScanBlock);
4981
16
    return;
4982
16
  }
4983
32
  if (!IsInclusive) {
4984
16
    EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4985
16
    EmitBlock(OMPScanExitBlock);
4986
16
  }
4987
32
  if (OMPFirstScanLoop) {
4988
    // Emit buffer[i] = red; at the end of the input phase.
4989
16
    const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4990
16
                             .getIterationVariable()
4991
16
                             ->IgnoreParenImpCasts();
4992
16
    LValue IdxLVal = EmitLValue(IVExpr);
4993
16
    llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4994
16
    IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4995
48
    for (unsigned I = 0, E = CopyArrayElems.size(); I < E; 
++I32
) {
4996
32
      const Expr *PrivateExpr = Privates[I];
4997
32
      const Expr *OrigExpr = Shareds[I];
4998
32
      const Expr *CopyArrayElem = CopyArrayElems[I];
4999
32
      OpaqueValueMapping IdxMapping(
5000
32
          *this,
5001
32
          cast<OpaqueValueExpr>(
5002
32
              cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5003
32
          RValue::get(IdxVal));
5004
32
      LValue DestLVal = EmitLValue(CopyArrayElem);
5005
32
      LValue SrcLVal = EmitLValue(OrigExpr);