Coverage Report

Created: 2022-01-18 06:27

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This contains code to emit OpenMP nodes as LLVM code.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGCleanup.h"
14
#include "CGOpenMPRuntime.h"
15
#include "CodeGenFunction.h"
16
#include "CodeGenModule.h"
17
#include "TargetInfo.h"
18
#include "clang/AST/ASTContext.h"
19
#include "clang/AST/Attr.h"
20
#include "clang/AST/DeclOpenMP.h"
21
#include "clang/AST/OpenMPClause.h"
22
#include "clang/AST/Stmt.h"
23
#include "clang/AST/StmtOpenMP.h"
24
#include "clang/AST/StmtVisitor.h"
25
#include "clang/Basic/OpenMPKinds.h"
26
#include "clang/Basic/PrettyStackTrace.h"
27
#include "llvm/BinaryFormat/Dwarf.h"
28
#include "llvm/Frontend/OpenMP/OMPConstants.h"
29
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
30
#include "llvm/IR/Constants.h"
31
#include "llvm/IR/DebugInfoMetadata.h"
32
#include "llvm/IR/Instructions.h"
33
#include "llvm/IR/Metadata.h"
34
#include "llvm/Support/AtomicOrdering.h"
35
using namespace clang;
36
using namespace CodeGen;
37
using namespace llvm::omp;
38
39
static const VarDecl *getBaseDecl(const Expr *Ref);
40
41
namespace {
42
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
43
/// for captured expressions.
44
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
45
15.7k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
46
18.5k
    for (const auto *C : S.clauses()) {
47
18.5k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
48
10.7k
        if (const auto *PreInit =
49
10.7k
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
50
1.14k
          for (const auto *I : PreInit->decls()) {
51
1.14k
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
52
1.12k
              CGF.EmitVarDecl(cast<VarDecl>(*I));
53
1.12k
            } else {
54
18
              CodeGenFunction::AutoVarEmission Emission =
55
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
56
18
              CGF.EmitAutoVarCleanups(Emission);
57
18
            }
58
1.14k
          }
59
1.06k
        }
60
10.7k
      }
61
18.5k
    }
62
15.7k
  }
63
  CodeGenFunction::OMPPrivateScope InlinedShareds;
64
65
17.0k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
66
17.0k
    return CGF.LambdaCaptureFields.lookup(VD) ||
67
17.0k
           
(16.5k
CGF.CapturedStmtInfo16.5k
&&
CGF.CapturedStmtInfo->lookup(VD)4.22k
) ||
68
17.0k
           
(12.9k
CGF.CurCodeDecl12.9k
&&
isa<BlockDecl>(CGF.CurCodeDecl)12.9k
&&
69
12.9k
            
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)8
);
70
17.0k
  }
71
72
public:
73
  OMPLexicalScope(
74
      CodeGenFunction &CGF, const OMPExecutableDirective &S,
75
      const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
76
      const bool EmitPreInitStmt = true)
77
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
78
25.4k
        InlinedShareds(CGF) {
79
25.4k
    if (EmitPreInitStmt)
80
15.7k
      emitPreInitStmt(CGF, S);
81
25.4k
    if (!CapturedRegion.hasValue())
82
12.6k
      return;
83
12.8k
    assert(S.hasAssociatedStmt() &&
84
12.8k
           "Expected associated statement for inlined directive.");
85
0
    const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
86
19.2k
    for (const auto &C : CS->captures()) {
87
19.2k
      if (C.capturesVariable() || 
C.capturesVariableByCopy()11.0k
) {
88
17.0k
        auto *VD = C.getCapturedVar();
89
17.0k
        assert(VD == VD->getCanonicalDecl() &&
90
17.0k
               "Canonical decl must be captured.");
91
0
        DeclRefExpr DRE(
92
17.0k
            CGF.getContext(), const_cast<VarDecl *>(VD),
93
17.0k
            isCapturedVar(CGF, VD) || 
(12.9k
CGF.CapturedStmtInfo12.9k
&&
94
12.9k
                                       
InlinedShareds.isGlobalVarCaptured(VD)668
),
95
17.0k
            VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
96
17.0k
        InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
97
17.0k
          return CGF.EmitLValue(&DRE).getAddress(CGF);
98
17.0k
        });
99
17.0k
      }
100
19.2k
    }
101
12.8k
    (void)InlinedShareds.Privatize();
102
12.8k
  }
103
};
104
105
/// Lexical scope for OpenMP parallel construct, that handles correct codegen
106
/// for captured expressions.
107
class OMPParallelScope final : public OMPLexicalScope {
108
6.18k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
109
6.18k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
110
6.18k
    return !(isOpenMPTargetExecutionDirective(Kind) ||
111
6.18k
             
isOpenMPLoopBoundSharingDirective(Kind)2.87k
) &&
112
6.18k
           
isOpenMPParallelDirective(Kind)1.48k
;
113
6.18k
  }
114
115
public:
116
  OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
117
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
118
6.18k
                        EmitPreInitStmt(S)) {}
119
};
120
121
/// Lexical scope for OpenMP teams construct, that handles correct codegen
122
/// for captured expressions.
123
class OMPTeamsScope final : public OMPLexicalScope {
124
5.71k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
125
5.71k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
126
5.71k
    return !isOpenMPTargetExecutionDirective(Kind) &&
127
5.71k
           
isOpenMPTeamsDirective(Kind)1.90k
;
128
5.71k
  }
129
130
public:
131
  OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
132
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
133
5.71k
                        EmitPreInitStmt(S)) {}
134
};
135
136
/// Private scope for OpenMP loop-based directives, that supports capturing
137
/// of used expression from loop statement.
138
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
139
17.4k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
140
17.4k
    const DeclStmt *PreInits;
141
17.4k
    CodeGenFunction::OMPMapVars PreCondVars;
142
17.4k
    if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
143
17.4k
      llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
144
18.1k
      for (const auto *E : LD->counters()) {
145
18.1k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
146
18.1k
        EmittedAsPrivate.insert(VD->getCanonicalDecl());
147
18.1k
        (void)PreCondVars.setVarAddr(
148
18.1k
            CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
149
18.1k
      }
150
      // Mark private vars as undefs.
151
17.4k
      for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
152
2.85k
        for (const Expr *IRef : C->varlists()) {
153
2.85k
          const auto *OrigVD =
154
2.85k
              cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
155
2.85k
          if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
156
2.52k
            (void)PreCondVars.setVarAddr(
157
2.52k
                CGF, OrigVD,
158
2.52k
                Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
159
2.52k
                            CGF.getContext().getPointerType(
160
2.52k
                                OrigVD->getType().getNonReferenceType()))),
161
2.52k
                        CGF.getContext().getDeclAlign(OrigVD)));
162
2.52k
          }
163
2.85k
        }
164
716
      }
165
17.4k
      (void)PreCondVars.apply(CGF);
166
      // Emit init, __range and __end variables for C++ range loops.
167
17.4k
      (void)OMPLoopBasedDirective::doForAllLoops(
168
17.4k
          LD->getInnermostCapturedStmt()->getCapturedStmt(),
169
17.4k
          /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
170
18.1k
          [&CGF](unsigned Cnt, const Stmt *CurStmt) {
171
18.1k
            if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
172
6
              if (const Stmt *Init = CXXFor->getInit())
173
0
                CGF.EmitStmt(Init);
174
6
              CGF.EmitStmt(CXXFor->getRangeStmt());
175
6
              CGF.EmitStmt(CXXFor->getEndStmt());
176
6
            }
177
18.1k
            return false;
178
18.1k
          });
179
17.4k
      PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
180
17.4k
    } else 
if (const auto *8
Tile8
= dyn_cast<OMPTileDirective>(&S)) {
181
8
      PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
182
8
    } else 
if (const auto *0
Unroll0
= dyn_cast<OMPUnrollDirective>(&S)) {
183
0
      PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
184
0
    } else {
185
0
      llvm_unreachable("Unknown loop-based directive kind.");
186
0
    }
187
17.4k
    if (PreInits) {
188
3.09k
      for (const auto *I : PreInits->decls())
189
6.90k
        CGF.EmitVarDecl(cast<VarDecl>(*I));
190
3.09k
    }
191
17.4k
    PreCondVars.restore(CGF);
192
17.4k
  }
193
194
public:
195
  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
196
17.4k
      : CodeGenFunction::RunCleanupsScope(CGF) {
197
17.4k
    emitPreInitStmt(CGF, S);
198
17.4k
  }
199
};
200
201
class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
202
  CodeGenFunction::OMPPrivateScope InlinedShareds;
203
204
41.8k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
205
41.8k
    return CGF.LambdaCaptureFields.lookup(VD) ||
206
41.8k
           
(40.8k
CGF.CapturedStmtInfo40.8k
&&
CGF.CapturedStmtInfo->lookup(VD)6.55k
) ||
207
41.8k
           
(40.8k
CGF.CurCodeDecl40.8k
&&
isa<BlockDecl>(CGF.CurCodeDecl)40.8k
&&
208
40.8k
            
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)78
);
209
41.8k
  }
210
211
public:
212
  OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
213
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
214
13.4k
        InlinedShareds(CGF) {
215
17.5k
    for (const auto *C : S.clauses()) {
216
17.5k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
217
11.5k
        if (const auto *PreInit =
218
11.5k
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
219
1.36k
          for (const auto *I : PreInit->decls()) {
220
1.36k
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
221
1.34k
              CGF.EmitVarDecl(cast<VarDecl>(*I));
222
1.34k
            } else {
223
18
              CodeGenFunction::AutoVarEmission Emission =
224
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
225
18
              CGF.EmitAutoVarCleanups(Emission);
226
18
            }
227
1.36k
          }
228
1.29k
        }
229
11.5k
      } else 
if (const auto *5.99k
UDP5.99k
= dyn_cast<OMPUseDevicePtrClause>(C)) {
230
86
        for (const Expr *E : UDP->varlists()) {
231
86
          const Decl *D = cast<DeclRefExpr>(E)->getDecl();
232
86
          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
233
20
            CGF.EmitVarDecl(*OED);
234
86
        }
235
5.92k
      } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
236
24
        for (const Expr *E : UDP->varlists()) {
237
24
          const Decl *D = getBaseDecl(E);
238
24
          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
239
10
            CGF.EmitVarDecl(*OED);
240
24
        }
241
6
      }
242
17.5k
    }
243
13.4k
    if (!isOpenMPSimdDirective(S.getDirectiveKind()))
244
9.94k
      CGF.EmitOMPPrivateClause(S, InlinedShareds);
245
13.4k
    if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
246
37
      if (const Expr *E = TG->getReductionRef())
247
26
        CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
248
37
    }
249
    // Temp copy arrays for inscan reductions should not be emitted as they are
250
    // not used in simd only mode.
251
13.4k
    llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
252
13.4k
    for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
253
466
      if (C->getModifier() != OMPC_REDUCTION_inscan)
254
446
        continue;
255
20
      for (const Expr *E : C->copy_array_temps())
256
36
        CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
257
20
    }
258
13.4k
    const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
259
41.3k
    while (CS) {
260
47.9k
      for (auto &C : CS->captures()) {
261
47.9k
        if (C.capturesVariable() || 
C.capturesVariableByCopy()30.9k
) {
262
41.8k
          auto *VD = C.getCapturedVar();
263
41.8k
          if (CopyArrayTemps.contains(VD))
264
16
            continue;
265
41.8k
          assert(VD == VD->getCanonicalDecl() &&
266
41.8k
                 "Canonical decl must be captured.");
267
0
          DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
268
41.8k
                          isCapturedVar(CGF, VD) ||
269
41.8k
                              
(40.7k
CGF.CapturedStmtInfo40.7k
&&
270
40.7k
                               
InlinedShareds.isGlobalVarCaptured(VD)6.53k
),
271
41.8k
                          VD->getType().getNonReferenceType(), VK_LValue,
272
41.8k
                          C.getLocation());
273
41.8k
          InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
274
41.8k
            return CGF.EmitLValue(&DRE).getAddress(CGF);
275
41.8k
          });
276
41.8k
        }
277
47.9k
      }
278
27.9k
      CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
279
27.9k
    }
280
13.4k
    (void)InlinedShareds.Privatize();
281
13.4k
  }
282
};
283
284
} // namespace
285
286
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
287
                                         const OMPExecutableDirective &S,
288
                                         const RegionCodeGenTy &CodeGen);
289
290
12.9k
LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
291
12.9k
  if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
292
9.46k
    if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
293
9.46k
      OrigVD = OrigVD->getCanonicalDecl();
294
9.46k
      bool IsCaptured =
295
9.46k
          LambdaCaptureFields.lookup(OrigVD) ||
296
9.46k
          
(9.34k
CapturedStmtInfo9.34k
&&
CapturedStmtInfo->lookup(OrigVD)972
) ||
297
9.46k
          
(8.67k
CurCodeDecl8.67k
&&
isa<BlockDecl>(CurCodeDecl)8.61k
);
298
9.46k
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
299
9.46k
                      OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
300
9.46k
      return EmitLValue(&DRE);
301
9.46k
    }
302
9.46k
  }
303
3.52k
  return EmitLValue(E);
304
12.9k
}
305
306
17.6k
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
307
17.6k
  ASTContext &C = getContext();
308
17.6k
  llvm::Value *Size = nullptr;
309
17.6k
  auto SizeInChars = C.getTypeSizeInChars(Ty);
310
17.6k
  if (SizeInChars.isZero()) {
311
    // getTypeSizeInChars() returns 0 for a VLA.
312
2.33k
    while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
313
1.16k
      VlaSizePair VlaSize = getVLASize(VAT);
314
1.16k
      Ty = VlaSize.Type;
315
1.16k
      Size =
316
1.16k
          Size ? 
Builder.CreateNUWMul(Size, VlaSize.NumElts)0
: VlaSize.NumElts;
317
1.16k
    }
318
1.16k
    SizeInChars = C.getTypeSizeInChars(Ty);
319
1.16k
    if (SizeInChars.isZero())
320
0
      return llvm::ConstantInt::get(SizeTy, /*V=*/0);
321
1.16k
    return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
322
1.16k
  }
323
16.4k
  return CGM.getSize(SizeInChars);
324
17.6k
}
325
326
void CodeGenFunction::GenerateOpenMPCapturedVars(
327
21.5k
    const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
328
21.5k
  const RecordDecl *RD = S.getCapturedRecordDecl();
329
21.5k
  auto CurField = RD->field_begin();
330
21.5k
  auto CurCap = S.captures().begin();
331
21.5k
  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
332
21.5k
                                                 E = S.capture_init_end();
333
53.2k
       I != E; 
++I, ++CurField, ++CurCap31.7k
) {
334
31.7k
    if (CurField->hasCapturedVLAType()) {
335
2.63k
      const VariableArrayType *VAT = CurField->getCapturedVLAType();
336
2.63k
      llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
337
2.63k
      CapturedVars.push_back(Val);
338
29.1k
    } else if (CurCap->capturesThis()) {
339
1.68k
      CapturedVars.push_back(CXXThisValue);
340
27.4k
    } else if (CurCap->capturesVariableByCopy()) {
341
14.5k
      llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
342
343
      // If the field is not a pointer, we need to save the actual value
344
      // and load it as a void pointer.
345
14.5k
      if (!CurField->getType()->isAnyPointerType()) {
346
12.7k
        ASTContext &Ctx = getContext();
347
12.7k
        Address DstAddr = CreateMemTemp(
348
12.7k
            Ctx.getUIntPtrType(),
349
12.7k
            Twine(CurCap->getCapturedVar()->getName(), ".casted"));
350
12.7k
        LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
351
352
12.7k
        llvm::Value *SrcAddrVal = EmitScalarConversion(
353
12.7k
            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
354
12.7k
            Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
355
12.7k
        LValue SrcLV =
356
12.7k
            MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
357
358
        // Store the value using the source type pointer.
359
12.7k
        EmitStoreThroughLValue(RValue::get(CV), SrcLV);
360
361
        // Load the value using the destination type pointer.
362
12.7k
        CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
363
12.7k
      }
364
14.5k
      CapturedVars.push_back(CV);
365
14.5k
    } else {
366
12.8k
      assert(CurCap->capturesVariable() && "Expected capture by reference.");
367
0
      CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
368
12.8k
    }
369
31.7k
  }
370
21.5k
}
371
372
static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
373
                                    QualType DstType, StringRef Name,
374
17.8k
                                    LValue AddrLV) {
375
17.8k
  ASTContext &Ctx = CGF.getContext();
376
377
17.8k
  llvm::Value *CastedPtr = CGF.EmitScalarConversion(
378
17.8k
      AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
379
17.8k
      Ctx.getPointerType(DstType), Loc);
380
17.8k
  Address TmpAddr =
381
17.8k
      CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF);
382
17.8k
  return TmpAddr;
383
17.8k
}
384
385
7.19k
static QualType getCanonicalParamType(ASTContext &C, QualType T) {
386
7.19k
  if (T->isLValueReferenceType())
387
2.10k
    return C.getLValueReferenceType(
388
2.10k
        getCanonicalParamType(C, T.getNonReferenceType()),
389
2.10k
        /*SpelledAsLValue=*/false);
390
5.08k
  if (T->isPointerType())
391
27
    return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
392
5.06k
  if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
393
2.99k
    if (const auto *VLA = dyn_cast<VariableArrayType>(A))
394
2.93k
      return getCanonicalParamType(C, VLA->getElementType());
395
60
    if (!A->isVariablyModifiedType())
396
60
      return C.getCanonicalType(T);
397
60
  }
398
2.06k
  return C.getCanonicalParamType(T);
399
5.06k
}
400
401
namespace {
402
/// Contains required data for proper outlined function codegen.
403
struct FunctionOptions {
404
  /// Captured statement for which the function is generated.
405
  const CapturedStmt *S = nullptr;
406
  /// true if cast to/from  UIntPtr is required for variables captured by
407
  /// value.
408
  const bool UIntPtrCastRequired = true;
409
  /// true if only casted arguments must be registered as local args or VLA
410
  /// sizes.
411
  const bool RegisterCastedArgsOnly = false;
412
  /// Name of the generated function.
413
  const StringRef FunctionName;
414
  /// Location of the non-debug version of the outlined function.
415
  SourceLocation Loc;
416
  explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
417
                           bool RegisterCastedArgsOnly, StringRef FunctionName,
418
                           SourceLocation Loc)
419
      : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
420
        RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
421
23.9k
        FunctionName(FunctionName), Loc(Loc) {}
422
};
423
} // namespace
424
425
static llvm::Function *emitOutlinedFunctionPrologue(
426
    CodeGenFunction &CGF, FunctionArgList &Args,
427
    llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
428
        &LocalAddrs,
429
    llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
430
        &VLASizes,
431
23.9k
    llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
432
23.9k
  const CapturedDecl *CD = FO.S->getCapturedDecl();
433
23.9k
  const RecordDecl *RD = FO.S->getCapturedRecordDecl();
434
23.9k
  assert(CD->hasBody() && "missing CapturedDecl body");
435
436
0
  CXXThisValue = nullptr;
437
  // Build the argument list.
438
23.9k
  CodeGenModule &CGM = CGF.CGM;
439
23.9k
  ASTContext &Ctx = CGM.getContext();
440
23.9k
  FunctionArgList TargetArgs;
441
23.9k
  Args.append(CD->param_begin(),
442
23.9k
              std::next(CD->param_begin(), CD->getContextParamPosition()));
443
23.9k
  TargetArgs.append(
444
23.9k
      CD->param_begin(),
445
23.9k
      std::next(CD->param_begin(), CD->getContextParamPosition()));
446
23.9k
  auto I = FO.S->captures().begin();
447
23.9k
  FunctionDecl *DebugFunctionDecl = nullptr;
448
23.9k
  if (!FO.UIntPtrCastRequired) {
449
152
    FunctionProtoType::ExtProtoInfo EPI;
450
152
    QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
451
152
    DebugFunctionDecl = FunctionDecl::Create(
452
152
        Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
453
152
        SourceLocation(), DeclarationName(), FunctionTy,
454
152
        Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
455
152
        /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456
152
        /*hasWrittenPrototype=*/false);
457
152
  }
458
35.6k
  for (const FieldDecl *FD : RD->fields()) {
459
35.6k
    QualType ArgType = FD->getType();
460
35.6k
    IdentifierInfo *II = nullptr;
461
35.6k
    VarDecl *CapVar = nullptr;
462
463
    // If this is a capture by copy and the type is not a pointer, the outlined
464
    // function argument type should be uintptr and the value properly casted to
465
    // uintptr. This is necessary given that the runtime library is only able to
466
    // deal with pointers. We can pass in the same way the VLA type sizes to the
467
    // outlined function.
468
35.6k
    if (FO.UIntPtrCastRequired &&
469
35.6k
        
(35.4k
(35.4k
I->capturesVariableByCopy()35.4k
&&
!ArgType->isAnyPointerType()16.7k
) ||
470
35.4k
         
I->capturesVariableArrayType()20.6k
))
471
17.8k
      ArgType = Ctx.getUIntPtrType();
472
473
35.6k
    if (I->capturesVariable() || 
I->capturesVariableByCopy()21.6k
) {
474
30.7k
      CapVar = I->getCapturedVar();
475
30.7k
      II = CapVar->getIdentifier();
476
30.7k
    } else 
if (4.88k
I->capturesThis()4.88k
) {
477
1.84k
      II = &Ctx.Idents.get("this");
478
3.04k
    } else {
479
3.04k
      assert(I->capturesVariableArrayType());
480
0
      II = &Ctx.Idents.get("vla");
481
3.04k
    }
482
35.6k
    if (ArgType->isVariablyModifiedType())
483
2.12k
      ArgType = getCanonicalParamType(Ctx, ArgType);
484
35.6k
    VarDecl *Arg;
485
35.6k
    if (DebugFunctionDecl && 
(204
CapVar204
||
I->capturesThis()17
)) {
486
193
      Arg = ParmVarDecl::Create(
487
193
          Ctx, DebugFunctionDecl,
488
193
          CapVar ? 
CapVar->getBeginLoc()187
:
FD->getBeginLoc()6
,
489
193
          CapVar ? 
CapVar->getLocation()187
:
FD->getLocation()6
, II, ArgType,
490
193
          /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
491
35.4k
    } else {
492
35.4k
      Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
493
35.4k
                                      II, ArgType, ImplicitParamDecl::Other);
494
35.4k
    }
495
35.6k
    Args.emplace_back(Arg);
496
    // Do not cast arguments if we emit function with non-original types.
497
35.6k
    TargetArgs.emplace_back(
498
35.6k
        FO.UIntPtrCastRequired
499
35.6k
            ? 
Arg35.4k
500
35.6k
            : 
CGM.getOpenMPRuntime().translateParameter(FD, Arg)204
);
501
35.6k
    ++I;
502
35.6k
  }
503
23.9k
  Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
504
23.9k
              CD->param_end());
505
23.9k
  TargetArgs.append(
506
23.9k
      std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
507
23.9k
      CD->param_end());
508
509
  // Create the function declaration.
510
23.9k
  const CGFunctionInfo &FuncInfo =
511
23.9k
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
512
23.9k
  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
513
514
23.9k
  auto *F =
515
23.9k
      llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
516
23.9k
                             FO.FunctionName, &CGM.getModule());
517
23.9k
  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
518
23.9k
  if (CD->isNothrow())
519
23.9k
    F->setDoesNotThrow();
520
23.9k
  F->setDoesNotRecurse();
521
522
  // Always inline the outlined function if optimizations are enabled.
523
23.9k
  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
524
107
    F->removeFnAttr(llvm::Attribute::NoInline);
525
107
    F->addFnAttr(llvm::Attribute::AlwaysInline);
526
107
  }
527
528
  // Generate the function.
529
23.9k
  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
530
23.9k
                    FO.UIntPtrCastRequired ? 
FO.Loc23.8k
:
FO.S->getBeginLoc()152
,
531
23.9k
                    FO.UIntPtrCastRequired ? 
FO.Loc23.8k
532
23.9k
                                           : 
CD->getBody()->getBeginLoc()152
);
533
23.9k
  unsigned Cnt = CD->getContextParamPosition();
534
23.9k
  I = FO.S->captures().begin();
535
35.6k
  for (const FieldDecl *FD : RD->fields()) {
536
    // Do not map arguments if we emit function with non-original types.
537
35.6k
    Address LocalAddr(Address::invalid());
538
35.6k
    if (!FO.UIntPtrCastRequired && 
Args[Cnt] != TargetArgs[Cnt]204
) {
539
58
      LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
540
58
                                                             TargetArgs[Cnt]);
541
35.6k
    } else {
542
35.6k
      LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
543
35.6k
    }
544
    // If we are capturing a pointer by copy we don't need to do anything, just
545
    // use the value that we get from the arguments.
546
35.6k
    if (I->capturesVariableByCopy() && 
FD->getType()->isAnyPointerType()16.7k
) {
547
1.91k
      const VarDecl *CurVD = I->getCapturedVar();
548
1.91k
      if (!FO.RegisterCastedArgsOnly)
549
1.91k
        LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
550
1.91k
      ++Cnt;
551
1.91k
      ++I;
552
1.91k
      continue;
553
1.91k
    }
554
555
33.7k
    LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
556
33.7k
                                        AlignmentSource::Decl);
557
33.7k
    if (FD->hasCapturedVLAType()) {
558
3.04k
      if (FO.UIntPtrCastRequired) {
559
3.03k
        ArgLVal = CGF.MakeAddrLValue(
560
3.03k
            castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
561
3.03k
                                 Args[Cnt]->getName(), ArgLVal),
562
3.03k
            FD->getType(), AlignmentSource::Decl);
563
3.03k
      }
564
3.04k
      llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
565
3.04k
      const VariableArrayType *VAT = FD->getCapturedVLAType();
566
3.04k
      VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
567
30.7k
    } else if (I->capturesVariable()) {
568
14.0k
      const VarDecl *Var = I->getCapturedVar();
569
14.0k
      QualType VarTy = Var->getType();
570
14.0k
      Address ArgAddr = ArgLVal.getAddress(CGF);
571
14.0k
      if (ArgLVal.getType()->isLValueReferenceType()) {
572
14.0k
        ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
573
14.0k
      } else 
if (0
!VarTy->isVariablyModifiedType()0
||
!VarTy->isPointerType()0
) {
574
0
        assert(ArgLVal.getType()->isPointerType());
575
0
        ArgAddr = CGF.EmitLoadOfPointer(
576
0
            ArgAddr, ArgLVal.getType()->castAs<PointerType>());
577
0
      }
578
14.0k
      if (!FO.RegisterCastedArgsOnly) {
579
13.8k
        LocalAddrs.insert(
580
13.8k
            {Args[Cnt],
581
13.8k
             {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
582
13.8k
      }
583
16.7k
    } else if (I->capturesVariableByCopy()) {
584
14.8k
      assert(!FD->getType()->isAnyPointerType() &&
585
14.8k
             "Not expecting a captured pointer.");
586
0
      const VarDecl *Var = I->getCapturedVar();
587
14.8k
      LocalAddrs.insert({Args[Cnt],
588
14.8k
                         {Var, FO.UIntPtrCastRequired
589
14.8k
                                   ? castValueFromUintptr(
590
14.8k
                                         CGF, I->getLocation(), FD->getType(),
591
14.8k
                                         Args[Cnt]->getName(), ArgLVal)
592
14.8k
                                   : 
ArgLVal.getAddress(CGF)23
}});
593
14.8k
    } else {
594
      // If 'this' is captured, load it into CXXThisValue.
595
1.84k
      assert(I->capturesThis());
596
0
      CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
597
1.84k
      LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
598
1.84k
    }
599
0
    ++Cnt;
600
33.7k
    ++I;
601
33.7k
  }
602
603
23.9k
  return F;
604
23.9k
}
605
606
llvm::Function *
607
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
608
23.8k
                                                    SourceLocation Loc) {
609
23.8k
  assert(
610
23.8k
      CapturedStmtInfo &&
611
23.8k
      "CapturedStmtInfo should be set when generating the captured function");
612
0
  const CapturedDecl *CD = S.getCapturedDecl();
613
  // Build the argument list.
614
23.8k
  bool NeedWrapperFunction =
615
23.8k
      getDebugInfo() && 
CGM.getCodeGenOpts().hasReducedDebugInfo()312
;
616
23.8k
  FunctionArgList Args;
617
23.8k
  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
618
23.8k
  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
619
23.8k
  SmallString<256> Buffer;
620
23.8k
  llvm::raw_svector_ostream Out(Buffer);
621
23.8k
  Out << CapturedStmtInfo->getHelperName();
622
23.8k
  if (NeedWrapperFunction)
623
152
    Out << "_debug__";
624
23.8k
  FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
625
23.8k
                     Out.str(), Loc);
626
23.8k
  llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
627
23.8k
                                                   VLASizes, CXXThisValue, FO);
628
23.8k
  CodeGenFunction::OMPPrivateScope LocalScope(*this);
629
32.4k
  for (const auto &LocalAddrPair : LocalAddrs) {
630
32.4k
    if (LocalAddrPair.second.first) {
631
30.5k
      LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
632
30.5k
        return LocalAddrPair.second.second;
633
30.5k
      });
634
30.5k
    }
635
32.4k
  }
636
23.8k
  (void)LocalScope.Privatize();
637
23.8k
  for (const auto &VLASizePair : VLASizes)
638
3.03k
    VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
639
23.8k
  PGO.assignRegionCounters(GlobalDecl(CD), F);
640
23.8k
  CapturedStmtInfo->EmitBody(*this, CD->getBody());
641
23.8k
  (void)LocalScope.ForceCleanup();
642
23.8k
  FinishFunction(CD->getBodyRBrace());
643
23.8k
  if (!NeedWrapperFunction)
644
23.6k
    return F;
645
646
152
  FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
647
152
                            /*RegisterCastedArgsOnly=*/true,
648
152
                            CapturedStmtInfo->getHelperName(), Loc);
649
152
  CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
650
152
  WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
651
152
  Args.clear();
652
152
  LocalAddrs.clear();
653
152
  VLASizes.clear();
654
152
  llvm::Function *WrapperF =
655
152
      emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
656
152
                                   WrapperCGF.CXXThisValue, WrapperFO);
657
152
  llvm::SmallVector<llvm::Value *, 4> CallArgs;
658
152
  auto *PI = F->arg_begin();
659
324
  for (const auto *Arg : Args) {
660
324
    llvm::Value *CallArg;
661
324
    auto I = LocalAddrs.find(Arg);
662
324
    if (I != LocalAddrs.end()) {
663
29
      LValue LV = WrapperCGF.MakeAddrLValue(
664
29
          I->second.second,
665
29
          I->second.first ? 
I->second.first->getType()23
:
Arg->getType()6
,
666
29
          AlignmentSource::Decl);
667
29
      if (LV.getType()->isAnyComplexType())
668
1
        LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
669
1
            LV.getAddress(WrapperCGF),
670
1
            PI->getType()->getPointerTo(
671
1
                LV.getAddress(WrapperCGF).getAddressSpace())));
672
29
      CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
673
295
    } else {
674
295
      auto EI = VLASizes.find(Arg);
675
295
      if (EI != VLASizes.end()) {
676
11
        CallArg = EI->second.second;
677
284
      } else {
678
284
        LValue LV =
679
284
            WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
680
284
                                      Arg->getType(), AlignmentSource::Decl);
681
284
        CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
682
284
      }
683
295
    }
684
324
    CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
685
324
    ++PI;
686
324
  }
687
152
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
688
152
  WrapperCGF.FinishFunction();
689
152
  return WrapperF;
690
23.8k
}
691
692
//===----------------------------------------------------------------------===//
693
//                              OpenMP Directive Emission
694
//===----------------------------------------------------------------------===//
695
void CodeGenFunction::EmitOMPAggregateAssign(
696
    Address DestAddr, Address SrcAddr, QualType OriginalType,
697
539
    const llvm::function_ref<void(Address, Address)> CopyGen) {
698
  // Perform element-by-element initialization.
699
539
  QualType ElementTy;
700
701
  // Drill down to the base element type on both arrays.
702
539
  const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
703
539
  llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
704
539
  SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
705
706
539
  llvm::Value *SrcBegin = SrcAddr.getPointer();
707
539
  llvm::Value *DestBegin = DestAddr.getPointer();
708
  // Cast from pointer to array type to pointer to single element.
709
539
  llvm::Value *DestEnd =
710
539
      Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
711
  // The basic structure here is a while-do loop.
712
539
  llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
713
539
  llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
714
539
  llvm::Value *IsEmpty =
715
539
      Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
716
539
  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
717
718
  // Enter the loop body, making that address the current address.
719
539
  llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
720
539
  EmitBlock(BodyBB);
721
722
539
  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
723
724
539
  llvm::PHINode *SrcElementPHI =
725
539
      Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
726
539
  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
727
539
  Address SrcElementCurrent =
728
539
      Address(SrcElementPHI,
729
539
              SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730
731
539
  llvm::PHINode *DestElementPHI = Builder.CreatePHI(
732
539
      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
733
539
  DestElementPHI->addIncoming(DestBegin, EntryBB);
734
539
  Address DestElementCurrent =
735
539
      Address(DestElementPHI,
736
539
              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
737
738
  // Emit copy.
739
539
  CopyGen(DestElementCurrent, SrcElementCurrent);
740
741
  // Shift the address forward by one element.
742
539
  llvm::Value *DestElementNext =
743
539
      Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
744
539
                                 /*Idx0=*/1, "omp.arraycpy.dest.element");
745
539
  llvm::Value *SrcElementNext =
746
539
      Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
747
539
                                 /*Idx0=*/1, "omp.arraycpy.src.element");
748
  // Check whether we've reached the end.
749
539
  llvm::Value *Done =
750
539
      Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
751
539
  Builder.CreateCondBr(Done, DoneBB, BodyBB);
752
539
  DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
753
539
  SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
754
755
  // Done.
756
539
  EmitBlock(DoneBB, /*IsFinished=*/true);
757
539
}
758
759
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
760
                                  Address SrcAddr, const VarDecl *DestVD,
761
1.95k
                                  const VarDecl *SrcVD, const Expr *Copy) {
762
1.95k
  if (OriginalType->isArrayType()) {
763
580
    const auto *BO = dyn_cast<BinaryOperator>(Copy);
764
580
    if (BO && 
BO->getOpcode() == BO_Assign313
) {
765
      // Perform simple memcpy for simple copying.
766
313
      LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
767
313
      LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
768
313
      EmitAggregateAssign(Dest, Src, OriginalType);
769
313
    } else {
770
      // For arrays with complex element types perform element by element
771
      // copying.
772
267
      EmitOMPAggregateAssign(
773
267
          DestAddr, SrcAddr, OriginalType,
774
267
          [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
775
            // Working with the single array element, so have to remap
776
            // destination and source variables to corresponding array
777
            // elements.
778
267
            CodeGenFunction::OMPPrivateScope Remap(*this);
779
267
            Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
780
267
            Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
781
267
            (void)Remap.Privatize();
782
267
            EmitIgnoredExpr(Copy);
783
267
          });
784
267
    }
785
1.37k
  } else {
786
    // Remap pseudo source variable to private copy.
787
1.37k
    CodeGenFunction::OMPPrivateScope Remap(*this);
788
1.37k
    Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
789
1.37k
    Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
790
1.37k
    (void)Remap.Privatize();
791
    // Emit copying of the whole variable.
792
1.37k
    EmitIgnoredExpr(Copy);
793
1.37k
  }
794
1.95k
}
795
796
bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
797
19.1k
                                                OMPPrivateScope &PrivateScope) {
798
19.1k
  if (!HaveInsertPoint())
799
0
    return false;
800
19.1k
  bool DeviceConstTarget =
801
19.1k
      getLangOpts().OpenMPIsDevice &&
802
19.1k
      
isOpenMPTargetExecutionDirective(D.getDirectiveKind())3.49k
;
803
19.1k
  bool FirstprivateIsLastprivate = false;
804
19.1k
  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
805
19.1k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
806
364
    for (const auto *D : C->varlists())
807
1.48k
      Lastprivates.try_emplace(
808
1.48k
          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
809
1.48k
          C->getKind());
810
364
  }
811
19.1k
  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
812
19.1k
  llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
813
19.1k
  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
814
  // Force emission of the firstprivate copy if the directive does not emit
815
  // outlined function, like omp for, omp simd, omp distribute etc.
816
19.1k
  bool MustEmitFirstprivateCopy =
817
19.1k
      CaptureRegions.size() == 1 && 
CaptureRegions.back() == OMPD_unknown5.11k
;
818
19.1k
  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
819
6.61k
    const auto *IRef = C->varlist_begin();
820
6.61k
    const auto *InitsRef = C->inits().begin();
821
10.2k
    for (const Expr *IInit : C->private_copies()) {
822
10.2k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
823
10.2k
      bool ThisFirstprivateIsLastprivate =
824
10.2k
          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
825
10.2k
      const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
826
10.2k
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
827
10.2k
      if (!MustEmitFirstprivateCopy && 
!ThisFirstprivateIsLastprivate9.98k
&&
FD9.98k
&&
828
10.2k
          
!FD->getType()->isReferenceType()9.97k
&&
829
10.2k
          
(8.90k
!VD8.90k
||
!VD->hasAttr<OMPAllocateDeclAttr>()8.90k
)) {
830
8.89k
        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
831
8.89k
        ++IRef;
832
8.89k
        ++InitsRef;
833
8.89k
        continue;
834
8.89k
      }
835
      // Do not emit copy for firstprivate constant variables in target regions,
836
      // captured by reference.
837
1.30k
      if (DeviceConstTarget && 
OrigVD->getType().isConstant(getContext())169
&&
838
1.30k
          
FD10
&&
FD->getType()->isReferenceType()10
&&
839
1.30k
          
(10
!VD10
||
!VD->hasAttr<OMPAllocateDeclAttr>()10
)) {
840
10
        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
841
10
        ++IRef;
842
10
        ++InitsRef;
843
10
        continue;
844
10
      }
845
1.29k
      FirstprivateIsLastprivate =
846
1.29k
          FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
847
1.29k
      if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
848
1.20k
        const auto *VDInit =
849
1.20k
            cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
850
1.20k
        bool IsRegistered;
851
1.20k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
852
1.20k
                        /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
853
1.20k
                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
854
1.20k
        LValue OriginalLVal;
855
1.20k
        if (!FD) {
856
          // Check if the firstprivate variable is just a constant value.
857
58
          ConstantEmission CE = tryEmitAsConstant(&DRE);
858
58
          if (CE && 
!CE.isReference()6
) {
859
            // Constant value, no need to create a copy.
860
4
            ++IRef;
861
4
            ++InitsRef;
862
4
            continue;
863
4
          }
864
54
          if (CE && 
CE.isReference()2
) {
865
2
            OriginalLVal = CE.getReferenceLValue(*this, &DRE);
866
52
          } else {
867
52
            assert(!CE && "Expected non-constant firstprivate.");
868
0
            OriginalLVal = EmitLValue(&DRE);
869
52
          }
870
1.14k
        } else {
871
1.14k
          OriginalLVal = EmitLValue(&DRE);
872
1.14k
        }
873
1.19k
        QualType Type = VD->getType();
874
1.19k
        if (Type->isArrayType()) {
875
          // Emit VarDecl with copy init for arrays.
876
          // Get the address of the original variable captured in current
877
          // captured region.
878
603
          IsRegistered = PrivateScope.addPrivate(
879
603
              OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
880
603
                AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
881
603
                const Expr *Init = VD->getInit();
882
603
                if (!isa<CXXConstructExpr>(Init) ||
883
603
                    
isTrivialInitializer(Init)220
) {
884
                  // Perform simple memcpy.
885
383
                  LValue Dest =
886
383
                      MakeAddrLValue(Emission.getAllocatedAddress(), Type);
887
383
                  EmitAggregateAssign(Dest, OriginalLVal, Type);
888
383
                } else {
889
220
                  EmitOMPAggregateAssign(
890
220
                      Emission.getAllocatedAddress(),
891
220
                      OriginalLVal.getAddress(*this), Type,
892
220
                      [this, VDInit, Init](Address DestElement,
893
220
                                           Address SrcElement) {
894
                        // Clean up any temporaries needed by the
895
                        // initialization.
896
220
                        RunCleanupsScope InitScope(*this);
897
                        // Emit initialization for single element.
898
220
                        setAddrOfLocalVar(VDInit, SrcElement);
899
220
                        EmitAnyExprToMem(Init, DestElement,
900
220
                                         Init->getType().getQualifiers(),
901
220
                                         /*IsInitializer*/ false);
902
220
                        LocalDeclMap.erase(VDInit);
903
220
                      });
904
220
                }
905
603
                EmitAutoVarCleanups(Emission);
906
603
                return Emission.getAllocatedAddress();
907
603
              });
908
603
        } else {
909
593
          Address OriginalAddr = OriginalLVal.getAddress(*this);
910
593
          IsRegistered =
911
593
              PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
912
593
                                               ThisFirstprivateIsLastprivate,
913
593
                                               OrigVD, &Lastprivates, IRef]() {
914
                // Emit private VarDecl with copy init.
915
                // Remap temp VDInit variable to the address of the original
916
                // variable (for proper handling of captured global variables).
917
593
                setAddrOfLocalVar(VDInit, OriginalAddr);
918
593
                EmitDecl(*VD);
919
593
                LocalDeclMap.erase(VDInit);
920
593
                if (ThisFirstprivateIsLastprivate &&
921
593
                    Lastprivates[OrigVD->getCanonicalDecl()] ==
922
8
                        OMPC_LASTPRIVATE_conditional) {
923
                  // Create/init special variable for lastprivate conditionals.
924
0
                  Address VDAddr =
925
0
                      CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
926
0
                          *this, OrigVD);
927
0
                  llvm::Value *V = EmitLoadOfScalar(
928
0
                      MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
929
0
                                     AlignmentSource::Decl),
930
0
                      (*IRef)->getExprLoc());
931
0
                  EmitStoreOfScalar(V,
932
0
                                    MakeAddrLValue(VDAddr, (*IRef)->getType(),
933
0
                                                   AlignmentSource::Decl));
934
0
                  LocalDeclMap.erase(VD);
935
0
                  setAddrOfLocalVar(VD, VDAddr);
936
0
                  return VDAddr;
937
0
                }
938
593
                return GetAddrOfLocalVar(VD);
939
593
              });
940
593
        }
941
1.19k
        assert(IsRegistered &&
942
1.19k
               "firstprivate var already registered as private");
943
        // Silence the warning about unused variable.
944
0
        (void)IsRegistered;
945
1.19k
      }
946
1.29k
      ++IRef;
947
1.29k
      ++InitsRef;
948
1.29k
    }
949
6.61k
  }
950
19.1k
  return FirstprivateIsLastprivate && 
!EmittedAsFirstprivate.empty()16
;
951
19.1k
}
952
953
void CodeGenFunction::EmitOMPPrivateClause(
954
    const OMPExecutableDirective &D,
955
33.2k
    CodeGenFunction::OMPPrivateScope &PrivateScope) {
956
33.2k
  if (!HaveInsertPoint())
957
0
    return;
958
33.2k
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
959
33.2k
  for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
960
983
    auto IRef = C->varlist_begin();
961
3.14k
    for (const Expr *IInit : C->private_copies()) {
962
3.14k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
963
3.14k
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
964
2.87k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
965
2.87k
        bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
966
          // Emit private VarDecl with copy init.
967
2.87k
          EmitDecl(*VD);
968
2.87k
          return GetAddrOfLocalVar(VD);
969
2.87k
        });
970
2.87k
        assert(IsRegistered && "private var already registered as private");
971
        // Silence the warning about unused variable.
972
0
        (void)IsRegistered;
973
2.87k
      }
974
0
      ++IRef;
975
3.14k
    }
976
983
  }
977
33.2k
}
978
979
1.02k
bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
980
1.02k
  if (!HaveInsertPoint())
981
0
    return false;
982
  // threadprivate_var1 = master_threadprivate_var1;
983
  // operator=(threadprivate_var2, master_threadprivate_var2);
984
  // ...
985
  // __kmpc_barrier(&loc, global_tid);
986
1.02k
  llvm::DenseSet<const VarDecl *> CopiedVars;
987
1.02k
  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
988
1.02k
  for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
989
26
    auto IRef = C->varlist_begin();
990
26
    auto ISrcRef = C->source_exprs().begin();
991
26
    auto IDestRef = C->destination_exprs().begin();
992
52
    for (const Expr *AssignOp : C->assignment_ops()) {
993
52
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
994
52
      QualType Type = VD->getType();
995
52
      if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
996
        // Get the address of the master variable. If we are emitting code with
997
        // TLS support, the address is passed from the master as field in the
998
        // captured declaration.
999
52
        Address MasterAddr = Address::invalid();
1000
52
        if (getLangOpts().OpenMPUseTLS &&
1001
52
            
getContext().getTargetInfo().isTLSSupported()26
) {
1002
26
          assert(CapturedStmtInfo->lookup(VD) &&
1003
26
                 "Copyin threadprivates should have been captured!");
1004
0
          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1005
26
                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1006
26
          MasterAddr = EmitLValue(&DRE).getAddress(*this);
1007
26
          LocalDeclMap.erase(VD);
1008
26
        } else {
1009
26
          MasterAddr =
1010
26
              Address(VD->isStaticLocal() ? 
CGM.getStaticLocalDeclAddress(VD)22
1011
26
                                          : 
CGM.GetAddrOfGlobal(VD)4
,
1012
26
                      getContext().getDeclAlign(VD));
1013
26
        }
1014
        // Get the address of the threadprivate variable.
1015
0
        Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1016
52
        if (CopiedVars.size() == 1) {
1017
          // At first check if current thread is a master thread. If it is, no
1018
          // need to copy data.
1019
26
          CopyBegin = createBasicBlock("copyin.not.master");
1020
26
          CopyEnd = createBasicBlock("copyin.not.master.end");
1021
          // TODO: Avoid ptrtoint conversion.
1022
26
          auto *MasterAddrInt =
1023
26
              Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1024
26
          auto *PrivateAddrInt =
1025
26
              Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1026
26
          Builder.CreateCondBr(
1027
26
              Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1028
26
              CopyEnd);
1029
26
          EmitBlock(CopyBegin);
1030
26
        }
1031
52
        const auto *SrcVD =
1032
52
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1033
52
        const auto *DestVD =
1034
52
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1035
52
        EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1036
52
      }
1037
0
      ++IRef;
1038
52
      ++ISrcRef;
1039
52
      ++IDestRef;
1040
52
    }
1041
26
  }
1042
1.02k
  if (CopyEnd) {
1043
    // Exit out of copying procedure for non-master thread.
1044
26
    EmitBlock(CopyEnd, /*IsFinished=*/true);
1045
26
    return true;
1046
26
  }
1047
994
  return false;
1048
1.02k
}
1049
1050
bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1051
13.6k
    const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1052
13.6k
  if (!HaveInsertPoint())
1053
0
    return false;
1054
13.6k
  bool HasAtLeastOneLastprivate = false;
1055
13.6k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
1056
13.6k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1057
8.74k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1058
9.12k
    for (const Expr *C : LoopDirective->counters()) {
1059
9.12k
      SIMDLCVs.insert(
1060
9.12k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1061
9.12k
    }
1062
8.74k
  }
1063
13.6k
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1064
13.6k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1065
540
    HasAtLeastOneLastprivate = true;
1066
540
    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1067
540
        
!getLangOpts().OpenMPSimd74
)
1068
49
      break;
1069
491
    const auto *IRef = C->varlist_begin();
1070
491
    const auto *IDestRef = C->destination_exprs().begin();
1071
2.01k
    for (const Expr *IInit : C->private_copies()) {
1072
      // Keep the address of the original variable for future update at the end
1073
      // of the loop.
1074
2.01k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1075
      // Taskloops do not require additional initialization, it is done in
1076
      // runtime support library.
1077
2.01k
      if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1078
1.60k
        const auto *DestVD =
1079
1.60k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1080
1.60k
        PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
1081
1.60k
          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1082
                          /*RefersToEnclosingVariableOrCapture=*/
1083
1.60k
                          CapturedStmtInfo->lookup(OrigVD) != nullptr,
1084
1.60k
                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1085
1.60k
          return EmitLValue(&DRE).getAddress(*this);
1086
1.60k
        });
1087
        // Check if the variable is also a firstprivate: in this case IInit is
1088
        // not generated. Initialization of this variable will happen in codegen
1089
        // for 'firstprivate' clause.
1090
1.60k
        if (IInit && 
!SIMDLCVs.count(OrigVD->getCanonicalDecl())1.56k
) {
1091
1.55k
          const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1092
1.55k
          bool IsRegistered =
1093
1.55k
              PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() {
1094
1.55k
                if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1095
10
                  Address VDAddr =
1096
10
                      CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1097
10
                          *this, OrigVD);
1098
10
                  setAddrOfLocalVar(VD, VDAddr);
1099
10
                  return VDAddr;
1100
10
                }
1101
                // Emit private VarDecl with copy init.
1102
1.54k
                EmitDecl(*VD);
1103
1.54k
                return GetAddrOfLocalVar(VD);
1104
1.55k
              });
1105
1.55k
          assert(IsRegistered &&
1106
1.55k
                 "lastprivate var already registered as private");
1107
0
          (void)IsRegistered;
1108
1.55k
        }
1109
1.60k
      }
1110
0
      ++IRef;
1111
2.01k
      ++IDestRef;
1112
2.01k
    }
1113
491
  }
1114
13.6k
  return HasAtLeastOneLastprivate;
1115
13.6k
}
1116
1117
void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1118
    const OMPExecutableDirective &D, bool NoFinals,
1119
532
    llvm::Value *IsLastIterCond) {
1120
532
  if (!HaveInsertPoint())
1121
0
    return;
1122
  // Emit following code:
1123
  // if (<IsLastIterCond>) {
1124
  //   orig_var1 = private_orig_var1;
1125
  //   ...
1126
  //   orig_varn = private_orig_varn;
1127
  // }
1128
532
  llvm::BasicBlock *ThenBB = nullptr;
1129
532
  llvm::BasicBlock *DoneBB = nullptr;
1130
532
  if (IsLastIterCond) {
1131
    // Emit implicit barrier if at least one lastprivate conditional is found
1132
    // and this is not a simd mode.
1133
405
    if (!getLangOpts().OpenMPSimd &&
1134
405
        llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1135
413
                     [](const OMPLastprivateClause *C) {
1136
413
                       return C->getKind() == OMPC_LASTPRIVATE_conditional;
1137
413
                     })) {
1138
6
      CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1139
6
                                             OMPD_unknown,
1140
6
                                             /*EmitChecks=*/false,
1141
6
                                             /*ForceSimpleCall=*/true);
1142
6
    }
1143
405
    ThenBB = createBasicBlock(".omp.lastprivate.then");
1144
405
    DoneBB = createBasicBlock(".omp.lastprivate.done");
1145
405
    Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1146
405
    EmitBlock(ThenBB);
1147
405
  }
1148
532
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1149
532
  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1150
532
  if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1151
516
    auto IC = LoopDirective->counters().begin();
1152
536
    for (const Expr *F : LoopDirective->finals()) {
1153
536
      const auto *D =
1154
536
          cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1155
536
      if (NoFinals)
1156
221
        AlreadyEmittedVars.insert(D);
1157
315
      else
1158
315
        LoopCountersAndUpdates[D] = F;
1159
536
      ++IC;
1160
536
    }
1161
516
  }
1162
540
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1163
540
    auto IRef = C->varlist_begin();
1164
540
    auto ISrcRef = C->source_exprs().begin();
1165
540
    auto IDestRef = C->destination_exprs().begin();
1166
2.21k
    for (const Expr *AssignOp : C->assignment_ops()) {
1167
2.21k
      const auto *PrivateVD =
1168
2.21k
          cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1169
2.21k
      QualType Type = PrivateVD->getType();
1170
2.21k
      const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1171
2.21k
      if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1172
        // If lastprivate variable is a loop control variable for loop-based
1173
        // directive, update its value before copyin back to original
1174
        // variable.
1175
1.73k
        if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1176
13
          EmitIgnoredExpr(FinalExpr);
1177
1.73k
        const auto *SrcVD =
1178
1.73k
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1179
1.73k
        const auto *DestVD =
1180
1.73k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1181
        // Get the address of the private variable.
1182
1.73k
        Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1183
1.73k
        if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1184
334
          PrivateAddr =
1185
334
              Address(Builder.CreateLoad(PrivateAddr),
1186
334
                      CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1187
        // Store the last value to the private copy in the last iteration.
1188
1.73k
        if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1189
10
          CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1190
10
              *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1191
10
              (*IRef)->getExprLoc());
1192
        // Get the address of the original variable.
1193
1.73k
        Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1194
1.73k
        EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1195
1.73k
      }
1196
2.21k
      ++IRef;
1197
2.21k
      ++ISrcRef;
1198
2.21k
      ++IDestRef;
1199
2.21k
    }
1200
540
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
1201
10
      EmitIgnoredExpr(PostUpdate);
1202
540
  }
1203
532
  if (IsLastIterCond)
1204
405
    EmitBlock(DoneBB, /*IsFinished=*/true);
1205
532
}
1206
1207
void CodeGenFunction::EmitOMPReductionClauseInit(
1208
    const OMPExecutableDirective &D,
1209
27.8k
    CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1210
27.8k
  if (!HaveInsertPoint())
1211
0
    return;
1212
27.8k
  SmallVector<const Expr *, 4> Shareds;
1213
27.8k
  SmallVector<const Expr *, 4> Privates;
1214
27.8k
  SmallVector<const Expr *, 4> ReductionOps;
1215
27.8k
  SmallVector<const Expr *, 4> LHSs;
1216
27.8k
  SmallVector<const Expr *, 4> RHSs;
1217
27.8k
  OMPTaskDataTy Data;
1218
27.8k
  SmallVector<const Expr *, 4> TaskLHSs;
1219
27.8k
  SmallVector<const Expr *, 4> TaskRHSs;
1220
27.8k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1221
1.16k
    if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1222
447
      continue;
1223
721
    Shareds.append(C->varlist_begin(), C->varlist_end());
1224
721
    Privates.append(C->privates().begin(), C->privates().end());
1225
721
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1226
721
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1227
721
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1228
721
    if (C->getModifier() == OMPC_REDUCTION_task) {
1229
27
      Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1230
27
      Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1231
27
      Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1232
27
      Data.ReductionOps.append(C->reduction_ops().begin(),
1233
27
                               C->reduction_ops().end());
1234
27
      TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1235
27
      TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1236
27
    }
1237
721
  }
1238
27.8k
  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1239
27.8k
  unsigned Count = 0;
1240
27.8k
  auto *ILHS = LHSs.begin();
1241
27.8k
  auto *IRHS = RHSs.begin();
1242
27.8k
  auto *IPriv = Privates.begin();
1243
27.8k
  for (const Expr *IRef : Shareds) {
1244
813
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1245
    // Emit private VarDecl with reduction init.
1246
813
    RedCG.emitSharedOrigLValue(*this, Count);
1247
813
    RedCG.emitAggregateType(*this, Count);
1248
813
    AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1249
813
    RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1250
813
                             RedCG.getSharedLValue(Count).getAddress(*this),
1251
813
                             [&Emission](CodeGenFunction &CGF) {
1252
617
                               CGF.EmitAutoVarInit(Emission);
1253
617
                               return true;
1254
617
                             });
1255
813
    EmitAutoVarCleanups(Emission);
1256
813
    Address BaseAddr = RedCG.adjustPrivateAddress(
1257
813
        *this, Count, Emission.getAllocatedAddress());
1258
813
    bool IsRegistered = PrivateScope.addPrivate(
1259
813
        RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
1260
813
    assert(IsRegistered && "private var already registered as private");
1261
    // Silence the warning about unused variable.
1262
0
    (void)IsRegistered;
1263
1264
813
    const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1265
813
    const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1266
813
    QualType Type = PrivateVD->getType();
1267
813
    bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1268
813
    if (isaOMPArraySectionExpr && 
Type->isVariablyModifiedType()157
) {
1269
      // Store the address of the original variable associated with the LHS
1270
      // implicit variable.
1271
109
      PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1272
109
        return RedCG.getSharedLValue(Count).getAddress(*this);
1273
109
      });
1274
109
      PrivateScope.addPrivate(
1275
109
          RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
1276
704
    } else if ((isaOMPArraySectionExpr && 
Type->isScalarType()48
) ||
1277
704
               
isa<ArraySubscriptExpr>(IRef)702
) {
1278
      // Store the address of the original variable associated with the LHS
1279
      // implicit variable.
1280
6
      PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() {
1281
6
        return RedCG.getSharedLValue(Count).getAddress(*this);
1282
6
      });
1283
6
      PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
1284
6
        return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
1285
6
                                            ConvertTypeForMem(RHSVD->getType()),
1286
6
                                            "rhs.begin");
1287
6
      });
1288
698
    } else {
1289
698
      QualType Type = PrivateVD->getType();
1290
698
      bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1291
698
      Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1292
      // Store the address of the original variable associated with the LHS
1293
      // implicit variable.
1294
698
      if (IsArray) {
1295
108
        OriginalAddr = Builder.CreateElementBitCast(
1296
108
            OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1297
108
      }
1298
698
      PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
1299
698
      PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
1300
698
        return IsArray ? Builder.CreateElementBitCast(
1301
108
                             GetAddrOfLocalVar(PrivateVD),
1302
108
                             ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1303
698
                       : 
GetAddrOfLocalVar(PrivateVD)590
;
1304
698
      });
1305
698
    }
1306
813
    ++ILHS;
1307
813
    ++IRHS;
1308
813
    ++IPriv;
1309
813
    ++Count;
1310
813
  }
1311
27.8k
  if (!Data.ReductionVars.empty()) {
1312
27
    Data.IsReductionWithTaskMod = true;
1313
27
    Data.IsWorksharingReduction =
1314
27
        isOpenMPWorksharingDirective(D.getDirectiveKind());
1315
27
    llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1316
27
        *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1317
27
    const Expr *TaskRedRef = nullptr;
1318
27
    switch (D.getDirectiveKind()) {
1319
2
    case OMPD_parallel:
1320
2
      TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1321
2
      break;
1322
2
    case OMPD_for:
1323
2
      TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1324
2
      break;
1325
2
    case OMPD_sections:
1326
2
      TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1327
2
      break;
1328
2
    case OMPD_parallel_for:
1329
2
      TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1330
2
      break;
1331
2
    case OMPD_parallel_master:
1332
2
      TaskRedRef =
1333
2
          cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1334
2
      break;
1335
2
    case OMPD_parallel_sections:
1336
2
      TaskRedRef =
1337
2
          cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1338
2
      break;
1339
2
    case OMPD_target_parallel:
1340
2
      TaskRedRef =
1341
2
          cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1342
2
      break;
1343
3
    case OMPD_target_parallel_for:
1344
3
      TaskRedRef =
1345
3
          cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1346
3
      break;
1347
2
    case OMPD_distribute_parallel_for:
1348
2
      TaskRedRef =
1349
2
          cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1350
2
      break;
1351
4
    case OMPD_teams_distribute_parallel_for:
1352
4
      TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1353
4
                       .getTaskReductionRefExpr();
1354
4
      break;
1355
4
    case OMPD_target_teams_distribute_parallel_for:
1356
4
      TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1357
4
                       .getTaskReductionRefExpr();
1358
4
      break;
1359
0
    case OMPD_simd:
1360
0
    case OMPD_for_simd:
1361
0
    case OMPD_section:
1362
0
    case OMPD_single:
1363
0
    case OMPD_master:
1364
0
    case OMPD_critical:
1365
0
    case OMPD_parallel_for_simd:
1366
0
    case OMPD_task:
1367
0
    case OMPD_taskyield:
1368
0
    case OMPD_barrier:
1369
0
    case OMPD_taskwait:
1370
0
    case OMPD_taskgroup:
1371
0
    case OMPD_flush:
1372
0
    case OMPD_depobj:
1373
0
    case OMPD_scan:
1374
0
    case OMPD_ordered:
1375
0
    case OMPD_atomic:
1376
0
    case OMPD_teams:
1377
0
    case OMPD_target:
1378
0
    case OMPD_cancellation_point:
1379
0
    case OMPD_cancel:
1380
0
    case OMPD_target_data:
1381
0
    case OMPD_target_enter_data:
1382
0
    case OMPD_target_exit_data:
1383
0
    case OMPD_taskloop:
1384
0
    case OMPD_taskloop_simd:
1385
0
    case OMPD_master_taskloop:
1386
0
    case OMPD_master_taskloop_simd:
1387
0
    case OMPD_parallel_master_taskloop:
1388
0
    case OMPD_parallel_master_taskloop_simd:
1389
0
    case OMPD_distribute:
1390
0
    case OMPD_target_update:
1391
0
    case OMPD_distribute_parallel_for_simd:
1392
0
    case OMPD_distribute_simd:
1393
0
    case OMPD_target_parallel_for_simd:
1394
0
    case OMPD_target_simd:
1395
0
    case OMPD_teams_distribute:
1396
0
    case OMPD_teams_distribute_simd:
1397
0
    case OMPD_teams_distribute_parallel_for_simd:
1398
0
    case OMPD_target_teams:
1399
0
    case OMPD_target_teams_distribute:
1400
0
    case OMPD_target_teams_distribute_parallel_for_simd:
1401
0
    case OMPD_target_teams_distribute_simd:
1402
0
    case OMPD_declare_target:
1403
0
    case OMPD_end_declare_target:
1404
0
    case OMPD_threadprivate:
1405
0
    case OMPD_allocate:
1406
0
    case OMPD_declare_reduction:
1407
0
    case OMPD_declare_mapper:
1408
0
    case OMPD_declare_simd:
1409
0
    case OMPD_requires:
1410
0
    case OMPD_declare_variant:
1411
0
    case OMPD_begin_declare_variant:
1412
0
    case OMPD_end_declare_variant:
1413
0
    case OMPD_unknown:
1414
0
    default:
1415
0
      llvm_unreachable("Enexpected directive with task reductions.");
1416
27
    }
1417
1418
27
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1419
27
    EmitVarDecl(*VD);
1420
27
    EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1421
27
                      /*Volatile=*/false, TaskRedRef->getType());
1422
27
  }
1423
27.8k
}
1424
1425
void CodeGenFunction::EmitOMPReductionClauseFinal(
1426
16.7k
    const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1427
16.7k
  if (!HaveInsertPoint())
1428
2
    return;
1429
16.7k
  llvm::SmallVector<const Expr *, 8> Privates;
1430
16.7k
  llvm::SmallVector<const Expr *, 8> LHSExprs;
1431
16.7k
  llvm::SmallVector<const Expr *, 8> RHSExprs;
1432
16.7k
  llvm::SmallVector<const Expr *, 8> ReductionOps;
1433
16.7k
  bool HasAtLeastOneReduction = false;
1434
16.7k
  bool IsReductionWithTaskMod = false;
1435
16.7k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1436
    // Do not emit for inscan reductions.
1437
713
    if (C->getModifier() == OMPC_REDUCTION_inscan)
1438
48
      continue;
1439
665
    HasAtLeastOneReduction = true;
1440
665
    Privates.append(C->privates().begin(), C->privates().end());
1441
665
    LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1442
665
    RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1443
665
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1444
665
    IsReductionWithTaskMod =
1445
665
        IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1446
665
  }
1447
16.7k
  if (HasAtLeastOneReduction) {
1448
574
    if (IsReductionWithTaskMod) {
1449
27
      CGM.getOpenMPRuntime().emitTaskReductionFini(
1450
27
          *this, D.getBeginLoc(),
1451
27
          isOpenMPWorksharingDirective(D.getDirectiveKind()));
1452
27
    }
1453
574
    bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1454
574
                      
isOpenMPParallelDirective(D.getDirectiveKind())568
||
1455
574
                      
ReductionKind == OMPD_simd268
;
1456
574
    bool SimpleReduction = ReductionKind == OMPD_simd;
1457
    // Emit nowait reduction if nowait clause is present or directive is a
1458
    // parallel directive (it always has implicit barrier).
1459
574
    CGM.getOpenMPRuntime().emitReduction(
1460
574
        *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1461
574
        {WithNowait, SimpleReduction, ReductionKind});
1462
574
  }
1463
16.7k
}
1464
1465
static void emitPostUpdateForReductionClause(
1466
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1467
16.7k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1468
16.7k
  if (!CGF.HaveInsertPoint())
1469
0
    return;
1470
16.7k
  llvm::BasicBlock *DoneBB = nullptr;
1471
16.7k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1472
721
    if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1473
4
      if (!DoneBB) {
1474
4
        if (llvm::Value *Cond = CondGen(CGF)) {
1475
          // If the first post-update expression is found, emit conditional
1476
          // block if it was requested.
1477
0
          llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1478
0
          DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1479
0
          CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1480
0
          CGF.EmitBlock(ThenBB);
1481
0
        }
1482
4
      }
1483
4
      CGF.EmitIgnoredExpr(PostUpdate);
1484
4
    }
1485
721
  }
1486
16.7k
  if (DoneBB)
1487
0
    CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1488
16.7k
}
1489
1490
namespace {
1491
/// Codegen lambda for appending distribute lower and upper bounds to outlined
1492
/// parallel function. This is necessary for combined constructs such as
1493
/// 'distribute parallel for'
1494
typedef llvm::function_ref<void(CodeGenFunction &,
1495
                                const OMPExecutableDirective &,
1496
                                llvm::SmallVectorImpl<llvm::Value *> &)>
1497
    CodeGenBoundParametersTy;
1498
} // anonymous namespace
1499
1500
static void
1501
checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1502
16.4k
                                     const OMPExecutableDirective &S) {
1503
16.4k
  if (CGF.getLangOpts().OpenMP < 50)
1504
4.15k
    return;
1505
12.3k
  llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1506
12.3k
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1507
902
    for (const Expr *Ref : C->varlists()) {
1508
902
      if (!Ref->getType()->isScalarType())
1509
473
        continue;
1510
429
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1511
429
      if (!DRE)
1512
0
        continue;
1513
429
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1514
429
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1515
429
    }
1516
744
  }
1517
12.3k
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1518
1.22k
    for (const Expr *Ref : C->varlists()) {
1519
1.22k
      if (!Ref->getType()->isScalarType())
1520
658
        continue;
1521
567
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1522
567
      if (!DRE)
1523
0
        continue;
1524
567
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1525
567
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1526
567
    }
1527
295
  }
1528
12.3k
  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1529
350
    for (const Expr *Ref : C->varlists()) {
1530
350
      if (!Ref->getType()->isScalarType())
1531
0
        continue;
1532
350
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1533
350
      if (!DRE)
1534
0
        continue;
1535
350
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1536
350
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1537
350
    }
1538
279
  }
1539
  // Privates should ne analyzed since they are not captured at all.
1540
  // Task reductions may be skipped - tasks are ignored.
1541
  // Firstprivates do not return value but may be passed by reference - no need
1542
  // to check for updated lastprivate conditional.
1543
12.3k
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1544
6.43k
    for (const Expr *Ref : C->varlists()) {
1545
6.43k
      if (!Ref->getType()->isScalarType())
1546
855
        continue;
1547
5.57k
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1548
5.57k
      if (!DRE)
1549
0
        continue;
1550
5.57k
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1551
5.57k
    }
1552
3.96k
  }
1553
12.3k
  CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1554
12.3k
      CGF, S, PrivateDecls);
1555
12.3k
}
1556
1557
static void emitCommonOMPParallelDirective(
1558
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
1559
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1560
6.18k
    const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1561
6.18k
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1562
6.18k
  llvm::Value *NumThreads = nullptr;
1563
6.18k
  llvm::Function *OutlinedFn =
1564
6.18k
      CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1565
6.18k
          S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1566
6.18k
  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1567
351
    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1568
351
    NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1569
351
                                    /*IgnoreResultAssign=*/true);
1570
351
    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1571
351
        CGF, NumThreads, NumThreadsClause->getBeginLoc());
1572
351
  }
1573
6.18k
  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1574
87
    CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1575
87
    CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1576
87
        CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1577
87
  }
1578
6.18k
  const Expr *IfCond = nullptr;
1579
6.18k
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1580
1.17k
    if (C->getNameModifier() == OMPD_unknown ||
1581
1.17k
        
C->getNameModifier() == OMPD_parallel769
) {
1582
590
      IfCond = C->getCondition();
1583
590
      break;
1584
590
    }
1585
1.17k
  }
1586
1587
6.18k
  OMPParallelScope Scope(CGF, S);
1588
6.18k
  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1589
  // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1590
  // lower and upper bounds with the pragma 'for' chunking mechanism.
1591
  // The following lambda takes care of appending the lower and upper bound
1592
  // parameters when necessary
1593
6.18k
  CodeGenBoundParameters(CGF, S, CapturedVars);
1594
6.18k
  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1595
6.18k
  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1596
6.18k
                                              CapturedVars, IfCond, NumThreads);
1597
6.18k
}
1598
1599
652
static bool isAllocatableDecl(const VarDecl *VD) {
1600
652
  const VarDecl *CVD = VD->getCanonicalDecl();
1601
652
  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1602
648
    return false;
1603
4
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1604
  // Use the default allocation.
1605
4
  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1606
4
            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1607
4
           
!AA->getAllocator()0
);
1608
652
}
1609
1610
static void emitEmptyBoundParameters(CodeGenFunction &,
1611
                                     const OMPExecutableDirective &,
1612
3.43k
                                     llvm::SmallVectorImpl<llvm::Value *> &) {}
1613
1614
Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1615
636
    CodeGenFunction &CGF, const VarDecl *VD) {
1616
636
  CodeGenModule &CGM = CGF.CGM;
1617
636
  auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1618
1619
636
  if (!VD)
1620
0
    return Address::invalid();
1621
636
  const VarDecl *CVD = VD->getCanonicalDecl();
1622
636
  if (!isAllocatableDecl(CVD))
1623
636
    return Address::invalid();
1624
0
  llvm::Value *Size;
1625
0
  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1626
0
  if (CVD->getType()->isVariablyModifiedType()) {
1627
0
    Size = CGF.getTypeSize(CVD->getType());
1628
    // Align the size: ((size + align - 1) / align) * align
1629
0
    Size = CGF.Builder.CreateNUWAdd(
1630
0
        Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1631
0
    Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1632
0
    Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1633
0
  } else {
1634
0
    CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1635
0
    Size = CGM.getSize(Sz.alignTo(Align));
1636
0
  }
1637
1638
0
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1639
0
  assert(AA->getAllocator() &&
1640
0
         "Expected allocator expression for non-default allocator.");
1641
0
  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1642
  // According to the standard, the original allocator type is a enum (integer).
1643
  // Convert to pointer type, if required.
1644
0
  if (Allocator->getType()->isIntegerTy())
1645
0
    Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1646
0
  else if (Allocator->getType()->isPointerTy())
1647
0
    Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1648
0
                                                                CGM.VoidPtrTy);
1649
1650
0
  llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1651
0
      CGF.Builder, Size, Allocator,
1652
0
      getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1653
0
  llvm::CallInst *FreeCI =
1654
0
      OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1655
1656
0
  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1657
0
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1658
0
      Addr,
1659
0
      CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1660
0
      getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1661
0
  return Address(Addr, Align);
1662
636
}
1663
1664
Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1665
    CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1666
0
    SourceLocation Loc) {
1667
0
  CodeGenModule &CGM = CGF.CGM;
1668
0
  if (CGM.getLangOpts().OpenMPUseTLS &&
1669
0
      CGM.getContext().getTargetInfo().isTLSSupported())
1670
0
    return VDAddr;
1671
1672
0
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1673
1674
0
  llvm::Type *VarTy = VDAddr.getElementType();
1675
0
  llvm::Value *Data =
1676
0
      CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1677
0
  llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1678
0
  std::string Suffix = getNameWithSeparators({"cache", ""});
1679
0
  llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1680
1681
0
  llvm::CallInst *ThreadPrivateCacheCall =
1682
0
      OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1683
1684
0
  return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1685
0
}
1686
1687
std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1688
0
    ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1689
0
  SmallString<128> Buffer;
1690
0
  llvm::raw_svector_ostream OS(Buffer);
1691
0
  StringRef Sep = FirstSeparator;
1692
0
  for (StringRef Part : Parts) {
1693
0
    OS << Sep << Part;
1694
0
    Sep = Separator;
1695
0
  }
1696
0
  return OS.str().str();
1697
0
}
1698
1.04k
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1699
1.04k
  if (CGM.getLangOpts().OpenMPIRBuilder) {
1700
50
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1701
    // Check if we have any if clause associated with the directive.
1702
50
    llvm::Value *IfCond = nullptr;
1703
50
    if (const auto *C = S.getSingleClause<OMPIfClause>())
1704
0
      IfCond = EmitScalarExpr(C->getCondition(),
1705
0
                              /*IgnoreResultAssign=*/true);
1706
1707
50
    llvm::Value *NumThreads = nullptr;
1708
50
    if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1709
0
      NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1710
0
                                  /*IgnoreResultAssign=*/true);
1711
1712
50
    ProcBindKind ProcBind = OMP_PROC_BIND_default;
1713
50
    if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1714
0
      ProcBind = ProcBindClause->getProcBindKind();
1715
1716
50
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1717
1718
    // The cleanup callback that finalizes all variabels at the given location,
1719
    // thus calls destructors etc.
1720
58
    auto FiniCB = [this](InsertPointTy IP) {
1721
58
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1722
58
    };
1723
1724
    // Privatization callback that performs appropriate action for
1725
    // shared/private/firstprivate/lastprivate/copyin/... variables.
1726
    //
1727
    // TODO: This defaults to shared right now.
1728
50
    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1729
82
                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1730
      // The next line is appropriate only for variables (Val) with the
1731
      // data-sharing attribute "shared".
1732
82
      ReplVal = &Val;
1733
1734
82
      return CodeGenIP;
1735
82
    };
1736
1737
50
    const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1738
50
    const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1739
1740
50
    auto BodyGenCB = [ParallelRegionBodyStmt,
1741
50
                      this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1742
50
                            llvm::BasicBlock &ContinuationBB) {
1743
50
      OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
1744
50
                                                      ContinuationBB);
1745
50
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
1746
50
                                             CodeGenIP, ContinuationBB);
1747
50
    };
1748
1749
50
    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1750
50
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1751
50
    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1752
50
        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1753
50
    Builder.restoreIP(
1754
50
        OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1755
50
                                  IfCond, NumThreads, ProcBind, S.hasCancel()));
1756
50
    return;
1757
50
  }
1758
1759
  // Emit parallel region as a standalone region.
1760
998
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1761
998
    Action.Enter(CGF);
1762
998
    OMPPrivateScope PrivateScope(CGF);
1763
998
    bool Copyins = CGF.EmitOMPCopyinClause(S);
1764
998
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1765
998
    if (Copyins) {
1766
      // Emit implicit barrier to synchronize threads and avoid data races on
1767
      // propagation master's thread values of threadprivate variables to local
1768
      // instances of that variables of all other implicit threads.
1769
23
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1770
23
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1771
23
          /*ForceSimpleCall=*/true);
1772
23
    }
1773
998
    CGF.EmitOMPPrivateClause(S, PrivateScope);
1774
998
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1775
998
    (void)PrivateScope.Privatize();
1776
998
    CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1777
998
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1778
998
  };
1779
998
  {
1780
998
    auto LPCRegion =
1781
998
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1782
998
    emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1783
998
                                   emitEmptyBoundParameters);
1784
998
    emitPostUpdateForReductionClause(*this, S,
1785
998
                                     [](CodeGenFunction &) 
{ return nullptr; }4
);
1786
998
  }
1787
  // Check for outer lastprivate conditional update.
1788
998
  checkForLastprivateConditionalUpdate(*this, S);
1789
998
}
1790
1791
0
void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1792
0
  EmitStmt(S.getIfStmt());
1793
0
}
1794
1795
namespace {
1796
/// RAII to handle scopes for loop transformation directives.
1797
class OMPTransformDirectiveScopeRAII {
1798
  OMPLoopScope *Scope = nullptr;
1799
  CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1800
  CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1801
1802
public:
1803
8
  OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1804
8
    if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1805
8
      Scope = new OMPLoopScope(CGF, *Dir);
1806
8
      CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1807
8
      CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1808
8
    }
1809
8
  }
1810
8
  ~OMPTransformDirectiveScopeRAII() {
1811
8
    if (!Scope)
1812
0
      return;
1813
8
    delete CapInfoRAII;
1814
8
    delete CGSI;
1815
8
    delete Scope;
1816
8
  }
1817
};
1818
} // namespace
1819
1820
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1821
11.6k
                     int MaxLevel, int Level = 0) {
1822
11.6k
  assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1823
0
  const Stmt *SimplifiedS = S->IgnoreContainers();
1824
11.6k
  if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1825
8
    PrettyStackTraceLoc CrashInfo(
1826
8
        CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1827
8
        "LLVM IR generation of compound statement ('{}')");
1828
1829
    // Keep track of the current cleanup stack depth, including debug scopes.
1830
8
    CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1831
8
    for (const Stmt *CurStmt : CS->body())
1832
34
      emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1833
8
    return;
1834
8
  }
1835
11.6k
  if (SimplifiedS == NextLoop) {
1836
11.6k
    if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1837
26
      SimplifiedS = Dir->getTransformedStmt();
1838
11.6k
    if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1839
46
      SimplifiedS = CanonLoop->getLoopStmt();
1840
11.6k
    if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1841
11.6k
      S = For->getBody();
1842
11.6k
    } else {
1843
6
      assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1844
6
             "Expected canonical for loop or range-based for loop.");
1845
0
      const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1846
6
      CGF.EmitStmt(CXXFor->getLoopVarStmt());
1847
6
      S = CXXFor->getBody();
1848
6
    }
1849
11.6k
    if (Level + 1 < MaxLevel) {
1850
543
      NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1851
543
          S, /*TryImperfectlyNestedLoops=*/true);
1852
543
      emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1853
543
      return;
1854
543
    }
1855
11.6k
  }
1856
11.1k
  CGF.EmitStmt(S);
1857
11.1k
}
1858
1859
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1860
11.0k
                                      JumpDest LoopExit) {
1861
11.0k
  RunCleanupsScope BodyScope(*this);
1862
  // Update counters values on current iteration.
1863
11.0k
  for (const Expr *UE : D.updates())
1864
11.6k
    EmitIgnoredExpr(UE);
1865
  // Update the linear variables.
1866
  // In distribute directives only loop counters may be marked as linear, no
1867
  // need to generate the code for them.
1868
11.0k
  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1869
4.46k
    for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1870
396
      for (const Expr *UE : C->updates())
1871
518
        EmitIgnoredExpr(UE);
1872
396
    }
1873
4.46k
  }
1874
1875
  // On a continue in the body, jump to the end.
1876
11.0k
  JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1877
11.0k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1878
11.6k
  for (const Expr *E : D.finals_conditions()) {
1879
11.6k
    if (!E)
1880
11.6k
      continue;
1881
    // Check that loop counter in non-rectangular nest fits into the iteration
1882
    // space.
1883
25
    llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1884
25
    EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1885
25
                         getProfileCount(D.getBody()));
1886
25
    EmitBlock(NextBB);
1887
25
  }
1888
1889
11.0k
  OMPPrivateScope InscanScope(*this);
1890
11.0k
  EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1891
11.0k
  bool IsInscanRegion = InscanScope.Privatize();
1892
11.0k
  if (IsInscanRegion) {
1893
    // Need to remember the block before and after scan directive
1894
    // to dispatch them correctly depending on the clause used in
1895
    // this directive, inclusive or exclusive. For inclusive scan the natural
1896
    // order of the blocks is used, for exclusive clause the blocks must be
1897
    // executed in reverse order.
1898
48
    OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1899
48
    OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1900
    // No need to allocate inscan exit block, in simd mode it is selected in the
1901
    // codegen for the scan directive.
1902
48
    if (D.getDirectiveKind() != OMPD_simd && 
!getLangOpts().OpenMPSimd40
)
1903
32
      OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1904
48
    OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1905
48
    EmitBranch(OMPScanDispatch);
1906
48
    EmitBlock(OMPBeforeScanBlock);
1907
48
  }
1908
1909
  // Emit loop variables for C++ range loops.
1910
11.0k
  const Stmt *Body =
1911
11.0k
      D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1912
  // Emit loop body.
1913
11.0k
  emitBody(*this, Body,
1914
11.0k
           OMPLoopBasedDirective::tryToFindNextInnerLoop(
1915
11.0k
               Body, /*TryImperfectlyNestedLoops=*/true),
1916
11.0k
           D.getLoopsNumber());
1917
1918
  // Jump to the dispatcher at the end of the loop body.
1919
11.0k
  if (IsInscanRegion)
1920
48
    EmitBranch(OMPScanExitBlock);
1921
1922
  // The end (updates/cleanups).
1923
11.0k
  EmitBlock(Continue.getBlock());
1924
11.0k
  BreakContinueStack.pop_back();
1925
11.0k
}
1926
1927
using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1928
1929
/// Emit a captured statement and return the function as well as its captured
1930
/// closure context.
1931
static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1932
64
                                             const CapturedStmt *S) {
1933
64
  LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1934
64
  CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1935
64
  std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1936
64
      std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1937
64
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1938
64
  llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1939
1940
64
  return {F, CapStruct.getPointer(ParentCGF)};
1941
64
}
1942
1943
/// Emit a call to a previously captured closure.
1944
static llvm::CallInst *
1945
emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1946
64
                     llvm::ArrayRef<llvm::Value *> Args) {
1947
  // Append the closure context to the argument.
1948
64
  SmallVector<llvm::Value *> EffectiveArgs;
1949
64
  EffectiveArgs.reserve(Args.size() + 1);
1950
64
  llvm::append_range(EffectiveArgs, Args);
1951
64
  EffectiveArgs.push_back(Cap.second);
1952
1953
64
  return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1954
64
}
1955
1956
llvm::CanonicalLoopInfo *
1957
37
CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1958
37
  assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1959
1960
  // The caller is processing the loop-associated directive processing the \p
1961
  // Depth loops nested in \p S. Put the previous pending loop-associated
1962
  // directive to the stack. If the current loop-associated directive is a loop
1963
  // transformation directive, it will push its generated loops onto the stack
1964
  // such that together with the loops left here they form the combined loop
1965
  // nest for the parent loop-associated directive.
1966
0
  int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1967
37
  ExpectedOMPLoopDepth = Depth;
1968
1969
37
  EmitStmt(S);
1970
37
  assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1971
1972
  // The last added loop is the outermost one.
1973
0
  llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
1974
1975
  // Pop the \p Depth loops requested by the call from that stack and restore
1976
  // the previous context.
1977
37
  OMPLoopNestStack.pop_back_n(Depth);
1978
37
  ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
1979
1980
37
  return Result;
1981
37
}
1982
1983
32
void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
1984
32
  const Stmt *SyntacticalLoop = S->getLoopStmt();
1985
32
  if (!getLangOpts().OpenMPIRBuilder) {
1986
    // Ignore if OpenMPIRBuilder is not enabled.
1987
0
    EmitStmt(SyntacticalLoop);
1988
0
    return;
1989
0
  }
1990
1991
32
  LexicalScope ForScope(*this, S->getSourceRange());
1992
1993
  // Emit init statements. The Distance/LoopVar funcs may reference variable
1994
  // declarations they contain.
1995
32
  const Stmt *BodyStmt;
1996
32
  if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
1997
31
    if (const Stmt *InitStmt = For->getInit())
1998
31
      EmitStmt(InitStmt);
1999
31
    BodyStmt = For->getBody();
2000
31
  } else 
if (const auto *1
RangeFor1
=
2001
1
                 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2002
1
    if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2003
1
      EmitStmt(RangeStmt);
2004
1
    if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2005
1
      EmitStmt(BeginStmt);
2006
1
    if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2007
1
      EmitStmt(EndStmt);
2008
1
    if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2009
1
      EmitStmt(LoopVarStmt);
2010
1
    BodyStmt = RangeFor->getBody();
2011
1
  } else
2012
0
    llvm_unreachable("Expected for-stmt or range-based for-stmt");
2013
2014
  // Emit closure for later use. By-value captures will be captured here.
2015
32
  const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2016
32
  EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2017
32
  const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2018
32
  EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2019
2020
  // Call the distance function to get the number of iterations of the loop to
2021
  // come.
2022
32
  QualType LogicalTy = DistanceFunc->getCapturedDecl()
2023
32
                           ->getParam(0)
2024
32
                           ->getType()
2025
32
                           .getNonReferenceType();
2026
32
  Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2027
32
  emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2028
32
  llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2029
2030
  // Emit the loop structure.
2031
32
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2032
32
  auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2033
32
                           llvm::Value *IndVar) {
2034
32
    Builder.restoreIP(CodeGenIP);
2035
2036
    // Emit the loop body: Convert the logical iteration number to the loop
2037
    // variable and emit the body.
2038
32
    const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2039
32
    LValue LCVal = EmitLValue(LoopVarRef);
2040
32
    Address LoopVarAddress = LCVal.getAddress(*this);
2041
32
    emitCapturedStmtCall(*this, LoopVarClosure,
2042
32
                         {LoopVarAddress.getPointer(), IndVar});
2043
2044
32
    RunCleanupsScope BodyScope(*this);
2045
32
    EmitStmt(BodyStmt);
2046
32
  };
2047
32
  llvm::CanonicalLoopInfo *CL =
2048
32
      OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2049
2050
  // Finish up the loop.
2051
32
  Builder.restoreIP(CL->getAfterIP());
2052
32
  ForScope.ForceCleanup();
2053
2054
  // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2055
32
  OMPLoopNestStack.push_back(CL);
2056
32
}
2057
2058
void CodeGenFunction::EmitOMPInnerLoop(
2059
    const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2060
    const Expr *IncExpr,
2061
    const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2062
13.9k
    const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2063
13.9k
  auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2064
2065
  // Start the loop with a block that tests the condition.
2066
13.9k
  auto CondBlock = createBasicBlock("omp.inner.for.cond");
2067
13.9k
  EmitBlock(CondBlock);
2068
13.9k
  const SourceRange R = S.getSourceRange();
2069
2070
  // If attributes are attached, push to the basic block with them.
2071
13.9k
  const auto &OMPED = cast<OMPExecutableDirective>(S);
2072
13.9k
  const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2073
13.9k
  const Stmt *SS = ICS->getCapturedStmt();
2074
13.9k
  const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2075
13.9k
  OMPLoopNestStack.clear();
2076
13.9k
  if (AS)
2077
2
    LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2078
2
                   AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2079
2
                   SourceLocToDebugLoc(R.getEnd()));
2080
13.9k
  else
2081
13.9k
    LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2082
13.9k
                   SourceLocToDebugLoc(R.getEnd()));
2083
2084
  // If there are any cleanups between here and the loop-exit scope,
2085
  // create a block to stage a loop exit along.
2086
13.9k
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2087
13.9k
  if (RequiresCleanup)
2088
857
    ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2089
2090
13.9k
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2091
2092
  // Emit condition.
2093
13.9k
  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2094
13.9k
  if (ExitBlock != LoopExit.getBlock()) {
2095
857
    EmitBlock(ExitBlock);
2096
857
    EmitBranchThroughCleanup(LoopExit);
2097
857
  }
2098
2099
13.9k
  EmitBlock(LoopBody);
2100
13.9k
  incrementProfileCounter(&S);
2101
2102
  // Create a block for the increment.
2103
13.9k
  JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2104
13.9k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2105
2106
13.9k
  BodyGen(*this);
2107
2108
  // Emit "IV = IV + 1" and a back-edge to the condition block.
2109
13.9k
  EmitBlock(Continue.getBlock());
2110
13.9k
  EmitIgnoredExpr(IncExpr);
2111
13.9k
  PostIncGen(*this);
2112
13.9k
  BreakContinueStack.pop_back();
2113
13.9k
  EmitBranch(CondBlock);
2114
13.9k
  LoopStack.pop();
2115
  // Emit the fall-through block.
2116
13.9k
  EmitBlock(LoopExit.getBlock());
2117
13.9k
}
2118
2119
9.12k
bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2120
9.12k
  if (!HaveInsertPoint())
2121
0
    return false;
2122
  // Emit inits for the linear variables.
2123
9.12k
  bool HasLinears = false;
2124
9.12k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2125
614
    for (const Expr *Init : C->inits()) {
2126
614
      HasLinears = true;
2127
614
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2128
614
      if (const auto *Ref =
2129
614
              dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2130
614
        AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2131
614
        const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2132
614
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2133
614
                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
2134
614
                        VD->getInit()->getType(), VK_LValue,
2135
614
                        VD->getInit()->getExprLoc());
2136
614
        EmitExprAsInit(
2137
614
            &DRE, VD,
2138
614
            MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2139
614
            /*capturedByInit=*/false);
2140
614
        EmitAutoVarCleanups(Emission);
2141
614
      } else {
2142
0
        EmitVarDecl(*VD);
2143
0
      }
2144
614
    }
2145
    // Emit the linear steps for the linear clauses.
2146
    // If a step is not constant, it is pre-calculated before the loop.
2147
492
    if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2148
154
      if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2149
154
        EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2150
        // Emit calculation of the linear step.
2151
154
        EmitIgnoredExpr(CS);
2152
154
      }
2153
492
  }
2154
9.12k
  return HasLinears;
2155
9.12k
}
2156
2157
void CodeGenFunction::EmitOMPLinearClauseFinal(
2158
    const OMPLoopDirective &D,
2159
9.12k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2160
9.12k
  if (!HaveInsertPoint())
2161
0
    return;
2162
9.12k
  llvm::BasicBlock *DoneBB = nullptr;
2163
  // Emit the final values of the linear variables.
2164
9.12k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2165
492
    auto IC = C->varlist_begin();
2166
614
    for (const Expr *F : C->finals()) {
2167
614
      if (!DoneBB) {
2168
539
        if (llvm::Value *Cond = CondGen(*this)) {
2169
          // If the first post-update expression is found, emit conditional
2170
          // block if it was requested.
2171
135
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2172
135
          DoneBB = createBasicBlock(".omp.linear.pu.done");
2173
135
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2174
135
          EmitBlock(ThenBB);
2175
135
        }
2176
539
      }
2177
614
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2178
614
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2179
614
                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
2180
614
                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2181
614
      Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2182
614
      CodeGenFunction::OMPPrivateScope VarScope(*this);
2183
614
      VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2184
614
      (void)VarScope.Privatize();
2185
614
      EmitIgnoredExpr(F);
2186
614
      ++IC;
2187
614
    }
2188
492
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
2189
4
      EmitIgnoredExpr(PostUpdate);
2190
492
  }
2191
9.12k
  if (DoneBB)
2192
135
    EmitBlock(DoneBB, /*IsFinished=*/true);
2193
9.12k
}
2194
2195
static void emitAlignedClause(CodeGenFunction &CGF,
2196
13.3k
                              const OMPExecutableDirective &D) {
2197
13.3k
  if (!CGF.HaveInsertPoint())
2198
0
    return;
2199
13.3k
  for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2200
314
    llvm::APInt ClauseAlignment(64, 0);
2201
314
    if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2202
108
      auto *AlignmentCI =
2203
108
          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2204
108
      ClauseAlignment = AlignmentCI->getValue();
2205
108
    }
2206
364
    for (const Expr *E : Clause->varlists()) {
2207
364
      llvm::APInt Alignment(ClauseAlignment);
2208
364
      if (Alignment == 0) {
2209
        // OpenMP [2.8.1, Description]
2210
        // If no optional parameter is specified, implementation-defined default
2211
        // alignments for SIMD instructions on the target platforms are assumed.
2212
248
        Alignment =
2213
248
            CGF.getContext()
2214
248
                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2215
248
                    E->getType()->getPointeeType()))
2216
248
                .getQuantity();
2217
248
      }
2218
364
      assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2219
364
             "alignment is not power of 2");
2220
364
      if (Alignment != 0) {
2221
364
        llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2222
364
        CGF.emitAlignmentAssumption(
2223
364
            PtrValue, E, /*No second loc needed*/ SourceLocation(),
2224
364
            llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2225
364
      }
2226
364
    }
2227
314
  }
2228
13.3k
}
2229
2230
void CodeGenFunction::EmitOMPPrivateLoopCounters(
2231
15.9k
    const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2232
15.9k
  if (!HaveInsertPoint())
2233
0
    return;
2234
15.9k
  auto I = S.private_counters().begin();
2235
16.8k
  for (const Expr *E : S.counters()) {
2236
16.8k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2237
16.8k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2238
    // Emit var without initialization.
2239
16.8k
    AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2240
16.8k
    EmitAutoVarCleanups(VarEmission);
2241
16.8k
    LocalDeclMap.erase(PrivateVD);
2242
16.8k
    (void)LoopScope.addPrivate(
2243
16.8k
        VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); });
2244
16.8k
    if (LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)16.1k
||
2245
16.8k
        
VD->hasGlobalStorage()16.1k
) {
2246
701
      (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
2247
701
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2248
701
                        LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)31
,
2249
701
                        E->getType(), VK_LValue, E->getExprLoc());
2250
701
        return EmitLValue(&DRE).getAddress(*this);
2251
701
      });
2252
16.1k
    } else {
2253
16.1k
      (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
2254
16.1k
        return VarEmission.getAllocatedAddress();
2255
16.1k
      });
2256
16.1k
    }
2257
16.8k
    ++I;
2258
16.8k
  }
2259
  // Privatize extra loop counters used in loops for ordered(n) clauses.
2260
15.9k
  for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2261
130
    if (!C->getNumForLoops())
2262
82
      continue;
2263
48
    for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2264
60
         I < E; 
++I12
) {
2265
12
      const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2266
12
      const auto *VD = cast<VarDecl>(DRE->getDecl());
2267
      // Override only those variables that can be captured to avoid re-emission
2268
      // of the variables declared within the loops.
2269
12
      if (DRE->refersToEnclosingVariableOrCapture()) {
2270
8
        (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
2271
8
          return CreateMemTemp(DRE->getType(), VD->getName());
2272
8
        });
2273
8
      }
2274
12
    }
2275
48
  }
2276
15.9k
}
2277
2278
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2279
                        const Expr *Cond, llvm::BasicBlock *TrueBlock,
2280
2.33k
                        llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2281
2.33k
  if (!CGF.HaveInsertPoint())
2282
0
    return;
2283
2.33k
  {
2284
2.33k
    CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2285
2.33k
    CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2286
2.33k
    (void)PreCondScope.Privatize();
2287
    // Get initial values of real counters.
2288
2.58k
    for (const Expr *I : S.inits()) {
2289
2.58k
      CGF.EmitIgnoredExpr(I);
2290
2.58k
    }
2291
2.33k
  }
2292
  // Create temp loop control variables with their init values to support
2293
  // non-rectangular loops.
2294
2.33k
  CodeGenFunction::OMPMapVars PreCondVars;
2295
2.58k
  for (const Expr *E : S.dependent_counters()) {
2296
2.58k
    if (!E)
2297
2.57k
      continue;
2298
5
    assert(!E->getType().getNonReferenceType()->isRecordType() &&
2299
5
           "dependent counter must not be an iterator.");
2300
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2301
5
    Address CounterAddr =
2302
5
        CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2303
5
    (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2304
5
  }
2305
2.33k
  (void)PreCondVars.apply(CGF);
2306
2.58k
  for (const Expr *E : S.dependent_inits()) {
2307
2.58k
    if (!E)
2308
2.57k
      continue;
2309
5
    CGF.EmitIgnoredExpr(E);
2310
5
  }
2311
  // Check that loop is executed at least one time.
2312
2.33k
  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2313
2.33k
  PreCondVars.restore(CGF);
2314
2.33k
}
2315
2316
void CodeGenFunction::EmitOMPLinearClause(
2317
9.12k
    const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2318
9.12k
  if (!HaveInsertPoint())
2319
0
    return;
2320
9.12k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
2321
9.12k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2322
6.45k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2323
6.77k
    for (const Expr *C : LoopDirective->counters()) {
2324
6.77k
      SIMDLCVs.insert(
2325
6.77k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2326
6.77k
    }
2327
6.45k
  }
2328
9.12k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2329
492
    auto CurPrivate = C->privates().begin();
2330
614
    for (const Expr *E : C->varlists()) {
2331
614
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2332
614
      const auto *PrivateVD =
2333
614
          cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2334
614
      if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2335
508
        bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
2336
          // Emit private VarDecl with copy init.
2337
508
          EmitVarDecl(*PrivateVD);
2338
508
          return GetAddrOfLocalVar(PrivateVD);
2339
508
        });
2340
508
        assert(IsRegistered && "linear var already registered as private");
2341
        // Silence the warning about unused variable.
2342
0
        (void)IsRegistered;
2343
508
      } else {
2344
106
        EmitVarDecl(*PrivateVD);
2345
106
      }
2346
0
      ++CurPrivate;
2347
614
    }
2348
492
  }
2349
9.12k
}
2350
2351
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2352
8.62k
                                     const OMPExecutableDirective &D) {
2353
8.62k
  if (!CGF.HaveInsertPoint())
2354
0
    return;
2355
8.62k
  if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2356
248
    RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2357
248
                                 /*ignoreResult=*/true);
2358
248
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2359
248
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2360
    // In presence of finite 'safelen', it may be unsafe to mark all
2361
    // the memory instructions parallel, because loop-carried
2362
    // dependences of 'safelen' iterations are possible.
2363
248
    CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2364
8.37k
  } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2365
166
    RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2366
166
                                 /*ignoreResult=*/true);
2367
166
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2368
166
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2369
    // In presence of finite 'safelen', it may be unsafe to mark all
2370
    // the memory instructions parallel, because loop-carried
2371
    // dependences of 'safelen' iterations are possible.
2372
166
    CGF.LoopStack.setParallel(/*Enable=*/false);
2373
166
  }
2374
8.62k
}
2375
2376
8.62k
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2377
  // Walk clauses and process safelen/lastprivate.
2378
8.62k
  LoopStack.setParallel(/*Enable=*/true);
2379
8.62k
  LoopStack.setVectorizeEnable();
2380
8.62k
  emitSimdlenSafelenClause(*this, D);
2381
8.62k
  if (const auto *C = D.getSingleClause<OMPOrderClause>())
2382
0
    if (C->getKind() == OMPC_ORDER_concurrent)
2383
0
      LoopStack.setParallel(/*Enable=*/true);
2384
8.62k
  if ((D.getDirectiveKind() == OMPD_simd ||
2385
8.62k
       
(8.27k
getLangOpts().OpenMPSimd8.27k
&&
2386
8.27k
        
isOpenMPSimdDirective(D.getDirectiveKind())3.18k
)) &&
2387
8.62k
      llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2388
3.53k
                   [](const OMPReductionClause *C) {
2389
95
                     return C->getModifier() == OMPC_REDUCTION_inscan;
2390
95
                   }))
2391
    // Disable parallel access in case of prefix sum.
2392
16
    LoopStack.setParallel(/*Enable=*/false);
2393
8.62k
}
2394
2395
void CodeGenFunction::EmitOMPSimdFinal(
2396
    const OMPLoopDirective &D,
2397
8.62k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2398
8.62k
  if (!HaveInsertPoint())
2399
0
    return;
2400
8.62k
  llvm::BasicBlock *DoneBB = nullptr;
2401
8.62k
  auto IC = D.counters().begin();
2402
8.62k
  auto IPC = D.private_counters().begin();
2403
8.99k
  for (const Expr *F : D.finals()) {
2404
8.99k
    const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2405
8.99k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2406
8.99k
    const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2407
8.99k
    if (LocalDeclMap.count(OrigVD) || 
CapturedStmtInfo->lookup(OrigVD)0
||
2408
8.99k
        
OrigVD->hasGlobalStorage()0
||
CED0
) {
2409
8.99k
      if (!DoneBB) {
2410
8.82k
        if (llvm::Value *Cond = CondGen(*this)) {
2411
          // If the first post-update expression is found, emit conditional
2412
          // block if it was requested.
2413
4.50k
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2414
4.50k
          DoneBB = createBasicBlock(".omp.final.done");
2415
4.50k
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2416
4.50k
          EmitBlock(ThenBB);
2417
4.50k
        }
2418
8.82k
      }
2419
8.99k
      Address OrigAddr = Address::invalid();
2420
8.99k
      if (CED) {
2421
28
        OrigAddr =
2422
28
            EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2423
8.97k
      } else {
2424
8.97k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2425
8.97k
                        /*RefersToEnclosingVariableOrCapture=*/false,
2426
8.97k
                        (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2427
8.97k
        OrigAddr = EmitLValue(&DRE).getAddress(*this);
2428
8.97k
      }
2429
8.99k
      OMPPrivateScope VarScope(*this);
2430
8.99k
      VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
2431
8.99k
      (void)VarScope.Privatize();
2432
8.99k
      EmitIgnoredExpr(F);
2433
8.99k
    }
2434
8.99k
    ++IC;
2435
8.99k
    ++IPC;
2436
8.99k
  }
2437
8.62k
  if (DoneBB)
2438
4.50k
    EmitBlock(DoneBB, /*IsFinished=*/true);
2439
8.62k
}
2440
2441
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2442
                                         const OMPLoopDirective &S,
2443
11.0k
                                         CodeGenFunction::JumpDest LoopExit) {
2444
11.0k
  CGF.EmitOMPLoopBody(S, LoopExit);
2445
11.0k
  CGF.EmitStopPoint(&S);
2446
11.0k
}
2447
2448
/// Emit a helper variable and return corresponding lvalue.
2449
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2450
42.7k
                               const DeclRefExpr *Helper) {
2451
42.7k
  auto VDecl = cast<VarDecl>(Helper->getDecl());
2452
42.7k
  CGF.EmitVarDecl(*VDecl);
2453
42.7k
  return CGF.EmitLValue(Helper);
2454
42.7k
}
2455
2456
static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2457
                               const RegionCodeGenTy &SimdInitGen,
2458
13.6k
                               const RegionCodeGenTy &BodyCodeGen) {
2459
13.6k
  auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2460
13.6k
                                                    PrePostActionTy &) {
2461
13.4k
    CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2462
13.4k
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2463
13.4k
    SimdInitGen(CGF);
2464
2465
13.4k
    BodyCodeGen(CGF);
2466
13.4k
  };
2467
13.6k
  auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2468
343
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2469
343
    CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2470
2471
343
    BodyCodeGen(CGF);
2472
343
  };
2473
13.6k
  const Expr *IfCond = nullptr;
2474
13.6k
  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2475
8.74k
    for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2476
1.77k
      if (CGF.getLangOpts().OpenMP >= 50 &&
2477
1.77k
          
(987
C->getNameModifier() == OMPD_unknown987
||
2478
987
           
C->getNameModifier() == OMPD_simd560
)) {
2479
493
        IfCond = C->getCondition();
2480
493
        break;
2481
493
      }
2482
1.77k
    }
2483
8.74k
  }
2484
13.6k
  if (IfCond) {
2485
493
    CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2486
13.1k
  } else {
2487
13.1k
    RegionCodeGenTy ThenRCG(ThenGen);
2488
13.1k
    ThenRCG(CGF);
2489
13.1k
  }
2490
13.6k
}
2491
2492
static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2493
4.20k
                              PrePostActionTy &Action) {
2494
4.20k
  Action.Enter(CGF);
2495
4.20k
  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2496
4.20k
         "Expected simd directive");
2497
0
  OMPLoopScope PreInitScope(CGF, S);
2498
  // if (PreCond) {
2499
  //   for (IV in 0..LastIteration) BODY;
2500
  //   <Final counter/linear vars updates>;
2501
  // }
2502
  //
2503
4.20k
  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2504
4.20k
      
isOpenMPWorksharingDirective(S.getDirectiveKind())2.22k
||
2505
4.20k
      
isOpenMPTaskLoopDirective(S.getDirectiveKind())1.51k
) {
2506
2.80k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2507
2.80k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2508
2.80k
  }
2509
2510
  // Emit: if (PreCond) - begin.
2511
  // If the condition constant folds and can be elided, avoid emitting the
2512
  // whole loop.
2513
4.20k
  bool CondConstant;
2514
4.20k
  llvm::BasicBlock *ContBlock = nullptr;
2515
4.20k
  if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2516
3.56k
    if (!CondConstant)
2517
82
      return;
2518
3.56k
  } else {
2519
635
    llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2520
635
    ContBlock = CGF.createBasicBlock("simd.if.end");
2521
635
    emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2522
635
                CGF.getProfileCount(&S));
2523
635
    CGF.EmitBlock(ThenBlock);
2524
635
    CGF.incrementProfileCounter(&S);
2525
635
  }
2526
2527
  // Emit the loop iteration variable.
2528
4.12k
  const Expr *IVExpr = S.getIterationVariable();
2529
4.12k
  const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2530
4.12k
  CGF.EmitVarDecl(*IVDecl);
2531
4.12k
  CGF.EmitIgnoredExpr(S.getInit());
2532
2533
  // Emit the iterations count variable.
2534
  // If it is not a variable, Sema decided to calculate iterations count on
2535
  // each iteration (e.g., it is foldable into a constant).
2536
4.12k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2537
0
    CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2538
    // Emit calculation of the iterations count.
2539
0
    CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2540
0
  }
2541
2542
4.12k
  emitAlignedClause(CGF, S);
2543
4.12k
  (void)CGF.EmitOMPLinearClauseInit(S);
2544
4.12k
  {
2545
4.12k
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2546
4.12k
    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2547
4.12k
    CGF.EmitOMPLinearClause(S, LoopScope);
2548
4.12k
    CGF.EmitOMPPrivateClause(S, LoopScope);
2549
4.12k
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
2550
4.12k
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2551
4.12k
        CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2552
4.12k
    bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2553
4.12k
    (void)LoopScope.Privatize();
2554
4.12k
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2555
2.67k
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2556
2557
4.12k
    emitCommonSimdLoop(
2558
4.12k
        CGF, S,
2559
4.12k
        [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2560
4.05k
          CGF.EmitOMPSimdInit(S);
2561
4.05k
        },
2562
4.23k
        [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2563
4.23k
          CGF.EmitOMPInnerLoop(
2564
4.23k
              S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2565
4.23k
              [&S](CodeGenFunction &CGF) {
2566
4.23k
                emitOMPLoopBodyWithStopPoint(CGF, S,
2567
4.23k
                                             CodeGenFunction::JumpDest());
2568
4.23k
              },
2569
4.23k
              [](CodeGenFunction &) {});
2570
4.23k
        });
2571
4.32k
    CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2572
    // Emit final copy of the lastprivate variables at the end of loops.
2573
4.12k
    if (HasLastprivateClause)
2574
127
      CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2575
4.12k
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2576
4.12k
    emitPostUpdateForReductionClause(CGF, S,
2577
4.12k
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
2578
4.12k
  }
2579
4.12k
  CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) 
{ return nullptr; }404
);
2580
  // Emit: if (PreCond) - end.
2581
4.12k
  if (ContBlock) {
2582
635
    CGF.EmitBranch(ContBlock);
2583
635
    CGF.EmitBlock(ContBlock, true);
2584
635
  }
2585
4.12k
}
2586
2587
181
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2588
181
  ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2589
181
  OMPFirstScanLoop = true;
2590
181
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2591
181
    emitOMPSimdRegion(CGF, S, Action);
2592
181
  };
2593
181
  {
2594
181
    auto LPCRegion =
2595
181
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2596
181
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
2597
181
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2598
181
  }
2599
  // Check for outer lastprivate conditional update.
2600
181
  checkForLastprivateConditionalUpdate(*this, S);
2601
181
}
2602
2603
8
void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2604
  // Emit the de-sugared statement.
2605
8
  OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2606
8
  EmitStmt(S.getTransformedStmt());
2607
8
}
2608
2609
18
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2610
18
  bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2611
2612
18
  if (UseOMPIRBuilder) {
2613
10
    auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2614
10
    const Stmt *Inner = S.getRawStmt();
2615
2616
    // Consume nested loop. Clear the entire remaining loop stack because a
2617
    // fully unrolled loop is non-transformable. For partial unrolling the
2618
    // generated outer loop is pushed back to the stack.
2619
10
    llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2620
10
    OMPLoopNestStack.clear();
2621
2622
10
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2623
2624
10
    bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2625
10
    llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2626
2627
10
    if (S.hasClausesOfKind<OMPFullClause>()) {
2628
1
      assert(ExpectedOMPLoopDepth == 0);
2629
0
      OMPBuilder.unrollLoopFull(DL, CLI);
2630
9
    } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2631
8
      uint64_t Factor = 0;
2632
8
      if (Expr *FactorExpr = PartialClause->getFactor()) {
2633
3
        Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2634
3
        assert(Factor >= 1 && "Only positive factors are valid");
2635
3
      }
2636
0
      OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2637
8
                                   NeedsUnrolledCLI ? 
&UnrolledCLI5
:
nullptr3
);
2638
8
    } else {
2639
1
      OMPBuilder.unrollLoopHeuristic(DL, CLI);
2640
1
    }
2641
2642
0
    assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2643
10
           "NeedsUnrolledCLI implies UnrolledCLI to be set");
2644
10
    if (UnrolledCLI)
2645
5
      OMPLoopNestStack.push_back(UnrolledCLI);
2646
2647
10
    return;
2648
10
  }
2649
2650
  // This function is only called if the unrolled loop is not consumed by any
2651
  // other loop-associated construct. Such a loop-associated construct will have
2652
  // used the transformed AST.
2653
2654
  // Set the unroll metadata for the next emitted loop.
2655
8
  LoopStack.setUnrollState(LoopAttributes::Enable);
2656
2657
8
  if (S.hasClausesOfKind<OMPFullClause>()) {
2658
2
    LoopStack.setUnrollState(LoopAttributes::Full);
2659
6
  } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2660
4
    if (Expr *FactorExpr = PartialClause->getFactor()) {
2661
2
      uint64_t Factor =
2662
2
          FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2663
2
      assert(Factor >= 1 && "Only positive factors are valid");
2664
0
      LoopStack.setUnrollCount(Factor);
2665
2
    }
2666
4
  }
2667
2668
0
  EmitStmt(S.getAssociatedStmt());
2669
8
}
2670
2671
void CodeGenFunction::EmitOMPOuterLoop(
2672
    bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2673
    CodeGenFunction::OMPPrivateScope &LoopScope,
2674
    const CodeGenFunction::OMPLoopArguments &LoopArgs,
2675
    const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2676
1.21k
    const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2677
1.21k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2678
2679
1.21k
  const Expr *IVExpr = S.getIterationVariable();
2680
1.21k
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2681
1.21k
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2682
2683
1.21k
  JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2684
2685
  // Start the loop with a block that tests the condition.
2686
1.21k
  llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2687
1.21k
  EmitBlock(CondBlock);
2688
1.21k
  const SourceRange R = S.getSourceRange();
2689
1.21k
  OMPLoopNestStack.clear();
2690
1.21k
  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2691
1.21k
                 SourceLocToDebugLoc(R.getEnd()));
2692
2693
1.21k
  llvm::Value *BoolCondVal = nullptr;
2694
1.21k
  if (!DynamicOrOrdered) {
2695
    // UB = min(UB, GlobalUB) or
2696
    // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2697
    // 'distribute parallel for')
2698
455
    EmitIgnoredExpr(LoopArgs.EUB);
2699
    // IV = LB
2700
455
    EmitIgnoredExpr(LoopArgs.Init);
2701
    // IV < UB
2702
455
    BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2703
764
  } else {
2704
764
    BoolCondVal =
2705
764
        RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2706
764
                       LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2707
764
  }
2708
2709
  // If there are any cleanups between here and the loop-exit scope,
2710
  // create a block to stage a loop exit along.
2711
1.21k
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2712
1.21k
  if (LoopScope.requiresCleanups())
2713
26
    ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2714
2715
1.21k
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2716
1.21k
  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2717
1.21k
  if (ExitBlock != LoopExit.getBlock()) {
2718
26
    EmitBlock(ExitBlock);
2719
26
    EmitBranchThroughCleanup(LoopExit);
2720
26
  }
2721
1.21k
  EmitBlock(LoopBody);
2722
2723
  // Emit "IV = LB" (in case of static schedule, we have already calculated new
2724
  // LB for loop condition and emitted it above).
2725
1.21k
  if (DynamicOrOrdered)
2726
764
    EmitIgnoredExpr(LoopArgs.Init);
2727
2728
  // Create a block for the increment.
2729
1.21k
  JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2730
1.21k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2731
2732
1.21k
  emitCommonSimdLoop(
2733
1.21k
      *this, S,
2734
1.21k
      [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2735
        // Generate !llvm.loop.parallel metadata for loads and stores for loops
2736
        // with dynamic/guided scheduling and without ordered clause.
2737
1.21k
        if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2738
679
          CGF.LoopStack.setParallel(!IsMonotonic);
2739
679
          if (const auto *C = S.getSingleClause<OMPOrderClause>())
2740
0
            if (C->getKind() == OMPC_ORDER_concurrent)
2741
0
              CGF.LoopStack.setParallel(/*Enable=*/true);
2742
679
        } else {
2743
540
          CGF.EmitOMPSimdInit(S);
2744
540
        }
2745
1.21k
      },
2746
1.21k
      [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2747
1.22k
       &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2748
1.22k
        SourceLocation Loc = S.getBeginLoc();
2749
        // when 'distribute' is not combined with a 'for':
2750
        // while (idx <= UB) { BODY; ++idx; }
2751
        // when 'distribute' is combined with a 'for'
2752
        // (e.g. 'distribute parallel for')
2753
        // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2754
1.22k
        CGF.EmitOMPInnerLoop(
2755
1.22k
            S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2756
1.22k
            [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2757
1.22k
              CodeGenLoop(CGF, S, LoopExit);
2758
1.22k
            },
2759
1.22k
            [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2760
1.22k
              CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2761
1.22k
            });
2762
1.22k
      });
2763
2764
1.21k
  EmitBlock(Continue.getBlock());
2765
1.21k
  BreakContinueStack.pop_back();
2766
1.21k
  if (!DynamicOrOrdered) {
2767
    // Emit "LB = LB + Stride", "UB = UB + Stride".
2768
455
    EmitIgnoredExpr(LoopArgs.NextLB);
2769
455
    EmitIgnoredExpr(LoopArgs.NextUB);
2770
455
  }
2771
2772
1.21k
  EmitBranch(CondBlock);
2773
1.21k
  OMPLoopNestStack.clear();
2774
1.21k
  LoopStack.pop();
2775
  // Emit the fall-through block.
2776
1.21k
  EmitBlock(LoopExit.getBlock());
2777
2778
  // Tell the runtime we are done.
2779
1.21k
  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2780
1.21k
    if (!DynamicOrOrdered)
2781
455
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2782
455
                                                     S.getDirectiveKind());
2783
1.21k
  };
2784
1.21k
  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2785
1.21k
}
2786
2787
void CodeGenFunction::EmitOMPForOuterLoop(
2788
    const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2789
    const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2790
    const OMPLoopArguments &LoopArgs,
2791
1.05k
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2792
1.05k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2793
2794
  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2795
1.05k
  const bool DynamicOrOrdered = Ordered || 
RT.isDynamic(ScheduleKind.Schedule)998
;
2796
2797
1.05k
  assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2798
1.05k
                                            LoopArgs.Chunk != nullptr)) &&
2799
1.05k
         "static non-chunked schedule does not need outer loop");
2800
2801
  // Emit outer loop.
2802
  //
2803
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2804
  // When schedule(dynamic,chunk_size) is specified, the iterations are
2805
  // distributed to threads in the team in chunks as the threads request them.
2806
  // Each thread executes a chunk of iterations, then requests another chunk,
2807
  // until no chunks remain to be distributed. Each chunk contains chunk_size
2808
  // iterations, except for the last chunk to be distributed, which may have
2809
  // fewer iterations. When no chunk_size is specified, it defaults to 1.
2810
  //
2811
  // When schedule(guided,chunk_size) is specified, the iterations are assigned
2812
  // to threads in the team in chunks as the executing threads request them.
2813
  // Each thread executes a chunk of iterations, then requests another chunk,
2814
  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2815
  // each chunk is proportional to the number of unassigned iterations divided
2816
  // by the number of threads in the team, decreasing to 1. For a chunk_size
2817
  // with value k (greater than 1), the size of each chunk is determined in the
2818
  // same way, with the restriction that the chunks do not contain fewer than k
2819
  // iterations (except for the last chunk to be assigned, which may have fewer
2820
  // than k iterations).
2821
  //
2822
  // When schedule(auto) is specified, the decision regarding scheduling is
2823
  // delegated to the compiler and/or runtime system. The programmer gives the
2824
  // implementation the freedom to choose any possible mapping of iterations to
2825
  // threads in the team.
2826
  //
2827
  // When schedule(runtime) is specified, the decision regarding scheduling is
2828
  // deferred until run time, and the schedule and chunk size are taken from the
2829
  // run-sched-var ICV. If the ICV is set to auto, the schedule is
2830
  // implementation defined
2831
  //
2832
  // while(__kmpc_dispatch_next(&LB, &UB)) {
2833
  //   idx = LB;
2834
  //   while (idx <= UB) { BODY; ++idx;
2835
  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2836
  //   } // inner loop
2837
  // }
2838
  //
2839
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2840
  // When schedule(static, chunk_size) is specified, iterations are divided into
2841
  // chunks of size chunk_size, and the chunks are assigned to the threads in
2842
  // the team in a round-robin fashion in the order of the thread number.
2843
  //
2844
  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2845
  //   while (idx <= UB) { BODY; ++idx; } // inner loop
2846
  //   LB = LB + ST;
2847
  //   UB = UB + ST;
2848
  // }
2849
  //
2850
2851
0
  const Expr *IVExpr = S.getIterationVariable();
2852
1.05k
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2853
1.05k
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2854
2855
1.05k
  if (DynamicOrOrdered) {
2856
764
    const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2857
764
        CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2858
764
    llvm::Value *LBVal = DispatchBounds.first;
2859
764
    llvm::Value *UBVal = DispatchBounds.second;
2860
764
    CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2861
764
                                                             LoopArgs.Chunk};
2862
764
    RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2863
764
                           IVSigned, Ordered, DipatchRTInputValues);
2864
764
  } else {
2865
291
    CGOpenMPRuntime::StaticRTInput StaticInit(
2866
291
        IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2867
291
        LoopArgs.ST, LoopArgs.Chunk);
2868
291
    RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2869
291
                         ScheduleKind, StaticInit);
2870
291
  }
2871
2872
1.05k
  auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2873
1.05k
                                    const unsigned IVSize,
2874
1.05k
                                    const bool IVSigned) {
2875
1.05k
    if (Ordered) {
2876
57
      CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2877
57
                                                            IVSigned);
2878
57
    }
2879
1.05k
  };
2880
2881
1.05k
  OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2882
1.05k
                                 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2883
1.05k
  OuterLoopArgs.IncExpr = S.getInc();
2884
1.05k
  OuterLoopArgs.Init = S.getInit();
2885
1.05k
  OuterLoopArgs.Cond = S.getCond();
2886
1.05k
  OuterLoopArgs.NextLB = S.getNextLowerBound();
2887
1.05k
  OuterLoopArgs.NextUB = S.getNextUpperBound();
2888
1.05k
  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2889
1.05k
                   emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2890
1.05k
}
2891
2892
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2893
164
                             const unsigned IVSize, const bool IVSigned) {}
2894
2895
void CodeGenFunction::EmitOMPDistributeOuterLoop(
2896
    OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2897
    OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2898
164
    const CodeGenLoopTy &CodeGenLoopContent) {
2899
2900
164
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2901
2902
  // Emit outer loop.
2903
  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2904
  // dynamic
2905
  //
2906
2907
164
  const Expr *IVExpr = S.getIterationVariable();
2908
164
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2909
164
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2910
2911
164
  CGOpenMPRuntime::StaticRTInput StaticInit(
2912
164
      IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2913
164
      LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2914
164
  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2915
2916
  // for combined 'distribute' and 'for' the increment expression of distribute
2917
  // is stored in DistInc. For 'distribute' alone, it is in Inc.
2918
164
  Expr *IncExpr;
2919
164
  if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2920
0
    IncExpr = S.getDistInc();
2921
164
  else
2922
164
    IncExpr = S.getInc();
2923
2924
  // this routine is shared by 'omp distribute parallel for' and
2925
  // 'omp distribute': select the right EUB expression depending on the
2926
  // directive
2927
164
  OMPLoopArguments OuterLoopArgs;
2928
164
  OuterLoopArgs.LB = LoopArgs.LB;
2929
164
  OuterLoopArgs.UB = LoopArgs.UB;
2930
164
  OuterLoopArgs.ST = LoopArgs.ST;
2931
164
  OuterLoopArgs.IL = LoopArgs.IL;
2932
164
  OuterLoopArgs.Chunk = LoopArgs.Chunk;
2933
164
  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2934
164
                          ? 
S.getCombinedEnsureUpperBound()0
2935
164
                          : S.getEnsureUpperBound();
2936
164
  OuterLoopArgs.IncExpr = IncExpr;
2937
164
  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2938
164
                           ? 
S.getCombinedInit()0
2939
164
                           : S.getInit();
2940
164
  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2941
164
                           ? 
S.getCombinedCond()0
2942
164
                           : S.getCond();
2943
164
  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2944
164
                             ? 
S.getCombinedNextLowerBound()0
2945
164
                             : S.getNextLowerBound();
2946
164
  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2947
164
                             ? 
S.getCombinedNextUpperBound()0
2948
164
                             : S.getNextUpperBound();
2949
2950
164
  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2951
164
                   LoopScope, OuterLoopArgs, CodeGenLoopContent,
2952
164
                   emitEmptyOrdered);
2953
164
}
2954
2955
static std::pair<LValue, LValue>
2956
emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2957
2.75k
                                     const OMPExecutableDirective &S) {
2958
2.75k
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2959
2.75k
  LValue LB =
2960
2.75k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2961
2.75k
  LValue UB =
2962
2.75k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2963
2964
  // When composing 'distribute' with 'for' (e.g. as in 'distribute
2965
  // parallel for') we need to use the 'distribute'
2966
  // chunk lower and upper bounds rather than the whole loop iteration
2967
  // space. These are parameters to the outlined function for 'parallel'
2968
  // and we copy the bounds of the previous schedule into the
2969
  // the current ones.
2970
2.75k
  LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2971
2.75k
  LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2972
2.75k
  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
2973
2.75k
      PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
2974
2.75k
  PrevLBVal = CGF.EmitScalarConversion(
2975
2.75k
      PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2976
2.75k
      LS.getIterationVariable()->getType(),
2977
2.75k
      LS.getPrevLowerBoundVariable()->getExprLoc());
2978
2.75k
  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
2979
2.75k
      PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
2980
2.75k
  PrevUBVal = CGF.EmitScalarConversion(
2981
2.75k
      PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2982
2.75k
      LS.getIterationVariable()->getType(),
2983
2.75k
      LS.getPrevUpperBoundVariable()->getExprLoc());
2984
2985
2.75k
  CGF.EmitStoreOfScalar(PrevLBVal, LB);
2986
2.75k
  CGF.EmitStoreOfScalar(PrevUBVal, UB);
2987
2988
2.75k
  return {LB, UB};
2989
2.75k
}
2990
2991
/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2992
/// we need to use the LB and UB expressions generated by the worksharing
2993
/// code generation support, whereas in non combined situations we would
2994
/// just emit 0 and the LastIteration expression
2995
/// This function is necessary due to the difference of the LB and UB
2996
/// types for the RT emission routines for 'for_static_init' and
2997
/// 'for_dispatch_init'
2998
static std::pair<llvm::Value *, llvm::Value *>
2999
emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3000
                                        const OMPExecutableDirective &S,
3001
440
                                        Address LB, Address UB) {
3002
440
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3003
440
  const Expr *IVExpr = LS.getIterationVariable();
3004
  // when implementing a dynamic schedule for a 'for' combined with a
3005
  // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3006
  // is not normalized as each team only executes its own assigned
3007
  // distribute chunk
3008
440
  QualType IteratorTy = IVExpr->getType();
3009
440
  llvm::Value *LBVal =
3010
440
      CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3011
440
  llvm::Value *UBVal =
3012
440
      CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3013
440
  return {LBVal, UBVal};
3014
440
}
3015
3016
static void emitDistributeParallelForDistributeInnerBoundParams(
3017
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
3018
2.75k
    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3019
2.75k
  const auto &Dir = cast<OMPLoopDirective>(S);
3020
2.75k
  LValue LB =
3021
2.75k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3022
2.75k
  llvm::Value *LBCast =
3023
2.75k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3024
2.75k
                                CGF.SizeTy, /*isSigned=*/false);
3025
2.75k
  CapturedVars.push_back(LBCast);
3026
2.75k
  LValue UB =
3027
2.75k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3028
3029
2.75k
  llvm::Value *UBCast =
3030
2.75k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3031
2.75k
                                CGF.SizeTy, /*isSigned=*/false);
3032
2.75k
  CapturedVars.push_back(UBCast);
3033
2.75k
}
3034
3035
static void
3036
emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3037
                                 const OMPLoopDirective &S,
3038
2.75k
                                 CodeGenFunction::JumpDest LoopExit) {
3039
2.75k
  auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3040
2.75k
                                         PrePostActionTy &Action) {
3041
2.75k
    Action.Enter(CGF);
3042
2.75k
    bool HasCancel = false;
3043
2.75k
    if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3044
1.34k
      if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3045
340
        HasCancel = D->hasCancel();
3046
1.00k
      else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3047
401
        HasCancel = D->hasCancel();
3048
608
      else if (const auto *D =
3049
608
                   dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3050
608
        HasCancel = D->hasCancel();
3051
1.34k
    }
3052
2.75k
    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3053
2.75k
                                                     HasCancel);
3054
2.75k
    CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3055
2.75k
                               emitDistributeParallelForInnerBounds,
3056
2.75k
                               emitDistributeParallelForDispatchBounds);
3057
2.75k
  };
3058
3059
2.75k
  emitCommonOMPParallelDirective(
3060
2.75k
      CGF, S,
3061
2.75k
      isOpenMPSimdDirective(S.getDirectiveKind()) ? 
OMPD_for_simd1.40k
:
OMPD_for1.34k
,
3062
2.75k
      CGInlinedWorksharingLoop,
3063
2.75k
      emitDistributeParallelForDistributeInnerBoundParams);
3064
2.75k
}
3065
3066
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3067
401
    const OMPDistributeParallelForDirective &S) {
3068
401
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3069
401
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3070
401
                              S.getDistInc());
3071
401
  };
3072
401
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3073
401
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3074
401
}
3075
3076
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3077
313
    const OMPDistributeParallelForSimdDirective &S) {
3078
313
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3079
313
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3080
313
                              S.getDistInc());
3081
313
  };
3082
313
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3083
313
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3084
313
}
3085
3086
void CodeGenFunction::EmitOMPDistributeSimdDirective(
3087
150
    const OMPDistributeSimdDirective &S) {
3088
150
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3089
150
    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3090
150
  };
3091
150
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3092
150
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3093
150
}
3094
3095
void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3096
192
    CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3097
  // Emit SPMD target parallel for region as a standalone region.
3098
192
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3099
192
    emitOMPSimdRegion(CGF, S, Action);
3100
192
  };
3101
192
  llvm::Function *Fn;
3102
192
  llvm::Constant *Addr;
3103
  // Emit target region as a standalone region.
3104
192
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3105
192
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3106
192
  assert(Fn && Addr && "Target device function emission failed.");
3107
192
}
3108
3109
void CodeGenFunction::EmitOMPTargetSimdDirective(
3110
354
    const OMPTargetSimdDirective &S) {
3111
354
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3112
354
    emitOMPSimdRegion(CGF, S, Action);
3113
354
  };
3114
354
  emitCommonOMPTargetDirective(*this, S, CodeGen);
3115
354
}
3116
3117
namespace {
3118
struct ScheduleKindModifiersTy {
3119
  OpenMPScheduleClauseKind Kind;
3120
  OpenMPScheduleClauseModifier M1;
3121
  OpenMPScheduleClauseModifier M2;
3122
  ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3123
                          OpenMPScheduleClauseModifier M1,
3124
                          OpenMPScheduleClauseModifier M2)
3125
0
      : Kind(Kind), M1(M1), M2(M2) {}
3126
};
3127
} // namespace
3128
3129
bool CodeGenFunction::EmitOMPWorksharingLoop(
3130
    const OMPLoopDirective &S, Expr *EUB,
3131
    const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3132
4.82k
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3133
  // Emit the loop iteration variable.
3134
4.82k
  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3135
4.82k
  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3136
4.82k
  EmitVarDecl(*IVDecl);
3137
3138
  // Emit the iterations count variable.
3139
  // If it is not a variable, Sema decided to calculate iterations count on each
3140
  // iteration (e.g., it is foldable into a constant).
3141
4.82k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3142
0
    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3143
    // Emit calculation of the iterations count.
3144
0
    EmitIgnoredExpr(S.getCalcLastIteration());
3145
0
  }
3146
3147
4.82k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3148
3149
4.82k
  bool HasLastprivateClause;
3150
  // Check pre-condition.
3151
4.82k
  {
3152
4.82k
    OMPLoopScope PreInitScope(*this, S);
3153
    // Skip the entire loop if we don't meet the precondition.
3154
    // If the condition constant folds and can be elided, avoid emitting the
3155
    // whole loop.
3156
4.82k
    bool CondConstant;
3157
4.82k
    llvm::BasicBlock *ContBlock = nullptr;
3158
4.82k
    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3159
3.97k
      if (!CondConstant)
3160
52
        return false;
3161
3.97k
    } else {
3162
848
      llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3163
848
      ContBlock = createBasicBlock("omp.precond.end");
3164
848
      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3165
848
                  getProfileCount(&S));
3166
848
      EmitBlock(ThenBlock);
3167
848
      incrementProfileCounter(&S);
3168
848
    }
3169
3170
4.77k
    RunCleanupsScope DoacrossCleanupScope(*this);
3171
4.77k
    bool Ordered = false;
3172
4.77k
    if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3173
85
      if (OrderedClause->getNumForLoops())
3174
28
        RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3175
57
      else
3176
57
        Ordered = true;
3177
85
    }
3178
3179
4.77k
    llvm::DenseSet<const Expr *> EmittedFinals;
3180
4.77k
    emitAlignedClause(*this, S);
3181
4.77k
    bool HasLinears = EmitOMPLinearClauseInit(S);
3182
    // Emit helper vars inits.
3183
3184
4.77k
    std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3185
4.77k
    LValue LB = Bounds.first;
3186
4.77k
    LValue UB = Bounds.second;
3187
4.77k
    LValue ST =
3188
4.77k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3189
4.77k
    LValue IL =
3190
4.77k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3191
3192
    // Emit 'then' code.
3193
4.77k
    {
3194
4.77k
      OMPPrivateScope LoopScope(*this);
3195
4.77k
      if (EmitOMPFirstprivateClause(S, LoopScope) || 
HasLinears4.75k
) {
3196
        // Emit implicit barrier to synchronize threads and avoid data races on
3197
        // initialization of firstprivate variables and post-update of
3198
        // lastprivate variables.
3199
150
        CGM.getOpenMPRuntime().emitBarrierCall(
3200
150
            *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3201
150
            /*ForceSimpleCall=*/true);
3202
150
      }
3203
4.77k
      EmitOMPPrivateClause(S, LoopScope);
3204
4.77k
      CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3205
4.77k
          *this, S, EmitLValue(S.getIterationVariable()));
3206
4.77k
      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3207
4.77k
      EmitOMPReductionClauseInit(S, LoopScope);
3208
4.77k
      EmitOMPPrivateLoopCounters(S, LoopScope);
3209
4.77k
      EmitOMPLinearClause(S, LoopScope);
3210
4.77k
      (void)LoopScope.Privatize();
3211
4.77k
      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3212
2.36k
        CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3213
3214
      // Detect the loop schedule kind and chunk.
3215
4.77k
      const Expr *ChunkExpr = nullptr;
3216
4.77k
      OpenMPScheduleTy ScheduleKind;
3217
4.77k
      if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3218
1.22k
        ScheduleKind.Schedule = C->getScheduleKind();
3219
1.22k
        ScheduleKind.M1 = C->getFirstScheduleModifier();
3220
1.22k
        ScheduleKind.M2 = C->getSecondScheduleModifier();
3221
1.22k
        ChunkExpr = C->getChunkSize();
3222
3.54k
      } else {
3223
        // Default behaviour for schedule clause.
3224
3.54k
        CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3225
3.54k
            *this, S, ScheduleKind.Schedule, ChunkExpr);
3226
3.54k
      }
3227
4.77k
      bool HasChunkSizeOne = false;
3228
4.77k
      llvm::Value *Chunk = nullptr;
3229
4.77k
      if (ChunkExpr) {
3230
549
        Chunk = EmitScalarExpr(ChunkExpr);
3231
549
        Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3232
549
                                     S.getIterationVariable()->getType(),
3233
549
                                     S.getBeginLoc());
3234
549
        Expr::EvalResult Result;
3235
549
        if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3236
344
          llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3237
344
          HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3238
344
        }
3239
549
      }
3240
4.77k
      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3241
4.77k
      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3242
      // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3243
      // If the static schedule kind is specified or if the ordered clause is
3244
      // specified, and if no monotonic modifier is specified, the effect will
3245
      // be as if the monotonic modifier was specified.
3246
4.77k
      bool StaticChunkedOne =
3247
4.77k
          RT.isStaticChunked(ScheduleKind.Schedule,
3248
4.77k
                             /* Chunked */ Chunk != nullptr) &&
3249
4.77k
          
HasChunkSizeOne417
&&
3250
4.77k
          
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())223
;
3251
4.77k
      bool IsMonotonic =
3252
4.77k
          Ordered ||
3253
4.77k
          
(4.71k
ScheduleKind.Schedule == OMPC_SCHEDULE_static4.71k
&&
3254
4.71k
           
!(604
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic604
||
3255
604
             
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic603
)) ||
3256
4.77k
          
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic4.11k
||
3257
4.77k
          
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic4.10k
;
3258
4.77k
      if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3259
4.77k
                                 /* Chunked */ Chunk != nullptr) ||
3260
4.77k
           
StaticChunkedOne1.15k
) &&
3261
4.77k
          
!Ordered3.74k
) {
3262
3.72k
        JumpDest LoopExit =
3263
3.72k
            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3264
3.72k
        emitCommonSimdLoop(
3265
3.72k
            *this, S,
3266
3.72k
            [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3267
3.69k
              if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3268
1.74k
                CGF.EmitOMPSimdInit(S);
3269
1.94k
              } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3270
3
                if (C->getKind() == OMPC_ORDER_concurrent)
3271
3
                  CGF.LoopStack.setParallel(/*Enable=*/true);
3272
3
              }
3273
3.69k
            },
3274
3.72k
            [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3275
3.72k
             &S, ScheduleKind, LoopExit,
3276
3.76k
             &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3277
              // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3278
              // When no chunk_size is specified, the iteration space is divided
3279
              // into chunks that are approximately equal in size, and at most
3280
              // one chunk is distributed to each thread. Note that the size of
3281
              // the chunks is unspecified in this case.
3282
3.76k
              CGOpenMPRuntime::StaticRTInput StaticInit(
3283
3.76k
                  IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3284
3.76k
                  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3285
3.76k
                  StaticChunkedOne ? 
Chunk125
:
nullptr3.64k
);
3286
3.76k
              CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3287
3.76k
                  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3288
3.76k
                  StaticInit);
3289
              // UB = min(UB, GlobalUB);
3290
3.76k
              if (!StaticChunkedOne)
3291
3.64k
                CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3292
              // IV = LB;
3293
3.76k
              CGF.EmitIgnoredExpr(S.getInit());
3294
              // For unchunked static schedule generate:
3295
              //
3296
              // while (idx <= UB) {
3297
              //   BODY;
3298
              //   ++idx;
3299
              // }
3300
              //
3301
              // For static schedule with chunk one:
3302
              //
3303
              // while (IV <= PrevUB) {
3304
              //   BODY;
3305
              //   IV += ST;
3306
              // }
3307
3.76k
              CGF.EmitOMPInnerLoop(
3308
3.76k
                  S, LoopScope.requiresCleanups(),
3309
3.76k
                  StaticChunkedOne ? 
S.getCombinedParForInDistCond()125
3310
3.76k
                                   : 
S.getCond()3.64k
,
3311
3.76k
                  StaticChunkedOne ? 
S.getDistInc()125
:
S.getInc()3.64k
,
3312
3.76k
                  [&S, LoopExit](CodeGenFunction &CGF) {
3313
3.76k
                    emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3314
3.76k
                  },
3315
3.76k
                  [](CodeGenFunction &) {});
3316
3.76k
            });
3317
3.72k
        EmitBlock(LoopExit.getBlock());
3318
        // Tell the runtime we are done.
3319
3.78k
        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3320
3.78k
          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3321
3.78k
                                                         S.getDirectiveKind());
3322
3.78k
        };
3323
3.72k
        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3324
3.72k
      } else {
3325
        // Emit the outer loop, which requests its work chunk [LB..UB] from
3326
        // runtime and runs the inner loop to process it.
3327
1.05k
        const OMPLoopArguments LoopArguments(
3328
1.05k
            LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3329
1.05k
            IL.getAddress(*this), Chunk, EUB);
3330
1.05k
        EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3331
1.05k
                            LoopArguments, CGDispatchBounds);
3332
1.05k
      }
3333
4.77k
      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3334
2.21k
        EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3335
2.21k
          return CGF.Builder.CreateIsNotNull(
3336
2.21k
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3337
2.21k
        });
3338
2.21k
      }
3339
4.77k
      EmitOMPReductionClauseFinal(
3340
4.77k
          S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3341
4.77k
                 ? /*Parallel and Simd*/ 
OMPD_parallel_for_simd2.21k
3342
4.77k
                 : /*Parallel only*/ 
OMPD_parallel2.55k
);
3343
      // Emit post-update of the reduction variables if IsLastIter != 0.
3344
4.77k
      emitPostUpdateForReductionClause(
3345
4.77k
          *this, S, [IL, &S](CodeGenFunction &CGF) {
3346
0
            return CGF.Builder.CreateIsNotNull(
3347
0
                CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3348
0
          });
3349
      // Emit final copy of the lastprivate variables if IsLastIter != 0.
3350
4.77k
      if (HasLastprivateClause)
3351
160
        EmitOMPLastprivateClauseFinal(
3352
160
            S, isOpenMPSimdDirective(S.getDirectiveKind()),
3353
160
            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3354
4.77k
    }
3355
4.77k
    EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3356
134
      return CGF.Builder.CreateIsNotNull(
3357
134
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3358
134
    });
3359
4.77k
    DoacrossCleanupScope.ForceCleanup();
3360
    // We're now done with the loop, so jump to the continuation block.
3361
4.77k
    if (ContBlock) {
3362
848
      EmitBranch(ContBlock);
3363
848
      EmitBlock(ContBlock, /*IsFinished=*/true);
3364
848
    }
3365
4.77k
  }
3366
0
  return HasLastprivateClause;
3367
4.82k
}
3368
3369
/// The following two functions generate expressions for the loop lower
3370
/// and upper bounds in case of static and dynamic (dispatch) schedule
3371
/// of the associated 'for' or 'distribute' loop.
3372
static std::pair<LValue, LValue>
3373
2.01k
emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3374
2.01k
  const auto &LS = cast<OMPLoopDirective>(S);
3375
2.01k
  LValue LB =
3376
2.01k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3377
2.01k
  LValue UB =
3378
2.01k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3379
2.01k
  return {LB, UB};
3380
2.01k
}
3381
3382
/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3383
/// consider the lower and upper bound expressions generated by the
3384
/// worksharing loop support, but we use 0 and the iteration space size as
3385
/// constants
3386
static std::pair<llvm::Value *, llvm::Value *>
3387
emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3388
324
                          Address LB, Address UB) {
3389
324
  const auto &LS = cast<OMPLoopDirective>(S);
3390
324
  const Expr *IVExpr = LS.getIterationVariable();
3391
324
  const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3392
324
  llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3393
324
  llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3394
324
  return {LBVal, UBVal};
3395
324
}
3396
3397
/// Emits internal temp array declarations for the directive with inscan
3398
/// reductions.
3399
/// The code is the following:
3400
/// \code
3401
/// size num_iters = <num_iters>;
3402
/// <type> buffer[num_iters];
3403
/// \endcode
3404
static void emitScanBasedDirectiveDecls(
3405
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3406
16
    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3407
16
  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3408
16
      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3409
16
  SmallVector<const Expr *, 4> Shareds;
3410
16
  SmallVector<const Expr *, 4> Privates;
3411
16
  SmallVector<const Expr *, 4> ReductionOps;
3412
16
  SmallVector<const Expr *, 4> CopyArrayTemps;
3413
16
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3414
16
    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3415
16
           "Only inscan reductions are expected.");
3416
0
    Shareds.append(C->varlist_begin(), C->varlist_end());
3417
16
    Privates.append(C->privates().begin(), C->privates().end());
3418
16
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3419
16
    CopyArrayTemps.append(C->copy_array_temps().begin(),
3420
16
                          C->copy_array_temps().end());
3421
16
  }
3422
16
  {
3423
    // Emit buffers for each reduction variables.
3424
    // ReductionCodeGen is required to emit correctly the code for array
3425
    // reductions.
3426
16
    ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3427
16
    unsigned Count = 0;
3428
16
    auto *ITA = CopyArrayTemps.begin();
3429
32
    for (const Expr *IRef : Privates) {
3430
32
      const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3431
      // Emit variably modified arrays, used for arrays/array sections
3432
      // reductions.
3433
32
      if (PrivateVD->getType()->isVariablyModifiedType()) {
3434
16
        RedCG.emitSharedOrigLValue(CGF, Count);
3435
16
        RedCG.emitAggregateType(CGF, Count);
3436
16
      }
3437
32
      CodeGenFunction::OpaqueValueMapping DimMapping(
3438
32
          CGF,
3439
32
          cast<OpaqueValueExpr>(
3440
32
              cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3441
32
                  ->getSizeExpr()),
3442
32
          RValue::get(OMPScanNumIterations));
3443
      // Emit temp buffer.
3444
32
      CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3445
32
      ++ITA;
3446
32
      ++Count;
3447
32
    }
3448
16
  }
3449
16
}
3450
3451
/// Emits the code for the directive with inscan reductions.
3452
/// The code is the following:
3453
/// \code
3454
/// #pragma omp ...
3455
/// for (i: 0..<num_iters>) {
3456
///   <input phase>;
3457
///   buffer[i] = red;
3458
/// }
3459
/// #pragma omp master // in parallel region
3460
/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3461
/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3462
///   buffer[i] op= buffer[i-pow(2,k)];
3463
/// #pragma omp barrier // in parallel region
3464
/// #pragma omp ...
3465
/// for (0..<num_iters>) {
3466
///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3467
///   <scan phase>;
3468
/// }
3469
/// \endcode
3470
static void emitScanBasedDirective(
3471
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3472
    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3473
    llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3474
16
    llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3475
16
  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3476
16
      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3477
16
  SmallVector<const Expr *, 4> Privates;
3478
16
  SmallVector<const Expr *, 4> ReductionOps;
3479
16
  SmallVector<const Expr *, 4> LHSs;
3480
16
  SmallVector<const Expr *, 4> RHSs;
3481
16
  SmallVector<const Expr *, 4> CopyArrayElems;
3482
16
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3483
16
    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3484
16
           "Only inscan reductions are expected.");
3485
0
    Privates.append(C->privates().begin(), C->privates().end());
3486
16
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3487
16
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3488
16
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3489
16
    CopyArrayElems.append(C->copy_array_elems().begin(),
3490
16
                          C->copy_array_elems().end());
3491
16
  }
3492
16
  CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3493
16
  {
3494
    // Emit loop with input phase:
3495
    // #pragma omp ...
3496
    // for (i: 0..<num_iters>) {
3497
    //   <input phase>;
3498
    //   buffer[i] = red;
3499
    // }
3500
16
    CGF.OMPFirstScanLoop = true;
3501
16
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3502
16
    FirstGen(CGF);
3503
16
  }
3504
  // #pragma omp barrier // in parallel region
3505
16
  auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3506
16
                    &ReductionOps,
3507
16
                    &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3508
16
    Action.Enter(CGF);
3509
    // Emit prefix reduction:
3510
    // #pragma omp master // in parallel region
3511
    // for (int k = 0; k <= ceil(log2(n)); ++k)
3512
16
    llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3513
16
    llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3514
16
    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3515
16
    llvm::Function *F =
3516
16
        CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3517
16
    llvm::Value *Arg =
3518
16
        CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3519
16
    llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3520
16
    F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3521
16
    LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3522
16
    LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3523
16
    llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3524
16
        OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3525
16
    auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3526
16
    CGF.EmitBlock(LoopBB);
3527
16
    auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3528
    // size pow2k = 1;
3529
16
    auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3530
16
    Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3531
16
    Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3532
    // for (size i = n - 1; i >= 2 ^ k; --i)
3533
    //   tmp[i] op= tmp[i-pow2k];
3534
16
    llvm::BasicBlock *InnerLoopBB =
3535
16
        CGF.createBasicBlock("omp.inner.log.scan.body");
3536
16
    llvm::BasicBlock *InnerExitBB =
3537
16
        CGF.createBasicBlock("omp.inner.log.scan.exit");
3538
16
    llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3539
16
    CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3540
16
    CGF.EmitBlock(InnerLoopBB);
3541
16
    auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3542
16
    IVal->addIncoming(NMin1, LoopBB);
3543
16
    {
3544
16
      CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3545
16
      auto *ILHS = LHSs.begin();
3546
16
      auto *IRHS = RHSs.begin();
3547
32
      for (const Expr *CopyArrayElem : CopyArrayElems) {
3548
32
        const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3549
32
        const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3550
32
        Address LHSAddr = Address::invalid();
3551
32
        {
3552
32
          CodeGenFunction::OpaqueValueMapping IdxMapping(
3553
32
              CGF,
3554
32
              cast<OpaqueValueExpr>(
3555
32
                  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3556
32
              RValue::get(IVal));
3557
32
          LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3558
32
        }
3559
32
        PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3560
32
        Address RHSAddr = Address::invalid();
3561
32
        {
3562
32
          llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3563
32
          CodeGenFunction::OpaqueValueMapping IdxMapping(
3564
32
              CGF,
3565
32
              cast<OpaqueValueExpr>(
3566
32
                  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3567
32
              RValue::get(OffsetIVal));
3568
32
          RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3569
32
        }
3570
32
        PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3571
32
        ++ILHS;
3572
32
        ++IRHS;
3573
32
      }
3574
16
      PrivScope.Privatize();
3575
16
      CGF.CGM.getOpenMPRuntime().emitReduction(
3576
16
          CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3577
16
          {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3578
16
    }
3579
16
    llvm::Value *NextIVal =
3580
16
        CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3581
16
    IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3582
16
    CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3583
16
    CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3584
16
    CGF.EmitBlock(InnerExitBB);
3585
16
    llvm::Value *Next =
3586
16
        CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3587
16
    Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3588
    // pow2k <<= 1;
3589
16
    llvm::Value *NextPow2K =
3590
16
        CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3591
16
    Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3592
16
    llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3593
16
    CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3594
16
    auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3595
16
    CGF.EmitBlock(ExitBB);
3596
16
  };
3597
16
  if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3598
8
    CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3599
8
    CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3600
8
        CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3601
8
        /*ForceSimpleCall=*/true);
3602
8
  } else {
3603
8
    RegionCodeGenTy RCG(CodeGen);
3604
8
    RCG(CGF);
3605
8
  }
3606
3607
16
  CGF.OMPFirstScanLoop = false;
3608
16
  SecondGen(CGF);
3609
16
}
3610
3611
static bool emitWorksharingDirective(CodeGenFunction &CGF,
3612
                                     const OMPLoopDirective &S,
3613
1.02k
                                     bool HasCancel) {
3614
1.02k
  bool HasLastprivates;
3615
1.02k
  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3616
1.02k
                   [](const OMPReductionClause *C) {
3617
193
                     return C->getModifier() == OMPC_REDUCTION_inscan;
3618
193
                   })) {
3619
24
    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3620
24
      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3621
24
      OMPLoopScope LoopScope(CGF, S);
3622
24
      return CGF.EmitScalarExpr(S.getNumIterations());
3623
24
    };
3624
16
    const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3625
16
      CodeGenFunction::OMPCancelStackRAII CancelRegion(
3626
16
          CGF, S.getDirectiveKind(), HasCancel);
3627
16
      (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3628
16
                                       emitForLoopBounds,
3629
16
                                       emitDispatchForLoopBounds);
3630
      // Emit an implicit barrier at the end.
3631
16
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3632
16
                                                 OMPD_for);
3633
16
    };
3634
16
    const auto &&SecondGen = [&S, HasCancel,
3635
16
                              &HasLastprivates](CodeGenFunction &CGF) {
3636
16
      CodeGenFunction::OMPCancelStackRAII CancelRegion(
3637
16
          CGF, S.getDirectiveKind(), HasCancel);
3638
16
      HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3639
16
                                                   emitForLoopBounds,
3640
16
                                                   emitDispatchForLoopBounds);
3641
16
    };
3642
16
    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3643
8
      emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3644
16
    emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3645
1.00k
  } else {
3646
1.00k
    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3647
1.00k
                                                     HasCancel);
3648
1.00k
    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3649
1.00k
                                                 emitForLoopBounds,
3650
1.00k
                                                 emitDispatchForLoopBounds);
3651
1.00k
  }
3652
1.02k
  return HasLastprivates;
3653
1.02k
}
3654
3655
59
static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3656
59
  if (S.hasCancel())
3657
4
    return false;
3658
55
  for (OMPClause *C : S.clauses())
3659
28
    if (!isa<OMPNowaitClause>(C))
3660
28
      return false;
3661
3662
27
  return true;
3663
55
}
3664
3665
423
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3666
423
  bool HasLastprivates = false;
3667
423
  bool UseOMPIRBuilder =
3668
423
      CGM.getLangOpts().OpenMPIRBuilder && 
isSupportedByOpenMPIRBuilder(S)59
;
3669
423
  auto &&CodeGen = [this, &S, &HasLastprivates,
3670
423
                    UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3671
    // Use the OpenMPIRBuilder if enabled.
3672
423
    if (UseOMPIRBuilder) {
3673
      // Emit the associated statement and get its loop representation.
3674
27
      const Stmt *Inner = S.getRawStmt();
3675
27
      llvm::CanonicalLoopInfo *CLI =
3676
27
          EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3677
3678
27
      bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3679
27
      llvm::OpenMPIRBuilder &OMPBuilder =
3680
27
          CGM.getOpenMPRuntime().getOMPBuilder();
3681
27
      llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3682
27
          AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3683
27
      OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI,
3684
27
                                    AllocaIP, NeedsBarrier);
3685
27
      return;
3686
27
    }
3687
3688
396
    HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3689
396
  };
3690
423
  {
3691
423
    auto LPCRegion =
3692
423
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3693
423
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3694
423
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3695
423
                                                S.hasCancel());
3696
423
  }
3697
3698
423
  if (!UseOMPIRBuilder) {
3699
    // Emit an implicit barrier at the end.
3700
396
    if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates11
)
3701
385
      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3702
396
  }
3703
  // Check for outer lastprivate conditional update.
3704
423
  checkForLastprivateConditionalUpdate(*this, S);
3705
423
}
3706
3707
253
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3708
253
  bool HasLastprivates = false;
3709
253
  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3710
253
                                          PrePostActionTy &) {
3711
253
    HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3712
253
  };
3713
253
  {
3714
253
    auto LPCRegion =
3715
253
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3716
253
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3717
253
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3718
253
  }
3719
3720
  // Emit an implicit barrier at the end.
3721
253
  if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates0
)
3722
253
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3723
  // Check for outer lastprivate conditional update.
3724
253
  checkForLastprivateConditionalUpdate(*this, S);
3725
253
}
3726
3727
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3728
                                const Twine &Name,
3729
400
                                llvm::Value *Init = nullptr) {
3730
400
  LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3731
400
  if (Init)
3732
320
    CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3733
400
  return LVal;
3734
400
}
3735
3736
80
void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3737
80
  const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3738
80
  const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3739
80
  bool HasLastprivates = false;
3740
80
  auto &&CodeGen = [&S, CapturedStmt, CS,
3741
80
                    &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3742
80
    const ASTContext &C = CGF.getContext();
3743
80
    QualType KmpInt32Ty =
3744
80
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3745
    // Emit helper vars inits.
3746
80
    LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3747
80
                                  CGF.Builder.getInt32(0));
3748
80
    llvm::ConstantInt *GlobalUBVal = CS != nullptr
3749
80
                                         ? CGF.Builder.getInt32(CS->size() - 1)
3750
80
                                         : 
CGF.Builder.getInt32(0)0
;
3751
80
    LValue UB =
3752
80
        createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3753
80
    LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3754
80
                                  CGF.Builder.getInt32(1));
3755
80
    LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3756
80
                                  CGF.Builder.getInt32(0));
3757
    // Loop counter.
3758
80
    LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3759
80
    OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3760
80
    CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3761
80
    OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3762
80
    CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3763
    // Generate condition for loop.
3764
80
    BinaryOperator *Cond = BinaryOperator::Create(
3765
80
        C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
3766
80
        S.getBeginLoc(), FPOptionsOverride());
3767
    // Increment for loop counter.
3768
80
    UnaryOperator *Inc = UnaryOperator::Create(
3769
80
        C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
3770
80
        S.getBeginLoc(), true, FPOptionsOverride());
3771
80
    auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3772
      // Iterate through all sections and emit a switch construct:
3773
      // switch (IV) {
3774
      //   case 0:
3775
      //     <SectionStmt[0]>;
3776
      //     break;
3777
      // ...
3778
      //   case <NumSection> - 1:
3779
      //     <SectionStmt[<NumSection> - 1]>;
3780
      //     break;
3781
      // }
3782
      // .omp.sections.exit:
3783
80
      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3784
80
      llvm::SwitchInst *SwitchStmt =
3785
80
          CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3786
80
                                   ExitBB, CS == nullptr ? 
10
: CS->size());
3787
80
      if (CS) {
3788
80
        unsigned CaseNumber = 0;
3789
126
        for (const Stmt *SubStmt : CS->children()) {
3790
126
          auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3791
126
          CGF.EmitBlock(CaseBB);
3792
126
          SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3793
126
          CGF.EmitStmt(SubStmt);
3794
126
          CGF.EmitBranch(ExitBB);
3795
126
          ++CaseNumber;
3796
126
        }
3797
80
      } else {
3798
0
        llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3799
0
        CGF.EmitBlock(CaseBB);
3800
0
        SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3801
0
        CGF.EmitStmt(CapturedStmt);
3802
0
        CGF.EmitBranch(ExitBB);
3803
0
      }
3804
80
      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3805
80
    };
3806
3807
80
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3808
80
    if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3809
      // Emit implicit barrier to synchronize threads and avoid data races on
3810
      // initialization of firstprivate variables and post-update of lastprivate
3811
      // variables.
3812
0
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3813
0
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3814
0
          /*ForceSimpleCall=*/true);
3815
0
    }
3816
80
    CGF.EmitOMPPrivateClause(S, LoopScope);
3817
80
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3818
80
    HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3819
80
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
3820
80
    (void)LoopScope.Privatize();
3821
80
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3822
0
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
3823
3824
    // Emit static non-chunked loop.
3825
80
    OpenMPScheduleTy ScheduleKind;
3826
80
    ScheduleKind.Schedule = OMPC_SCHEDULE_static;
3827
80
    CGOpenMPRuntime::StaticRTInput StaticInit(
3828
80
        /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
3829
80
        LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
3830
80
    CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3831
80
        CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
3832
    // UB = min(UB, GlobalUB);
3833
80
    llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
3834
80
    llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
3835
80
        CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
3836
80
    CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
3837
    // IV = LB;
3838
80
    CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
3839
    // while (idx <= UB) { BODY; ++idx; }
3840
80
    CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
3841
80
                         [](CodeGenFunction &) {});
3842
    // Tell the runtime we are done.
3843
112
    auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3844
112
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3845
112
                                                     S.getDirectiveKind());
3846
112
    };
3847
80
    CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
3848
80
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
3849
    // Emit post-update of the reduction variables if IsLastIter != 0.
3850
80
    emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
3851
0
      return CGF.Builder.CreateIsNotNull(
3852
0
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3853
0
    });
3854
3855
    // Emit final copy of the lastprivate variables if IsLastIter != 0.
3856
80
    if (HasLastprivates)
3857
16
      CGF.EmitOMPLastprivateClauseFinal(
3858
16
          S, /*NoFinals=*/false,
3859
16
          CGF.Builder.CreateIsNotNull(
3860
16
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
3861
80
  };
3862
3863
80
  bool HasCancel = false;
3864
80
  if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
3865
54
    HasCancel = OSD->hasCancel();
3866
26
  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
3867
26
    HasCancel = OPSD->hasCancel();
3868
80
  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
3869
80
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
3870
80
                                              HasCancel);
3871
  // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
3872
  // clause. Otherwise the barrier will be generated by the codegen for the
3873
  // directive.
3874
80
  if (HasLastprivates && 
S.getSingleClause<OMPNowaitClause>()16
) {
3875
    // Emit implicit barrier to synchronize threads and avoid data races on
3876
    // initialization of firstprivate variables.
3877
0
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3878
0
                                           OMPD_unknown);
3879
0
  }
3880
80
}
3881
3882
62
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3883
62
  if (CGM.getLangOpts().OpenMPIRBuilder) {
3884
8
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3885
8
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3886
8
    using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3887
3888
16
    auto FiniCB = [this](InsertPointTy IP) {
3889
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3890
16
    };
3891
3892
8
    const CapturedStmt *ICS = S.getInnermostCapturedStmt();
3893
8
    const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3894
8
    const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3895
8
    llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3896
8
    if (CS) {
3897
12
      for (const Stmt *SubStmt : CS->children()) {
3898
12
        auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
3899
12
                                         InsertPointTy CodeGenIP,
3900
12
                                         llvm::BasicBlock &FiniBB) {
3901
12
          OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
3902
12
                                                         FiniBB);
3903
12
          OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
3904
12
                                                 FiniBB);
3905
12
        };
3906
12
        SectionCBVector.push_back(SectionCB);
3907
12
      }
3908
8
    } else {
3909
0
      auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
3910
0
                                            InsertPointTy CodeGenIP,
3911
0
                                            llvm::BasicBlock &FiniBB) {
3912
0
        OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3913
0
        OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
3914
0
                                               FiniBB);
3915
0
      };
3916
0
      SectionCBVector.push_back(SectionCB);
3917
0
    }
3918
3919
    // Privatization callback that performs appropriate action for
3920
    // shared/private/firstprivate/lastprivate/copyin/... variables.
3921
    //
3922
    // TODO: This defaults to shared right now.
3923
8
    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3924
8
                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
3925
      // The next line is appropriate only for variables (Val) with the
3926
      // data-sharing attribute "shared".
3927
0
      ReplVal = &Val;
3928
3929
0
      return CodeGenIP;
3930
0
    };
3931
3932
8
    CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
3933
8
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3934
8
    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3935
8
        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3936
8
    Builder.restoreIP(OMPBuilder.createSections(
3937
8
        Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
3938
8
        S.getSingleClause<OMPNowaitClause>()));
3939
8
    return;
3940
8
  }
3941
54
  {
3942
54
    auto LPCRegion =
3943
54
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3944
54
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3945
54
    EmitSections(S);
3946
54
  }
3947
  // Emit an implicit barrier at the end.
3948
54
  if (!S.getSingleClause<OMPNowaitClause>()) {
3949
48
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
3950
48
                                           OMPD_sections);
3951
48
  }
3952
  // Check for outer lastprivate conditional update.
3953
54
  checkForLastprivateConditionalUpdate(*this, S);
3954
54
}
3955
3956
54
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3957
54
  if (CGM.getLangOpts().OpenMPIRBuilder) {
3958
8
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3959
8
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3960
3961
8
    const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
3962
16
    auto FiniCB = [this](InsertPointTy IP) {
3963
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3964
16
    };
3965
3966
8
    auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
3967
8
                                                   InsertPointTy CodeGenIP,
3968
8
                                                   llvm::BasicBlock &FiniBB) {
3969
8
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3970
8
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
3971
8
                                             CodeGenIP, FiniBB);
3972
8
    };
3973
3974
8
    LexicalScope Scope(*this, S.getSourceRange());
3975
8
    EmitStopPoint(&S);
3976
8
    Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
3977
3978
8
    return;
3979
8
  }
3980
46
  LexicalScope Scope(*this, S.getSourceRange());
3981
46
  EmitStopPoint(&S);
3982
46
  EmitStmt(S.getAssociatedStmt());
3983
46
}
3984
3985
58
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3986
58
  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3987
58
  llvm::SmallVector<const Expr *, 8> DestExprs;
3988
58
  llvm::SmallVector<const Expr *, 8> SrcExprs;
3989
58
  llvm::SmallVector<const Expr *, 8> AssignmentOps;
3990
  // Check if there are any 'copyprivate' clauses associated with this
3991
  // 'single' construct.
3992
  // Build a list of copyprivate variables along with helper expressions
3993
  // (<source>, <destination>, <destination>=<source> expressions)
3994
58
  for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
3995
28
    CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
3996
28
    DestExprs.append(C->destination_exprs().begin(),
3997
28
                     C->destination_exprs().end());
3998
28
    SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
3999
28
    AssignmentOps.append(C->assignment_ops().begin(),
4000
28
                         C->assignment_ops().end());
4001
28
  }
4002
  // Emit code for 'single' region along with 'copyprivate' clauses
4003
58
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4004
58
    Action.Enter(CGF);
4005
58
    OMPPrivateScope SingleScope(CGF);
4006
58
    (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4007
58
    CGF.EmitOMPPrivateClause(S, SingleScope);
4008
58
    (void)SingleScope.Privatize();
4009
58
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4010
58
  };
4011
58
  {
4012
58
    auto LPCRegion =
4013
58
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4014
58
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
4015
58
    CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4016
58
                                            CopyprivateVars, DestExprs,
4017
58
                                            SrcExprs, AssignmentOps);
4018
58
  }
4019
  // Emit an implicit barrier at the end (to avoid data race on firstprivate
4020
  // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4021
58
  if (!S.getSingleClause<OMPNowaitClause>() && 
CopyprivateVars.empty()51
) {
4022
23
    CGM.getOpenMPRuntime().emitBarrierCall(
4023
23
        *this, S.getBeginLoc(),
4024
23
        S.getSingleClause<OMPNowaitClause>() ? 
OMPD_unknown0
: OMPD_single);
4025
23
  }
4026
  // Check for outer lastprivate conditional update.
4027
58
  checkForLastprivateConditionalUpdate(*this, S);
4028
58
}
4029
4030
37
static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4031
37
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4032
37
    Action.Enter(CGF);
4033
37
    CGF.EmitStmt(S.getRawStmt());
4034
37
  };
4035
37
  CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4036
37
}
4037
4038
25
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4039
25
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4040
10
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4041
10
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4042
4043
10
    const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4044
4045
10
    auto FiniCB = [this](InsertPointTy IP) {
4046
10
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4047
10
    };
4048
4049
10
    auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4050
10
                                                  InsertPointTy CodeGenIP,
4051
10
                                                  llvm::BasicBlock &FiniBB) {
4052
10
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4053
10
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
4054
10
                                             CodeGenIP, FiniBB);
4055
10
    };
4056
4057
10
    LexicalScope Scope(*this, S.getSourceRange());
4058
10
    EmitStopPoint(&S);
4059
10
    Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4060
4061
10
    return;
4062
10
  }
4063
15
  LexicalScope Scope(*this, S.getSourceRange());
4064
15
  EmitStopPoint(&S);
4065
15
  emitMaster(*this, S);
4066
15
}
4067
4068
24
static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4069
24
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4070
24
    Action.Enter(CGF);
4071
24
    CGF.EmitStmt(S.getRawStmt());
4072
24
  };
4073
24
  Expr *Filter = nullptr;
4074
24
  if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4075
18
    Filter = FilterClause->getThreadID();
4076
24
  CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4077
24
                                              Filter);
4078
24
}
4079
4080
40
void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4081
40
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4082
16
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4083
16
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4084
4085
16
    const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4086
16
    const Expr *Filter = nullptr;
4087
16
    if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4088
12
      Filter = FilterClause->getThreadID();
4089
16
    llvm::Value *FilterVal = Filter
4090
16
                                 ? 
EmitScalarExpr(Filter, CGM.Int32Ty)12
4091
16
                                 : 
llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0)4
;
4092
4093
16
    auto FiniCB = [this](InsertPointTy IP) {
4094
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4095
16
    };
4096
4097
16
    auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4098
16
                                                  InsertPointTy CodeGenIP,
4099
16
                                                  llvm::BasicBlock &FiniBB) {
4100
16
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4101
16
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
4102
16
                                             CodeGenIP, FiniBB);
4103
16
    };
4104
4105
16
    LexicalScope Scope(*this, S.getSourceRange());
4106
16
    EmitStopPoint(&S);
4107
16
    Builder.restoreIP(
4108
16
        OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4109
4110
16
    return;
4111
16
  }
4112
24
  LexicalScope Scope(*this, S.getSourceRange());
4113
24
  EmitStopPoint(&S);
4114
24
  emitMasked(*this, S);
4115
24
}
4116
4117
102
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4118
102
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4119
34
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4120
34
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4121
4122
34
    const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4123
34
    const Expr *Hint = nullptr;
4124
34
    if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4125
4
      Hint = HintClause->getHint();
4126
4127
    // TODO: This is slightly different from what's currently being done in
4128
    // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4129
    // about typing is final.
4130
34
    llvm::Value *HintInst = nullptr;
4131
34
    if (Hint)
4132
4
      HintInst =
4133
4
          Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4134
4135
34
    auto FiniCB = [this](InsertPointTy IP) {
4136
30
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4137
30
    };
4138
4139
34
    auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4140
34
                                                    InsertPointTy CodeGenIP,
4141
34
                                                    llvm::BasicBlock &FiniBB) {
4142
34
      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4143
34
      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
4144
34
                                             CodeGenIP, FiniBB);
4145
34
    };
4146
4147
34
    LexicalScope Scope(*this, S.getSourceRange());
4148
34
    EmitStopPoint(&S);
4149
34
    Builder.restoreIP(OMPBuilder.createCritical(
4150
34
        Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4151
34
        HintInst));
4152
4153
34
    return;
4154
34
  }
4155
4156
68
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4157
68
    Action.Enter(CGF);
4158
68
    CGF.EmitStmt(S.getAssociatedStmt());
4159
68
  };
4160
68
  const Expr *Hint = nullptr;
4161
68
  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4162
6
    Hint = HintClause->getHint();
4163
68
  LexicalScope Scope(*this, S.getSourceRange());
4164
68
  EmitStopPoint(&S);
4165
68
  CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4166
68
                                            S.getDirectiveName().getAsString(),
4167
68
                                            CodeGen, S.getBeginLoc(), Hint);
4168
68
}
4169
4170
void CodeGenFunction::EmitOMPParallelForDirective(
4171
266
    const OMPParallelForDirective &S) {
4172
  // Emit directive as a combined directive that consists of two implicit
4173
  // directives: 'parallel' with 'for' directive.
4174
266
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4175
266
    Action.Enter(CGF);
4176
266
    (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4177
266
  };
4178
266
  {
4179
266
    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4180
266
                     [](const OMPReductionClause *C) {
4181
66
                       return C->getModifier() == OMPC_REDUCTION_inscan;
4182
66
                     })) {
4183
4
      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4184
4
        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4185
4
        CGCapturedStmtInfo CGSI(CR_OpenMP);
4186
4
        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4187
4
        OMPLoopScope LoopScope(CGF, S);
4188
4
        return CGF.EmitScalarExpr(S.getNumIterations());
4189
4
      };
4190
4
      emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4191
4
    }
4192
266
    auto LPCRegion =
4193
266
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4194
266
    emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4195
266
                                   emitEmptyBoundParameters);
4196
266
  }
4197
  // Check for outer lastprivate conditional update.
4198
266
  checkForLastprivateConditionalUpdate(*this, S);
4199
266
}
4200
4201
void CodeGenFunction::EmitOMPParallelForSimdDirective(
4202
105
    const OMPParallelForSimdDirective &S) {
4203
  // Emit directive as a combined directive that consists of two implicit
4204
  // directives: 'parallel' with 'for' directive.
4205
105
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4206
105
    Action.Enter(CGF);
4207
105
    (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4208
105
  };
4209
105
  {
4210
105
    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4211
105
                     [](const OMPReductionClause *C) {
4212
10
                       return C->getModifier() == OMPC_REDUCTION_inscan;
4213
10
                     })) {
4214
4
      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4215
4
        CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4216
4
        CGCapturedStmtInfo CGSI(CR_OpenMP);
4217
4
        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4218
4
        OMPLoopScope LoopScope(CGF, S);
4219
4
        return CGF.EmitScalarExpr(S.getNumIterations());
4220
4
      };
4221
4
      emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4222
4
    }
4223
105
    auto LPCRegion =
4224
105
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4225
105
    emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4226
105
                                   emitEmptyBoundParameters);
4227
105
  }
4228
  // Check for outer lastprivate conditional update.
4229
105
  checkForLastprivateConditionalUpdate(*this, S);
4230
105
}
4231
4232
void CodeGenFunction::EmitOMPParallelMasterDirective(
4233
22
    const OMPParallelMasterDirective &S) {
4234
  // Emit directive as a combined directive that consists of two implicit
4235
  // directives: 'parallel' with 'master' directive.
4236
22
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4237
22
    Action.Enter(CGF);
4238
22
    OMPPrivateScope PrivateScope(CGF);
4239
22
    bool Copyins = CGF.EmitOMPCopyinClause(S);
4240
22
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4241
22
    if (Copyins) {
4242
      // Emit implicit barrier to synchronize threads and avoid data races on
4243
      // propagation master's thread values of threadprivate variables to local
4244
      // instances of that variables of all other implicit threads.
4245
3
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4246
3
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4247
3
          /*ForceSimpleCall=*/true);
4248
3
    }
4249
22
    CGF.EmitOMPPrivateClause(S, PrivateScope);
4250
22
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4251
22
    (void)PrivateScope.Privatize();
4252
22
    emitMaster(CGF, S);
4253
22
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4254
22
  };
4255
22
  {
4256
22
    auto LPCRegion =
4257
22
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4258
22
    emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4259
22
                                   emitEmptyBoundParameters);
4260
22
    emitPostUpdateForReductionClause(*this, S,
4261
22
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
4262
22
  }
4263
  // Check for outer lastprivate conditional update.
4264
22
  checkForLastprivateConditionalUpdate(*this, S);
4265
22
}
4266
4267
void CodeGenFunction::EmitOMPParallelSectionsDirective(
4268
26
    const OMPParallelSectionsDirective &S) {
4269
  // Emit directive as a combined directive that consists of two implicit
4270
  // directives: 'parallel' with 'sections' directive.
4271
26
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4272
26
    Action.Enter(CGF);
4273
26
    CGF.EmitSections(S);
4274
26
  };
4275
26
  {
4276
26
    auto LPCRegion =
4277
26
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4278
26
    emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4279
26
                                   emitEmptyBoundParameters);
4280
26
  }
4281
  // Check for outer lastprivate conditional update.
4282
26
  checkForLastprivateConditionalUpdate(*this, S);
4283
26
}
4284
4285
namespace {
4286
/// Get the list of variables declared in the context of the untied tasks.
4287
class CheckVarsEscapingUntiedTaskDeclContext final
4288
    : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4289
  llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4290
4291
public:
4292
16
  explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4293
16
  virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4294
6
  void VisitDeclStmt(const DeclStmt *S) {
4295
6
    if (!S)
4296
0
      return;
4297
    // Need to privatize only local vars, static locals can be processed as is.
4298
10
    
for (const Decl *D : S->decls())6
{
4299
10
      if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4300
8
        if (VD->hasLocalStorage())
4301
8
          PrivateDecls.push_back(VD);
4302
10
    }
4303
6
  }
4304
16
  void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4305
0
  void VisitCapturedStmt(const CapturedStmt *) {}
4306
0
  void VisitLambdaExpr(const LambdaExpr *) {}
4307
0
  void VisitBlockExpr(const BlockExpr *) {}
4308
108
  void VisitStmt(const Stmt *S) {
4309
108
    if (!S)
4310
0
      return;
4311
108
    for (const Stmt *Child : S->children())
4312
114
      if (Child)
4313
114
        Visit(Child);
4314
108
  }
4315
4316
  /// Swaps list of vars with the provided one.
4317
32
  ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4318
};
4319
} // anonymous namespace
4320
4321
void CodeGenFunction::EmitOMPTaskBasedDirective(
4322
    const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4323
    const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4324
419
    OMPTaskDataTy &Data) {
4325
  // Emit outlined function for task construct.
4326
419
  const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4327
419
  auto I = CS->getCapturedDecl()->param_begin();
4328
419
  auto PartId = std::next(I);
4329
419
  auto TaskT = std::next(I, 4);
4330
  // Check if the task is final
4331
419
  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4332
    // If the condition constant folds and can be elided, try to avoid emitting
4333
    // the condition and the dead arm of the if/else.
4334
22
    const Expr *Cond = Clause->getCondition();
4335
22
    bool CondConstant;
4336
22
    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4337
12
      Data.Final.setInt(CondConstant);
4338
10
    else
4339
10
      Data.Final.setPointer(EvaluateExprAsBool(Cond));
4340
397
  } else {
4341
    // By default the task is not final.
4342
397
    Data.Final.setInt(/*IntVal=*/false);
4343
397
  }
4344
  // Check if the task has 'priority' clause.
4345
419
  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4346
22
    const Expr *Prio = Clause->getPriority();
4347
22
    Data.Priority.setInt(/*IntVal=*/true);
4348
22
    Data.Priority.setPointer(EmitScalarConversion(
4349
22
        EmitScalarExpr(Prio), Prio->getType(),
4350
22
        getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4351
22
        Prio->getExprLoc()));
4352
22
  }
4353
  // The first function argument for tasks is a thread id, the second one is a
4354
  // part id (0 for tied tasks, >=0 for untied task).
4355
419
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4356
  // Get list of private variables.
4357
419
  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4358
50
    auto IRef = C->varlist_begin();
4359
226
    for (const Expr *IInit : C->private_copies()) {
4360
226
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4361
226
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4362
170
        Data.PrivateVars.push_back(*IRef);
4363
170
        Data.PrivateCopies.push_back(IInit);
4364
170
      }
4365
226
      ++IRef;
4366
226
    }
4367
50
  }
4368
419
  EmittedAsPrivate.clear();
4369
  // Get list of firstprivate variables.
4370
419
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4371
121
    auto IRef = C->varlist_begin();
4372
121
    auto IElemInitRef = C->inits().begin();
4373
341
    for (const Expr *IInit : C->private_copies()) {
4374
341
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4375
341
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4376
267
        Data.FirstprivateVars.push_back(*IRef);
4377
267
        Data.FirstprivateCopies.push_back(IInit);
4378
267
        Data.FirstprivateInits.push_back(*IElemInitRef);
4379
267
      }
4380
341
      ++IRef;
4381
341
      ++IElemInitRef;
4382
341
    }
4383
121
  }
4384
  // Get list of lastprivate variables (for taskloops).
4385
419
  llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4386
419
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4387
49
    auto IRef = C->varlist_begin();
4388
49
    auto ID = C->destination_exprs().begin();
4389
199
    for (const Expr *IInit : C->private_copies()) {
4390
199
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4391
199
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4392
151
        Data.LastprivateVars.push_back(*IRef);
4393
151
        Data.LastprivateCopies.push_back(IInit);
4394
151
      }
4395
199
      LastprivateDstsOrigs.insert(
4396
199
          std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4397
199
                         cast<DeclRefExpr>(*IRef)));
4398
199
      ++IRef;
4399
199
      ++ID;
4400
199
    }
4401
49
  }
4402
419
  SmallVector<const Expr *, 4> LHSs;
4403
419
  SmallVector<const Expr *, 4> RHSs;
4404
419
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4405
6
    Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4406
6
    Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4407
6
    Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4408
6
    Data.ReductionOps.append(C->reduction_ops().begin(),
4409
6
                             C->reduction_ops().end());
4410
6
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4411
6
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4412
6
  }
4413
419
  Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4414
419
      *this, S.getBeginLoc(), LHSs, RHSs, Data);
4415
  // Build list of dependences.
4416
419
  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4417
45
    OMPTaskDataTy::DependData &DD =
4418
45
        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4419
45
    DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4420
45
  }
4421
  // Get list of local vars for untied tasks.
4422
419
  if (!Data.Tied) {
4423
16
    CheckVarsEscapingUntiedTaskDeclContext Checker;
4424
16
    Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4425
16
    Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4426
16
                              Checker.getPrivateDecls().end());
4427
16
  }
4428
419
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4429
419
                    CapturedRegion](CodeGenFunction &CGF,
4430
419
                                    PrePostActionTy &Action) {
4431
419
    llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4432
419
                    std::pair<Address, Address>>
4433
419
        UntiedLocalVars;
4434
    // Set proper addresses for generated private copies.
4435
419
    OMPPrivateScope Scope(CGF);
4436
    // Generate debug info for variables present in shared clause.
4437
419
    if (auto *DI = CGF.getDebugInfo()) {
4438
6
      llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4439
6
          CGF.CapturedStmtInfo->getCaptureFields();
4440
6
      llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4441
6
      if (CaptureFields.size() && ContextValue) {
4442
6
        unsigned CharWidth = CGF.getContext().getCharWidth();
4443
        // The shared variables are packed together as members of structure.
4444
        // So the address of each shared variable can be computed by adding
4445
        // offset of it (within record) to the base address of record. For each
4446
        // shared variable, debug intrinsic llvm.dbg.declare is generated with
4447
        // appropriate expressions (DIExpression).
4448
        // Ex:
4449
        //  %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4450
        //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4451
        //            metadata !svar1,
4452
        //            metadata !DIExpression(DW_OP_deref))
4453
        //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4454
        //            metadata !svar2,
4455
        //            metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4456
30
        for (auto It = CaptureFields.begin(); It != CaptureFields.end(); 
++It24
) {
4457
24
          const VarDecl *SharedVar = It->first;
4458
24
          RecordDecl *CaptureRecord = It->second->getParent();
4459
24
          const ASTRecordLayout &Layout =
4460
24
              CGF.getContext().getASTRecordLayout(CaptureRecord);
4461
24
          unsigned Offset =
4462
24
              Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4463
24
          if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4464
24
            (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4465
24
                                                CGF.Builder, false);
4466
24
          llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4467
          // Get the call dbg.declare instruction we just created and update
4468
          // its DIExpression to add offset to base address.
4469
24
          if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) {
4470
24
            SmallVector<uint64_t, 8> Ops;
4471
            // Add offset to the base address if non zero.
4472
24
            if (Offset) {
4473
18
              Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4474
18
              Ops.push_back(Offset);
4475
18
            }
4476
24
            Ops.push_back(llvm::dwarf::DW_OP_deref);
4477
24
            auto &Ctx = DDI->getContext();
4478
24
            llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops);
4479
24
            Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr));
4480
24
          }
4481
24
        }
4482
6
      }
4483
6
    }
4484
419
    llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4485
419
    if (!Data.PrivateVars.empty() || 
!Data.FirstprivateVars.empty()369
||
4486
419
        
!Data.LastprivateVars.empty()250
||
!Data.PrivateLocals.empty()201
) {
4487
218
      enum { PrivatesParam = 2, CopyFnParam = 3 };
4488
218
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4489
218
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4490
218
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4491
218
          CS->getCapturedDecl()->getParam(PrivatesParam)));
4492
      // Map privates.
4493
218
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4494
218
      llvm::SmallVector<llvm::Value *, 16> CallArgs;
4495
218
      llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4496
218
      CallArgs.push_back(PrivatesPtr);
4497
218
      ParamTypes.push_back(PrivatesPtr->getType());
4498
218
      for (const Expr *E : Data.PrivateVars) {
4499
170
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4500
170
        Address PrivatePtr = CGF.CreateMemTemp(
4501
170
            CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4502
170
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4503
170
        CallArgs.push_back(PrivatePtr.getPointer());
4504
170
        ParamTypes.push_back(PrivatePtr.getType());
4505
170
      }
4506
267
      for (const Expr *E : Data.FirstprivateVars) {
4507
267
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4508
267
        Address PrivatePtr =
4509
267
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4510
267
                              ".firstpriv.ptr.addr");
4511
267
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4512
267
        FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4513
267
        CallArgs.push_back(PrivatePtr.getPointer());
4514
267
        ParamTypes.push_back(PrivatePtr.getType());
4515
267
      }
4516
218
      for (const Expr *E : Data.LastprivateVars) {
4517
151
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4518
151
        Address PrivatePtr =
4519
151
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4520
151
                              ".lastpriv.ptr.addr");
4521
151
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4522
151
        CallArgs.push_back(PrivatePtr.getPointer());
4523
151
        ParamTypes.push_back(PrivatePtr.getType());
4524
151
      }
4525
218
      for (const VarDecl *VD : Data.PrivateLocals) {
4526
8
        QualType Ty = VD->getType().getNonReferenceType();
4527
8
        if (VD->getType()->isLValueReferenceType())
4528
0
          Ty = CGF.getContext().getPointerType(Ty);
4529
8
        if (isAllocatableDecl(VD))
4530
2
          Ty = CGF.getContext().getPointerType(Ty);
4531
8
        Address PrivatePtr = CGF.CreateMemTemp(
4532
8
            CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4533
8
        auto Result = UntiedLocalVars.insert(
4534
8
            std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4535
        // If key exists update in place.
4536
8
        if (Result.second == false)
4537
0
          *Result.first = std::make_pair(
4538
0
              VD, std::make_pair(PrivatePtr, Address::invalid()));
4539
8
        CallArgs.push_back(PrivatePtr.getPointer());
4540
8
        ParamTypes.push_back(PrivatePtr.getType());
4541
8
      }
4542
218
      auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4543
218
                                               ParamTypes, /*isVarArg=*/false);
4544
218
      CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4545
218
          CopyFn, CopyFnTy->getPointerTo());
4546
218
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4547
218
          CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4548
218
      for (const auto &Pair : LastprivateDstsOrigs) {
4549
199
        const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4550
199
        DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4551
                        /*RefersToEnclosingVariableOrCapture=*/
4552
199
                        CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4553
199
                        Pair.second->getType(), VK_LValue,
4554
199
                        Pair.second->getExprLoc());
4555
199
        Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
4556
199
          return CGF.EmitLValue(&DRE).getAddress(CGF);
4557
199
        });
4558
199
      }
4559
588
      for (const auto &Pair : PrivatePtrs) {
4560
588
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4561
588
                            CGF.getContext().getDeclAlign(Pair.first));
4562
588
        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4563
588
        if (auto *DI = CGF.getDebugInfo())
4564
18
          if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4565
18
            (void)DI->EmitDeclareOfAutoVariable(
4566
18
                Pair.first, Pair.second.getPointer(), CGF.Builder,
4567
18
                /*UsePointerValue*/ true);
4568
588
      }
4569
      // Adjust mapping for internal locals by mapping actual memory instead of
4570
      // a pointer to this memory.
4571
218
      for (auto &Pair : UntiedLocalVars) {
4572
8
        if (isAllocatableDecl(Pair.first)) {
4573
2
          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4574
2
          Address Replacement(Ptr, CGF.getPointerAlign());
4575
2
          Pair.second.first = Replacement;
4576
2
          Ptr = CGF.Builder.CreateLoad(Replacement);
4577
2
          Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4578
2
          Pair.second.second = Replacement;
4579
6
        } else {
4580
6
          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4581
6
          Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4582
6
          Pair.second.first = Replacement;
4583
6
        }
4584
8
      }
4585
218
    }
4586
419
    if (Data.Reductions) {
4587
6
      OMPPrivateScope FirstprivateScope(CGF);
4588
18
      for (const auto &Pair : FirstprivatePtrs) {
4589
18
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4590
18
                            CGF.getContext().getDeclAlign(Pair.first));
4591
18
        FirstprivateScope.addPrivate(Pair.first,
4592
18
                                     [Replacement]() { return Replacement; });
4593
18
      }
4594
6
      (void)FirstprivateScope.Privatize();
4595
6
      OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4596
6
      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4597
6
                             Data.ReductionCopies, Data.ReductionOps);
4598
6
      llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4599
6
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4600
30
      for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; 
++Cnt24
) {
4601
24
        RedCG.emitSharedOrigLValue(CGF, Cnt);
4602
24
        RedCG.emitAggregateType(CGF, Cnt);
4603
        // FIXME: This must removed once the runtime library is fixed.
4604
        // Emit required threadprivate variables for
4605
        // initializer/combiner/finalizer.
4606
24
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4607
24
                                                           RedCG, Cnt);
4608
24
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4609
24
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4610
24
        Replacement =
4611
24
            Address(CGF.EmitScalarConversion(
4612
24
                        Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4613
24
                        CGF.getContext().getPointerType(
4614
24
                            Data.ReductionCopies[Cnt]->getType()),
4615
24
                        Data.ReductionCopies[Cnt]->getExprLoc()),
4616
24
                    Replacement.getAlignment());
4617
24
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4618
24
        Scope.addPrivate(RedCG.getBaseDecl(Cnt),
4619
24
                         [Replacement]() { return Replacement; });
4620
24
      }
4621
6
    }
4622
    // Privatize all private variables except for in_reduction items.
4623
419
    (void)Scope.Privatize();
4624
419
    SmallVector<const Expr *, 4> InRedVars;
4625
419
    SmallVector<const Expr *, 4> InRedPrivs;
4626
419
    SmallVector<const Expr *, 4> InRedOps;
4627
419
    SmallVector<const Expr *, 4> TaskgroupDescriptors;
4628
419
    for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4629
44
      auto IPriv = C->privates().begin();
4630
44
      auto IRed = C->reduction_ops().begin();
4631
44
      auto ITD = C->taskgroup_descriptors().begin();
4632
66
      for (const Expr *Ref : C->varlists()) {
4633
66
        InRedVars.emplace_back(Ref);
4634
66
        InRedPrivs.emplace_back(*IPriv);
4635
66
        InRedOps.emplace_back(*IRed);
4636
66
        TaskgroupDescriptors.emplace_back(*ITD);
4637
66
        std::advance(IPriv, 1);
4638
66
        std::advance(IRed, 1);
4639
66
        std::advance(ITD, 1);
4640
66
      }
4641
44
    }
4642
    // Privatize in_reduction items here, because taskgroup descriptors must be
4643
    // privatized earlier.
4644
419
    OMPPrivateScope InRedScope(CGF);
4645
419
    if (!InRedVars.empty()) {
4646
34
      ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4647
100
      for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; 
++Cnt66
) {
4648
66
        RedCG.emitSharedOrigLValue(CGF, Cnt);
4649
66
        RedCG.emitAggregateType(CGF, Cnt);
4650
        // The taskgroup descriptor variable is always implicit firstprivate and
4651
        // privatized already during processing of the firstprivates.
4652
        // FIXME: This must removed once the runtime library is fixed.
4653
        // Emit required threadprivate variables for
4654
        // initializer/combiner/finalizer.
4655
66
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4656
66
                                                           RedCG, Cnt);
4657
66
        llvm::Value *ReductionsPtr;
4658
66
        if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4659
64
          ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4660
64
                                               TRExpr->getExprLoc());
4661
64
        } else {
4662
2
          ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4663
2
        }
4664
66
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4665
66
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4666
66
        Replacement = Address(
4667
66
            CGF.EmitScalarConversion(
4668
66
                Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4669
66
                CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4670
66
                InRedPrivs[Cnt]->getExprLoc()),
4671
66
            Replacement.getAlignment());
4672
66
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4673
66
        InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
4674
66
                              [Replacement]() { return Replacement; });
4675
66
      }
4676
34
    }
4677
419
    (void)InRedScope.Privatize();
4678
4679
419
    CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4680
419
                                                             UntiedLocalVars);
4681
419
    Action.Enter(CGF);
4682
419
    BodyGen(CGF);
4683
419
  };
4684
419
  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4685
419
      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4686
419
      Data.NumberOfParts);
4687
419
  OMPLexicalScope Scope(*this, S, llvm::None,
4688
419
                        !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4689
419
                            
!isOpenMPSimdDirective(S.getDirectiveKind())347
);
4690
419
  TaskGen(*this, OutlinedFn, Data);
4691
419
}
4692
4693
static ImplicitParamDecl *
4694
createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4695
                                  QualType Ty, CapturedDecl *CD,
4696
710
                                  SourceLocation Loc) {
4697
710
  auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4698
710
                                           ImplicitParamDecl::Other);
4699
710
  auto *OrigRef = DeclRefExpr::Create(
4700
710
      C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4701
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4702
710
  auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4703
710
                                              ImplicitParamDecl::Other);
4704
710
  auto *PrivateRef = DeclRefExpr::Create(
4705
710
      C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4706
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4707
710
  QualType ElemType = C.getBaseElementType(Ty);
4708
710
  auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4709
710
                                           ImplicitParamDecl::Other);
4710
710
  auto *InitRef = DeclRefExpr::Create(
4711
710
      C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4712
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4713
710
  PrivateVD->setInitStyle(VarDecl::CInit);
4714
710
  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4715
710
                                              InitRef, /*BasePath=*/nullptr,
4716
710
                                              VK_PRValue, FPOptionsOverride()));
4717
710
  Data.FirstprivateVars.emplace_back(OrigRef);
4718
710
  Data.FirstprivateCopies.emplace_back(PrivateRef);
4719
710
  Data.FirstprivateInits.emplace_back(InitRef);
4720
710
  return OrigVD;
4721
710
}
4722
4723
void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4724
    const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4725
460
    OMPTargetDataInfo &InputInfo) {
4726
  // Emit outlined function for task construct.
4727
460
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4728
460
  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4729
460
  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4730
460
  auto I = CS->getCapturedDecl()->param_begin();
4731
460
  auto PartId = std::next(I);
4732
460
  auto TaskT = std::next(I, 4);
4733
460
  OMPTaskDataTy Data;
4734
  // The task is not final.
4735
460
  Data.Final.setInt(/*IntVal=*/false);
4736
  // Get list of firstprivate variables.
4737
460
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4738
252
    auto IRef = C->varlist_begin();
4739
252
    auto IElemInitRef = C->inits().begin();
4740
392
    for (auto *IInit : C->private_copies()) {
4741
392
      Data.FirstprivateVars.push_back(*IRef);
4742
392
      Data.FirstprivateCopies.push_back(IInit);
4743
392
      Data.FirstprivateInits.push_back(*IElemInitRef);
4744
392
      ++IRef;
4745
392
      ++IElemInitRef;
4746
392
    }
4747
252
  }
4748
460
  OMPPrivateScope TargetScope(*this);
4749
460
  VarDecl *BPVD = nullptr;
4750
460
  VarDecl *PVD = nullptr;
4751
460
  VarDecl *SVD = nullptr;
4752
460
  VarDecl *MVD = nullptr;
4753
460
  if (InputInfo.NumberOfTargetItems > 0) {
4754
228
    auto *CD = CapturedDecl::Create(
4755
228
        getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4756
228
    llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4757
228
    QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4758
228
        getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4759
228
        /*IndexTypeQuals=*/0);
4760
228
    BPVD = createImplicitFirstprivateForType(
4761
228
        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4762
228
    PVD = createImplicitFirstprivateForType(
4763
228
        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4764
228
    QualType SizesType = getContext().getConstantArrayType(
4765
228
        getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4766
228
        ArrSize, nullptr, ArrayType::Normal,
4767
228
        /*IndexTypeQuals=*/0);
4768
228
    SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4769
228
                                            S.getBeginLoc());
4770
228
    TargetScope.addPrivate(
4771
228
        BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
4772
228
    TargetScope.addPrivate(PVD,
4773
228
                           [&InputInfo]() { return InputInfo.PointersArray; });
4774
228
    TargetScope.addPrivate(SVD,
4775
228
                           [&InputInfo]() { return InputInfo.SizesArray; });
4776
    // If there is no user-defined mapper, the mapper array will be nullptr. In
4777
    // this case, we don't need to privatize it.
4778
228
    if (!isa_and_nonnull<llvm::ConstantPointerNull>(
4779
228
            InputInfo.MappersArray.getPointer())) {
4780
26
      MVD = createImplicitFirstprivateForType(
4781
26
          getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4782
26
      TargetScope.addPrivate(MVD,
4783
26
                             [&InputInfo]() { return InputInfo.MappersArray; });
4784
26
    }
4785
228
  }
4786
460
  (void)TargetScope.Privatize();
4787
  // Build list of dependences.
4788
460
  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4789
376
    OMPTaskDataTy::DependData &DD =
4790
376
        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4791
376
    DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4792
376
  }
4793
460
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
4794
460
                    &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
4795
    // Set proper addresses for generated private copies.
4796
460
    OMPPrivateScope Scope(CGF);
4797
460
    if (!Data.FirstprivateVars.empty()) {
4798
356
      enum { PrivatesParam = 2, CopyFnParam = 3 };
4799
356
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4800
356
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4801
356
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4802
356
          CS->getCapturedDecl()->getParam(PrivatesParam)));
4803
      // Map privates.
4804
356
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4805
356
      llvm::SmallVector<llvm::Value *, 16> CallArgs;
4806
356
      llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4807
356
      CallArgs.push_back(PrivatesPtr);
4808
356
      ParamTypes.push_back(PrivatesPtr->getType());
4809
1.10k
      for (const Expr *E : Data.FirstprivateVars) {
4810
1.10k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4811
1.10k
        Address PrivatePtr =
4812
1.10k
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4813
1.10k
                              ".firstpriv.ptr.addr");
4814
1.10k
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4815
1.10k
        CallArgs.push_back(PrivatePtr.getPointer());
4816
1.10k
        ParamTypes.push_back(PrivatePtr.getType());
4817
1.10k
      }
4818
356
      auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4819
356
                                               ParamTypes, /*isVarArg=*/false);
4820
356
      CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4821
356
          CopyFn, CopyFnTy->getPointerTo());
4822
356
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4823
356
          CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4824
1.10k
      for (const auto &Pair : PrivatePtrs) {
4825
1.10k
        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
4826
1.10k
                            CGF.getContext().getDeclAlign(Pair.first));
4827
1.10k
        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4828
1.10k
      }
4829
356
    }
4830
    // Privatize all private variables except for in_reduction items.
4831
460
    (void)Scope.Privatize();
4832
460
    if (InputInfo.NumberOfTargetItems > 0) {
4833
228
      InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
4834
228
          CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
4835
228
      InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
4836
228
          CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
4837
228
      InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
4838
228
          CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4839
      // If MVD is nullptr, the mapper array is not privatized
4840
228
      if (MVD)
4841
26
        InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4842
26
            CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
4843
228
    }
4844
4845
460
    Action.Enter(CGF);
4846
460
    OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
4847
460
    BodyGen(CGF);
4848
460
  };
4849
460
  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4850
460
      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
4851
460
      Data.NumberOfParts);
4852
460
  llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 
1296
:
0164
);
4853
460
  IntegerLiteral IfCond(getContext(), TrueOrFalse,
4854
460
                        getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
4855
460
                        SourceLocation());
4856
4857
460
  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
4858
460
                                      SharedsTy, CapturedStruct, &IfCond, Data);
4859
460
}
4860
4861
193
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
4862
  // Emit outlined function for task construct.
4863
193
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4864
193
  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4865
193
  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4866
193
  const Expr *IfCond = nullptr;
4867
193
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4868
44
    if (C->getNameModifier() == OMPD_unknown ||
4869
44
        
C->getNameModifier() == OMPD_task12
) {
4870
44
      IfCond = C->getCondition();
4871
44
      break;
4872
44
    }
4873
44
  }
4874
4875
193
  OMPTaskDataTy Data;
4876
  // Check if we should emit tied or untied task.
4877
193
  Data.Tied = !S.getSingleClause<OMPUntiedClause>();
4878
193
  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
4879
193
    CGF.EmitStmt(CS->getCapturedStmt());
4880
193
  };
4881
193
  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4882
193
                    IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
4883
193
                            const OMPTaskDataTy &Data) {
4884
193
    CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
4885
193
                                            SharedsTy, CapturedStruct, IfCond,
4886
193
                                            Data);
4887
193
  };
4888
193
  auto LPCRegion =
4889
193
      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4890
193
  EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
4891
193
}
4892
4893
void CodeGenFunction::EmitOMPTaskyieldDirective(
4894
16
    const OMPTaskyieldDirective &S) {
4895
16
  CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
4896
16
}
4897
4898
30
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
4899
30
  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
4900
30
}
4901
4902
14
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
4903
14
  OMPTaskDataTy Data;
4904
  // Build list of dependences
4905
14
  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4906
2
    OMPTaskDataTy::DependData &DD =
4907
2
        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4908
2
    DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4909
2
  }
4910
14
  CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
4911
14
}
4912
4913
void CodeGenFunction::EmitOMPTaskgroupDirective(
4914
39
    const OMPTaskgroupDirective &S) {
4915
39
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4916
39
    Action.Enter(CGF);
4917
39
    if (const Expr *E = S.getReductionRef()) {
4918
26
      SmallVector<const Expr *, 4> LHSs;
4919
26
      SmallVector<const Expr *, 4> RHSs;
4920
26
      OMPTaskDataTy Data;
4921
26
      for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
4922
26
        Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4923
26
        Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4924
26
        Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4925
26
        Data.ReductionOps.append(C->reduction_ops().begin(),
4926
26
                                 C->reduction_ops().end());
4927
26
        LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4928
26
        RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4929
26
      }
4930
26
      llvm::Value *ReductionDesc =
4931
26
          CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
4932
26
                                                           LHSs, RHSs, Data);
4933
26
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4934
26
      CGF.EmitVarDecl(*VD);
4935
26
      CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
4936
26
                            /*Volatile=*/false, E->getType());
4937
26
    }
4938
39
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4939
39
  };
4940
39
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
4941
39
  CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
4942
39
}
4943
4944
40
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
4945
40
  llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
4946
40
                                ? 
llvm::AtomicOrdering::NotAtomic8
4947
40
                                : 
llvm::AtomicOrdering::AcquireRelease32
;
4948
40
  CGM.getOpenMPRuntime().emitFlush(
4949
40
      *this,
4950
40
      [&S]() -> ArrayRef<const Expr *> {
4951
40
        if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
4952
8
          return llvm::makeArrayRef(FlushClause->varlist_begin(),
4953
8
                                    FlushClause->varlist_end());
4954
32
        return llvm::None;
4955
40
      }(),
4956
40
      S.getBeginLoc(), AO);
4957
40
}
4958
4959
14
void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
4960
14
  const auto *DO = S.getSingleClause<OMPDepobjClause>();
4961
14
  LValue DOLVal = EmitLValue(DO->getDepobj());
4962
14
  if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
4963
6
    OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
4964
6
                                           DC->getModifier());
4965
6
    Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
4966
6
    Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
4967
6
        *this, Dependencies, DC->getBeginLoc());
4968
6
    EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
4969
6
    return;
4970
6
  }
4971
8
  if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
4972
4
    CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
4973
4
    return;
4974
4
  }
4975
4
  if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
4976
4
    CGM.getOpenMPRuntime().emitUpdateClause(
4977
4
        *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
4978
4
    return;
4979
4
  }
4980
4
}
4981
4982
56
void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4983
56
  if (!OMPParentLoopDirectiveForScan)
4984
8
    return;
4985
48
  const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4986
48
  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4987
48
  SmallVector<const Expr *, 4> Shareds;
4988
48
  SmallVector<const Expr *, 4> Privates;
4989
48
  SmallVector<const Expr *, 4> LHSs;
4990
48
  SmallVector<const Expr *, 4> RHSs;
4991
48
  SmallVector<const Expr *, 4> ReductionOps;
4992
48
  SmallVector<const Expr *, 4> CopyOps;
4993
48
  SmallVector<const Expr *, 4> CopyArrayTemps;
4994
48
  SmallVector<const Expr *, 4> CopyArrayElems;
4995
48
  for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4996
48
    if (C->getModifier() != OMPC_REDUCTION_inscan)
4997
0
      continue;
4998
48
    Shareds.append(C->varlist_begin(), C->varlist_end());
4999
48
    Privates.append(C->privates().begin(), C->privates().end());
5000
48
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5001
48
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5002
48
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5003
48
    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5004
48
    CopyArrayTemps.append(C->copy_array_temps().begin(),