Coverage Report

Created: 2022-07-16 07:03

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp
<
Line
Count
Source (jump to first uncovered line)
1
//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This contains code to emit OpenMP nodes as LLVM code.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CGCleanup.h"
14
#include "CGOpenMPRuntime.h"
15
#include "CodeGenFunction.h"
16
#include "CodeGenModule.h"
17
#include "TargetInfo.h"
18
#include "clang/AST/ASTContext.h"
19
#include "clang/AST/Attr.h"
20
#include "clang/AST/DeclOpenMP.h"
21
#include "clang/AST/OpenMPClause.h"
22
#include "clang/AST/Stmt.h"
23
#include "clang/AST/StmtOpenMP.h"
24
#include "clang/AST/StmtVisitor.h"
25
#include "clang/Basic/OpenMPKinds.h"
26
#include "clang/Basic/PrettyStackTrace.h"
27
#include "llvm/ADT/SmallSet.h"
28
#include "llvm/BinaryFormat/Dwarf.h"
29
#include "llvm/Frontend/OpenMP/OMPConstants.h"
30
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31
#include "llvm/IR/Constants.h"
32
#include "llvm/IR/DebugInfoMetadata.h"
33
#include "llvm/IR/Instructions.h"
34
#include "llvm/IR/IntrinsicInst.h"
35
#include "llvm/IR/Metadata.h"
36
#include "llvm/Support/AtomicOrdering.h"
37
using namespace clang;
38
using namespace CodeGen;
39
using namespace llvm::omp;
40
41
static const VarDecl *getBaseDecl(const Expr *Ref);
42
43
namespace {
44
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
45
/// for captured expressions.
46
class OMPLexicalScope : public CodeGenFunction::LexicalScope {
47
15.8k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
48
18.7k
    for (const auto *C : S.clauses()) {
49
18.7k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
50
10.8k
        if (const auto *PreInit =
51
10.8k
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
52
1.14k
          for (const auto *I : PreInit->decls()) {
53
1.14k
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
54
1.13k
              CGF.EmitVarDecl(cast<VarDecl>(*I));
55
1.13k
            } else {
56
18
              CodeGenFunction::AutoVarEmission Emission =
57
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
58
18
              CGF.EmitAutoVarCleanups(Emission);
59
18
            }
60
1.14k
          }
61
1.07k
        }
62
10.8k
      }
63
18.7k
    }
64
15.8k
  }
65
  CodeGenFunction::OMPPrivateScope InlinedShareds;
66
67
17.0k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
68
17.0k
    return CGF.LambdaCaptureFields.lookup(VD) ||
69
17.0k
           
(16.6k
CGF.CapturedStmtInfo16.6k
&&
CGF.CapturedStmtInfo->lookup(VD)4.24k
) ||
70
17.0k
           
(13.0k
CGF.CurCodeDecl13.0k
&&
isa<BlockDecl>(CGF.CurCodeDecl)13.0k
&&
71
13.0k
            
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)8
);
72
17.0k
  }
73
74
public:
75
  OMPLexicalScope(
76
      CodeGenFunction &CGF, const OMPExecutableDirective &S,
77
      const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
78
      const bool EmitPreInitStmt = true)
79
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
80
25.5k
        InlinedShareds(CGF) {
81
25.5k
    if (EmitPreInitStmt)
82
15.8k
      emitPreInitStmt(CGF, S);
83
25.5k
    if (!CapturedRegion)
84
12.6k
      return;
85
12.8k
    assert(S.hasAssociatedStmt() &&
86
12.8k
           "Expected associated statement for inlined directive.");
87
0
    const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
88
19.3k
    for (const auto &C : CS->captures()) {
89
19.3k
      if (C.capturesVariable() || 
C.capturesVariableByCopy()11.0k
) {
90
17.0k
        auto *VD = C.getCapturedVar();
91
17.0k
        assert(VD == VD->getCanonicalDecl() &&
92
17.0k
               "Canonical decl must be captured.");
93
0
        DeclRefExpr DRE(
94
17.0k
            CGF.getContext(), const_cast<VarDecl *>(VD),
95
17.0k
            isCapturedVar(CGF, VD) || 
(13.0k
CGF.CapturedStmtInfo13.0k
&&
96
13.0k
                                       
InlinedShareds.isGlobalVarCaptured(VD)676
),
97
17.0k
            VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
98
17.0k
        InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
99
17.0k
      }
100
19.3k
    }
101
12.8k
    (void)InlinedShareds.Privatize();
102
12.8k
  }
103
};
104
105
/// Lexical scope for OpenMP parallel construct, that handles correct codegen
106
/// for captured expressions.
107
class OMPParallelScope final : public OMPLexicalScope {
108
6.18k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
109
6.18k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
110
6.18k
    return !(isOpenMPTargetExecutionDirective(Kind) ||
111
6.18k
             
isOpenMPLoopBoundSharingDirective(Kind)2.88k
) &&
112
6.18k
           
isOpenMPParallelDirective(Kind)1.49k
;
113
6.18k
  }
114
115
public:
116
  OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
117
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
118
6.18k
                        EmitPreInitStmt(S)) {}
119
};
120
121
/// Lexical scope for OpenMP teams construct, that handles correct codegen
122
/// for captured expressions.
123
class OMPTeamsScope final : public OMPLexicalScope {
124
5.71k
  bool EmitPreInitStmt(const OMPExecutableDirective &S) {
125
5.71k
    OpenMPDirectiveKind Kind = S.getDirectiveKind();
126
5.71k
    return !isOpenMPTargetExecutionDirective(Kind) &&
127
5.71k
           
isOpenMPTeamsDirective(Kind)1.90k
;
128
5.71k
  }
129
130
public:
131
  OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
132
      : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
133
5.71k
                        EmitPreInitStmt(S)) {}
134
};
135
136
/// Private scope for OpenMP loop-based directives, that supports capturing
137
/// of used expression from loop statement.
138
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
139
17.4k
  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
140
17.4k
    const DeclStmt *PreInits;
141
17.4k
    CodeGenFunction::OMPMapVars PreCondVars;
142
17.4k
    if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
143
17.4k
      llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
144
18.1k
      for (const auto *E : LD->counters()) {
145
18.1k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
146
18.1k
        EmittedAsPrivate.insert(VD->getCanonicalDecl());
147
18.1k
        (void)PreCondVars.setVarAddr(
148
18.1k
            CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
149
18.1k
      }
150
      // Mark private vars as undefs.
151
17.4k
      for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
152
2.85k
        for (const Expr *IRef : C->varlists()) {
153
2.85k
          const auto *OrigVD =
154
2.85k
              cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
155
2.85k
          if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
156
2.52k
            QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
157
2.52k
            (void)PreCondVars.setVarAddr(
158
2.52k
                CGF, OrigVD,
159
2.52k
                Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
160
2.52k
                            CGF.getContext().getPointerType(OrigVDTy))),
161
2.52k
                        CGF.ConvertTypeForMem(OrigVDTy),
162
2.52k
                        CGF.getContext().getDeclAlign(OrigVD)));
163
2.52k
          }
164
2.85k
        }
165
716
      }
166
17.4k
      (void)PreCondVars.apply(CGF);
167
      // Emit init, __range and __end variables for C++ range loops.
168
17.4k
      (void)OMPLoopBasedDirective::doForAllLoops(
169
17.4k
          LD->getInnermostCapturedStmt()->getCapturedStmt(),
170
17.4k
          /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
171
18.1k
          [&CGF](unsigned Cnt, const Stmt *CurStmt) {
172
18.1k
            if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
173
6
              if (const Stmt *Init = CXXFor->getInit())
174
0
                CGF.EmitStmt(Init);
175
6
              CGF.EmitStmt(CXXFor->getRangeStmt());
176
6
              CGF.EmitStmt(CXXFor->getEndStmt());
177
6
            }
178
18.1k
            return false;
179
18.1k
          });
180
17.4k
      PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
181
17.4k
    } else 
if (const auto *8
Tile8
= dyn_cast<OMPTileDirective>(&S)) {
182
8
      PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
183
8
    } else 
if (const auto *0
Unroll0
= dyn_cast<OMPUnrollDirective>(&S)) {
184
0
      PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
185
0
    } else {
186
0
      llvm_unreachable("Unknown loop-based directive kind.");
187
0
    }
188
17.4k
    if (PreInits) {
189
3.09k
      for (const auto *I : PreInits->decls())
190
6.90k
        CGF.EmitVarDecl(cast<VarDecl>(*I));
191
3.09k
    }
192
17.4k
    PreCondVars.restore(CGF);
193
17.4k
  }
194
195
public:
196
  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
197
17.4k
      : CodeGenFunction::RunCleanupsScope(CGF) {
198
17.4k
    emitPreInitStmt(CGF, S);
199
17.4k
  }
200
};
201
202
class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
203
  CodeGenFunction::OMPPrivateScope InlinedShareds;
204
205
41.8k
  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
206
41.8k
    return CGF.LambdaCaptureFields.lookup(VD) ||
207
41.8k
           
(40.8k
CGF.CapturedStmtInfo40.8k
&&
CGF.CapturedStmtInfo->lookup(VD)6.55k
) ||
208
41.8k
           
(40.8k
CGF.CurCodeDecl40.8k
&&
isa<BlockDecl>(CGF.CurCodeDecl)40.8k
&&
209
40.8k
            
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)78
);
210
41.8k
  }
211
212
public:
213
  OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
214
      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
215
13.4k
        InlinedShareds(CGF) {
216
17.5k
    for (const auto *C : S.clauses()) {
217
17.5k
      if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
218
11.5k
        if (const auto *PreInit =
219
11.5k
                cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
220
1.36k
          for (const auto *I : PreInit->decls()) {
221
1.36k
            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
222
1.34k
              CGF.EmitVarDecl(cast<VarDecl>(*I));
223
1.34k
            } else {
224
18
              CodeGenFunction::AutoVarEmission Emission =
225
18
                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
226
18
              CGF.EmitAutoVarCleanups(Emission);
227
18
            }
228
1.36k
          }
229
1.29k
        }
230
11.5k
      } else 
if (const auto *6.04k
UDP6.04k
= dyn_cast<OMPUseDevicePtrClause>(C)) {
231
86
        for (const Expr *E : UDP->varlists()) {
232
86
          const Decl *D = cast<DeclRefExpr>(E)->getDecl();
233
86
          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
234
20
            CGF.EmitVarDecl(*OED);
235
86
        }
236
5.96k
      } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
237
24
        for (const Expr *E : UDP->varlists()) {
238
24
          const Decl *D = getBaseDecl(E);
239
24
          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
240
10
            CGF.EmitVarDecl(*OED);
241
24
        }
242
6
      }
243
17.5k
    }
244
13.4k
    if (!isOpenMPSimdDirective(S.getDirectiveKind()))
245
9.98k
      CGF.EmitOMPPrivateClause(S, InlinedShareds);
246
13.4k
    if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
247
37
      if (const Expr *E = TG->getReductionRef())
248
26
        CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
249
37
    }
250
    // Temp copy arrays for inscan reductions should not be emitted as they are
251
    // not used in simd only mode.
252
13.4k
    llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
253
13.4k
    for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
254
466
      if (C->getModifier() != OMPC_REDUCTION_inscan)
255
446
        continue;
256
20
      for (const Expr *E : C->copy_array_temps())
257
36
        CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
258
20
    }
259
13.4k
    const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
260
41.4k
    while (CS) {
261
47.9k
      for (auto &C : CS->captures()) {
262
47.9k
        if (C.capturesVariable() || 
C.capturesVariableByCopy()30.9k
) {
263
41.8k
          auto *VD = C.getCapturedVar();
264
41.8k
          if (CopyArrayTemps.contains(VD))
265
16
            continue;
266
41.8k
          assert(VD == VD->getCanonicalDecl() &&
267
41.8k
                 "Canonical decl must be captured.");
268
0
          DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
269
41.8k
                          isCapturedVar(CGF, VD) ||
270
41.8k
                              
(40.7k
CGF.CapturedStmtInfo40.7k
&&
271
40.7k
                               
InlinedShareds.isGlobalVarCaptured(VD)6.53k
),
272
41.8k
                          VD->getType().getNonReferenceType(), VK_LValue,
273
41.8k
                          C.getLocation());
274
41.8k
          InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
275
41.8k
        }
276
47.9k
      }
277
28.0k
      CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
278
28.0k
    }
279
13.4k
    (void)InlinedShareds.Privatize();
280
13.4k
  }
281
};
282
283
} // namespace
284
285
static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
286
                                         const OMPExecutableDirective &S,
287
                                         const RegionCodeGenTy &CodeGen);
288
289
13.0k
LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
290
13.0k
  if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
291
9.49k
    if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
292
9.49k
      OrigVD = OrigVD->getCanonicalDecl();
293
9.49k
      bool IsCaptured =
294
9.49k
          LambdaCaptureFields.lookup(OrigVD) ||
295
9.49k
          
(9.36k
CapturedStmtInfo9.36k
&&
CapturedStmtInfo->lookup(OrigVD)984
) ||
296
9.49k
          
(8.70k
CurCodeDecl8.70k
&&
isa<BlockDecl>(CurCodeDecl)8.64k
);
297
9.49k
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
298
9.49k
                      OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
299
9.49k
      return EmitLValue(&DRE);
300
9.49k
    }
301
9.49k
  }
302
3.52k
  return EmitLValue(E);
303
13.0k
}
304
305
18.9k
llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
306
18.9k
  ASTContext &C = getContext();
307
18.9k
  llvm::Value *Size = nullptr;
308
18.9k
  auto SizeInChars = C.getTypeSizeInChars(Ty);
309
18.9k
  if (SizeInChars.isZero()) {
310
    // getTypeSizeInChars() returns 0 for a VLA.
311
2.33k
    while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
312
1.16k
      VlaSizePair VlaSize = getVLASize(VAT);
313
1.16k
      Ty = VlaSize.Type;
314
1.16k
      Size =
315
1.16k
          Size ? 
Builder.CreateNUWMul(Size, VlaSize.NumElts)0
: VlaSize.NumElts;
316
1.16k
    }
317
1.16k
    SizeInChars = C.getTypeSizeInChars(Ty);
318
1.16k
    if (SizeInChars.isZero())
319
0
      return llvm::ConstantInt::get(SizeTy, /*V=*/0);
320
1.16k
    return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
321
1.16k
  }
322
17.7k
  return CGM.getSize(SizeInChars);
323
18.9k
}
324
325
void CodeGenFunction::GenerateOpenMPCapturedVars(
326
21.5k
    const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
327
21.5k
  const RecordDecl *RD = S.getCapturedRecordDecl();
328
21.5k
  auto CurField = RD->field_begin();
329
21.5k
  auto CurCap = S.captures().begin();
330
21.5k
  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
331
21.5k
                                                 E = S.capture_init_end();
332
53.3k
       I != E; 
++I, ++CurField, ++CurCap31.7k
) {
333
31.7k
    if (CurField->hasCapturedVLAType()) {
334
2.63k
      const VariableArrayType *VAT = CurField->getCapturedVLAType();
335
2.63k
      llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
336
2.63k
      CapturedVars.push_back(Val);
337
29.1k
    } else if (CurCap->capturesThis()) {
338
1.68k
      CapturedVars.push_back(CXXThisValue);
339
27.4k
    } else if (CurCap->capturesVariableByCopy()) {
340
14.6k
      llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
341
342
      // If the field is not a pointer, we need to save the actual value
343
      // and load it as a void pointer.
344
14.6k
      if (!CurField->getType()->isAnyPointerType()) {
345
12.7k
        ASTContext &Ctx = getContext();
346
12.7k
        Address DstAddr = CreateMemTemp(
347
12.7k
            Ctx.getUIntPtrType(),
348
12.7k
            Twine(CurCap->getCapturedVar()->getName(), ".casted"));
349
12.7k
        LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
350
351
12.7k
        llvm::Value *SrcAddrVal = EmitScalarConversion(
352
12.7k
            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
353
12.7k
            Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
354
12.7k
        LValue SrcLV =
355
12.7k
            MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
356
357
        // Store the value using the source type pointer.
358
12.7k
        EmitStoreThroughLValue(RValue::get(CV), SrcLV);
359
360
        // Load the value using the destination type pointer.
361
12.7k
        CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
362
12.7k
      }
363
14.6k
      CapturedVars.push_back(CV);
364
14.6k
    } else {
365
12.8k
      assert(CurCap->capturesVariable() && "Expected capture by reference.");
366
0
      CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
367
12.8k
    }
368
31.7k
  }
369
21.5k
}
370
371
static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
372
                                    QualType DstType, StringRef Name,
373
17.8k
                                    LValue AddrLV) {
374
17.8k
  ASTContext &Ctx = CGF.getContext();
375
376
17.8k
  llvm::Value *CastedPtr = CGF.EmitScalarConversion(
377
17.8k
      AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
378
17.8k
      Ctx.getPointerType(DstType), Loc);
379
17.8k
  Address TmpAddr =
380
17.8k
      CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF);
381
17.8k
  return TmpAddr;
382
17.8k
}
383
384
7.20k
static QualType getCanonicalParamType(ASTContext &C, QualType T) {
385
7.20k
  if (T->isLValueReferenceType())
386
2.10k
    return C.getLValueReferenceType(
387
2.10k
        getCanonicalParamType(C, T.getNonReferenceType()),
388
2.10k
        /*SpelledAsLValue=*/false);
389
5.09k
  if (T->isPointerType())
390
27
    return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
391
5.06k
  if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
392
3.00k
    if (const auto *VLA = dyn_cast<VariableArrayType>(A))
393
2.94k
      return getCanonicalParamType(C, VLA->getElementType());
394
60
    if (!A->isVariablyModifiedType())
395
60
      return C.getCanonicalType(T);
396
60
  }
397
2.06k
  return C.getCanonicalParamType(T);
398
5.06k
}
399
400
namespace {
401
/// Contains required data for proper outlined function codegen.
402
struct FunctionOptions {
403
  /// Captured statement for which the function is generated.
404
  const CapturedStmt *S = nullptr;
405
  /// true if cast to/from  UIntPtr is required for variables captured by
406
  /// value.
407
  const bool UIntPtrCastRequired = true;
408
  /// true if only casted arguments must be registered as local args or VLA
409
  /// sizes.
410
  const bool RegisterCastedArgsOnly = false;
411
  /// Name of the generated function.
412
  const StringRef FunctionName;
413
  /// Location of the non-debug version of the outlined function.
414
  SourceLocation Loc;
415
  explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
416
                           bool RegisterCastedArgsOnly, StringRef FunctionName,
417
                           SourceLocation Loc)
418
      : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
419
        RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
420
24.0k
        FunctionName(FunctionName), Loc(Loc) {}
421
};
422
} // namespace
423
424
static llvm::Function *emitOutlinedFunctionPrologue(
425
    CodeGenFunction &CGF, FunctionArgList &Args,
426
    llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
427
        &LocalAddrs,
428
    llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
429
        &VLASizes,
430
24.0k
    llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
431
24.0k
  const CapturedDecl *CD = FO.S->getCapturedDecl();
432
24.0k
  const RecordDecl *RD = FO.S->getCapturedRecordDecl();
433
24.0k
  assert(CD->hasBody() && "missing CapturedDecl body");
434
435
0
  CXXThisValue = nullptr;
436
  // Build the argument list.
437
24.0k
  CodeGenModule &CGM = CGF.CGM;
438
24.0k
  ASTContext &Ctx = CGM.getContext();
439
24.0k
  FunctionArgList TargetArgs;
440
24.0k
  Args.append(CD->param_begin(),
441
24.0k
              std::next(CD->param_begin(), CD->getContextParamPosition()));
442
24.0k
  TargetArgs.append(
443
24.0k
      CD->param_begin(),
444
24.0k
      std::next(CD->param_begin(), CD->getContextParamPosition()));
445
24.0k
  auto I = FO.S->captures().begin();
446
24.0k
  FunctionDecl *DebugFunctionDecl = nullptr;
447
24.0k
  if (!FO.UIntPtrCastRequired) {
448
152
    FunctionProtoType::ExtProtoInfo EPI;
449
152
    QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
450
152
    DebugFunctionDecl = FunctionDecl::Create(
451
152
        Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
452
152
        SourceLocation(), DeclarationName(), FunctionTy,
453
152
        Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
454
152
        /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
455
152
        /*hasWrittenPrototype=*/false);
456
152
  }
457
35.7k
  for (const FieldDecl *FD : RD->fields()) {
458
35.7k
    QualType ArgType = FD->getType();
459
35.7k
    IdentifierInfo *II = nullptr;
460
35.7k
    VarDecl *CapVar = nullptr;
461
462
    // If this is a capture by copy and the type is not a pointer, the outlined
463
    // function argument type should be uintptr and the value properly casted to
464
    // uintptr. This is necessary given that the runtime library is only able to
465
    // deal with pointers. We can pass in the same way the VLA type sizes to the
466
    // outlined function.
467
35.7k
    if (FO.UIntPtrCastRequired &&
468
35.7k
        
(35.5k
(35.5k
I->capturesVariableByCopy()35.5k
&&
!ArgType->isAnyPointerType()16.7k
) ||
469
35.5k
         
I->capturesVariableArrayType()20.6k
))
470
17.8k
      ArgType = Ctx.getUIntPtrType();
471
472
35.7k
    if (I->capturesVariable() || 
I->capturesVariableByCopy()21.6k
) {
473
30.8k
      CapVar = I->getCapturedVar();
474
30.8k
      II = CapVar->getIdentifier();
475
30.8k
    } else 
if (4.89k
I->capturesThis()4.89k
) {
476
1.84k
      II = &Ctx.Idents.get("this");
477
3.04k
    } else {
478
3.04k
      assert(I->capturesVariableArrayType());
479
0
      II = &Ctx.Idents.get("vla");
480
3.04k
    }
481
35.7k
    if (ArgType->isVariablyModifiedType())
482
2.12k
      ArgType = getCanonicalParamType(Ctx, ArgType);
483
35.7k
    VarDecl *Arg;
484
35.7k
    if (CapVar && 
(CapVar->getTLSKind() != clang::VarDecl::TLS_None)30.8k
) {
485
56
      Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
486
56
                                      II, ArgType,
487
56
                                      ImplicitParamDecl::ThreadPrivateVar);
488
35.6k
    } else if (DebugFunctionDecl && 
(204
CapVar204
||
I->capturesThis()17
)) {
489
193
      Arg = ParmVarDecl::Create(
490
193
          Ctx, DebugFunctionDecl,
491
193
          CapVar ? 
CapVar->getBeginLoc()187
:
FD->getBeginLoc()6
,
492
193
          CapVar ? 
CapVar->getLocation()187
:
FD->getLocation()6
, II, ArgType,
493
193
          /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
494
35.4k
    } else {
495
35.4k
      Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
496
35.4k
                                      II, ArgType, ImplicitParamDecl::Other);
497
35.4k
    }
498
35.7k
    Args.emplace_back(Arg);
499
    // Do not cast arguments if we emit function with non-original types.
500
35.7k
    TargetArgs.emplace_back(
501
35.7k
        FO.UIntPtrCastRequired
502
35.7k
            ? 
Arg35.5k
503
35.7k
            : 
CGM.getOpenMPRuntime().translateParameter(FD, Arg)204
);
504
35.7k
    ++I;
505
35.7k
  }
506
24.0k
  Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
507
24.0k
              CD->param_end());
508
24.0k
  TargetArgs.append(
509
24.0k
      std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
510
24.0k
      CD->param_end());
511
512
  // Create the function declaration.
513
24.0k
  const CGFunctionInfo &FuncInfo =
514
24.0k
      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
515
24.0k
  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
516
517
24.0k
  auto *F =
518
24.0k
      llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
519
24.0k
                             FO.FunctionName, &CGM.getModule());
520
24.0k
  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
521
24.0k
  if (CD->isNothrow())
522
23.9k
    F->setDoesNotThrow();
523
24.0k
  F->setDoesNotRecurse();
524
525
  // Always inline the outlined function if optimizations are enabled.
526
24.0k
  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
527
96
    F->removeFnAttr(llvm::Attribute::NoInline);
528
96
    F->addFnAttr(llvm::Attribute::AlwaysInline);
529
96
  }
530
531
  // Generate the function.
532
24.0k
  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
533
24.0k
                    FO.UIntPtrCastRequired ? 
FO.Loc23.8k
:
FO.S->getBeginLoc()152
,
534
24.0k
                    FO.UIntPtrCastRequired ? 
FO.Loc23.8k
535
24.0k
                                           : 
CD->getBody()->getBeginLoc()152
);
536
24.0k
  unsigned Cnt = CD->getContextParamPosition();
537
24.0k
  I = FO.S->captures().begin();
538
35.7k
  for (const FieldDecl *FD : RD->fields()) {
539
    // Do not map arguments if we emit function with non-original types.
540
35.7k
    Address LocalAddr(Address::invalid());
541
35.7k
    if (!FO.UIntPtrCastRequired && 
Args[Cnt] != TargetArgs[Cnt]204
) {
542
58
      LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
543
58
                                                             TargetArgs[Cnt]);
544
35.6k
    } else {
545
35.6k
      LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
546
35.6k
    }
547
    // If we are capturing a pointer by copy we don't need to do anything, just
548
    // use the value that we get from the arguments.
549
35.7k
    if (I->capturesVariableByCopy() && 
FD->getType()->isAnyPointerType()16.7k
) {
550
1.91k
      const VarDecl *CurVD = I->getCapturedVar();
551
1.91k
      if (!FO.RegisterCastedArgsOnly)
552
1.91k
        LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
553
1.91k
      ++Cnt;
554
1.91k
      ++I;
555
1.91k
      continue;
556
1.91k
    }
557
558
33.7k
    LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
559
33.7k
                                        AlignmentSource::Decl);
560
33.7k
    if (FD->hasCapturedVLAType()) {
561
3.04k
      if (FO.UIntPtrCastRequired) {
562
3.03k
        ArgLVal = CGF.MakeAddrLValue(
563
3.03k
            castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
564
3.03k
                                 Args[Cnt]->getName(), ArgLVal),
565
3.03k
            FD->getType(), AlignmentSource::Decl);
566
3.03k
      }
567
3.04k
      llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
568
3.04k
      const VariableArrayType *VAT = FD->getCapturedVLAType();
569
3.04k
      VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
570
30.7k
    } else if (I->capturesVariable()) {
571
14.0k
      const VarDecl *Var = I->getCapturedVar();
572
14.0k
      QualType VarTy = Var->getType();
573
14.0k
      Address ArgAddr = ArgLVal.getAddress(CGF);
574
14.0k
      if (ArgLVal.getType()->isLValueReferenceType()) {
575
14.0k
        ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
576
14.0k
      } else 
if (0
!VarTy->isVariablyModifiedType()0
||
!VarTy->isPointerType()0
) {
577
0
        assert(ArgLVal.getType()->isPointerType());
578
0
        ArgAddr = CGF.EmitLoadOfPointer(
579
0
            ArgAddr, ArgLVal.getType()->castAs<PointerType>());
580
0
      }
581
14.0k
      if (!FO.RegisterCastedArgsOnly) {
582
13.8k
        LocalAddrs.insert(
583
13.8k
            {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
584
13.8k
      }
585
16.7k
    } else if (I->capturesVariableByCopy()) {
586
14.8k
      assert(!FD->getType()->isAnyPointerType() &&
587
14.8k
             "Not expecting a captured pointer.");
588
0
      const VarDecl *Var = I->getCapturedVar();
589
14.8k
      LocalAddrs.insert({Args[Cnt],
590
14.8k
                         {Var, FO.UIntPtrCastRequired
591
14.8k
                                   ? castValueFromUintptr(
592
14.8k
                                         CGF, I->getLocation(), FD->getType(),
593
14.8k
                                         Args[Cnt]->getName(), ArgLVal)
594
14.8k
                                   : 
ArgLVal.getAddress(CGF)23
}});
595
14.8k
    } else {
596
      // If 'this' is captured, load it into CXXThisValue.
597
1.84k
      assert(I->capturesThis());
598
0
      CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
599
1.84k
      LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
600
1.84k
    }
601
0
    ++Cnt;
602
33.7k
    ++I;
603
33.7k
  }
604
605
24.0k
  return F;
606
24.0k
}
607
608
llvm::Function *
609
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
610
23.8k
                                                    SourceLocation Loc) {
611
23.8k
  assert(
612
23.8k
      CapturedStmtInfo &&
613
23.8k
      "CapturedStmtInfo should be set when generating the captured function");
614
0
  const CapturedDecl *CD = S.getCapturedDecl();
615
  // Build the argument list.
616
23.8k
  bool NeedWrapperFunction =
617
23.8k
      getDebugInfo() && 
CGM.getCodeGenOpts().hasReducedDebugInfo()301
;
618
23.8k
  FunctionArgList Args;
619
23.8k
  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
620
23.8k
  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
621
23.8k
  SmallString<256> Buffer;
622
23.8k
  llvm::raw_svector_ostream Out(Buffer);
623
23.8k
  Out << CapturedStmtInfo->getHelperName();
624
23.8k
  if (NeedWrapperFunction)
625
152
    Out << "_debug__";
626
23.8k
  FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
627
23.8k
                     Out.str(), Loc);
628
23.8k
  llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
629
23.8k
                                                   VLASizes, CXXThisValue, FO);
630
23.8k
  CodeGenFunction::OMPPrivateScope LocalScope(*this);
631
32.4k
  for (const auto &LocalAddrPair : LocalAddrs) {
632
32.4k
    if (LocalAddrPair.second.first) {
633
30.6k
      LocalScope.addPrivate(LocalAddrPair.second.first,
634
30.6k
                            LocalAddrPair.second.second);
635
30.6k
    }
636
32.4k
  }
637
23.8k
  (void)LocalScope.Privatize();
638
23.8k
  for (const auto &VLASizePair : VLASizes)
639
3.03k
    VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
640
23.8k
  PGO.assignRegionCounters(GlobalDecl(CD), F);
641
23.8k
  CapturedStmtInfo->EmitBody(*this, CD->getBody());
642
23.8k
  (void)LocalScope.ForceCleanup();
643
23.8k
  FinishFunction(CD->getBodyRBrace());
644
23.8k
  if (!NeedWrapperFunction)
645
23.7k
    return F;
646
647
152
  FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
648
152
                            /*RegisterCastedArgsOnly=*/true,
649
152
                            CapturedStmtInfo->getHelperName(), Loc);
650
152
  CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
651
152
  WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
652
152
  Args.clear();
653
152
  LocalAddrs.clear();
654
152
  VLASizes.clear();
655
152
  llvm::Function *WrapperF =
656
152
      emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
657
152
                                   WrapperCGF.CXXThisValue, WrapperFO);
658
152
  llvm::SmallVector<llvm::Value *, 4> CallArgs;
659
152
  auto *PI = F->arg_begin();
660
324
  for (const auto *Arg : Args) {
661
324
    llvm::Value *CallArg;
662
324
    auto I = LocalAddrs.find(Arg);
663
324
    if (I != LocalAddrs.end()) {
664
29
      LValue LV = WrapperCGF.MakeAddrLValue(
665
29
          I->second.second,
666
29
          I->second.first ? 
I->second.first->getType()23
:
Arg->getType()6
,
667
29
          AlignmentSource::Decl);
668
29
      if (LV.getType()->isAnyComplexType())
669
1
        LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
670
1
            LV.getAddress(WrapperCGF),
671
1
            PI->getType()->getPointerTo(
672
1
                LV.getAddress(WrapperCGF).getAddressSpace()),
673
1
            PI->getType()));
674
29
      CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
675
295
    } else {
676
295
      auto EI = VLASizes.find(Arg);
677
295
      if (EI != VLASizes.end()) {
678
11
        CallArg = EI->second.second;
679
284
      } else {
680
284
        LValue LV =
681
284
            WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
682
284
                                      Arg->getType(), AlignmentSource::Decl);
683
284
        CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
684
284
      }
685
295
    }
686
324
    CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
687
324
    ++PI;
688
324
  }
689
152
  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
690
152
  WrapperCGF.FinishFunction();
691
152
  return WrapperF;
692
23.8k
}
693
694
//===----------------------------------------------------------------------===//
695
//                              OpenMP Directive Emission
696
//===----------------------------------------------------------------------===//
697
void CodeGenFunction::EmitOMPAggregateAssign(
698
    Address DestAddr, Address SrcAddr, QualType OriginalType,
699
539
    const llvm::function_ref<void(Address, Address)> CopyGen) {
700
  // Perform element-by-element initialization.
701
539
  QualType ElementTy;
702
703
  // Drill down to the base element type on both arrays.
704
539
  const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
705
539
  llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
706
539
  SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
707
708
539
  llvm::Value *SrcBegin = SrcAddr.getPointer();
709
539
  llvm::Value *DestBegin = DestAddr.getPointer();
710
  // Cast from pointer to array type to pointer to single element.
711
539
  llvm::Value *DestEnd =
712
539
      Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
713
  // The basic structure here is a while-do loop.
714
539
  llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
715
539
  llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
716
539
  llvm::Value *IsEmpty =
717
539
      Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
718
539
  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
719
720
  // Enter the loop body, making that address the current address.
721
539
  llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
722
539
  EmitBlock(BodyBB);
723
724
539
  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
725
726
539
  llvm::PHINode *SrcElementPHI =
727
539
      Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
728
539
  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
729
539
  Address SrcElementCurrent =
730
539
      Address(SrcElementPHI, SrcAddr.getElementType(),
731
539
              SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732
733
539
  llvm::PHINode *DestElementPHI = Builder.CreatePHI(
734
539
      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
735
539
  DestElementPHI->addIncoming(DestBegin, EntryBB);
736
539
  Address DestElementCurrent =
737
539
      Address(DestElementPHI, DestAddr.getElementType(),
738
539
              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
739
740
  // Emit copy.
741
539
  CopyGen(DestElementCurrent, SrcElementCurrent);
742
743
  // Shift the address forward by one element.
744
539
  llvm::Value *DestElementNext =
745
539
      Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
746
539
                                 /*Idx0=*/1, "omp.arraycpy.dest.element");
747
539
  llvm::Value *SrcElementNext =
748
539
      Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
749
539
                                 /*Idx0=*/1, "omp.arraycpy.src.element");
750
  // Check whether we've reached the end.
751
539
  llvm::Value *Done =
752
539
      Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
753
539
  Builder.CreateCondBr(Done, DoneBB, BodyBB);
754
539
  DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
755
539
  SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
756
757
  // Done.
758
539
  EmitBlock(DoneBB, /*IsFinished=*/true);
759
539
}
760
761
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
762
                                  Address SrcAddr, const VarDecl *DestVD,
763
1.99k
                                  const VarDecl *SrcVD, const Expr *Copy) {
764
1.99k
  if (OriginalType->isArrayType()) {
765
596
    const auto *BO = dyn_cast<BinaryOperator>(Copy);
766
596
    if (BO && 
BO->getOpcode() == BO_Assign329
) {
767
      // Perform simple memcpy for simple copying.
768
329
      LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
769
329
      LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
770
329
      EmitAggregateAssign(Dest, Src, OriginalType);
771
329
    } else {
772
      // For arrays with complex element types perform element by element
773
      // copying.
774
267
      EmitOMPAggregateAssign(
775
267
          DestAddr, SrcAddr, OriginalType,
776
267
          [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
777
            // Working with the single array element, so have to remap
778
            // destination and source variables to corresponding array
779
            // elements.
780
267
            CodeGenFunction::OMPPrivateScope Remap(*this);
781
267
            Remap.addPrivate(DestVD, DestElement);
782
267
            Remap.addPrivate(SrcVD, SrcElement);
783
267
            (void)Remap.Privatize();
784
267
            EmitIgnoredExpr(Copy);
785
267
          });
786
267
    }
787
1.39k
  } else {
788
    // Remap pseudo source variable to private copy.
789
1.39k
    CodeGenFunction::OMPPrivateScope Remap(*this);
790
1.39k
    Remap.addPrivate(SrcVD, SrcAddr);
791
1.39k
    Remap.addPrivate(DestVD, DestAddr);
792
1.39k
    (void)Remap.Privatize();
793
    // Emit copying of the whole variable.
794
1.39k
    EmitIgnoredExpr(Copy);
795
1.39k
  }
796
1.99k
}
797
798
bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
799
19.1k
                                                OMPPrivateScope &PrivateScope) {
800
19.1k
  if (!HaveInsertPoint())
801
0
    return false;
802
19.1k
  bool DeviceConstTarget =
803
19.1k
      getLangOpts().OpenMPIsDevice &&
804
19.1k
      
isOpenMPTargetExecutionDirective(D.getDirectiveKind())3.49k
;
805
19.1k
  bool FirstprivateIsLastprivate = false;
806
19.1k
  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
807
19.1k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
808
364
    for (const auto *D : C->varlists())
809
1.48k
      Lastprivates.try_emplace(
810
1.48k
          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
811
1.48k
          C->getKind());
812
364
  }
813
19.1k
  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
814
19.1k
  llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
815
19.1k
  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
816
  // Force emission of the firstprivate copy if the directive does not emit
817
  // outlined function, like omp for, omp simd, omp distribute etc.
818
19.1k
  bool MustEmitFirstprivateCopy =
819
19.1k
      CaptureRegions.size() == 1 && 
CaptureRegions.back() == OMPD_unknown5.12k
;
820
19.1k
  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
821
6.61k
    const auto *IRef = C->varlist_begin();
822
6.61k
    const auto *InitsRef = C->inits().begin();
823
10.2k
    for (const Expr *IInit : C->private_copies()) {
824
10.2k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
825
10.2k
      bool ThisFirstprivateIsLastprivate =
826
10.2k
          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
827
10.2k
      const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
828
10.2k
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
829
10.2k
      if (!MustEmitFirstprivateCopy && 
!ThisFirstprivateIsLastprivate9.99k
&&
FD9.99k
&&
830
10.2k
          
!FD->getType()->isReferenceType()9.99k
&&
831
10.2k
          
(8.91k
!VD8.91k
||
!VD->hasAttr<OMPAllocateDeclAttr>()8.91k
)) {
832
8.90k
        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
833
8.90k
        ++IRef;
834
8.90k
        ++InitsRef;
835
8.90k
        continue;
836
8.90k
      }
837
      // Do not emit copy for firstprivate constant variables in target regions,
838
      // captured by reference.
839
1.30k
      if (DeviceConstTarget && 
OrigVD->getType().isConstant(getContext())169
&&
840
1.30k
          
FD10
&&
FD->getType()->isReferenceType()10
&&
841
1.30k
          
(10
!VD10
||
!VD->hasAttr<OMPAllocateDeclAttr>()10
)) {
842
10
        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
843
10
        ++IRef;
844
10
        ++InitsRef;
845
10
        continue;
846
10
      }
847
1.29k
      FirstprivateIsLastprivate =
848
1.29k
          FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
849
1.29k
      if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
850
1.20k
        const auto *VDInit =
851
1.20k
            cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
852
1.20k
        bool IsRegistered;
853
1.20k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
854
1.20k
                        /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
855
1.20k
                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
856
1.20k
        LValue OriginalLVal;
857
1.20k
        if (!FD) {
858
          // Check if the firstprivate variable is just a constant value.
859
58
          ConstantEmission CE = tryEmitAsConstant(&DRE);
860
58
          if (CE && 
!CE.isReference()6
) {
861
            // Constant value, no need to create a copy.
862
4
            ++IRef;
863
4
            ++InitsRef;
864
4
            continue;
865
4
          }
866
54
          if (CE && 
CE.isReference()2
) {
867
2
            OriginalLVal = CE.getReferenceLValue(*this, &DRE);
868
52
          } else {
869
52
            assert(!CE && "Expected non-constant firstprivate.");
870
0
            OriginalLVal = EmitLValue(&DRE);
871
52
          }
872
1.14k
        } else {
873
1.14k
          OriginalLVal = EmitLValue(&DRE);
874
1.14k
        }
875
1.19k
        QualType Type = VD->getType();
876
1.19k
        if (Type->isArrayType()) {
877
          // Emit VarDecl with copy init for arrays.
878
          // Get the address of the original variable captured in current
879
          // captured region.
880
603
          AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
881
603
          const Expr *Init = VD->getInit();
882
603
          if (!isa<CXXConstructExpr>(Init) || 
isTrivialInitializer(Init)220
) {
883
            // Perform simple memcpy.
884
383
            LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
885
383
            EmitAggregateAssign(Dest, OriginalLVal, Type);
886
383
          } else {
887
220
            EmitOMPAggregateAssign(
888
220
                Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this),
889
220
                Type,
890
220
                [this, VDInit, Init](Address DestElement, Address SrcElement) {
891
                  // Clean up any temporaries needed by the
892
                  // initialization.
893
220
                  RunCleanupsScope InitScope(*this);
894
                  // Emit initialization for single element.
895
220
                  setAddrOfLocalVar(VDInit, SrcElement);
896
220
                  EmitAnyExprToMem(Init, DestElement,
897
220
                                   Init->getType().getQualifiers(),
898
220
                                   /*IsInitializer*/ false);
899
220
                  LocalDeclMap.erase(VDInit);
900
220
                });
901
220
          }
902
603
          EmitAutoVarCleanups(Emission);
903
603
          IsRegistered =
904
603
              PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
905
603
        } else {
906
595
          Address OriginalAddr = OriginalLVal.getAddress(*this);
907
          // Emit private VarDecl with copy init.
908
          // Remap temp VDInit variable to the address of the original
909
          // variable (for proper handling of captured global variables).
910
595
          setAddrOfLocalVar(VDInit, OriginalAddr);
911
595
          EmitDecl(*VD);
912
595
          LocalDeclMap.erase(VDInit);
913
595
          Address VDAddr = GetAddrOfLocalVar(VD);
914
595
          if (ThisFirstprivateIsLastprivate &&
915
595
              Lastprivates[OrigVD->getCanonicalDecl()] ==
916
8
                  OMPC_LASTPRIVATE_conditional) {
917
            // Create/init special variable for lastprivate conditionals.
918
0
            llvm::Value *V =
919
0
                EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
920
0
                                                AlignmentSource::Decl),
921
0
                                 (*IRef)->getExprLoc());
922
0
            VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
923
0
                *this, OrigVD);
924
0
            EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
925
0
                                                AlignmentSource::Decl));
926
0
            LocalDeclMap.erase(VD);
927
0
            setAddrOfLocalVar(VD, VDAddr);
928
0
          }
929
595
          IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
930
595
        }
931
1.19k
        assert(IsRegistered &&
932
1.19k
               "firstprivate var already registered as private");
933
        // Silence the warning about unused variable.
934
0
        (void)IsRegistered;
935
1.19k
      }
936
1.29k
      ++IRef;
937
1.29k
      ++InitsRef;
938
1.29k
    }
939
6.61k
  }
940
19.1k
  return FirstprivateIsLastprivate && 
!EmittedAsFirstprivate.empty()16
;
941
19.1k
}
942
943
void CodeGenFunction::EmitOMPPrivateClause(
944
    const OMPExecutableDirective &D,
945
33.2k
    CodeGenFunction::OMPPrivateScope &PrivateScope) {
946
33.2k
  if (!HaveInsertPoint())
947
0
    return;
948
33.2k
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
949
33.2k
  for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
950
983
    auto IRef = C->varlist_begin();
951
3.14k
    for (const Expr *IInit : C->private_copies()) {
952
3.14k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
953
3.14k
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
954
2.87k
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
955
2.87k
        EmitDecl(*VD);
956
        // Emit private VarDecl with copy init.
957
2.87k
        bool IsRegistered =
958
2.87k
            PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
959
2.87k
        assert(IsRegistered && "private var already registered as private");
960
        // Silence the warning about unused variable.
961
0
        (void)IsRegistered;
962
2.87k
      }
963
0
      ++IRef;
964
3.14k
    }
965
983
  }
966
33.2k
}
967
968
1.02k
bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
969
1.02k
  if (!HaveInsertPoint())
970
0
    return false;
971
  // threadprivate_var1 = master_threadprivate_var1;
972
  // operator=(threadprivate_var2, master_threadprivate_var2);
973
  // ...
974
  // __kmpc_barrier(&loc, global_tid);
975
1.02k
  llvm::DenseSet<const VarDecl *> CopiedVars;
976
1.02k
  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
977
1.02k
  for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
978
26
    auto IRef = C->varlist_begin();
979
26
    auto ISrcRef = C->source_exprs().begin();
980
26
    auto IDestRef = C->destination_exprs().begin();
981
52
    for (const Expr *AssignOp : C->assignment_ops()) {
982
52
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
983
52
      QualType Type = VD->getType();
984
52
      if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
985
        // Get the address of the master variable. If we are emitting code with
986
        // TLS support, the address is passed from the master as field in the
987
        // captured declaration.
988
52
        Address MasterAddr = Address::invalid();
989
52
        if (getLangOpts().OpenMPUseTLS &&
990
52
            
getContext().getTargetInfo().isTLSSupported()26
) {
991
26
          assert(CapturedStmtInfo->lookup(VD) &&
992
26
                 "Copyin threadprivates should have been captured!");
993
0
          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
994
26
                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
995
26
          MasterAddr = EmitLValue(&DRE).getAddress(*this);
996
26
          LocalDeclMap.erase(VD);
997
26
        } else {
998
26
          MasterAddr =
999
26
              Address(VD->isStaticLocal() ? 
CGM.getStaticLocalDeclAddress(VD)22
1000
26
                                          : 
CGM.GetAddrOfGlobal(VD)4
,
1001
26
                      CGM.getTypes().ConvertTypeForMem(VD->getType()),
1002
26
                      getContext().getDeclAlign(VD));
1003
26
        }
1004
        // Get the address of the threadprivate variable.
1005
0
        Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1006
52
        if (CopiedVars.size() == 1) {
1007
          // At first check if current thread is a master thread. If it is, no
1008
          // need to copy data.
1009
26
          CopyBegin = createBasicBlock("copyin.not.master");
1010
26
          CopyEnd = createBasicBlock("copyin.not.master.end");
1011
          // TODO: Avoid ptrtoint conversion.
1012
26
          auto *MasterAddrInt =
1013
26
              Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1014
26
          auto *PrivateAddrInt =
1015
26
              Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1016
26
          Builder.CreateCondBr(
1017
26
              Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1018
26
              CopyEnd);
1019
26
          EmitBlock(CopyBegin);
1020
26
        }
1021
52
        const auto *SrcVD =
1022
52
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1023
52
        const auto *DestVD =
1024
52
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1025
52
        EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1026
52
      }
1027
0
      ++IRef;
1028
52
      ++ISrcRef;
1029
52
      ++IDestRef;
1030
52
    }
1031
26
  }
1032
1.02k
  if (CopyEnd) {
1033
    // Exit out of copying procedure for non-master thread.
1034
26
    EmitBlock(CopyEnd, /*IsFinished=*/true);
1035
26
    return true;
1036
26
  }
1037
997
  return false;
1038
1.02k
}
1039
1040
bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1041
13.6k
    const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1042
13.6k
  if (!HaveInsertPoint())
1043
0
    return false;
1044
13.6k
  bool HasAtLeastOneLastprivate = false;
1045
13.6k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
1046
13.6k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1047
8.74k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1048
9.13k
    for (const Expr *C : LoopDirective->counters()) {
1049
9.13k
      SIMDLCVs.insert(
1050
9.13k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1051
9.13k
    }
1052
8.74k
  }
1053
13.6k
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1054
13.6k
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1055
540
    HasAtLeastOneLastprivate = true;
1056
540
    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1057
540
        
!getLangOpts().OpenMPSimd74
)
1058
49
      break;
1059
491
    const auto *IRef = C->varlist_begin();
1060
491
    const auto *IDestRef = C->destination_exprs().begin();
1061
2.01k
    for (const Expr *IInit : C->private_copies()) {
1062
      // Keep the address of the original variable for future update at the end
1063
      // of the loop.
1064
2.01k
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1065
      // Taskloops do not require additional initialization, it is done in
1066
      // runtime support library.
1067
2.01k
      if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1068
1.60k
        const auto *DestVD =
1069
1.60k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1070
1.60k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1071
                        /*RefersToEnclosingVariableOrCapture=*/
1072
1.60k
                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
1073
1.60k
                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1074
1.60k
        PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this));
1075
        // Check if the variable is also a firstprivate: in this case IInit is
1076
        // not generated. Initialization of this variable will happen in codegen
1077
        // for 'firstprivate' clause.
1078
1.60k
        if (IInit && 
!SIMDLCVs.count(OrigVD->getCanonicalDecl())1.56k
) {
1079
1.55k
          const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1080
1.55k
          Address VDAddr = Address::invalid();
1081
1.55k
          if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1082
10
            VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1083
10
                *this, OrigVD);
1084
10
            setAddrOfLocalVar(VD, VDAddr);
1085
1.54k
          } else {
1086
            // Emit private VarDecl with copy init.
1087
1.54k
            EmitDecl(*VD);
1088
1.54k
            VDAddr = GetAddrOfLocalVar(VD);
1089
1.54k
          }
1090
1.55k
          bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1091
1.55k
          assert(IsRegistered &&
1092
1.55k
                 "lastprivate var already registered as private");
1093
0
          (void)IsRegistered;
1094
1.55k
        }
1095
1.60k
      }
1096
0
      ++IRef;
1097
2.01k
      ++IDestRef;
1098
2.01k
    }
1099
491
  }
1100
13.6k
  return HasAtLeastOneLastprivate;
1101
13.6k
}
1102
1103
void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1104
    const OMPExecutableDirective &D, bool NoFinals,
1105
532
    llvm::Value *IsLastIterCond) {
1106
532
  if (!HaveInsertPoint())
1107
0
    return;
1108
  // Emit following code:
1109
  // if (<IsLastIterCond>) {
1110
  //   orig_var1 = private_orig_var1;
1111
  //   ...
1112
  //   orig_varn = private_orig_varn;
1113
  // }
1114
532
  llvm::BasicBlock *ThenBB = nullptr;
1115
532
  llvm::BasicBlock *DoneBB = nullptr;
1116
532
  if (IsLastIterCond) {
1117
    // Emit implicit barrier if at least one lastprivate conditional is found
1118
    // and this is not a simd mode.
1119
405
    if (!getLangOpts().OpenMPSimd &&
1120
405
        llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1121
413
                     [](const OMPLastprivateClause *C) {
1122
413
                       return C->getKind() == OMPC_LASTPRIVATE_conditional;
1123
413
                     })) {
1124
6
      CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1125
6
                                             OMPD_unknown,
1126
6
                                             /*EmitChecks=*/false,
1127
6
                                             /*ForceSimpleCall=*/true);
1128
6
    }
1129
405
    ThenBB = createBasicBlock(".omp.lastprivate.then");
1130
405
    DoneBB = createBasicBlock(".omp.lastprivate.done");
1131
405
    Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1132
405
    EmitBlock(ThenBB);
1133
405
  }
1134
532
  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1135
532
  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1136
532
  if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1137
516
    auto IC = LoopDirective->counters().begin();
1138
536
    for (const Expr *F : LoopDirective->finals()) {
1139
536
      const auto *D =
1140
536
          cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1141
536
      if (NoFinals)
1142
221
        AlreadyEmittedVars.insert(D);
1143
315
      else
1144
315
        LoopCountersAndUpdates[D] = F;
1145
536
      ++IC;
1146
536
    }
1147
516
  }
1148
540
  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1149
540
    auto IRef = C->varlist_begin();
1150
540
    auto ISrcRef = C->source_exprs().begin();
1151
540
    auto IDestRef = C->destination_exprs().begin();
1152
2.21k
    for (const Expr *AssignOp : C->assignment_ops()) {
1153
2.21k
      const auto *PrivateVD =
1154
2.21k
          cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1155
2.21k
      QualType Type = PrivateVD->getType();
1156
2.21k
      const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1157
2.21k
      if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1158
        // If lastprivate variable is a loop control variable for loop-based
1159
        // directive, update its value before copyin back to original
1160
        // variable.
1161
1.73k
        if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1162
13
          EmitIgnoredExpr(FinalExpr);
1163
1.73k
        const auto *SrcVD =
1164
1.73k
            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1165
1.73k
        const auto *DestVD =
1166
1.73k
            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1167
        // Get the address of the private variable.
1168
1.73k
        Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1169
1.73k
        if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1170
334
          PrivateAddr = Address(
1171
334
              Builder.CreateLoad(PrivateAddr),
1172
334
              CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1173
334
              CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1174
        // Store the last value to the private copy in the last iteration.
1175
1.73k
        if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1176
10
          CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1177
10
              *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1178
10
              (*IRef)->getExprLoc());
1179
        // Get the address of the original variable.
1180
1.73k
        Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1181
1.73k
        EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1182
1.73k
      }
1183
2.21k
      ++IRef;
1184
2.21k
      ++ISrcRef;
1185
2.21k
      ++IDestRef;
1186
2.21k
    }
1187
540
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
1188
10
      EmitIgnoredExpr(PostUpdate);
1189
540
  }
1190
532
  if (IsLastIterCond)
1191
405
    EmitBlock(DoneBB, /*IsFinished=*/true);
1192
532
}
1193
1194
void CodeGenFunction::EmitOMPReductionClauseInit(
1195
    const OMPExecutableDirective &D,
1196
27.8k
    CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1197
27.8k
  if (!HaveInsertPoint())
1198
0
    return;
1199
27.8k
  SmallVector<const Expr *, 4> Shareds;
1200
27.8k
  SmallVector<const Expr *, 4> Privates;
1201
27.8k
  SmallVector<const Expr *, 4> ReductionOps;
1202
27.8k
  SmallVector<const Expr *, 4> LHSs;
1203
27.8k
  SmallVector<const Expr *, 4> RHSs;
1204
27.8k
  OMPTaskDataTy Data;
1205
27.8k
  SmallVector<const Expr *, 4> TaskLHSs;
1206
27.8k
  SmallVector<const Expr *, 4> TaskRHSs;
1207
27.8k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1208
1.16k
    if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1209
447
      continue;
1210
721
    Shareds.append(C->varlist_begin(), C->varlist_end());
1211
721
    Privates.append(C->privates().begin(), C->privates().end());
1212
721
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1213
721
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1214
721
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1215
721
    if (C->getModifier() == OMPC_REDUCTION_task) {
1216
27
      Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1217
27
      Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1218
27
      Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1219
27
      Data.ReductionOps.append(C->reduction_ops().begin(),
1220
27
                               C->reduction_ops().end());
1221
27
      TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1222
27
      TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1223
27
    }
1224
721
  }
1225
27.8k
  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1226
27.8k
  unsigned Count = 0;
1227
27.8k
  auto *ILHS = LHSs.begin();
1228
27.8k
  auto *IRHS = RHSs.begin();
1229
27.8k
  auto *IPriv = Privates.begin();
1230
27.8k
  for (const Expr *IRef : Shareds) {
1231
813
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1232
    // Emit private VarDecl with reduction init.
1233
813
    RedCG.emitSharedOrigLValue(*this, Count);
1234
813
    RedCG.emitAggregateType(*this, Count);
1235
813
    AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1236
813
    RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1237
813
                             RedCG.getSharedLValue(Count).getAddress(*this),
1238
813
                             [&Emission](CodeGenFunction &CGF) {
1239
617
                               CGF.EmitAutoVarInit(Emission);
1240
617
                               return true;
1241
617
                             });
1242
813
    EmitAutoVarCleanups(Emission);
1243
813
    Address BaseAddr = RedCG.adjustPrivateAddress(
1244
813
        *this, Count, Emission.getAllocatedAddress());
1245
813
    bool IsRegistered =
1246
813
        PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1247
813
    assert(IsRegistered && "private var already registered as private");
1248
    // Silence the warning about unused variable.
1249
0
    (void)IsRegistered;
1250
1251
813
    const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1252
813
    const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1253
813
    QualType Type = PrivateVD->getType();
1254
813
    bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1255
813
    if (isaOMPArraySectionExpr && 
Type->isVariablyModifiedType()157
) {
1256
      // Store the address of the original variable associated with the LHS
1257
      // implicit variable.
1258
109
      PrivateScope.addPrivate(LHSVD,
1259
109
                              RedCG.getSharedLValue(Count).getAddress(*this));
1260
109
      PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1261
704
    } else if ((isaOMPArraySectionExpr && 
Type->isScalarType()48
) ||
1262
704
               
isa<ArraySubscriptExpr>(IRef)702
) {
1263
      // Store the address of the original variable associated with the LHS
1264
      // implicit variable.
1265
6
      PrivateScope.addPrivate(LHSVD,
1266
6
                              RedCG.getSharedLValue(Count).getAddress(*this));
1267
6
      PrivateScope.addPrivate(RHSVD, Builder.CreateElementBitCast(
1268
6
                                         GetAddrOfLocalVar(PrivateVD),
1269
6
                                         ConvertTypeForMem(RHSVD->getType()),
1270
6
                                         "rhs.begin"));
1271
698
    } else {
1272
698
      QualType Type = PrivateVD->getType();
1273
698
      bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1274
698
      Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1275
      // Store the address of the original variable associated with the LHS
1276
      // implicit variable.
1277
698
      if (IsArray) {
1278
108
        OriginalAddr = Builder.CreateElementBitCast(
1279
108
            OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1280
108
      }
1281
698
      PrivateScope.addPrivate(LHSVD, OriginalAddr);
1282
698
      PrivateScope.addPrivate(
1283
698
          RHSVD, IsArray ? Builder.CreateElementBitCast(
1284
108
                               GetAddrOfLocalVar(PrivateVD),
1285
108
                               ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
1286
698
                         : 
GetAddrOfLocalVar(PrivateVD)590
);
1287
698
    }
1288
813
    ++ILHS;
1289
813
    ++IRHS;
1290
813
    ++IPriv;
1291
813
    ++Count;
1292
813
  }
1293
27.8k
  if (!Data.ReductionVars.empty()) {
1294
27
    Data.IsReductionWithTaskMod = true;
1295
27
    Data.IsWorksharingReduction =
1296
27
        isOpenMPWorksharingDirective(D.getDirectiveKind());
1297
27
    llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1298
27
        *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1299
27
    const Expr *TaskRedRef = nullptr;
1300
27
    switch (D.getDirectiveKind()) {
1301
2
    case OMPD_parallel:
1302
2
      TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1303
2
      break;
1304
2
    case OMPD_for:
1305
2
      TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1306
2
      break;
1307
2
    case OMPD_sections:
1308
2
      TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1309
2
      break;
1310
2
    case OMPD_parallel_for:
1311
2
      TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1312
2
      break;
1313
2
    case OMPD_parallel_master:
1314
2
      TaskRedRef =
1315
2
          cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1316
2
      break;
1317
2
    case OMPD_parallel_sections:
1318
2
      TaskRedRef =
1319
2
          cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1320
2
      break;
1321
2
    case OMPD_target_parallel:
1322
2
      TaskRedRef =
1323
2
          cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1324
2
      break;
1325
3
    case OMPD_target_parallel_for:
1326
3
      TaskRedRef =
1327
3
          cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1328
3
      break;
1329
2
    case OMPD_distribute_parallel_for:
1330
2
      TaskRedRef =
1331
2
          cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1332
2
      break;
1333
4
    case OMPD_teams_distribute_parallel_for:
1334
4
      TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1335
4
                       .getTaskReductionRefExpr();
1336
4
      break;
1337
4
    case OMPD_target_teams_distribute_parallel_for:
1338
4
      TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1339
4
                       .getTaskReductionRefExpr();
1340
4
      break;
1341
0
    case OMPD_simd:
1342
0
    case OMPD_for_simd:
1343
0
    case OMPD_section:
1344
0
    case OMPD_single:
1345
0
    case OMPD_master:
1346
0
    case OMPD_critical:
1347
0
    case OMPD_parallel_for_simd:
1348
0
    case OMPD_task:
1349
0
    case OMPD_taskyield:
1350
0
    case OMPD_barrier:
1351
0
    case OMPD_taskwait:
1352
0
    case OMPD_taskgroup:
1353
0
    case OMPD_flush:
1354
0
    case OMPD_depobj:
1355
0
    case OMPD_scan:
1356
0
    case OMPD_ordered:
1357
0
    case OMPD_atomic:
1358
0
    case OMPD_teams:
1359
0
    case OMPD_target:
1360
0
    case OMPD_cancellation_point:
1361
0
    case OMPD_cancel:
1362
0
    case OMPD_target_data:
1363
0
    case OMPD_target_enter_data:
1364
0
    case OMPD_target_exit_data:
1365
0
    case OMPD_taskloop:
1366
0
    case OMPD_taskloop_simd:
1367
0
    case OMPD_master_taskloop:
1368
0
    case OMPD_master_taskloop_simd:
1369
0
    case OMPD_parallel_master_taskloop:
1370
0
    case OMPD_parallel_master_taskloop_simd:
1371
0
    case OMPD_distribute:
1372
0
    case OMPD_target_update:
1373
0
    case OMPD_distribute_parallel_for_simd:
1374
0
    case OMPD_distribute_simd:
1375
0
    case OMPD_target_parallel_for_simd:
1376
0
    case OMPD_target_simd:
1377
0
    case OMPD_teams_distribute:
1378
0
    case OMPD_teams_distribute_simd:
1379
0
    case OMPD_teams_distribute_parallel_for_simd:
1380
0
    case OMPD_target_teams:
1381
0
    case OMPD_target_teams_distribute:
1382
0
    case OMPD_target_teams_distribute_parallel_for_simd:
1383
0
    case OMPD_target_teams_distribute_simd:
1384
0
    case OMPD_declare_target:
1385
0
    case OMPD_end_declare_target:
1386
0
    case OMPD_threadprivate:
1387
0
    case OMPD_allocate:
1388
0
    case OMPD_declare_reduction:
1389
0
    case OMPD_declare_mapper:
1390
0
    case OMPD_declare_simd:
1391
0
    case OMPD_requires:
1392
0
    case OMPD_declare_variant:
1393
0
    case OMPD_begin_declare_variant:
1394
0
    case OMPD_end_declare_variant:
1395
0
    case OMPD_unknown:
1396
0
    default:
1397
0
      llvm_unreachable("Enexpected directive with task reductions.");
1398
27
    }
1399
1400
27
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1401
27
    EmitVarDecl(*VD);
1402
27
    EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1403
27
                      /*Volatile=*/false, TaskRedRef->getType());
1404
27
  }
1405
27.8k
}
1406
1407
void CodeGenFunction::EmitOMPReductionClauseFinal(
1408
16.7k
    const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1409
16.7k
  if (!HaveInsertPoint())
1410
2
    return;
1411
16.7k
  llvm::SmallVector<const Expr *, 8> Privates;
1412
16.7k
  llvm::SmallVector<const Expr *, 8> LHSExprs;
1413
16.7k
  llvm::SmallVector<const Expr *, 8> RHSExprs;
1414
16.7k
  llvm::SmallVector<const Expr *, 8> ReductionOps;
1415
16.7k
  bool HasAtLeastOneReduction = false;
1416
16.7k
  bool IsReductionWithTaskMod = false;
1417
16.7k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1418
    // Do not emit for inscan reductions.
1419
713
    if (C->getModifier() == OMPC_REDUCTION_inscan)
1420
48
      continue;
1421
665
    HasAtLeastOneReduction = true;
1422
665
    Privates.append(C->privates().begin(), C->privates().end());
1423
665
    LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1424
665
    RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1425
665
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1426
665
    IsReductionWithTaskMod =
1427
665
        IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1428
665
  }
1429
16.7k
  if (HasAtLeastOneReduction) {
1430
574
    if (IsReductionWithTaskMod) {
1431
27
      CGM.getOpenMPRuntime().emitTaskReductionFini(
1432
27
          *this, D.getBeginLoc(),
1433
27
          isOpenMPWorksharingDirective(D.getDirectiveKind()));
1434
27
    }
1435
574
    bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1436
574
                      
isOpenMPParallelDirective(D.getDirectiveKind())568
||
1437
574
                      
ReductionKind == OMPD_simd268
;
1438
574
    bool SimpleReduction = ReductionKind == OMPD_simd;
1439
    // Emit nowait reduction if nowait clause is present or directive is a
1440
    // parallel directive (it always has implicit barrier).
1441
574
    CGM.getOpenMPRuntime().emitReduction(
1442
574
        *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1443
574
        {WithNowait, SimpleReduction, ReductionKind});
1444
574
  }
1445
16.7k
}
1446
1447
static void emitPostUpdateForReductionClause(
1448
    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1449
16.7k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1450
16.7k
  if (!CGF.HaveInsertPoint())
1451
0
    return;
1452
16.7k
  llvm::BasicBlock *DoneBB = nullptr;
1453
16.7k
  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1454
721
    if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1455
4
      if (!DoneBB) {
1456
4
        if (llvm::Value *Cond = CondGen(CGF)) {
1457
          // If the first post-update expression is found, emit conditional
1458
          // block if it was requested.
1459
0
          llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1460
0
          DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1461
0
          CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1462
0
          CGF.EmitBlock(ThenBB);
1463
0
        }
1464
4
      }
1465
4
      CGF.EmitIgnoredExpr(PostUpdate);
1466
4
    }
1467
721
  }
1468
16.7k
  if (DoneBB)
1469
0
    CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1470
16.7k
}
1471
1472
namespace {
1473
/// Codegen lambda for appending distribute lower and upper bounds to outlined
1474
/// parallel function. This is necessary for combined constructs such as
1475
/// 'distribute parallel for'
1476
typedef llvm::function_ref<void(CodeGenFunction &,
1477
                                const OMPExecutableDirective &,
1478
                                llvm::SmallVectorImpl<llvm::Value *> &)>
1479
    CodeGenBoundParametersTy;
1480
} // anonymous namespace
1481
1482
static void
1483
checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1484
20.7k
                                     const OMPExecutableDirective &S) {
1485
20.7k
  if (CGF.getLangOpts().OpenMP < 50)
1486
4.15k
    return;
1487
16.6k
  llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1488
16.6k
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1489
902
    for (const Expr *Ref : C->varlists()) {
1490
902
      if (!Ref->getType()->isScalarType())
1491
473
        continue;
1492
429
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1493
429
      if (!DRE)
1494
0
        continue;
1495
429
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1496
429
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1497
429
    }
1498
744
  }
1499
16.6k
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1500
1.22k
    for (const Expr *Ref : C->varlists()) {
1501
1.22k
      if (!Ref->getType()->isScalarType())
1502
658
        continue;
1503
567
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1504
567
      if (!DRE)
1505
0
        continue;
1506
567
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1507
567
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1508
567
    }
1509
295
  }
1510
16.6k
  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1511
350
    for (const Expr *Ref : C->varlists()) {
1512
350
      if (!Ref->getType()->isScalarType())
1513
0
        continue;
1514
350
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1515
350
      if (!DRE)
1516
0
        continue;
1517
350
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1518
350
      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1519
350
    }
1520
279
  }
1521
  // Privates should ne analyzed since they are not captured at all.
1522
  // Task reductions may be skipped - tasks are ignored.
1523
  // Firstprivates do not return value but may be passed by reference - no need
1524
  // to check for updated lastprivate conditional.
1525
16.6k
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1526
6.46k
    for (const Expr *Ref : C->varlists()) {
1527
6.46k
      if (!Ref->getType()->isScalarType())
1528
859
        continue;
1529
5.60k
      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1530
5.60k
      if (!DRE)
1531
0
        continue;
1532
5.60k
      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1533
5.60k
    }
1534
3.99k
  }
1535
16.6k
  CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1536
16.6k
      CGF, S, PrivateDecls);
1537
16.6k
}
1538
1539
static void emitCommonOMPParallelDirective(
1540
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
1541
    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1542
6.18k
    const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1543
6.18k
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1544
6.18k
  llvm::Value *NumThreads = nullptr;
1545
6.18k
  llvm::Function *OutlinedFn =
1546
6.18k
      CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1547
6.18k
          S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1548
6.18k
  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1549
351
    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1550
351
    NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1551
351
                                    /*IgnoreResultAssign=*/true);
1552
351
    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1553
351
        CGF, NumThreads, NumThreadsClause->getBeginLoc());
1554
351
  }
1555
6.18k
  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1556
87
    CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1557
87
    CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1558
87
        CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1559
87
  }
1560
6.18k
  const Expr *IfCond = nullptr;
1561
6.18k
  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1562
1.17k
    if (C->getNameModifier() == OMPD_unknown ||
1563
1.17k
        
C->getNameModifier() == OMPD_parallel769
) {
1564
590
      IfCond = C->getCondition();
1565
590
      break;
1566
590
    }
1567
1.17k
  }
1568
1569
6.18k
  OMPParallelScope Scope(CGF, S);
1570
6.18k
  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1571
  // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1572
  // lower and upper bounds with the pragma 'for' chunking mechanism.
1573
  // The following lambda takes care of appending the lower and upper bound
1574
  // parameters when necessary
1575
6.18k
  CodeGenBoundParameters(CGF, S, CapturedVars);
1576
6.18k
  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1577
6.18k
  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1578
6.18k
                                              CapturedVars, IfCond, NumThreads);
1579
6.18k
}
1580
1581
729
static bool isAllocatableDecl(const VarDecl *VD) {
1582
729
  const VarDecl *CVD = VD->getCanonicalDecl();
1583
729
  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1584
723
    return false;
1585
6
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1586
  // Use the default allocation.
1587
6
  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1588
6
            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1589
6
           
!AA->getAllocator()0
);
1590
729
}
1591
1592
static void emitEmptyBoundParameters(CodeGenFunction &,
1593
                                     const OMPExecutableDirective &,
1594
3.43k
                                     llvm::SmallVectorImpl<llvm::Value *> &) {}
1595
1596
Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1597
701
    CodeGenFunction &CGF, const VarDecl *VD) {
1598
701
  CodeGenModule &CGM = CGF.CGM;
1599
701
  auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1600
1601
701
  if (!VD)
1602
0
    return Address::invalid();
1603
701
  const VarDecl *CVD = VD->getCanonicalDecl();
1604
701
  if (!isAllocatableDecl(CVD))
1605
701
    return Address::invalid();
1606
0
  llvm::Value *Size;
1607
0
  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1608
0
  if (CVD->getType()->isVariablyModifiedType()) {
1609
0
    Size = CGF.getTypeSize(CVD->getType());
1610
    // Align the size: ((size + align - 1) / align) * align
1611
0
    Size = CGF.Builder.CreateNUWAdd(
1612
0
        Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1613
0
    Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1614
0
    Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1615
0
  } else {
1616
0
    CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1617
0
    Size = CGM.getSize(Sz.alignTo(Align));
1618
0
  }
1619
1620
0
  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1621
0
  assert(AA->getAllocator() &&
1622
0
         "Expected allocator expression for non-default allocator.");
1623
0
  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1624
  // According to the standard, the original allocator type is a enum (integer).
1625
  // Convert to pointer type, if required.
1626
0
  if (Allocator->getType()->isIntegerTy())
1627
0
    Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1628
0
  else if (Allocator->getType()->isPointerTy())
1629
0
    Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1630
0
                                                                CGM.VoidPtrTy);
1631
1632
0
  llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1633
0
      CGF.Builder, Size, Allocator,
1634
0
      getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1635
0
  llvm::CallInst *FreeCI =
1636
0
      OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1637
1638
0
  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1639
0
  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1640
0
      Addr,
1641
0
      CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1642
0
      getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1643
0
  return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
1644
701
}
1645
1646
Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1647
    CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1648
0
    SourceLocation Loc) {
1649
0
  CodeGenModule &CGM = CGF.CGM;
1650
0
  if (CGM.getLangOpts().OpenMPUseTLS &&
1651
0
      CGM.getContext().getTargetInfo().isTLSSupported())
1652
0
    return VDAddr;
1653
1654
0
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1655
1656
0
  llvm::Type *VarTy = VDAddr.getElementType();
1657
0
  llvm::Value *Data =
1658
0
      CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1659
0
  llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1660
0
  std::string Suffix = getNameWithSeparators({"cache", ""});
1661
0
  llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1662
1663
0
  llvm::CallInst *ThreadPrivateCacheCall =
1664
0
      OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1665
1666
0
  return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1667
0
}
1668
1669
std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1670
0
    ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1671
0
  SmallString<128> Buffer;
1672
0
  llvm::raw_svector_ostream OS(Buffer);
1673
0
  StringRef Sep = FirstSeparator;
1674
0
  for (StringRef Part : Parts) {
1675
0
    OS << Sep << Part;
1676
0
    Sep = Separator;
1677
0
  }
1678
0
  return OS.str().str();
1679
0
}
1680
1681
void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1682
    CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1683
98
    InsertPointTy CodeGenIP, Twine RegionName) {
1684
98
  CGBuilderTy &Builder = CGF.Builder;
1685
98
  Builder.restoreIP(CodeGenIP);
1686
98
  llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1687
98
                                               "." + RegionName + ".after");
1688
1689
98
  {
1690
98
    OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1691
98
    CGF.EmitStmt(RegionBodyStmt);
1692
98
  }
1693
1694
98
  if (Builder.saveIP().isSet())
1695
94
    Builder.CreateBr(FiniBB);
1696
98
}
1697
1698
void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1699
    CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1700
50
    InsertPointTy CodeGenIP, Twine RegionName) {
1701
50
  CGBuilderTy &Builder = CGF.Builder;
1702
50
  Builder.restoreIP(CodeGenIP);
1703
50
  llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1704
50
                                               "." + RegionName + ".after");
1705
1706
50
  {
1707
50
    OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1708
50
    CGF.EmitStmt(RegionBodyStmt);
1709
50
  }
1710
1711
50
  if (Builder.saveIP().isSet())
1712
50
    Builder.CreateBr(FiniBB);
1713
50
}
1714
1715
1.05k
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1716
1.05k
  if (CGM.getLangOpts().OpenMPIRBuilder) {
1717
50
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1718
    // Check if we have any if clause associated with the directive.
1719
50
    llvm::Value *IfCond = nullptr;
1720
50
    if (const auto *C = S.getSingleClause<OMPIfClause>())
1721
0
      IfCond = EmitScalarExpr(C->getCondition(),
1722
0
                              /*IgnoreResultAssign=*/true);
1723
1724
50
    llvm::Value *NumThreads = nullptr;
1725
50
    if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1726
0
      NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1727
0
                                  /*IgnoreResultAssign=*/true);
1728
1729
50
    ProcBindKind ProcBind = OMP_PROC_BIND_default;
1730
50
    if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1731
0
      ProcBind = ProcBindClause->getProcBindKind();
1732
1733
50
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1734
1735
    // The cleanup callback that finalizes all variabels at the given location,
1736
    // thus calls destructors etc.
1737
58
    auto FiniCB = [this](InsertPointTy IP) {
1738
58
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1739
58
    };
1740
1741
    // Privatization callback that performs appropriate action for
1742
    // shared/private/firstprivate/lastprivate/copyin/... variables.
1743
    //
1744
    // TODO: This defaults to shared right now.
1745
50
    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1746
82
                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1747
      // The next line is appropriate only for variables (Val) with the
1748
      // data-sharing attribute "shared".
1749
82
      ReplVal = &Val;
1750
1751
82
      return CodeGenIP;
1752
82
    };
1753
1754
50
    const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1755
50
    const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1756
1757
50
    auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1758
50
                               InsertPointTy CodeGenIP) {
1759
50
      OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1760
50
          *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
1761
50
    };
1762
1763
50
    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1764
50
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1765
50
    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1766
50
        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1767
50
    Builder.restoreIP(
1768
50
        OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1769
50
                                  IfCond, NumThreads, ProcBind, S.hasCancel()));
1770
50
    return;
1771
50
  }
1772
1773
  // Emit parallel region as a standalone region.
1774
1.00k
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1775
1.00k
    Action.Enter(CGF);
1776
1.00k
    OMPPrivateScope PrivateScope(CGF);
1777
1.00k
    bool Copyins = CGF.EmitOMPCopyinClause(S);
1778
1.00k
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1779
1.00k
    if (Copyins) {
1780
      // Emit implicit barrier to synchronize threads and avoid data races on
1781
      // propagation master's thread values of threadprivate variables to local
1782
      // instances of that variables of all other implicit threads.
1783
23
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1784
23
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1785
23
          /*ForceSimpleCall=*/true);
1786
23
    }
1787
1.00k
    CGF.EmitOMPPrivateClause(S, PrivateScope);
1788
1.00k
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1789
1.00k
    (void)PrivateScope.Privatize();
1790
1.00k
    CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1791
1.00k
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1792
1.00k
  };
1793
1.00k
  {
1794
1.00k
    auto LPCRegion =
1795
1.00k
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1796
1.00k
    emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1797
1.00k
                                   emitEmptyBoundParameters);
1798
1.00k
    emitPostUpdateForReductionClause(*this, S,
1799
1.00k
                                     [](CodeGenFunction &) 
{ return nullptr; }4
);
1800
1.00k
  }
1801
  // Check for outer lastprivate conditional update.
1802
1.00k
  checkForLastprivateConditionalUpdate(*this, S);
1803
1.00k
}
1804
1805
0
void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1806
0
  EmitStmt(S.getIfStmt());
1807
0
}
1808
1809
namespace {
1810
/// RAII to handle scopes for loop transformation directives.
1811
class OMPTransformDirectiveScopeRAII {
1812
  OMPLoopScope *Scope = nullptr;
1813
  CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1814
  CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1815
1816
public:
1817
8
  OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1818
8
    if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1819
8
      Scope = new OMPLoopScope(CGF, *Dir);
1820
8
      CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1821
8
      CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1822
8
    }
1823
8
  }
1824
8
  ~OMPTransformDirectiveScopeRAII() {
1825
8
    if (!Scope)
1826
0
      return;
1827
8
    delete CapInfoRAII;
1828
8
    delete CGSI;
1829
8
    delete Scope;
1830
8
  }
1831
};
1832
} // namespace
1833
1834
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1835
11.6k
                     int MaxLevel, int Level = 0) {
1836
11.6k
  assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1837
0
  const Stmt *SimplifiedS = S->IgnoreContainers();
1838
11.6k
  if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1839
8
    PrettyStackTraceLoc CrashInfo(
1840
8
        CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1841
8
        "LLVM IR generation of compound statement ('{}')");
1842
1843
    // Keep track of the current cleanup stack depth, including debug scopes.
1844
8
    CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1845
8
    for (const Stmt *CurStmt : CS->body())
1846
34
      emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1847
8
    return;
1848
8
  }
1849
11.6k
  if (SimplifiedS == NextLoop) {
1850
11.6k
    if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1851
26
      SimplifiedS = Dir->getTransformedStmt();
1852
11.6k
    if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1853
46
      SimplifiedS = CanonLoop->getLoopStmt();
1854
11.6k
    if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1855
11.6k
      S = For->getBody();
1856
11.6k
    } else {
1857
6
      assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1858
6
             "Expected canonical for loop or range-based for loop.");
1859
0
      const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1860
6
      CGF.EmitStmt(CXXFor->getLoopVarStmt());
1861
6
      S = CXXFor->getBody();
1862
6
    }
1863
11.6k
    if (Level + 1 < MaxLevel) {
1864
543
      NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1865
543
          S, /*TryImperfectlyNestedLoops=*/true);
1866
543
      emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1867
543
      return;
1868
543
    }
1869
11.6k
  }
1870
11.1k
  CGF.EmitStmt(S);
1871
11.1k
}
1872
1873
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1874
11.0k
                                      JumpDest LoopExit) {
1875
11.0k
  RunCleanupsScope BodyScope(*this);
1876
  // Update counters values on current iteration.
1877
11.0k
  for (const Expr *UE : D.updates())
1878
11.6k
    EmitIgnoredExpr(UE);
1879
  // Update the linear variables.
1880
  // In distribute directives only loop counters may be marked as linear, no
1881
  // need to generate the code for them.
1882
11.0k
  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1883
4.47k
    for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1884
396
      for (const Expr *UE : C->updates())
1885
518
        EmitIgnoredExpr(UE);
1886
396
    }
1887
4.47k
  }
1888
1889
  // On a continue in the body, jump to the end.
1890
11.0k
  JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1891
11.0k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1892
11.6k
  for (const Expr *E : D.finals_conditions()) {
1893
11.6k
    if (!E)
1894
11.6k
      continue;
1895
    // Check that loop counter in non-rectangular nest fits into the iteration
1896
    // space.
1897
25
    llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1898
25
    EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1899
25
                         getProfileCount(D.getBody()));
1900
25
    EmitBlock(NextBB);
1901
25
  }
1902
1903
11.0k
  OMPPrivateScope InscanScope(*this);
1904
11.0k
  EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1905
11.0k
  bool IsInscanRegion = InscanScope.Privatize();
1906
11.0k
  if (IsInscanRegion) {
1907
    // Need to remember the block before and after scan directive
1908
    // to dispatch them correctly depending on the clause used in
1909
    // this directive, inclusive or exclusive. For inclusive scan the natural
1910
    // order of the blocks is used, for exclusive clause the blocks must be
1911
    // executed in reverse order.
1912
48
    OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1913
48
    OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1914
    // No need to allocate inscan exit block, in simd mode it is selected in the
1915
    // codegen for the scan directive.
1916
48
    if (D.getDirectiveKind() != OMPD_simd && 
!getLangOpts().OpenMPSimd40
)
1917
32
      OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1918
48
    OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1919
48
    EmitBranch(OMPScanDispatch);
1920
48
    EmitBlock(OMPBeforeScanBlock);
1921
48
  }
1922
1923
  // Emit loop variables for C++ range loops.
1924
11.0k
  const Stmt *Body =
1925
11.0k
      D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1926
  // Emit loop body.
1927
11.0k
  emitBody(*this, Body,
1928
11.0k
           OMPLoopBasedDirective::tryToFindNextInnerLoop(
1929
11.0k
               Body, /*TryImperfectlyNestedLoops=*/true),
1930
11.0k
           D.getLoopsNumber());
1931
1932
  // Jump to the dispatcher at the end of the loop body.
1933
11.0k
  if (IsInscanRegion)
1934
48
    EmitBranch(OMPScanExitBlock);
1935
1936
  // The end (updates/cleanups).
1937
11.0k
  EmitBlock(Continue.getBlock());
1938
11.0k
  BreakContinueStack.pop_back();
1939
11.0k
}
1940
1941
using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1942
1943
/// Emit a captured statement and return the function as well as its captured
1944
/// closure context.
1945
static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1946
84
                                             const CapturedStmt *S) {
1947
84
  LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1948
84
  CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1949
84
  std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1950
84
      std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1951
84
  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1952
84
  llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1953
1954
84
  return {F, CapStruct.getPointer(ParentCGF)};
1955
84
}
1956
1957
/// Emit a call to a previously captured closure.
1958
static llvm::CallInst *
1959
emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1960
84
                     llvm::ArrayRef<llvm::Value *> Args) {
1961
  // Append the closure context to the argument.
1962
84
  SmallVector<llvm::Value *> EffectiveArgs;
1963
84
  EffectiveArgs.reserve(Args.size() + 1);
1964
84
  llvm::append_range(EffectiveArgs, Args);
1965
84
  EffectiveArgs.push_back(Cap.second);
1966
1967
84
  return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1968
84
}
1969
1970
llvm::CanonicalLoopInfo *
1971
47
CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1972
47
  assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1973
1974
  // The caller is processing the loop-associated directive processing the \p
1975
  // Depth loops nested in \p S. Put the previous pending loop-associated
1976
  // directive to the stack. If the current loop-associated directive is a loop
1977
  // transformation directive, it will push its generated loops onto the stack
1978
  // such that together with the loops left here they form the combined loop
1979
  // nest for the parent loop-associated directive.
1980
0
  int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1981
47
  ExpectedOMPLoopDepth = Depth;
1982
1983
47
  EmitStmt(S);
1984
47
  assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1985
1986
  // The last added loop is the outermost one.
1987
0
  llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
1988
1989
  // Pop the \p Depth loops requested by the call from that stack and restore
1990
  // the previous context.
1991
47
  OMPLoopNestStack.pop_back_n(Depth);
1992
47
  ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
1993
1994
47
  return Result;
1995
47
}
1996
1997
42
void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
1998
42
  const Stmt *SyntacticalLoop = S->getLoopStmt();
1999
42
  if (!getLangOpts().OpenMPIRBuilder) {
2000
    // Ignore if OpenMPIRBuilder is not enabled.
2001
0
    EmitStmt(SyntacticalLoop);
2002
0
    return;
2003
0
  }
2004
2005
42
  LexicalScope ForScope(*this, S->getSourceRange());
2006
2007
  // Emit init statements. The Distance/LoopVar funcs may reference variable
2008
  // declarations they contain.
2009
42
  const Stmt *BodyStmt;
2010
42
  if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2011
41
    if (const Stmt *InitStmt = For->getInit())
2012
41
      EmitStmt(InitStmt);
2013
41
    BodyStmt = For->getBody();
2014
41
  } else 
if (const auto *1
RangeFor1
=
2015
1
                 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2016
1
    if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2017
1
      EmitStmt(RangeStmt);
2018
1
    if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2019
1
      EmitStmt(BeginStmt);
2020
1
    if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2021
1
      EmitStmt(EndStmt);
2022
1
    if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2023
1
      EmitStmt(LoopVarStmt);
2024
1
    BodyStmt = RangeFor->getBody();
2025
1
  } else
2026
0
    llvm_unreachable("Expected for-stmt or range-based for-stmt");
2027
2028
  // Emit closure for later use. By-value captures will be captured here.
2029
42
  const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2030
42
  EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2031
42
  const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2032
42
  EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2033
2034
  // Call the distance function to get the number of iterations of the loop to
2035
  // come.
2036
42
  QualType LogicalTy = DistanceFunc->getCapturedDecl()
2037
42
                           ->getParam(0)
2038
42
                           ->getType()
2039
42
                           .getNonReferenceType();
2040
42
  Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2041
42
  emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2042
42
  llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2043
2044
  // Emit the loop structure.
2045
42
  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2046
42
  auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2047
42
                           llvm::Value *IndVar) {
2048
42
    Builder.restoreIP(CodeGenIP);
2049
2050
    // Emit the loop body: Convert the logical iteration number to the loop
2051
    // variable and emit the body.
2052
42
    const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2053
42
    LValue LCVal = EmitLValue(LoopVarRef);
2054
42
    Address LoopVarAddress = LCVal.getAddress(*this);
2055
42
    emitCapturedStmtCall(*this, LoopVarClosure,
2056
42
                         {LoopVarAddress.getPointer(), IndVar});
2057
2058
42
    RunCleanupsScope BodyScope(*this);
2059
42
    EmitStmt(BodyStmt);
2060
42
  };
2061
42
  llvm::CanonicalLoopInfo *CL =
2062
42
      OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2063
2064
  // Finish up the loop.
2065
42
  Builder.restoreIP(CL->getAfterIP());
2066
42
  ForScope.ForceCleanup();
2067
2068
  // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2069
42
  OMPLoopNestStack.push_back(CL);
2070
42
}
2071
2072
void CodeGenFunction::EmitOMPInnerLoop(
2073
    const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2074
    const Expr *IncExpr,
2075
    const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2076
13.9k
    const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2077
13.9k
  auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2078
2079
  // Start the loop with a block that tests the condition.
2080
13.9k
  auto CondBlock = createBasicBlock("omp.inner.for.cond");
2081
13.9k
  EmitBlock(CondBlock);
2082
13.9k
  const SourceRange R = S.getSourceRange();
2083
2084
  // If attributes are attached, push to the basic block with them.
2085
13.9k
  const auto &OMPED = cast<OMPExecutableDirective>(S);
2086
13.9k
  const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2087
13.9k
  const Stmt *SS = ICS->getCapturedStmt();
2088
13.9k
  const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2089
13.9k
  OMPLoopNestStack.clear();
2090
13.9k
  if (AS)
2091
2
    LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2092
2
                   AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2093
2
                   SourceLocToDebugLoc(R.getEnd()));
2094
13.9k
  else
2095
13.9k
    LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2096
13.9k
                   SourceLocToDebugLoc(R.getEnd()));
2097
2098
  // If there are any cleanups between here and the loop-exit scope,
2099
  // create a block to stage a loop exit along.
2100
13.9k
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2101
13.9k
  if (RequiresCleanup)
2102
855
    ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2103
2104
13.9k
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2105
2106
  // Emit condition.
2107
13.9k
  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2108
13.9k
  if (ExitBlock != LoopExit.getBlock()) {
2109
855
    EmitBlock(ExitBlock);
2110
855
    EmitBranchThroughCleanup(LoopExit);
2111
855
  }
2112
2113
13.9k
  EmitBlock(LoopBody);
2114
13.9k
  incrementProfileCounter(&S);
2115
2116
  // Create a block for the increment.
2117
13.9k
  JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2118
13.9k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2119
2120
13.9k
  BodyGen(*this);
2121
2122
  // Emit "IV = IV + 1" and a back-edge to the condition block.
2123
13.9k
  EmitBlock(Continue.getBlock());
2124
13.9k
  EmitIgnoredExpr(IncExpr);
2125
13.9k
  PostIncGen(*this);
2126
13.9k
  BreakContinueStack.pop_back();
2127
13.9k
  EmitBranch(CondBlock);
2128
13.9k
  LoopStack.pop();
2129
  // Emit the fall-through block.
2130
13.9k
  EmitBlock(LoopExit.getBlock());
2131
13.9k
}
2132
2133
9.12k
bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2134
9.12k
  if (!HaveInsertPoint())
2135
0
    return false;
2136
  // Emit inits for the linear variables.
2137
9.12k
  bool HasLinears = false;
2138
9.12k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2139
614
    for (const Expr *Init : C->inits()) {
2140
614
      HasLinears = true;
2141
614
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2142
614
      if (const auto *Ref =
2143
614
              dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2144
614
        AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2145
614
        const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2146
614
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2147
614
                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
2148
614
                        VD->getInit()->getType(), VK_LValue,
2149
614
                        VD->getInit()->getExprLoc());
2150
614
        EmitExprAsInit(
2151
614
            &DRE, VD,
2152
614
            MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2153
614
            /*capturedByInit=*/false);
2154
614
        EmitAutoVarCleanups(Emission);
2155
614
      } else {
2156
0
        EmitVarDecl(*VD);
2157
0
      }
2158
614
    }
2159
    // Emit the linear steps for the linear clauses.
2160
    // If a step is not constant, it is pre-calculated before the loop.
2161
492
    if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2162
154
      if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2163
154
        EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2164
        // Emit calculation of the linear step.
2165
154
        EmitIgnoredExpr(CS);
2166
154
      }
2167
492
  }
2168
9.12k
  return HasLinears;
2169
9.12k
}
2170
2171
void CodeGenFunction::EmitOMPLinearClauseFinal(
2172
    const OMPLoopDirective &D,
2173
9.12k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2174
9.12k
  if (!HaveInsertPoint())
2175
0
    return;
2176
9.12k
  llvm::BasicBlock *DoneBB = nullptr;
2177
  // Emit the final values of the linear variables.
2178
9.12k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2179
492
    auto IC = C->varlist_begin();
2180
614
    for (const Expr *F : C->finals()) {
2181
614
      if (!DoneBB) {
2182
539
        if (llvm::Value *Cond = CondGen(*this)) {
2183
          // If the first post-update expression is found, emit conditional
2184
          // block if it was requested.
2185
135
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2186
135
          DoneBB = createBasicBlock(".omp.linear.pu.done");
2187
135
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2188
135
          EmitBlock(ThenBB);
2189
135
        }
2190
539
      }
2191
614
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2192
614
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2193
614
                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
2194
614
                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2195
614
      Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2196
614
      CodeGenFunction::OMPPrivateScope VarScope(*this);
2197
614
      VarScope.addPrivate(OrigVD, OrigAddr);
2198
614
      (void)VarScope.Privatize();
2199
614
      EmitIgnoredExpr(F);
2200
614
      ++IC;
2201
614
    }
2202
492
    if (const Expr *PostUpdate = C->getPostUpdateExpr())
2203
4
      EmitIgnoredExpr(PostUpdate);
2204
492
  }
2205
9.12k
  if (DoneBB)
2206
135
    EmitBlock(DoneBB, /*IsFinished=*/true);
2207
9.12k
}
2208
2209
static void emitAlignedClause(CodeGenFunction &CGF,
2210
13.3k
                              const OMPExecutableDirective &D) {
2211
13.3k
  if (!CGF.HaveInsertPoint())
2212
0
    return;
2213
13.3k
  for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2214
314
    llvm::APInt ClauseAlignment(64, 0);
2215
314
    if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2216
108
      auto *AlignmentCI =
2217
108
          cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2218
108
      ClauseAlignment = AlignmentCI->getValue();
2219
108
    }
2220
364
    for (const Expr *E : Clause->varlists()) {
2221
364
      llvm::APInt Alignment(ClauseAlignment);
2222
364
      if (Alignment == 0) {
2223
        // OpenMP [2.8.1, Description]
2224
        // If no optional parameter is specified, implementation-defined default
2225
        // alignments for SIMD instructions on the target platforms are assumed.
2226
248
        Alignment =
2227
248
            CGF.getContext()
2228
248
                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2229
248
                    E->getType()->getPointeeType()))
2230
248
                .getQuantity();
2231
248
      }
2232
364
      assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2233
364
             "alignment is not power of 2");
2234
364
      if (Alignment != 0) {
2235
364
        llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2236
364
        CGF.emitAlignmentAssumption(
2237
364
            PtrValue, E, /*No second loc needed*/ SourceLocation(),
2238
364
            llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2239
364
      }
2240
364
    }
2241
314
  }
2242
13.3k
}
2243
2244
void CodeGenFunction::EmitOMPPrivateLoopCounters(
2245
15.9k
    const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2246
15.9k
  if (!HaveInsertPoint())
2247
0
    return;
2248
15.9k
  auto I = S.private_counters().begin();
2249
16.8k
  for (const Expr *E : S.counters()) {
2250
16.8k
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2251
16.8k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2252
    // Emit var without initialization.
2253
16.8k
    AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2254
16.8k
    EmitAutoVarCleanups(VarEmission);
2255
16.8k
    LocalDeclMap.erase(PrivateVD);
2256
16.8k
    (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2257
16.8k
    if (LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)16.1k
||
2258
16.8k
        
VD->hasGlobalStorage()16.1k
) {
2259
701
      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2260
701
                      LocalDeclMap.count(VD) || 
CapturedStmtInfo->lookup(VD)31
,
2261
701
                      E->getType(), VK_LValue, E->getExprLoc());
2262
701
      (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this));
2263
16.1k
    } else {
2264
16.1k
      (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2265
16.1k
    }
2266
16.8k
    ++I;
2267
16.8k
  }
2268
  // Privatize extra loop counters used in loops for ordered(n) clauses.
2269
15.9k
  for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2270
130
    if (!C->getNumForLoops())
2271
82
      continue;
2272
48
    for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2273
60
         I < E; 
++I12
) {
2274
12
      const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2275
12
      const auto *VD = cast<VarDecl>(DRE->getDecl());
2276
      // Override only those variables that can be captured to avoid re-emission
2277
      // of the variables declared within the loops.
2278
12
      if (DRE->refersToEnclosingVariableOrCapture()) {
2279
8
        (void)LoopScope.addPrivate(
2280
8
            VD, CreateMemTemp(DRE->getType(), VD->getName()));
2281
8
      }
2282
12
    }
2283
48
  }
2284
15.9k
}
2285
2286
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2287
                        const Expr *Cond, llvm::BasicBlock *TrueBlock,
2288
2.33k
                        llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2289
2.33k
  if (!CGF.HaveInsertPoint())
2290
0
    return;
2291
2.33k
  {
2292
2.33k
    CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2293
2.33k
    CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2294
2.33k
    (void)PreCondScope.Privatize();
2295
    // Get initial values of real counters.
2296
2.58k
    for (const Expr *I : S.inits()) {
2297
2.58k
      CGF.EmitIgnoredExpr(I);
2298
2.58k
    }
2299
2.33k
  }
2300
  // Create temp loop control variables with their init values to support
2301
  // non-rectangular loops.
2302
2.33k
  CodeGenFunction::OMPMapVars PreCondVars;
2303
2.58k
  for (const Expr *E : S.dependent_counters()) {
2304
2.58k
    if (!E)
2305
2.57k
      continue;
2306
5
    assert(!E->getType().getNonReferenceType()->isRecordType() &&
2307
5
           "dependent counter must not be an iterator.");
2308
0
    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2309
5
    Address CounterAddr =
2310
5
        CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2311
5
    (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2312
5
  }
2313
2.33k
  (void)PreCondVars.apply(CGF);
2314
2.58k
  for (const Expr *E : S.dependent_inits()) {
2315
2.58k
    if (!E)
2316
2.57k
      continue;
2317
5
    CGF.EmitIgnoredExpr(E);
2318
5
  }
2319
  // Check that loop is executed at least one time.
2320
2.33k
  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2321
2.33k
  PreCondVars.restore(CGF);
2322
2.33k
}
2323
2324
void CodeGenFunction::EmitOMPLinearClause(
2325
9.12k
    const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2326
9.12k
  if (!HaveInsertPoint())
2327
0
    return;
2328
9.12k
  llvm::DenseSet<const VarDecl *> SIMDLCVs;
2329
9.12k
  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2330
6.46k
    const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2331
6.77k
    for (const Expr *C : LoopDirective->counters()) {
2332
6.77k
      SIMDLCVs.insert(
2333
6.77k
          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2334
6.77k
    }
2335
6.46k
  }
2336
9.12k
  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2337
492
    auto CurPrivate = C->privates().begin();
2338
614
    for (const Expr *E : C->varlists()) {
2339
614
      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2340
614
      const auto *PrivateVD =
2341
614
          cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2342
614
      if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2343
        // Emit private VarDecl with copy init.
2344
508
        EmitVarDecl(*PrivateVD);
2345
508
        bool IsRegistered =
2346
508
            PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2347
508
        assert(IsRegistered && "linear var already registered as private");
2348
        // Silence the warning about unused variable.
2349
0
        (void)IsRegistered;
2350
508
      } else {
2351
106
        EmitVarDecl(*PrivateVD);
2352
106
      }
2353
0
      ++CurPrivate;
2354
614
    }
2355
492
  }
2356
9.12k
}
2357
2358
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2359
8.62k
                                     const OMPExecutableDirective &D) {
2360
8.62k
  if (!CGF.HaveInsertPoint())
2361
0
    return;
2362
8.62k
  if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2363
248
    RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2364
248
                                 /*ignoreResult=*/true);
2365
248
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2366
248
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2367
    // In presence of finite 'safelen', it may be unsafe to mark all
2368
    // the memory instructions parallel, because loop-carried
2369
    // dependences of 'safelen' iterations are possible.
2370
248
    CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2371
8.37k
  } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2372
166
    RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2373
166
                                 /*ignoreResult=*/true);
2374
166
    auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2375
166
    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2376
    // In presence of finite 'safelen', it may be unsafe to mark all
2377
    // the memory instructions parallel, because loop-carried
2378
    // dependences of 'safelen' iterations are possible.
2379
166
    CGF.LoopStack.setParallel(/*Enable=*/false);
2380
166
  }
2381
8.62k
}
2382
2383
8.62k
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2384
  // Walk clauses and process safelen/lastprivate.
2385
8.62k
  LoopStack.setParallel(/*Enable=*/true);
2386
8.62k
  LoopStack.setVectorizeEnable();
2387
8.62k
  emitSimdlenSafelenClause(*this, D);
2388
8.62k
  if (const auto *C = D.getSingleClause<OMPOrderClause>())
2389
0
    if (C->getKind() == OMPC_ORDER_concurrent)
2390
0
      LoopStack.setParallel(/*Enable=*/true);
2391
8.62k
  if ((D.getDirectiveKind() == OMPD_simd ||
2392
8.62k
       
(8.28k
getLangOpts().OpenMPSimd8.28k
&&
2393
8.28k
        
isOpenMPSimdDirective(D.getDirectiveKind())3.18k
)) &&
2394
8.62k
      llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2395
3.53k
                   [](const OMPReductionClause *C) {
2396
95
                     return C->getModifier() == OMPC_REDUCTION_inscan;
2397
95
                   }))
2398
    // Disable parallel access in case of prefix sum.
2399
16
    LoopStack.setParallel(/*Enable=*/false);
2400
8.62k
}
2401
2402
void CodeGenFunction::EmitOMPSimdFinal(
2403
    const OMPLoopDirective &D,
2404
8.62k
    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2405
8.62k
  if (!HaveInsertPoint())
2406
0
    return;
2407
8.62k
  llvm::BasicBlock *DoneBB = nullptr;
2408
8.62k
  auto IC = D.counters().begin();
2409
8.62k
  auto IPC = D.private_counters().begin();
2410
9.00k
  for (const Expr *F : D.finals()) {
2411
9.00k
    const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2412
9.00k
    const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2413
9.00k
    const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2414
9.00k
    if (LocalDeclMap.count(OrigVD) || 
CapturedStmtInfo->lookup(OrigVD)0
||
2415
9.00k
        
OrigVD->hasGlobalStorage()0
||
CED0
) {
2416
9.00k
      if (!DoneBB) {
2417
8.82k
        if (llvm::Value *Cond = CondGen(*this)) {
2418
          // If the first post-update expression is found, emit conditional
2419
          // block if it was requested.
2420
4.50k
          llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2421
4.50k
          DoneBB = createBasicBlock(".omp.final.done");
2422
4.50k
          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2423
4.50k
          EmitBlock(ThenBB);
2424
4.50k
        }
2425
8.82k
      }
2426
9.00k
      Address OrigAddr = Address::invalid();
2427
9.00k
      if (CED) {
2428
28
        OrigAddr =
2429
28
            EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2430
8.97k
      } else {
2431
8.97k
        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2432
8.97k
                        /*RefersToEnclosingVariableOrCapture=*/false,
2433
8.97k
                        (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2434
8.97k
        OrigAddr = EmitLValue(&DRE).getAddress(*this);
2435
8.97k
      }
2436
9.00k
      OMPPrivateScope VarScope(*this);
2437
9.00k
      VarScope.addPrivate(OrigVD, OrigAddr);
2438
9.00k
      (void)VarScope.Privatize();
2439
9.00k
      EmitIgnoredExpr(F);
2440
9.00k
    }
2441
9.00k
    ++IC;
2442
9.00k
    ++IPC;
2443
9.00k
  }
2444
8.62k
  if (DoneBB)
2445
4.50k
    EmitBlock(DoneBB, /*IsFinished=*/true);
2446
8.62k
}
2447
2448
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2449
                                         const OMPLoopDirective &S,
2450
11.0k
                                         CodeGenFunction::JumpDest LoopExit) {
2451
11.0k
  CGF.EmitOMPLoopBody(S, LoopExit);
2452
11.0k
  CGF.EmitStopPoint(&S);
2453
11.0k
}
2454
2455
/// Emit a helper variable and return corresponding lvalue.
2456
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2457
42.6k
                               const DeclRefExpr *Helper) {
2458
42.6k
  auto VDecl = cast<VarDecl>(Helper->getDecl());
2459
42.6k
  CGF.EmitVarDecl(*VDecl);
2460
42.6k
  return CGF.EmitLValue(Helper);
2461
42.6k
}
2462
2463
static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2464
                               const RegionCodeGenTy &SimdInitGen,
2465
13.6k
                               const RegionCodeGenTy &BodyCodeGen) {
2466
13.6k
  auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2467
13.6k
                                                    PrePostActionTy &) {
2468
13.4k
    CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2469
13.4k
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2470
13.4k
    SimdInitGen(CGF);
2471
2472
13.4k
    BodyCodeGen(CGF);
2473
13.4k
  };
2474
13.6k
  auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2475
343
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2476
343
    CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2477
2478
343
    BodyCodeGen(CGF);
2479
343
  };
2480
13.6k
  const Expr *IfCond = nullptr;
2481
13.6k
  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2482
8.74k
    for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2483
1.77k
      if (CGF.getLangOpts().OpenMP >= 50 &&
2484
1.77k
          
(987
C->getNameModifier() == OMPD_unknown987
||
2485
987
           
C->getNameModifier() == OMPD_simd560
)) {
2486
493
        IfCond = C->getCondition();
2487
493
        break;
2488
493
      }
2489
1.77k
    }
2490
8.74k
  }
2491
13.6k
  if (IfCond) {
2492
493
    CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2493
13.1k
  } else {
2494
13.1k
    RegionCodeGenTy ThenRCG(ThenGen);
2495
13.1k
    ThenRCG(CGF);
2496
13.1k
  }
2497
13.6k
}
2498
2499
static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2500
4.20k
                              PrePostActionTy &Action) {
2501
4.20k
  Action.Enter(CGF);
2502
4.20k
  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2503
4.20k
         "Expected simd directive");
2504
0
  OMPLoopScope PreInitScope(CGF, S);
2505
  // if (PreCond) {
2506
  //   for (IV in 0..LastIteration) BODY;
2507
  //   <Final counter/linear vars updates>;
2508
  // }
2509
  //
2510
4.20k
  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2511
4.20k
      
isOpenMPWorksharingDirective(S.getDirectiveKind())2.22k
||
2512
4.20k
      
isOpenMPTaskLoopDirective(S.getDirectiveKind())1.51k
) {
2513
2.80k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2514
2.80k
    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2515
2.80k
  }
2516
2517
  // Emit: if (PreCond) - begin.
2518
  // If the condition constant folds and can be elided, avoid emitting the
2519
  // whole loop.
2520
4.20k
  bool CondConstant;
2521
4.20k
  llvm::BasicBlock *ContBlock = nullptr;
2522
4.20k
  if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2523
3.56k
    if (!CondConstant)
2524
82
      return;
2525
3.56k
  } else {
2526
635
    llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2527
635
    ContBlock = CGF.createBasicBlock("simd.if.end");
2528
635
    emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2529
635
                CGF.getProfileCount(&S));
2530
635
    CGF.EmitBlock(ThenBlock);
2531
635
    CGF.incrementProfileCounter(&S);
2532
635
  }
2533
2534
  // Emit the loop iteration variable.
2535
4.12k
  const Expr *IVExpr = S.getIterationVariable();
2536
4.12k
  const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2537
4.12k
  CGF.EmitVarDecl(*IVDecl);
2538
4.12k
  CGF.EmitIgnoredExpr(S.getInit());
2539
2540
  // Emit the iterations count variable.
2541
  // If it is not a variable, Sema decided to calculate iterations count on
2542
  // each iteration (e.g., it is foldable into a constant).
2543
4.12k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2544
0
    CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2545
    // Emit calculation of the iterations count.
2546
0
    CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2547
0
  }
2548
2549
4.12k
  emitAlignedClause(CGF, S);
2550
4.12k
  (void)CGF.EmitOMPLinearClauseInit(S);
2551
4.12k
  {
2552
4.12k
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2553
4.12k
    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2554
4.12k
    CGF.EmitOMPLinearClause(S, LoopScope);
2555
4.12k
    CGF.EmitOMPPrivateClause(S, LoopScope);
2556
4.12k
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
2557
4.12k
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2558
4.12k
        CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2559
4.12k
    bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2560
4.12k
    (void)LoopScope.Privatize();
2561
4.12k
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2562
2.67k
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2563
2564
4.12k
    emitCommonSimdLoop(
2565
4.12k
        CGF, S,
2566
4.12k
        [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2567
4.05k
          CGF.EmitOMPSimdInit(S);
2568
4.05k
        },
2569
4.23k
        [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2570
4.23k
          CGF.EmitOMPInnerLoop(
2571
4.23k
              S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2572
4.23k
              [&S](CodeGenFunction &CGF) {
2573
4.23k
                emitOMPLoopBodyWithStopPoint(CGF, S,
2574
4.23k
                                             CodeGenFunction::JumpDest());
2575
4.23k
              },
2576
4.23k
              [](CodeGenFunction &) {});
2577
4.23k
        });
2578
4.32k
    CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2579
    // Emit final copy of the lastprivate variables at the end of loops.
2580
4.12k
    if (HasLastprivateClause)
2581
127
      CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2582
4.12k
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2583
4.12k
    emitPostUpdateForReductionClause(CGF, S,
2584
4.12k
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
2585
4.12k
  }
2586
4.12k
  CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) 
{ return nullptr; }404
);
2587
  // Emit: if (PreCond) - end.
2588
4.12k
  if (ContBlock) {
2589
635
    CGF.EmitBranch(ContBlock);
2590
635
    CGF.EmitBlock(ContBlock, true);
2591
635
  }
2592
4.12k
}
2593
2594
8
static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2595
  // Check for unsupported clauses
2596
8
  for (OMPClause *C : S.clauses()) {
2597
    // Currently only simdlen clause is supported
2598
1
    if (!isa<OMPSimdlenClause>(C))
2599
0
      return false;
2600
1
  }
2601
2602
  // Check if we have a statement with the ordered directive.
2603
  // Visit the statement hierarchy to find a compound statement
2604
  // with a ordered directive in it.
2605
8
  if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2606
8
    if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2607
40
      for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2608
40
        if (!SubStmt)
2609
8
          continue;
2610
32
        if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2611
12
          for (const Stmt *CSSubStmt : CS->children()) {
2612
12
            if (!CSSubStmt)
2613
0
              continue;
2614
12
            if (isa<OMPOrderedDirective>(CSSubStmt)) {
2615
4
              return false;
2616
4
            }
2617
12
          }
2618
8
        }
2619
32
      }
2620
8
    }
2621
8
  }
2622
4
  return true;
2623
8
}
2624
2625
185
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2626
185
  bool UseOMPIRBuilder =
2627
185
      CGM.getLangOpts().OpenMPIRBuilder && 
isSupportedByOpenMPIRBuilder(S)8
;
2628
185
  if (UseOMPIRBuilder) {
2629
4
    auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2630
4
                                                          PrePostActionTy &) {
2631
      // Use the OpenMPIRBuilder if enabled.
2632
4
      if (UseOMPIRBuilder) {
2633
        // Emit the associated statement and get its loop representation.
2634
4
        const Stmt *Inner = S.getRawStmt();
2635
4
        llvm::CanonicalLoopInfo *CLI =
2636
4
            EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2637
2638
4
        llvm::OpenMPIRBuilder &OMPBuilder =
2639
4
            CGM.getOpenMPRuntime().getOMPBuilder();
2640
        // Add SIMD specific metadata
2641
4
        llvm::ConstantInt *Simdlen = nullptr;
2642
4
        if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2643
1
          RValue Len =
2644
1
              this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2645
1
                                /*ignoreResult=*/true);
2646
1
          auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2647
1
          Simdlen = Val;
2648
1
        }
2649
4
        OMPBuilder.applySimd(CLI, Simdlen);
2650
4
        return;
2651
4
      }
2652
4
    };
2653
4
    {
2654
4
      auto LPCRegion =
2655
4
          CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2656
4
      OMPLexicalScope Scope(*this, S, OMPD_unknown);
2657
4
      CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2658
4
                                                  CodeGenIRBuilder);
2659
4
    }
2660
4
    return;
2661
4
  }
2662
2663
181
  ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2664
181
  OMPFirstScanLoop = true;
2665
181
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2666
181
    emitOMPSimdRegion(CGF, S, Action);
2667
181
  };
2668
181
  {
2669
181
    auto LPCRegion =
2670
181
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2671
181
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
2672
181
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2673
181
  }
2674
  // Check for outer lastprivate conditional update.
2675
181
  checkForLastprivateConditionalUpdate(*this, S);
2676
181
}
2677
2678
8
void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2679
  // Emit the de-sugared statement.
2680
8
  OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2681
8
  EmitStmt(S.getTransformedStmt());
2682
8
}
2683
2684
18
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2685
18
  bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2686
2687
18
  if (UseOMPIRBuilder) {
2688
10
    auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2689
10
    const Stmt *Inner = S.getRawStmt();
2690
2691
    // Consume nested loop. Clear the entire remaining loop stack because a
2692
    // fully unrolled loop is non-transformable. For partial unrolling the
2693
    // generated outer loop is pushed back to the stack.
2694
10
    llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2695
10
    OMPLoopNestStack.clear();
2696
2697
10
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2698
2699
10
    bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2700
10
    llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2701
2702
10
    if (S.hasClausesOfKind<OMPFullClause>()) {
2703
1
      assert(ExpectedOMPLoopDepth == 0);
2704
0
      OMPBuilder.unrollLoopFull(DL, CLI);
2705
9
    } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2706
8
      uint64_t Factor = 0;
2707
8
      if (Expr *FactorExpr = PartialClause->getFactor()) {
2708
3
        Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2709
3
        assert(Factor >= 1 && "Only positive factors are valid");
2710
3
      }
2711
0
      OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2712
8
                                   NeedsUnrolledCLI ? 
&UnrolledCLI5
:
nullptr3
);
2713
8
    } else {
2714
1
      OMPBuilder.unrollLoopHeuristic(DL, CLI);
2715
1
    }
2716
2717
0
    assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2718
10
           "NeedsUnrolledCLI implies UnrolledCLI to be set");
2719
10
    if (UnrolledCLI)
2720
5
      OMPLoopNestStack.push_back(UnrolledCLI);
2721
2722
10
    return;
2723
10
  }
2724
2725
  // This function is only called if the unrolled loop is not consumed by any
2726
  // other loop-associated construct. Such a loop-associated construct will have
2727
  // used the transformed AST.
2728
2729
  // Set the unroll metadata for the next emitted loop.
2730
8
  LoopStack.setUnrollState(LoopAttributes::Enable);
2731
2732
8
  if (S.hasClausesOfKind<OMPFullClause>()) {
2733
2
    LoopStack.setUnrollState(LoopAttributes::Full);
2734
6
  } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2735
4
    if (Expr *FactorExpr = PartialClause->getFactor()) {
2736
2
      uint64_t Factor =
2737
2
          FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2738
2
      assert(Factor >= 1 && "Only positive factors are valid");
2739
0
      LoopStack.setUnrollCount(Factor);
2740
2
    }
2741
4
  }
2742
2743
0
  EmitStmt(S.getAssociatedStmt());
2744
8
}
2745
2746
void CodeGenFunction::EmitOMPOuterLoop(
2747
    bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2748
    CodeGenFunction::OMPPrivateScope &LoopScope,
2749
    const CodeGenFunction::OMPLoopArguments &LoopArgs,
2750
    const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2751
1.21k
    const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2752
1.21k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2753
2754
1.21k
  const Expr *IVExpr = S.getIterationVariable();
2755
1.21k
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2756
1.21k
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2757
2758
1.21k
  JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2759
2760
  // Start the loop with a block that tests the condition.
2761
1.21k
  llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2762
1.21k
  EmitBlock(CondBlock);
2763
1.21k
  const SourceRange R = S.getSourceRange();
2764
1.21k
  OMPLoopNestStack.clear();
2765
1.21k
  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2766
1.21k
                 SourceLocToDebugLoc(R.getEnd()));
2767
2768
1.21k
  llvm::Value *BoolCondVal = nullptr;
2769
1.21k
  if (!DynamicOrOrdered) {
2770
    // UB = min(UB, GlobalUB) or
2771
    // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2772
    // 'distribute parallel for')
2773
455
    EmitIgnoredExpr(LoopArgs.EUB);
2774
    // IV = LB
2775
455
    EmitIgnoredExpr(LoopArgs.Init);
2776
    // IV < UB
2777
455
    BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2778
764
  } else {
2779
764
    BoolCondVal =
2780
764
        RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2781
764
                       LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2782
764
  }
2783
2784
  // If there are any cleanups between here and the loop-exit scope,
2785
  // create a block to stage a loop exit along.
2786
1.21k
  llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2787
1.21k
  if (LoopScope.requiresCleanups())
2788
26
    ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2789
2790
1.21k
  llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2791
1.21k
  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2792
1.21k
  if (ExitBlock != LoopExit.getBlock()) {
2793
26
    EmitBlock(ExitBlock);
2794
26
    EmitBranchThroughCleanup(LoopExit);
2795
26
  }
2796
1.21k
  EmitBlock(LoopBody);
2797
2798
  // Emit "IV = LB" (in case of static schedule, we have already calculated new
2799
  // LB for loop condition and emitted it above).
2800
1.21k
  if (DynamicOrOrdered)
2801
764
    EmitIgnoredExpr(LoopArgs.Init);
2802
2803
  // Create a block for the increment.
2804
1.21k
  JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2805
1.21k
  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2806
2807
1.21k
  emitCommonSimdLoop(
2808
1.21k
      *this, S,
2809
1.21k
      [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2810
        // Generate !llvm.loop.parallel metadata for loads and stores for loops
2811
        // with dynamic/guided scheduling and without ordered clause.
2812
1.21k
        if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2813
679
          CGF.LoopStack.setParallel(!IsMonotonic);
2814
679
          if (const auto *C = S.getSingleClause<OMPOrderClause>())
2815
0
            if (C->getKind() == OMPC_ORDER_concurrent)
2816
0
              CGF.LoopStack.setParallel(/*Enable=*/true);
2817
679
        } else {
2818
540
          CGF.EmitOMPSimdInit(S);
2819
540
        }
2820
1.21k
      },
2821
1.21k
      [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2822
1.22k
       &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2823
1.22k
        SourceLocation Loc = S.getBeginLoc();
2824
        // when 'distribute' is not combined with a 'for':
2825
        // while (idx <= UB) { BODY; ++idx; }
2826
        // when 'distribute' is combined with a 'for'
2827
        // (e.g. 'distribute parallel for')
2828
        // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2829
1.22k
        CGF.EmitOMPInnerLoop(
2830
1.22k
            S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2831
1.22k
            [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2832
1.22k
              CodeGenLoop(CGF, S, LoopExit);
2833
1.22k
            },
2834
1.22k
            [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2835
1.22k
              CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2836
1.22k
            });
2837
1.22k
      });
2838
2839
1.21k
  EmitBlock(Continue.getBlock());
2840
1.21k
  BreakContinueStack.pop_back();
2841
1.21k
  if (!DynamicOrOrdered) {
2842
    // Emit "LB = LB + Stride", "UB = UB + Stride".
2843
455
    EmitIgnoredExpr(LoopArgs.NextLB);
2844
455
    EmitIgnoredExpr(LoopArgs.NextUB);
2845
455
  }
2846
2847
1.21k
  EmitBranch(CondBlock);
2848
1.21k
  OMPLoopNestStack.clear();
2849
1.21k
  LoopStack.pop();
2850
  // Emit the fall-through block.
2851
1.21k
  EmitBlock(LoopExit.getBlock());
2852
2853
  // Tell the runtime we are done.
2854
1.21k
  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2855
1.21k
    if (!DynamicOrOrdered)
2856
455
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2857
455
                                                     S.getDirectiveKind());
2858
1.21k
  };
2859
1.21k
  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2860
1.21k
}
2861
2862
void CodeGenFunction::EmitOMPForOuterLoop(
2863
    const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2864
    const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2865
    const OMPLoopArguments &LoopArgs,
2866
1.05k
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2867
1.05k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2868
2869
  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2870
1.05k
  const bool DynamicOrOrdered = Ordered || 
RT.isDynamic(ScheduleKind.Schedule)998
;
2871
2872
1.05k
  assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2873
1.05k
                                            LoopArgs.Chunk != nullptr)) &&
2874
1.05k
         "static non-chunked schedule does not need outer loop");
2875
2876
  // Emit outer loop.
2877
  //
2878
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2879
  // When schedule(dynamic,chunk_size) is specified, the iterations are
2880
  // distributed to threads in the team in chunks as the threads request them.
2881
  // Each thread executes a chunk of iterations, then requests another chunk,
2882
  // until no chunks remain to be distributed. Each chunk contains chunk_size
2883
  // iterations, except for the last chunk to be distributed, which may have
2884
  // fewer iterations. When no chunk_size is specified, it defaults to 1.
2885
  //
2886
  // When schedule(guided,chunk_size) is specified, the iterations are assigned
2887
  // to threads in the team in chunks as the executing threads request them.
2888
  // Each thread executes a chunk of iterations, then requests another chunk,
2889
  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2890
  // each chunk is proportional to the number of unassigned iterations divided
2891
  // by the number of threads in the team, decreasing to 1. For a chunk_size
2892
  // with value k (greater than 1), the size of each chunk is determined in the
2893
  // same way, with the restriction that the chunks do not contain fewer than k
2894
  // iterations (except for the last chunk to be assigned, which may have fewer
2895
  // than k iterations).
2896
  //
2897
  // When schedule(auto) is specified, the decision regarding scheduling is
2898
  // delegated to the compiler and/or runtime system. The programmer gives the
2899
  // implementation the freedom to choose any possible mapping of iterations to
2900
  // threads in the team.
2901
  //
2902
  // When schedule(runtime) is specified, the decision regarding scheduling is
2903
  // deferred until run time, and the schedule and chunk size are taken from the
2904
  // run-sched-var ICV. If the ICV is set to auto, the schedule is
2905
  // implementation defined
2906
  //
2907
  // while(__kmpc_dispatch_next(&LB, &UB)) {
2908
  //   idx = LB;
2909
  //   while (idx <= UB) { BODY; ++idx;
2910
  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2911
  //   } // inner loop
2912
  // }
2913
  //
2914
  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2915
  // When schedule(static, chunk_size) is specified, iterations are divided into
2916
  // chunks of size chunk_size, and the chunks are assigned to the threads in
2917
  // the team in a round-robin fashion in the order of the thread number.
2918
  //
2919
  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2920
  //   while (idx <= UB) { BODY; ++idx; } // inner loop
2921
  //   LB = LB + ST;
2922
  //   UB = UB + ST;
2923
  // }
2924
  //
2925
2926
0
  const Expr *IVExpr = S.getIterationVariable();
2927
1.05k
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2928
1.05k
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2929
2930
1.05k
  if (DynamicOrOrdered) {
2931
764
    const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2932
764
        CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2933
764
    llvm::Value *LBVal = DispatchBounds.first;
2934
764
    llvm::Value *UBVal = DispatchBounds.second;
2935
764
    CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2936
764
                                                             LoopArgs.Chunk};
2937
764
    RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
2938
764
                           IVSigned, Ordered, DipatchRTInputValues);
2939
764
  } else {
2940
291
    CGOpenMPRuntime::StaticRTInput StaticInit(
2941
291
        IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2942
291
        LoopArgs.ST, LoopArgs.Chunk);
2943
291
    RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
2944
291
                         ScheduleKind, StaticInit);
2945
291
  }
2946
2947
1.05k
  auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2948
1.05k
                                    const unsigned IVSize,
2949
1.05k
                                    const bool IVSigned) {
2950
1.05k
    if (Ordered) {
2951
57
      CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2952
57
                                                            IVSigned);
2953
57
    }
2954
1.05k
  };
2955
2956
1.05k
  OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2957
1.05k
                                 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2958
1.05k
  OuterLoopArgs.IncExpr = S.getInc();
2959
1.05k
  OuterLoopArgs.Init = S.getInit();
2960
1.05k
  OuterLoopArgs.Cond = S.getCond();
2961
1.05k
  OuterLoopArgs.NextLB = S.getNextLowerBound();
2962
1.05k
  OuterLoopArgs.NextUB = S.getNextUpperBound();
2963
1.05k
  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2964
1.05k
                   emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2965
1.05k
}
2966
2967
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2968
164
                             const unsigned IVSize, const bool IVSigned) {}
2969
2970
void CodeGenFunction::EmitOMPDistributeOuterLoop(
2971
    OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2972
    OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2973
164
    const CodeGenLoopTy &CodeGenLoopContent) {
2974
2975
164
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2976
2977
  // Emit outer loop.
2978
  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2979
  // dynamic
2980
  //
2981
2982
164
  const Expr *IVExpr = S.getIterationVariable();
2983
164
  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2984
164
  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2985
2986
164
  CGOpenMPRuntime::StaticRTInput StaticInit(
2987
164
      IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
2988
164
      LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
2989
164
  RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
2990
2991
  // for combined 'distribute' and 'for' the increment expression of distribute
2992
  // is stored in DistInc. For 'distribute' alone, it is in Inc.
2993
164
  Expr *IncExpr;
2994
164
  if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2995
0
    IncExpr = S.getDistInc();
2996
164
  else
2997
164
    IncExpr = S.getInc();
2998
2999
  // this routine is shared by 'omp distribute parallel for' and
3000
  // 'omp distribute': select the right EUB expression depending on the
3001
  // directive
3002
164
  OMPLoopArguments OuterLoopArgs;
3003
164
  OuterLoopArgs.LB = LoopArgs.LB;
3004
164
  OuterLoopArgs.UB = LoopArgs.UB;
3005
164
  OuterLoopArgs.ST = LoopArgs.ST;
3006
164
  OuterLoopArgs.IL = LoopArgs.IL;
3007
164
  OuterLoopArgs.Chunk = LoopArgs.Chunk;
3008
164
  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3009
164
                          ? 
S.getCombinedEnsureUpperBound()0
3010
164
                          : S.getEnsureUpperBound();
3011
164
  OuterLoopArgs.IncExpr = IncExpr;
3012
164
  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3013
164
                           ? 
S.getCombinedInit()0
3014
164
                           : S.getInit();
3015
164
  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3016
164
                           ? 
S.getCombinedCond()0
3017
164
                           : S.getCond();
3018
164
  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3019
164
                             ? 
S.getCombinedNextLowerBound()0
3020
164
                             : S.getNextLowerBound();
3021
164
  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3022
164
                             ? 
S.getCombinedNextUpperBound()0
3023
164
                             : S.getNextUpperBound();
3024
3025
164
  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3026
164
                   LoopScope, OuterLoopArgs, CodeGenLoopContent,
3027
164
                   emitEmptyOrdered);
3028
164
}
3029
3030
static std::pair<LValue, LValue>
3031
emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3032
2.75k
                                     const OMPExecutableDirective &S) {
3033
2.75k
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3034
2.75k
  LValue LB =
3035
2.75k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3036
2.75k
  LValue UB =
3037
2.75k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3038
3039
  // When composing 'distribute' with 'for' (e.g. as in 'distribute
3040
  // parallel for') we need to use the 'distribute'
3041
  // chunk lower and upper bounds rather than the whole loop iteration
3042
  // space. These are parameters to the outlined function for 'parallel'
3043
  // and we copy the bounds of the previous schedule into the
3044
  // the current ones.
3045
2.75k
  LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3046
2.75k
  LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3047
2.75k
  llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3048
2.75k
      PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3049
2.75k
  PrevLBVal = CGF.EmitScalarConversion(
3050
2.75k
      PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3051
2.75k
      LS.getIterationVariable()->getType(),
3052
2.75k
      LS.getPrevLowerBoundVariable()->getExprLoc());
3053
2.75k
  llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3054
2.75k
      PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3055
2.75k
  PrevUBVal = CGF.EmitScalarConversion(
3056
2.75k
      PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3057
2.75k
      LS.getIterationVariable()->getType(),
3058
2.75k
      LS.getPrevUpperBoundVariable()->getExprLoc());
3059
3060
2.75k
  CGF.EmitStoreOfScalar(PrevLBVal, LB);
3061
2.75k
  CGF.EmitStoreOfScalar(PrevUBVal, UB);
3062
3063
2.75k
  return {LB, UB};
3064
2.75k
}
3065
3066
/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3067
/// we need to use the LB and UB expressions generated by the worksharing
3068
/// code generation support, whereas in non combined situations we would
3069
/// just emit 0 and the LastIteration expression
3070
/// This function is necessary due to the difference of the LB and UB
3071
/// types for the RT emission routines for 'for_static_init' and
3072
/// 'for_dispatch_init'
3073
static std::pair<llvm::Value *, llvm::Value *>
3074
emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3075
                                        const OMPExecutableDirective &S,
3076
440
                                        Address LB, Address UB) {
3077
440
  const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3078
440
  const Expr *IVExpr = LS.getIterationVariable();
3079
  // when implementing a dynamic schedule for a 'for' combined with a
3080
  // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3081
  // is not normalized as each team only executes its own assigned
3082
  // distribute chunk
3083
440
  QualType IteratorTy = IVExpr->getType();
3084
440
  llvm::Value *LBVal =
3085
440
      CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3086
440
  llvm::Value *UBVal =
3087
440
      CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3088
440
  return {LBVal, UBVal};
3089
440
}
3090
3091
static void emitDistributeParallelForDistributeInnerBoundParams(
3092
    CodeGenFunction &CGF, const OMPExecutableDirective &S,
3093
2.75k
    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3094
2.75k
  const auto &Dir = cast<OMPLoopDirective>(S);
3095
2.75k
  LValue LB =
3096
2.75k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3097
2.75k
  llvm::Value *LBCast =
3098
2.75k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3099
2.75k
                                CGF.SizeTy, /*isSigned=*/false);
3100
2.75k
  CapturedVars.push_back(LBCast);
3101
2.75k
  LValue UB =
3102
2.75k
      CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3103
3104
2.75k
  llvm::Value *UBCast =
3105
2.75k
      CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3106
2.75k
                                CGF.SizeTy, /*isSigned=*/false);
3107
2.75k
  CapturedVars.push_back(UBCast);
3108
2.75k
}
3109
3110
static void
3111
emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3112
                                 const OMPLoopDirective &S,
3113
2.75k
                                 CodeGenFunction::JumpDest LoopExit) {
3114
2.75k
  auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3115
2.75k
                                         PrePostActionTy &Action) {
3116
2.75k
    Action.Enter(CGF);
3117
2.75k
    bool HasCancel = false;
3118
2.75k
    if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3119
1.34k
      if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3120
340
        HasCancel = D->hasCancel();
3121
1.00k
      else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3122
401
        HasCancel = D->hasCancel();
3123
607
      else if (const auto *D =
3124
607
                   dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3125
607
        HasCancel = D->hasCancel();
3126
1.34k
    }
3127
2.75k
    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3128
2.75k
                                                     HasCancel);
3129
2.75k
    CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3130
2.75k
                               emitDistributeParallelForInnerBounds,
3131
2.75k
                               emitDistributeParallelForDispatchBounds);
3132
2.75k
  };
3133
3134
2.75k
  emitCommonOMPParallelDirective(
3135
2.75k
      CGF, S,
3136
2.75k
      isOpenMPSimdDirective(S.getDirectiveKind()) ? 
OMPD_for_simd1.40k
:
OMPD_for1.34k
,
3137
2.75k
      CGInlinedWorksharingLoop,
3138
2.75k
      emitDistributeParallelForDistributeInnerBoundParams);
3139
2.75k
}
3140
3141
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3142
401
    const OMPDistributeParallelForDirective &S) {
3143
401
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3144
401
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3145
401
                              S.getDistInc());
3146
401
  };
3147
401
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3148
401
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3149
401
}
3150
3151
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3152
313
    const OMPDistributeParallelForSimdDirective &S) {
3153
313
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3154
313
    CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3155
313
                              S.getDistInc());
3156
313
  };
3157
313
  OMPLexicalScope Scope(*this, S, OMPD_parallel);
3158
313
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3159
313
}
3160
3161
void CodeGenFunction::EmitOMPDistributeSimdDirective(
3162
150
    const OMPDistributeSimdDirective &S) {
3163
150
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3164
150
    CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3165
150
  };
3166
150
  OMPLexicalScope Scope(*this, S, OMPD_unknown);
3167
150
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3168
150
}
3169
3170
void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3171
193
    CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3172
  // Emit SPMD target parallel for region as a standalone region.
3173
193
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3174
193
    emitOMPSimdRegion(CGF, S, Action);
3175
193
  };
3176
193
  llvm::Function *Fn;
3177
193
  llvm::Constant *Addr;
3178
  // Emit target region as a standalone region.
3179
193
  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3180
193
      S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3181
193
  assert(Fn && Addr && "Target device function emission failed.");
3182
193
}
3183
3184
void CodeGenFunction::EmitOMPTargetSimdDirective(
3185
355
    const OMPTargetSimdDirective &S) {
3186
355
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3187
355
    emitOMPSimdRegion(CGF, S, Action);
3188
355
  };
3189
355
  emitCommonOMPTargetDirective(*this, S, CodeGen);
3190
355
}
3191
3192
namespace {
3193
struct ScheduleKindModifiersTy {
3194
  OpenMPScheduleClauseKind Kind;
3195
  OpenMPScheduleClauseModifier M1;
3196
  OpenMPScheduleClauseModifier M2;
3197
  ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3198
                          OpenMPScheduleClauseModifier M1,
3199
                          OpenMPScheduleClauseModifier M2)
3200
0
      : Kind(Kind), M1(M1), M2(M2) {}
3201
};
3202
} // namespace
3203
3204
bool CodeGenFunction::EmitOMPWorksharingLoop(
3205
    const OMPLoopDirective &S, Expr *EUB,
3206
    const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3207
4.82k
    const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3208
  // Emit the loop iteration variable.
3209
4.82k
  const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3210
4.82k
  const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3211
4.82k
  EmitVarDecl(*IVDecl);
3212
3213
  // Emit the iterations count variable.
3214
  // If it is not a variable, Sema decided to calculate iterations count on each
3215
  // iteration (e.g., it is foldable into a constant).
3216
4.82k
  if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3217
0
    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3218
    // Emit calculation of the iterations count.
3219
0
    EmitIgnoredExpr(S.getCalcLastIteration());
3220
0
  }
3221
3222
4.82k
  CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3223
3224
4.82k
  bool HasLastprivateClause;
3225
  // Check pre-condition.
3226
4.82k
  {
3227
4.82k
    OMPLoopScope PreInitScope(*this, S);
3228
    // Skip the entire loop if we don't meet the precondition.
3229
    // If the condition constant folds and can be elided, avoid emitting the
3230
    // whole loop.
3231
4.82k
    bool CondConstant;
3232
4.82k
    llvm::BasicBlock *ContBlock = nullptr;
3233
4.82k
    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3234
3.97k
      if (!CondConstant)
3235
52
        return false;
3236
3.97k
    } else {
3237
848
      llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3238
848
      ContBlock = createBasicBlock("omp.precond.end");
3239
848
      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3240
848
                  getProfileCount(&S));
3241
848
      EmitBlock(ThenBlock);
3242
848
      incrementProfileCounter(&S);
3243
848
    }
3244
3245
4.77k
    RunCleanupsScope DoacrossCleanupScope(*this);
3246
4.77k
    bool Ordered = false;
3247
4.77k
    if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3248
85
      if (OrderedClause->getNumForLoops())
3249
28
        RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3250
57
      else
3251
57
        Ordered = true;
3252
85
    }
3253
3254
4.77k
    llvm::DenseSet<const Expr *> EmittedFinals;
3255
4.77k
    emitAlignedClause(*this, S);
3256
4.77k
    bool HasLinears = EmitOMPLinearClauseInit(S);
3257
    // Emit helper vars inits.
3258
3259
4.77k
    std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3260
4.77k
    LValue LB = Bounds.first;
3261
4.77k
    LValue UB = Bounds.second;
3262
4.77k
    LValue ST =
3263
4.77k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3264
4.77k
    LValue IL =
3265
4.77k
        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3266
3267
    // Emit 'then' code.
3268
4.77k
    {
3269
4.77k
      OMPPrivateScope LoopScope(*this);
3270
4.77k
      if (EmitOMPFirstprivateClause(S, LoopScope) || 
HasLinears4.75k
) {
3271
        // Emit implicit barrier to synchronize threads and avoid data races on
3272
        // initialization of firstprivate variables and post-update of
3273
        // lastprivate variables.
3274
150
        CGM.getOpenMPRuntime().emitBarrierCall(
3275
150
            *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3276
150
            /*ForceSimpleCall=*/true);
3277
150
      }
3278
4.77k
      EmitOMPPrivateClause(S, LoopScope);
3279
4.77k
      CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3280
4.77k
          *this, S, EmitLValue(S.getIterationVariable()));
3281
4.77k
      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3282
4.77k
      EmitOMPReductionClauseInit(S, LoopScope);
3283
4.77k
      EmitOMPPrivateLoopCounters(S, LoopScope);
3284
4.77k
      EmitOMPLinearClause(S, LoopScope);
3285
4.77k
      (void)LoopScope.Privatize();
3286
4.77k
      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3287
2.36k
        CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3288
3289
      // Detect the loop schedule kind and chunk.
3290
4.77k
      const Expr *ChunkExpr = nullptr;
3291
4.77k
      OpenMPScheduleTy ScheduleKind;
3292
4.77k
      if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3293
1.22k
        ScheduleKind.Schedule = C->getScheduleKind();
3294
1.22k
        ScheduleKind.M1 = C->getFirstScheduleModifier();
3295
1.22k
        ScheduleKind.M2 = C->getSecondScheduleModifier();
3296
1.22k
        ChunkExpr = C->getChunkSize();
3297
3.54k
      } else {
3298
        // Default behaviour for schedule clause.
3299
3.54k
        CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3300
3.54k
            *this, S, ScheduleKind.Schedule, ChunkExpr);
3301
3.54k
      }
3302
4.77k
      bool HasChunkSizeOne = false;
3303
4.77k
      llvm::Value *Chunk = nullptr;
3304
4.77k
      if (ChunkExpr) {
3305
548
        Chunk = EmitScalarExpr(ChunkExpr);
3306
548
        Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3307
548
                                     S.getIterationVariable()->getType(),
3308
548
                                     S.getBeginLoc());
3309
548
        Expr::EvalResult Result;
3310
548
        if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3311
343
          llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3312
343
          HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3313
343
        }
3314
548
      }
3315
4.77k
      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3316
4.77k
      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3317
      // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3318
      // If the static schedule kind is specified or if the ordered clause is
3319
      // specified, and if no monotonic modifier is specified, the effect will
3320
      // be as if the monotonic modifier was specified.
3321
4.77k
      bool StaticChunkedOne =
3322
4.77k
          RT.isStaticChunked(ScheduleKind.Schedule,
3323
4.77k
                             /* Chunked */ Chunk != nullptr) &&
3324
4.77k
          
HasChunkSizeOne416
&&
3325
4.77k
          
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())222
;
3326
4.77k
      bool IsMonotonic =
3327
4.77k
          Ordered ||
3328
4.77k
          
(4.71k
ScheduleKind.Schedule == OMPC_SCHEDULE_static4.71k
&&
3329
4.71k
           
!(603
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic603
||
3330
603
             
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic602
)) ||
3331
4.77k
          
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic4.11k
||
3332
4.77k
          
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic4.10k
;
3333
4.77k
      if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3334
4.77k
                                 /* Chunked */ Chunk != nullptr) ||
3335
4.77k
           
StaticChunkedOne1.15k
) &&
3336
4.77k
          
!Ordered3.74k
) {
3337
3.71k
        JumpDest LoopExit =
3338
3.71k
            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3339
3.71k
        emitCommonSimdLoop(
3340
3.71k
            *this, S,
3341
3.71k
            [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3342
3.69k
              if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3343
1.74k
                CGF.EmitOMPSimdInit(S);
3344
1.94k
              } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3345
3
                if (C->getKind() == OMPC_ORDER_concurrent)
3346
3
                  CGF.LoopStack.setParallel(/*Enable=*/true);
3347
3
              }
3348
3.69k
            },
3349
3.71k
            [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3350
3.71k
             &S, ScheduleKind, LoopExit,
3351
3.76k
             &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3352
              // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3353
              // When no chunk_size is specified, the iteration space is divided
3354
              // into chunks that are approximately equal in size, and at most
3355
              // one chunk is distributed to each thread. Note that the size of
3356
              // the chunks is unspecified in this case.
3357
3.76k
              CGOpenMPRuntime::StaticRTInput StaticInit(
3358
3.76k
                  IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3359
3.76k
                  LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3360
3.76k
                  StaticChunkedOne ? 
Chunk124
:
nullptr3.64k
);
3361
3.76k
              CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3362
3.76k
                  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3363
3.76k
                  StaticInit);
3364
              // UB = min(UB, GlobalUB);
3365
3.76k
              if (!StaticChunkedOne)
3366
3.64k
                CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3367
              // IV = LB;
3368
3.76k
              CGF.EmitIgnoredExpr(S.getInit());
3369
              // For unchunked static schedule generate:
3370
              //
3371
              // while (idx <= UB) {
3372
              //   BODY;
3373
              //   ++idx;
3374
              // }
3375
              //
3376
              // For static schedule with chunk one:
3377
              //
3378
              // while (IV <= PrevUB) {
3379
              //   BODY;
3380
              //   IV += ST;
3381
              // }
3382
3.76k
              CGF.EmitOMPInnerLoop(
3383
3.76k
                  S, LoopScope.requiresCleanups(),
3384
3.76k
                  StaticChunkedOne ? 
S.getCombinedParForInDistCond()124
3385
3.76k
                                   : 
S.getCond()3.64k
,
3386
3.76k
                  StaticChunkedOne ? 
S.getDistInc()124
:
S.getInc()3.64k
,
3387
3.76k
                  [&S, LoopExit](CodeGenFunction &CGF) {
3388
3.76k
                    emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3389
3.76k
                  },
3390
3.76k
                  [](CodeGenFunction &) {});
3391
3.76k
            });
3392
3.71k
        EmitBlock(LoopExit.getBlock());
3393
        // Tell the runtime we are done.
3394
3.78k
        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3395
3.78k
          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3396
3.78k
                                                         S.getDirectiveKind());
3397
3.78k
        };
3398
3.71k
        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3399
3.71k
      } else {
3400
        // Emit the outer loop, which requests its work chunk [LB..UB] from
3401
        // runtime and runs the inner loop to process it.
3402
1.05k
        const OMPLoopArguments LoopArguments(
3403
1.05k
            LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3404
1.05k
            IL.getAddress(*this), Chunk, EUB);
3405
1.05k
        EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3406
1.05k
                            LoopArguments, CGDispatchBounds);
3407
1.05k
      }
3408
4.77k
      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3409
2.21k
        EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3410
2.21k
          return CGF.Builder.CreateIsNotNull(
3411
2.21k
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3412
2.21k
        });
3413
2.21k
      }
3414
4.77k
      EmitOMPReductionClauseFinal(
3415
4.77k
          S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3416
4.77k
                 ? /*Parallel and Simd*/ 
OMPD_parallel_for_simd2.21k
3417
4.77k
                 : /*Parallel only*/ 
OMPD_parallel2.55k
);
3418
      // Emit post-update of the reduction variables if IsLastIter != 0.
3419
4.77k
      emitPostUpdateForReductionClause(
3420
4.77k
          *this, S, [IL, &S](CodeGenFunction &CGF) {
3421
0
            return CGF.Builder.CreateIsNotNull(
3422
0
                CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3423
0
          });
3424
      // Emit final copy of the lastprivate variables if IsLastIter != 0.
3425
4.77k
      if (HasLastprivateClause)
3426
160
        EmitOMPLastprivateClauseFinal(
3427
160
            S, isOpenMPSimdDirective(S.getDirectiveKind()),
3428
160
            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3429
4.77k
    }
3430
4.77k
    EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3431
134
      return CGF.Builder.CreateIsNotNull(
3432
134
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3433
134
    });
3434
4.77k
    DoacrossCleanupScope.ForceCleanup();
3435
    // We're now done with the loop, so jump to the continuation block.
3436
4.77k
    if (ContBlock) {
3437
848
      EmitBranch(ContBlock);
3438
848
      EmitBlock(ContBlock, /*IsFinished=*/true);
3439
848
    }
3440
4.77k
  }
3441
0
  return HasLastprivateClause;
3442
4.82k
}
3443
3444
/// The following two functions generate expressions for the loop lower
3445
/// and upper bounds in case of static and dynamic (dispatch) schedule
3446
/// of the associated 'for' or 'distribute' loop.
3447
static std::pair<LValue, LValue>
3448
2.01k
emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3449
2.01k
  const auto &LS = cast<OMPLoopDirective>(S);
3450
2.01k
  LValue LB =
3451
2.01k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3452
2.01k
  LValue UB =
3453
2.01k
      EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3454
2.01k
  return {LB, UB};
3455
2.01k
}
3456
3457
/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3458
/// consider the lower and upper bound expressions generated by the
3459
/// worksharing loop support, but we use 0 and the iteration space size as
3460
/// constants
3461
static std::pair<llvm::Value *, llvm::Value *>
3462
emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3463
324
                          Address LB, Address UB) {
3464
324
  const auto &LS = cast<OMPLoopDirective>(S);
3465
324
  const Expr *IVExpr = LS.getIterationVariable();
3466
324
  const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3467
324
  llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3468
324
  llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3469
324
  return {LBVal, UBVal};
3470
324
}
3471
3472
/// Emits internal temp array declarations for the directive with inscan
3473
/// reductions.
3474
/// The code is the following:
3475
/// \code
3476
/// size num_iters = <num_iters>;
3477
/// <type> buffer[num_iters];
3478
/// \endcode
3479
static void emitScanBasedDirectiveDecls(
3480
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3481
16
    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3482
16
  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3483
16
      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3484
16
  SmallVector<const Expr *, 4> Shareds;
3485
16
  SmallVector<const Expr *, 4> Privates;
3486
16
  SmallVector<const Expr *, 4> ReductionOps;
3487
16
  SmallVector<const Expr *, 4> CopyArrayTemps;
3488
16
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3489
16
    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3490
16
           "Only inscan reductions are expected.");
3491
0
    Shareds.append(C->varlist_begin(), C->varlist_end());
3492
16
    Privates.append(C->privates().begin(), C->privates().end());
3493
16
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3494
16
    CopyArrayTemps.append(C->copy_array_temps().begin(),
3495
16
                          C->copy_array_temps().end());
3496
16
  }
3497
16
  {
3498
    // Emit buffers for each reduction variables.
3499
    // ReductionCodeGen is required to emit correctly the code for array
3500
    // reductions.
3501
16
    ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3502
16
    unsigned Count = 0;
3503
16
    auto *ITA = CopyArrayTemps.begin();
3504
32
    for (const Expr *IRef : Privates) {
3505
32
      const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3506
      // Emit variably modified arrays, used for arrays/array sections
3507
      // reductions.
3508
32
      if (PrivateVD->getType()->isVariablyModifiedType()) {
3509
16
        RedCG.emitSharedOrigLValue(CGF, Count);
3510
16
        RedCG.emitAggregateType(CGF, Count);
3511
16
      }
3512
32
      CodeGenFunction::OpaqueValueMapping DimMapping(
3513
32
          CGF,
3514
32
          cast<OpaqueValueExpr>(
3515
32
              cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3516
32
                  ->getSizeExpr()),
3517
32
          RValue::get(OMPScanNumIterations));
3518
      // Emit temp buffer.
3519
32
      CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3520
32
      ++ITA;
3521
32
      ++Count;
3522
32
    }
3523
16
  }
3524
16
}
3525
3526
/// Copies final inscan reductions values to the original variables.
3527
/// The code is the following:
3528
/// \code
3529
/// <orig_var> = buffer[num_iters-1];
3530
/// \endcode
3531
static void emitScanBasedDirectiveFinals(
3532
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3533
16
    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3534
16
  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3535
16
      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3536
16
  SmallVector<const Expr *, 4> Shareds;
3537
16
  SmallVector<const Expr *, 4> LHSs;
3538
16
  SmallVector<const Expr *, 4> RHSs;
3539
16
  SmallVector<const Expr *, 4> Privates;
3540
16
  SmallVector<const Expr *, 4> CopyOps;
3541
16
  SmallVector<const Expr *, 4> CopyArrayElems;
3542
16
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3543
16
    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3544
16
           "Only inscan reductions are expected.");
3545
0
    Shareds.append(C->varlist_begin(), C->varlist_end());
3546
16
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3547
16
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3548
16
    Privates.append(C->privates().begin(), C->privates().end());
3549
16
    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3550
16
    CopyArrayElems.append(C->copy_array_elems().begin(),
3551
16
                          C->copy_array_elems().end());
3552
16
  }
3553
  // Create temp var and copy LHS value to this temp value.
3554
  // LHS = TMP[LastIter];
3555
16
  llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3556
16
      OMPScanNumIterations,
3557
16
      llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3558
48
  for (unsigned I = 0, E = CopyArrayElems.size(); I < E; 
++I32
) {
3559
32
    const Expr *PrivateExpr = Privates[I];
3560
32
    const Expr *OrigExpr = Shareds[I];
3561
32
    const Expr *CopyArrayElem = CopyArrayElems[I];
3562
32
    CodeGenFunction::OpaqueValueMapping IdxMapping(
3563
32
        CGF,
3564
32
        cast<OpaqueValueExpr>(
3565
32
            cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3566
32
        RValue::get(OMPLast));
3567
32
    LValue DestLVal = CGF.EmitLValue(OrigExpr);
3568
32
    LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3569
32
    CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
3570
32
                    SrcLVal.getAddress(CGF),
3571
32
                    cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3572
32
                    cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
3573
32
                    CopyOps[I]);
3574
32
  }
3575
16
}
3576
3577
/// Emits the code for the directive with inscan reductions.
3578
/// The code is the following:
3579
/// \code
3580
/// #pragma omp ...
3581
/// for (i: 0..<num_iters>) {
3582
///   <input phase>;
3583
///   buffer[i] = red;
3584
/// }
3585
/// #pragma omp master // in parallel region
3586
/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3587
/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3588
///   buffer[i] op= buffer[i-pow(2,k)];
3589
/// #pragma omp barrier // in parallel region
3590
/// #pragma omp ...
3591
/// for (0..<num_iters>) {
3592
///   red = InclusiveScan ? buffer[i] : buffer[i-1];
3593
///   <scan phase>;
3594
/// }
3595
/// \endcode
3596
static void emitScanBasedDirective(
3597
    CodeGenFunction &CGF, const OMPLoopDirective &S,
3598
    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3599
    llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3600
16
    llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3601
16
  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3602
16
      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3603
16
  SmallVector<const Expr *, 4> Privates;
3604
16
  SmallVector<const Expr *, 4> ReductionOps;
3605
16
  SmallVector<const Expr *, 4> LHSs;
3606
16
  SmallVector<const Expr *, 4> RHSs;
3607
16
  SmallVector<const Expr *, 4> CopyArrayElems;
3608
16
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3609
16
    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3610
16
           "Only inscan reductions are expected.");
3611
0
    Privates.append(C->privates().begin(), C->privates().end());
3612
16
    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3613
16
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3614
16
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3615
16
    CopyArrayElems.append(C->copy_array_elems().begin(),
3616
16
                          C->copy_array_elems().end());
3617
16
  }
3618
16
  CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3619
16
  {
3620
    // Emit loop with input phase:
3621
    // #pragma omp ...
3622
    // for (i: 0..<num_iters>) {
3623
    //   <input phase>;
3624
    //   buffer[i] = red;
3625
    // }
3626
16
    CGF.OMPFirstScanLoop = true;
3627
16
    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3628
16
    FirstGen(CGF);
3629
16
  }
3630
  // #pragma omp barrier // in parallel region
3631
16
  auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3632
16
                    &ReductionOps,
3633
16
                    &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3634
16
    Action.Enter(CGF);
3635
    // Emit prefix reduction:
3636
    // #pragma omp master // in parallel region
3637
    // for (int k = 0; k <= ceil(log2(n)); ++k)
3638
16
    llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3639
16
    llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3640
16
    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3641
16
    llvm::Function *F =
3642
16
        CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3643
16
    llvm::Value *Arg =
3644
16
        CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3645
16
    llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3646
16
    F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3647
16
    LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3648
16
    LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3649
16
    llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3650
16
        OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3651
16
    auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3652
16
    CGF.EmitBlock(LoopBB);
3653
16
    auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3654
    // size pow2k = 1;
3655
16
    auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3656
16
    Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3657
16
    Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3658
    // for (size i = n - 1; i >= 2 ^ k; --i)
3659
    //   tmp[i] op= tmp[i-pow2k];
3660
16
    llvm::BasicBlock *InnerLoopBB =
3661
16
        CGF.createBasicBlock("omp.inner.log.scan.body");
3662
16
    llvm::BasicBlock *InnerExitBB =
3663
16
        CGF.createBasicBlock("omp.inner.log.scan.exit");
3664
16
    llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3665
16
    CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3666
16
    CGF.EmitBlock(InnerLoopBB);
3667
16
    auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3668
16
    IVal->addIncoming(NMin1, LoopBB);
3669
16
    {
3670
16
      CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3671
16
      auto *ILHS = LHSs.begin();
3672
16
      auto *IRHS = RHSs.begin();
3673
32
      for (const Expr *CopyArrayElem : CopyArrayElems) {
3674
32
        const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3675
32
        const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3676
32
        Address LHSAddr = Address::invalid();
3677
32
        {
3678
32
          CodeGenFunction::OpaqueValueMapping IdxMapping(
3679
32
              CGF,
3680
32
              cast<OpaqueValueExpr>(
3681
32
                  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3682
32
              RValue::get(IVal));
3683
32
          LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3684
32
        }
3685
32
        PrivScope.addPrivate(LHSVD, LHSAddr);
3686
32
        Address RHSAddr = Address::invalid();
3687
32
        {
3688
32
          llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3689
32
          CodeGenFunction::OpaqueValueMapping IdxMapping(
3690
32
              CGF,
3691
32
              cast<OpaqueValueExpr>(
3692
32
                  cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3693
32
              RValue::get(OffsetIVal));
3694
32
          RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3695
32
        }
3696
32
        PrivScope.addPrivate(RHSVD, RHSAddr);
3697
32
        ++ILHS;
3698
32
        ++IRHS;
3699
32
      }
3700
16
      PrivScope.Privatize();
3701
16
      CGF.CGM.getOpenMPRuntime().emitReduction(
3702
16
          CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3703
16
          {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3704
16
    }
3705
16
    llvm::Value *NextIVal =
3706
16
        CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3707
16
    IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3708
16
    CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3709
16
    CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3710
16
    CGF.EmitBlock(InnerExitBB);
3711
16
    llvm::Value *Next =
3712
16
        CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3713
16
    Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3714
    // pow2k <<= 1;
3715
16
    llvm::Value *NextPow2K =
3716
16
        CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3717
16
    Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3718
16
    llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3719
16
    CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3720
16
    auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3721
16
    CGF.EmitBlock(ExitBB);
3722
16
  };
3723
16
  if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3724
8
    CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3725
8
    CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3726
8
        CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3727
8
        /*ForceSimpleCall=*/true);
3728
8
  } else {
3729
8
    RegionCodeGenTy RCG(CodeGen);
3730
8
    RCG(CGF);
3731
8
  }
3732
3733
16
  CGF.OMPFirstScanLoop = false;
3734
16
  SecondGen(CGF);
3735
16
}
3736
3737
static bool emitWorksharingDirective(CodeGenFunction &CGF,
3738
                                     const OMPLoopDirective &S,
3739
1.02k
                                     bool HasCancel) {
3740
1.02k
  bool HasLastprivates;
3741
1.02k
  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3742
1.02k
                   [](const OMPReductionClause *C) {
3743
193
                     return C->getModifier() == OMPC_REDUCTION_inscan;
3744
193
                   })) {
3745
32
    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3746
32
      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3747
32
      OMPLoopScope LoopScope(CGF, S);
3748
32
      return CGF.EmitScalarExpr(S.getNumIterations());
3749
32
    };
3750
16
    const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3751
16
      CodeGenFunction::OMPCancelStackRAII CancelRegion(
3752
16
          CGF, S.getDirectiveKind(), HasCancel);
3753
16
      (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3754
16
                                       emitForLoopBounds,
3755
16
                                       emitDispatchForLoopBounds);
3756
      // Emit an implicit barrier at the end.
3757
16
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3758
16
                                                 OMPD_for);
3759
16
    };
3760
16
    const auto &&SecondGen = [&S, HasCancel,
3761
16
                              &HasLastprivates](CodeGenFunction &CGF) {
3762
16
      CodeGenFunction::OMPCancelStackRAII CancelRegion(
3763
16
          CGF, S.getDirectiveKind(), HasCancel);
3764
16
      HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3765
16
                                                   emitForLoopBounds,
3766
16
                                                   emitDispatchForLoopBounds);
3767
16
    };
3768
16
    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3769
8
      emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3770
16
    emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3771
16
    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3772
8
      emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3773
1.00k
  } else {
3774
1.00k
    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3775
1.00k
                                                     HasCancel);
3776
1.00k
    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3777
1.00k
                                                 emitForLoopBounds,
3778
1.00k
                                                 emitDispatchForLoopBounds);
3779
1.00k
  }
3780
1.02k
  return HasLastprivates;
3781
1.02k
}
3782
3783
65
static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3784
65
  if (S.hasCancel())
3785
4
    return false;
3786
61
  for (OMPClause *C : S.clauses()) {
3787
45
    if (isa<OMPNowaitClause>(C))
3788
0
      continue;
3789
3790
45
    if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3791
17
      if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3792
0
        return false;
3793
17
      if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3794
0
        return false;
3795
17
      switch (SC->getScheduleKind()) {
3796
5
      case OMPC_SCHEDULE_auto:
3797
11
      case OMPC_SCHEDULE_dynamic:
3798
12
      case OMPC_SCHEDULE_runtime:
3799
12
      case OMPC_SCHEDULE_guided:
3800
17
      case OMPC_SCHEDULE_static:
3801
17
        continue;
3802
0
      case OMPC_SCHEDULE_unknown:
3803
0
        return false;
3804
17
      }
3805
17
    }
3806
3807
28
    return false;
3808
45
  }
3809
3810
33
  return true;
3811
61
}
3812
3813
static llvm::omp::ScheduleKind
3814
5
convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3815
5
  switch (ScheduleClauseKind) {
3816
0
  case OMPC_SCHEDULE_unknown:
3817
0
    return llvm::omp::OMP_SCHEDULE_Default;
3818
1
  case OMPC_SCHEDULE_auto:
3819
1
    return llvm::omp::OMP_SCHEDULE_Auto;
3820
2
  case OMPC_SCHEDULE_dynamic:
3821
2
    return llvm::omp::OMP_SCHEDULE_Dynamic;
3822
0
  case OMPC_SCHEDULE_guided:
3823
0
    return llvm::omp::OMP_SCHEDULE_Guided;
3824
1
  case OMPC_SCHEDULE_runtime:
3825
1
    return llvm::omp::OMP_SCHEDULE_Runtime;
3826
1
  case OMPC_SCHEDULE_static:
3827
1
    return llvm::omp::OMP_SCHEDULE_Static;
3828
5
  }
3829
0
  llvm_unreachable("Unhandled schedule kind");
3830
0
}
3831
3832
429
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3833
429
  bool HasLastprivates = false;
3834
429
  bool UseOMPIRBuilder =
3835
429
      CGM.getLangOpts().OpenMPIRBuilder && 
isSupportedByOpenMPIRBuilder(S)65
;
3836
429
  auto &&CodeGen = [this, &S, &HasLastprivates,
3837
429
                    UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3838
    // Use the OpenMPIRBuilder if enabled.
3839
429
    if (UseOMPIRBuilder) {
3840
33
      bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3841
3842
33
      llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3843
33
      llvm::Value *ChunkSize = nullptr;
3844
33
      if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3845
5
        SchedKind =
3846
5
            convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3847
5
        if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3848
2
          ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3849
5
      }
3850
3851
      // Emit the associated statement and get its loop representation.
3852
33
      const Stmt *Inner = S.getRawStmt();
3853
33
      llvm::CanonicalLoopInfo *CLI =
3854
33
          EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3855
3856
33
      llvm::OpenMPIRBuilder &OMPBuilder =
3857
33
          CGM.getOpenMPRuntime().getOMPBuilder();
3858
33
      llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3859
33
          AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3860
33
      OMPBuilder.applyWorkshareLoop(
3861
33
          Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3862
33
          SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3863
33
          /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3864
33
          /*HasOrderedClause=*/false);
3865
33
      return;
3866
33
    }
3867
3868
396
    HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3869
396
  };
3870
429
  {
3871
429
    auto LPCRegion =
3872
429
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3873
429
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3874
429
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3875
429
                                                S.hasCancel());
3876
429
  }
3877
3878
429
  if (!UseOMPIRBuilder) {
3879
    // Emit an implicit barrier at the end.
3880
396
    if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates11
)
3881
385
      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3882
396
  }
3883
  // Check for outer lastprivate conditional update.
3884
429
  checkForLastprivateConditionalUpdate(*this, S);
3885
429
}
3886
3887
253
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3888
253
  bool HasLastprivates = false;
3889
253
  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3890
253
                                          PrePostActionTy &) {
3891
253
    HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3892
253
  };
3893
253
  {
3894
253
    auto LPCRegion =
3895
253
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3896
253
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
3897
253
    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3898
253
  }
3899
3900
  // Emit an implicit barrier at the end.
3901
253
  if (!S.getSingleClause<OMPNowaitClause>() || 
HasLastprivates0
)
3902
253
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3903
  // Check for outer lastprivate conditional update.
3904
253
  checkForLastprivateConditionalUpdate(*this, S);
3905
253
}
3906
3907
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3908
                                const Twine &Name,
3909
400
                                llvm::Value *Init = nullptr) {
3910
400
  LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3911
400
  if (Init)
3912
320
    CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3913
400
  return LVal;
3914
400
}
3915
3916
80
void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3917
80
  const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3918
80
  const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3919
80
  bool HasLastprivates = false;
3920
80
  auto &&CodeGen = [&S, CapturedStmt, CS,
3921
80
                    &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3922
80
    const ASTContext &C = CGF.getContext();
3923
80
    QualType KmpInt32Ty =
3924
80
        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3925
    // Emit helper vars inits.
3926
80
    LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3927
80
                                  CGF.Builder.getInt32(0));
3928
80
    llvm::ConstantInt *GlobalUBVal = CS != nullptr
3929
80
                                         ? CGF.Builder.getInt32(CS->size() - 1)
3930
80
                                         : 
CGF.Builder.getInt32(0)0
;
3931
80
    LValue UB =
3932
80
        createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3933
80
    LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3934
80
                                  CGF.Builder.getInt32(1));
3935
80
    LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
3936
80
                                  CGF.Builder.getInt32(0));
3937
    // Loop counter.
3938
80
    LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
3939
80
    OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3940
80
    CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3941
80
    OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3942
80
    CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3943
    // Generate condition for loop.
3944
80
    BinaryOperator *Cond = BinaryOperator::Create(
3945
80
        C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
3946
80
        S.getBeginLoc(), FPOptionsOverride());
3947
    // Increment for loop counter.
3948
80
    UnaryOperator *Inc = UnaryOperator::Create(
3949
80
        C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
3950
80
        S.getBeginLoc(), true, FPOptionsOverride());
3951
80
    auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3952
      // Iterate through all sections and emit a switch construct:
3953
      // switch (IV) {
3954
      //   case 0:
3955
      //     <SectionStmt[0]>;
3956
      //     break;
3957
      // ...
3958
      //   case <NumSection> - 1:
3959
      //     <SectionStmt[<NumSection> - 1]>;
3960
      //     break;
3961
      // }
3962
      // .omp.sections.exit:
3963
80
      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
3964
80
      llvm::SwitchInst *SwitchStmt =
3965
80
          CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
3966
80
                                   ExitBB, CS == nullptr ? 
10
: CS->size());
3967
80
      if (CS) {
3968
80
        unsigned CaseNumber = 0;
3969
126
        for (const Stmt *SubStmt : CS->children()) {
3970
126
          auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
3971
126
          CGF.EmitBlock(CaseBB);
3972
126
          SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
3973
126
          CGF.EmitStmt(SubStmt);
3974
126
          CGF.EmitBranch(ExitBB);
3975
126
          ++CaseNumber;
3976
126
        }
3977
80
      } else {
3978
0
        llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
3979
0
        CGF.EmitBlock(CaseBB);
3980
0
        SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
3981
0
        CGF.EmitStmt(CapturedStmt);
3982
0
        CGF.EmitBranch(ExitBB);
3983
0
      }
3984
80
      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
3985
80
    };
3986
3987
80
    CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3988
80
    if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
3989
      // Emit implicit barrier to synchronize threads and avoid data races on
3990
      // initialization of firstprivate variables and post-update of lastprivate
3991
      // variables.
3992
0
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3993
0
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3994
0
          /*ForceSimpleCall=*/true);
3995
0
    }
3996
80
    CGF.EmitOMPPrivateClause(S, LoopScope);
3997
80
    CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
3998
80
    HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
3999
80
    CGF.EmitOMPReductionClauseInit(S, LoopScope);
4000
80
    (void)LoopScope.Privatize();
4001
80
    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4002
0
      CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4003
4004
    // Emit static non-chunked loop.
4005
80
    OpenMPScheduleTy ScheduleKind;
4006
80
    ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4007
80
    CGOpenMPRuntime::StaticRTInput StaticInit(
4008
80
        /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
4009
80
        LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
4010
80
    CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4011
80
        CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4012
    // UB = min(UB, GlobalUB);
4013
80
    llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4014
80
    llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4015
80
        CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4016
80
    CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4017
    // IV = LB;
4018
80
    CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4019
    // while (idx <= UB) { BODY; ++idx; }
4020
80
    CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4021
80
                         [](CodeGenFunction &) {});
4022
    // Tell the runtime we are done.
4023
112
    auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4024
112
      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4025
112
                                                     S.getDirectiveKind());
4026
112
    };
4027
80
    CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4028
80
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4029
    // Emit post-update of the reduction variables if IsLastIter != 0.
4030
80
    emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4031
0
      return CGF.Builder.CreateIsNotNull(
4032
0
          CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4033
0
    });
4034
4035
    // Emit final copy of the lastprivate variables if IsLastIter != 0.
4036
80
    if (HasLastprivates)
4037
16
      CGF.EmitOMPLastprivateClauseFinal(
4038
16
          S, /*NoFinals=*/false,
4039
16
          CGF.Builder.CreateIsNotNull(
4040
16
              CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4041
80
  };
4042
4043
80
  bool HasCancel = false;
4044
80
  if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4045
54
    HasCancel = OSD->hasCancel();
4046
26
  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4047
26
    HasCancel = OPSD->hasCancel();
4048
80
  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4049
80
  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4050
80
                                              HasCancel);
4051
  // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4052
  // clause. Otherwise the barrier will be generated by the codegen for the
4053
  // directive.
4054
80
  if (HasLastprivates && 
S.getSingleClause<OMPNowaitClause>()16
) {
4055
    // Emit implicit barrier to synchronize threads and avoid data races on
4056
    // initialization of firstprivate variables.
4057
0
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4058
0
                                           OMPD_unknown);
4059
0
  }
4060
80
}
4061
4062
62
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4063
62
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4064
8
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4065
8
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4066
8
    using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4067
4068
16
    auto FiniCB = [this](InsertPointTy IP) {
4069
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4070
16
    };
4071
4072
8
    const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4073
8
    const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4074
8
    const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4075
8
    llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4076
8
    if (CS) {
4077
12
      for (const Stmt *SubStmt : CS->children()) {
4078
12
        auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4079
12
                                         InsertPointTy CodeGenIP) {
4080
12
          OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4081
12
              *this, SubStmt, AllocaIP, CodeGenIP, "section");
4082
12
        };
4083
12
        SectionCBVector.push_back(SectionCB);
4084
12
      }
4085
8
    } else {
4086
0
      auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4087
0
                                            InsertPointTy CodeGenIP) {
4088
0
        OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4089
0
            *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
4090
0
      };
4091
0
      SectionCBVector.push_back(SectionCB);
4092
0
    }
4093
4094
    // Privatization callback that performs appropriate action for
4095
    // shared/private/firstprivate/lastprivate/copyin/... variables.
4096
    //
4097
    // TODO: This defaults to shared right now.
4098
8
    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4099
8
                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4100
      // The next line is appropriate only for variables (Val) with the
4101
      // data-sharing attribute "shared".
4102
0
      ReplVal = &Val;
4103
4104
0
      return CodeGenIP;
4105
0
    };
4106
4107
8
    CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4108
8
    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4109
8
    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4110
8
        AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4111
8
    Builder.restoreIP(OMPBuilder.createSections(
4112
8
        Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4113
8
        S.getSingleClause<OMPNowaitClause>()));
4114
8
    return;
4115
8
  }
4116
54
  {
4117
54
    auto LPCRegion =
4118
54
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4119
54
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
4120
54
    EmitSections(S);
4121
54
  }
4122
  // Emit an implicit barrier at the end.
4123
54
  if (!S.getSingleClause<OMPNowaitClause>()) {
4124
48
    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4125
48
                                           OMPD_sections);
4126
48
  }
4127
  // Check for outer lastprivate conditional update.
4128
54
  checkForLastprivateConditionalUpdate(*this, S);
4129
54
}
4130
4131
54
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4132
54
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4133
8
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4134
8
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4135
4136
8
    const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4137
16
    auto FiniCB = [this](InsertPointTy IP) {
4138
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4139
16
    };
4140
4141
8
    auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4142
8
                                                   InsertPointTy CodeGenIP) {
4143
8
      OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4144
8
          *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
4145
8
    };
4146
4147
8
    LexicalScope Scope(*this, S.getSourceRange());
4148
8
    EmitStopPoint(&S);
4149
8
    Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4150
4151
8
    return;
4152
8
  }
4153
46
  LexicalScope Scope(*this, S.getSourceRange());
4154
46
  EmitStopPoint(&S);
4155
46
  EmitStmt(S.getAssociatedStmt());
4156
46
}
4157
4158
61
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4159
61
  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4160
61
  llvm::SmallVector<const Expr *, 8> DestExprs;
4161
61
  llvm::SmallVector<const Expr *, 8> SrcExprs;
4162
61
  llvm::SmallVector<const Expr *, 8> AssignmentOps;
4163
  // Check if there are any 'copyprivate' clauses associated with this
4164
  // 'single' construct.
4165
  // Build a list of copyprivate variables along with helper expressions
4166
  // (<source>, <destination>, <destination>=<source> expressions)
4167
61
  for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4168
28
    CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4169
28
    DestExprs.append(C->destination_exprs().begin(),
4170
28
                     C->destination_exprs().end());
4171
28
    SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4172
28
    AssignmentOps.append(C->assignment_ops().begin(),
4173
28
                         C->assignment_ops().end());
4174
28
  }
4175
  // Emit code for 'single' region along with 'copyprivate' clauses
4176
61
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4177
61
    Action.Enter(CGF);
4178
61
    OMPPrivateScope SingleScope(CGF);
4179
61
    (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4180
61
    CGF.EmitOMPPrivateClause(S, SingleScope);
4181
61
    (void)SingleScope.Privatize();
4182
61
    CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4183
61
  };
4184
61
  {
4185
61
    auto LPCRegion =
4186
61
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4187
61
    OMPLexicalScope Scope(*this, S, OMPD_unknown);
4188
61
    CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4189
61
                                            CopyprivateVars, DestExprs,
4190
61
                                            SrcExprs, AssignmentOps);
4191
61
  }
4192
  // Emit an implicit barrier at the end (to avoid data race on firstprivate
4193
  // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4194
61
  if (!S.getSingleClause<OMPNowaitClause>() && 
CopyprivateVars.empty()54
) {
4195
26
    CGM.getOpenMPRuntime().emitBarrierCall(
4196
26
        *this, S.getBeginLoc(),
4197
26
        S.getSingleClause<OMPNowaitClause>() ? 
OMPD_unknown0
: OMPD_single);
4198
26
  }
4199
  // Check for outer lastprivate conditional update.
4200
61
  checkForLastprivateConditionalUpdate(*this, S);
4201
61
}
4202
4203
37
static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4204
37
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4205
37
    Action.Enter(CGF);
4206
37
    CGF.EmitStmt(S.getRawStmt());
4207
37
  };
4208
37
  CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4209
37
}
4210
4211
25
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4212
25
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4213
10
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4214
10
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4215
4216
10
    const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4217
4218
10
    auto FiniCB = [this](InsertPointTy IP) {
4219
10
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4220
10
    };
4221
4222
10
    auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4223
10
                                                  InsertPointTy CodeGenIP) {
4224
10
      OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4225
10
          *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
4226
10
    };
4227
4228
10
    LexicalScope Scope(*this, S.getSourceRange());
4229
10
    EmitStopPoint(&S);
4230
10
    Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4231
4232
10
    return;
4233
10
  }
4234
15
  LexicalScope Scope(*this, S.getSourceRange());
4235
15
  EmitStopPoint(&S);
4236
15
  emitMaster(*this, S);
4237
15
}
4238
4239
24
static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4240
24
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4241
24
    Action.Enter(CGF);
4242
24
    CGF.EmitStmt(S.getRawStmt());
4243
24
  };
4244
24
  Expr *Filter = nullptr;
4245
24
  if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4246
18
    Filter = FilterClause->getThreadID();
4247
24
  CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4248
24
                                              Filter);
4249
24
}
4250
4251
40
void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4252
40
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4253
16
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4254
16
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4255
4256
16
    const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4257
16
    const Expr *Filter = nullptr;
4258
16
    if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4259
12
      Filter = FilterClause->getThreadID();
4260
16
    llvm::Value *FilterVal = Filter
4261
16
                                 ? 
EmitScalarExpr(Filter, CGM.Int32Ty)12
4262
16
                                 : 
llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0)4
;
4263
4264
16
    auto FiniCB = [this](InsertPointTy IP) {
4265
16
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4266
16
    };
4267
4268
16
    auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4269
16
                                                  InsertPointTy CodeGenIP) {
4270
16
      OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4271
16
          *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
4272
16
    };
4273
4274
16
    LexicalScope Scope(*this, S.getSourceRange());
4275
16
    EmitStopPoint(&S);
4276
16
    Builder.restoreIP(
4277
16
        OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4278
4279
16
    return;
4280
16
  }
4281
24
  LexicalScope Scope(*this, S.getSourceRange());
4282
24
  EmitStopPoint(&S);
4283
24
  emitMasked(*this, S);
4284
24
}
4285
4286
105
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4287
105
  if (CGM.getLangOpts().OpenMPIRBuilder) {
4288
36
    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4289
36
    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4290
4291
36
    const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4292
36
    const Expr *Hint = nullptr;
4293
36
    if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4294
4
      Hint = HintClause->getHint();
4295
4296
    // TODO: This is slightly different from what's currently being done in
4297
    // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4298
    // about typing is final.
4299
36
    llvm::Value *HintInst = nullptr;
4300
36
    if (Hint)
4301
4
      HintInst =
4302
4
          Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4303
4304
36
    auto FiniCB = [this](InsertPointTy IP) {
4305
36
      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4306
36
    };
4307
4308
36
    auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4309
36
                                                    InsertPointTy CodeGenIP) {
4310
36
      OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4311
36
          *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
4312
36
    };
4313
4314
36
    LexicalScope Scope(*this, S.getSourceRange());
4315
36
    EmitStopPoint(&S);
4316
36
    Builder.restoreIP(OMPBuilder.createCritical(
4317
36
        Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4318
36
        HintInst));
4319
4320
36
    return;
4321
36
  }
4322
4323
69
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4324
69
    Action.Enter(CGF);
4325
69
    CGF.EmitStmt(S.getAssociatedStmt());
4326
69
  };
4327
69
  const Expr *Hint = nullptr;
4328
69
  if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4329
6
    Hint = HintClause->getHint();
4330
69
  LexicalScope Scope(*this, S.getSourceRange());
4331
69
  EmitStopPoint(&S);
4332
69
  CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4333
69
                                            S.getDirectiveName().getAsString(),
4334
69
                                            CodeGen, S.getBeginLoc(), Hint);
4335
69
}
4336
4337
void CodeGenFunction::EmitOMPParallelForDirective(
4338
266
    const OMPParallelForDirective &S) {
4339
  // Emit directive as a combined directive that consists of two implicit
4340
  // directives: 'parallel' with 'for' directive.
4341
266
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4342
266
    Action.Enter(CGF);
4343
266
    (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4344
266
  };
4345
266
  {
4346
266
    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4347
8
      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4348
8
      CGCapturedStmtInfo CGSI(CR_OpenMP);
4349
8
      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4350
8
      OMPLoopScope LoopScope(CGF, S);
4351
8
      return CGF.EmitScalarExpr(S.getNumIterations());
4352
8
    };
4353
266
    bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4354
266
                     [](const OMPReductionClause *C) {
4355
66
                       return C->getModifier() == OMPC_REDUCTION_inscan;
4356
66
                     });
4357
266
    if (IsInscan)
4358
4
      emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4359
266
    auto LPCRegion =
4360
266
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4361
266
    emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4362
266
                                   emitEmptyBoundParameters);
4363
266
    if (IsInscan)
4364
4
      emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4365
266
  }
4366
  // Check for outer lastprivate conditional update.
4367
266
  checkForLastprivateConditionalUpdate(*this, S);
4368
266
}
4369
4370
void CodeGenFunction::EmitOMPParallelForSimdDirective(
4371
105
    const OMPParallelForSimdDirective &S) {
4372
  // Emit directive as a combined directive that consists of two implicit
4373
  // directives: 'parallel' with 'for' directive.
4374
105
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4375
105
    Action.Enter(CGF);
4376
105
    (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4377
105
  };
4378
105
  {
4379
105
    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4380
8
      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4381
8
      CGCapturedStmtInfo CGSI(CR_OpenMP);
4382
8
      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4383
8
      OMPLoopScope LoopScope(CGF, S);
4384
8
      return CGF.EmitScalarExpr(S.getNumIterations());
4385
8
    };
4386
105
    bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4387
105
                     [](const OMPReductionClause *C) {
4388
10
                       return C->getModifier() == OMPC_REDUCTION_inscan;
4389
10
                     });
4390
105
    if (IsInscan)
4391
4
      emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4392
105
    auto LPCRegion =
4393
105
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4394
105
    emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4395
105
                                   emitEmptyBoundParameters);
4396
105
    if (IsInscan)
4397
4
      emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4398
105
  }
4399
  // Check for outer lastprivate conditional update.
4400
105
  checkForLastprivateConditionalUpdate(*this, S);
4401
105
}
4402
4403
void CodeGenFunction::EmitOMPParallelMasterDirective(
4404
22
    const OMPParallelMasterDirective &S) {
4405
  // Emit directive as a combined directive that consists of two implicit
4406
  // directives: 'parallel' with 'master' directive.
4407
22
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4408
22
    Action.Enter(CGF);
4409
22
    OMPPrivateScope PrivateScope(CGF);
4410
22
    bool Copyins = CGF.EmitOMPCopyinClause(S);
4411
22
    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4412
22
    if (Copyins) {
4413
      // Emit implicit barrier to synchronize threads and avoid data races on
4414
      // propagation master's thread values of threadprivate variables to local
4415
      // instances of that variables of all other implicit threads.
4416
3
      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4417
3
          CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4418
3
          /*ForceSimpleCall=*/true);
4419
3
    }
4420
22
    CGF.EmitOMPPrivateClause(S, PrivateScope);
4421
22
    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4422
22
    (void)PrivateScope.Privatize();
4423
22
    emitMaster(CGF, S);
4424
22
    CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4425
22
  };
4426
22
  {
4427
22
    auto LPCRegion =
4428
22
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4429
22
    emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4430
22
                                   emitEmptyBoundParameters);
4431
22
    emitPostUpdateForReductionClause(*this, S,
4432
22
                                     [](CodeGenFunction &) 
{ return nullptr; }0
);
4433
22
  }
4434
  // Check for outer lastprivate conditional update.
4435
22
  checkForLastprivateConditionalUpdate(*this, S);
4436
22
}
4437
4438
void CodeGenFunction::EmitOMPParallelSectionsDirective(
4439
26
    const OMPParallelSectionsDirective &S) {
4440
  // Emit directive as a combined directive that consists of two implicit
4441
  // directives: 'parallel' with 'sections' directive.
4442
26
  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4443
26
    Action.Enter(CGF);
4444
26
    CGF.EmitSections(S);
4445
26
  };
4446
26
  {
4447
26
    auto LPCRegion =
4448
26
        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4449
26
    emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4450
26
                                   emitEmptyBoundParameters);
4451
26
  }
4452
  // Check for outer lastprivate conditional update.
4453
26
  checkForLastprivateConditionalUpdate(*this, S);
4454
26
}
4455
4456
namespace {
4457
/// Get the list of variables declared in the context of the untied tasks.
4458
class CheckVarsEscapingUntiedTaskDeclContext final
4459
    : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4460
  llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4461
4462
public:
4463
28
  explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4464
28
  virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4465
10
  void VisitDeclStmt(const DeclStmt *S) {
4466
10
    if (!S)
4467
0
      return;
4468
    // Need to privatize only local vars, static locals can be processed as is.
4469
17
    
for (const Decl *D : S->decls())10
{
4470
17
      if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4471
14
        if (VD->hasLocalStorage())
4472
14
          PrivateDecls.push_back(VD);
4473
17
    }
4474
10
  }
4475
28
  void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4476
0
  void VisitCapturedStmt(const CapturedStmt *) {}
4477
0
  void VisitLambdaExpr(const LambdaExpr *) {}
4478
0
  void VisitBlockExpr(const BlockExpr *) {}
4479
189
  void VisitStmt(const Stmt *S) {
4480
189
    if (!S)
4481
0
      return;
4482
189
    for (const Stmt *Child : S->children())
4483
199
      if (Child)
4484
199
        Visit(Child);
4485
189
  }
4486
4487
  /// Swaps list of vars with the provided one.
4488
56
  ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4489
};
4490
} // anonymous namespace
4491
4492
static void buildDependences(const OMPExecutableDirective &S,
4493
947
                             OMPTaskDataTy &Data) {
4494
4495
  // First look for 'omp_all_memory' and add this first.
4496
947
  bool OmpAllMemory = false;
4497
947
  if (llvm::any_of(
4498
947
          S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
4499
459
            return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4500
459
                   
C->getDependencyKind() == OMPC_DEPEND_inoutallmemory455
;
4501
459
          })) {
4502
10
    OmpAllMemory = true;
4503
    // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4504
    // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4505
    // simplify.
4506
10
    OMPTaskDataTy::DependData &DD =
4507
10
        Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
4508
10
                                      /*IteratorExpr=*/nullptr);
4509
    // Add a nullptr Expr to simplify the codegen in emitDependData.
4510
10
    DD.DepExprs.push_back(nullptr);
4511
10
  }
4512
  // Add remaining dependences skipping any 'out' or 'inout' if they are
4513
  // overridden by 'omp_all_memory'.
4514
947
  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4515
459
    OpenMPDependClauseKind Kind = C->getDependencyKind();
4516
459
    if (Kind == OMPC_DEPEND_outallmemory || 
Kind == OMPC_DEPEND_inoutallmemory455
)
4517
10
      continue;
4518
449
    if (OmpAllMemory && 
(12
Kind == OMPC_DEPEND_out12
||
Kind == OMPC_DEPEND_inout12
))
4519
4
      continue;
4520
445
    OMPTaskDataTy::DependData &DD =
4521
445
        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4522
445
    DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4523
445
  }
4524
947
}
4525
4526
void CodeGenFunction::EmitOMPTaskBasedDirective(
4527
    const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4528
    const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4529
468
    OMPTaskDataTy &Data) {
4530
  // Emit outlined function for task construct.
4531
468
  const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4532
468
  auto I = CS->getCapturedDecl()->param_begin();
4533
468
  auto PartId = std::next(I);
4534
468
  auto TaskT = std::next(I, 4);
4535
  // Check if the task is final
4536
468
  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4537
    // If the condition constant folds and can be elided, try to avoid emitting
4538
    // the condition and the dead arm of the if/else.
4539
36
    const Expr *Cond = Clause->getCondition();
4540
36
    bool CondConstant;
4541
36
    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4542
23
      Data.Final.setInt(CondConstant);
4543
13
    else
4544
13
      Data.Final.setPointer(EvaluateExprAsBool(Cond));
4545
432
  } else {
4546
    // By default the task is not final.
4547
432
    Data.Final.setInt(/*IntVal=*/false);
4548
432
  }
4549
  // Check if the task has 'priority' clause.
4550
468
  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4551
25
    const Expr *Prio = Clause->getPriority();
4552
25
    Data.Priority.setInt(/*IntVal=*/true);
4553
25
    Data.Priority.setPointer(EmitScalarConversion(
4554
25
        EmitScalarExpr(Prio), Prio->getType(),
4555
25
        getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4556
25
        Prio->getExprLoc()));
4557
25
  }
4558
  // The first function argument for tasks is a thread id, the second one is a
4559
  // part id (0 for tied tasks, >=0 for untied task).
4560
468
  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4561
  // Get list of private variables.
4562
468
  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4563
50
    auto IRef = C->varlist_begin();
4564
226
    for (const Expr *IInit : C->private_copies()) {
4565
226
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4566
226
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4567
170
        Data.PrivateVars.push_back(*IRef);
4568
170
        Data.PrivateCopies.push_back(IInit);
4569
170
      }
4570
226
      ++IRef;
4571
226
    }
4572
50
  }
4573
468
  EmittedAsPrivate.clear();
4574
  // Get list of firstprivate variables.
4575
468
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4576
138
    auto IRef = C->varlist_begin();
4577
138
    auto IElemInitRef = C->inits().begin();
4578
358
    for (const Expr *IInit : C->private_copies()) {
4579
358
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4580
358
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4581
284
        Data.FirstprivateVars.push_back(*IRef);
4582
284
        Data.FirstprivateCopies.push_back(IInit);
4583
284
        Data.FirstprivateInits.push_back(*IElemInitRef);
4584
284
      }
4585
358
      ++IRef;
4586
358
      ++IElemInitRef;
4587
358
    }
4588
138
  }
4589
  // Get list of lastprivate variables (for taskloops).
4590
468
  llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4591
468
  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4592
49
    auto IRef = C->varlist_begin();
4593
49
    auto ID = C->destination_exprs().begin();
4594
199
    for (const Expr *IInit : C->private_copies()) {
4595
199
      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4596
199
      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4597
151
        Data.LastprivateVars.push_back(*IRef);
4598
151
        Data.LastprivateCopies.push_back(IInit);
4599
151
      }
4600
199
      LastprivateDstsOrigs.insert(
4601
199
          std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4602
199
                         cast<DeclRefExpr>(*IRef)));
4603
199
      ++IRef;
4604
199
      ++ID;
4605
199
    }
4606
49
  }
4607
468
  SmallVector<const Expr *, 4> LHSs;
4608
468
  SmallVector<const Expr *, 4> RHSs;
4609
468
  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4610
6
    Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4611
6
    Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4612
6
    Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4613
6
    Data.ReductionOps.append(C->reduction_ops().begin(),
4614
6
                             C->reduction_ops().end());
4615
6
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4616
6
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4617
6
  }
4618
468
  Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4619
468
      *this, S.getBeginLoc(), LHSs, RHSs, Data);
4620
  // Build list of dependences.
4621
468
  buildDependences(S, Data);
4622
  // Get list of local vars for untied tasks.
4623
468
  if (!Data.Tied) {
4624
28
    CheckVarsEscapingUntiedTaskDeclContext Checker;
4625
28
    Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4626
28
    Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4627
28
                              Checker.getPrivateDecls().end());
4628
28
  }
4629
468
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4630
468
                    CapturedRegion](CodeGenFunction &CGF,
4631
468
                                    PrePostActionTy &Action) {
4632
468
    llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4633
468
                    std::pair<Address, Address>>
4634
468
        UntiedLocalVars;
4635
    // Set proper addresses for generated private copies.
4636
468
    OMPPrivateScope Scope(CGF);
4637
    // Generate debug info for variables present in shared clause.
4638
468
    if (auto *DI = CGF.getDebugInfo()) {
4639
6
      llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4640
6
          CGF.CapturedStmtInfo->getCaptureFields();
4641
6
      llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4642
6
      if (CaptureFields.size() && ContextValue) {
4643
6
        unsigned CharWidth = CGF.getContext().getCharWidth();
4644
        // The shared variables are packed together as members of structure.
4645
        // So the address of each shared variable can be computed by adding
4646
        // offset of it (within record) to the base address of record. For each
4647
        // shared variable, debug intrinsic llvm.dbg.declare is generated with
4648
        // appropriate expressions (DIExpression).
4649
        // Ex:
4650
        //  %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4651
        //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4652
        //            metadata !svar1,
4653
        //            metadata !DIExpression(DW_OP_deref))
4654
        //  call void @llvm.dbg.declare(metadata %struct.anon* %12,
4655
        //            metadata !svar2,
4656
        //            metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4657
30
        for (auto It = CaptureFields.begin(); It != CaptureFields.end(); 
++It24
) {
4658
24
          const VarDecl *SharedVar = It->first;
4659
24
          RecordDecl *CaptureRecord = It->second->getParent();
4660
24
          const ASTRecordLayout &Layout =
4661
24
              CGF.getContext().getASTRecordLayout(CaptureRecord);
4662
24
          unsigned Offset =
4663
24
              Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4664
24
          if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4665
24
            (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4666
24
                                                CGF.Builder, false);
4667
24
          llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4668
          // Get the call dbg.declare instruction we just created and update
4669
          // its DIExpression to add offset to base address.
4670
24
          if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) {
4671
24
            SmallVector<uint64_t, 8> Ops;
4672
            // Add offset to the base address if non zero.
4673
24
            if (Offset) {
4674
18
              Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4675
18
              Ops.push_back(Offset);
4676
18
            }
4677
24
            Ops.push_back(llvm::dwarf::DW_OP_deref);
4678
24
            auto &Ctx = DDI->getContext();
4679
24
            llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops);
4680
24
            Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr));
4681
24
          }
4682
24
        }
4683
6
      }
4684
6
    }
4685
468
    llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4686
468
    if (!Data.PrivateVars.empty() || 
!Data.FirstprivateVars.empty()418
||
4687
468
        
!Data.LastprivateVars.empty()282
||
!Data.PrivateLocals.empty()233
) {
4688
235
      enum { PrivatesParam = 2, CopyFnParam = 3 };
4689
235
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4690
235
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4691
235
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4692
235
          CS->getCapturedDecl()->getParam(PrivatesParam)));
4693
      // Map privates.
4694
235
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4695
235
      llvm::SmallVector<llvm::Value *, 16> CallArgs;
4696
235
      llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4697
235
      CallArgs.push_back(PrivatesPtr);
4698
235
      ParamTypes.push_back(PrivatesPtr->getType());
4699
235
      for (const Expr *E : Data.PrivateVars) {
4700
170
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4701
170
        Address PrivatePtr = CGF.CreateMemTemp(
4702
170
            CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4703
170
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4704
170
        CallArgs.push_back(PrivatePtr.getPointer());
4705
170
        ParamTypes.push_back(PrivatePtr.getType());
4706
170
      }
4707
284
      for (const Expr *E : Data.FirstprivateVars) {
4708
284
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4709
284
        Address PrivatePtr =
4710
284
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4711
284
                              ".firstpriv.ptr.addr");
4712
284
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4713
284
        FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4714
284
        CallArgs.push_back(PrivatePtr.getPointer());
4715
284
        ParamTypes.push_back(PrivatePtr.getType());
4716
284
      }
4717
235
      for (const Expr *E : Data.LastprivateVars) {
4718
151
        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4719
151
        Address PrivatePtr =
4720
151
            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4721
151
                              ".lastpriv.ptr.addr");
4722
151
        PrivatePtrs.emplace_back(VD, PrivatePtr);
4723
151
        CallArgs.push_back(PrivatePtr.getPointer());
4724
151
        ParamTypes.push_back(PrivatePtr.getType());
4725
151
      }
4726
235
      for (const VarDecl *VD : Data.PrivateLocals) {
4727
14
        QualType Ty = VD->getType().getNonReferenceType();
4728
14
        if (VD->getType()->isLValueReferenceType())
4729
0
          Ty = CGF.getContext().getPointerType(Ty);
4730
14
        if (isAllocatableDecl(VD))
4731
3
          Ty = CGF.getContext().getPointerType(Ty);
4732
14
        Address PrivatePtr = CGF.CreateMemTemp(
4733
14
            CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4734
14
        auto Result = UntiedLocalVars.insert(
4735
14
            std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4736
        // If key exists update in place.
4737
14
        if (Result.second == false)
4738
0
          *Result.first = std::make_pair(
4739
0
              VD, std::make_pair(PrivatePtr, Address::invalid()));
4740
14
        CallArgs.push_back(PrivatePtr.getPointer());
4741
14
        ParamTypes.push_back(PrivatePtr.getType());
4742
14
      }
4743
235
      auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4744
235
                                               ParamTypes, /*isVarArg=*/false);
4745
235
      CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4746
235
          CopyFn, CopyFnTy->getPointerTo());
4747
235
      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4748
235
          CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4749
235
      for (const auto &Pair : LastprivateDstsOrigs) {
4750
199
        const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4751
199
        DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4752
                        /*RefersToEnclosingVariableOrCapture=*/
4753
199
                        CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4754
199
                        Pair.second->getType(), VK_LValue,
4755
199
                        Pair.second->getExprLoc());
4756
199
        Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF));
4757
199
      }
4758
605
      for (const auto &Pair : PrivatePtrs) {
4759
605
        Address Replacement = Address(
4760
605
            CGF.Builder.CreateLoad(Pair.second),
4761
605
            CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4762
605
            CGF.getContext().getDeclAlign(Pair.first));
4763
605
        Scope.addPrivate(Pair.first, Replacement);
4764
605
        if (auto *DI = CGF.getDebugInfo())
4765
18
          if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4766
18
            (void)DI->EmitDeclareOfAutoVariable(
4767
18
                Pair.first, Pair.second.getPointer(), CGF.Builder,
4768
18
                /*UsePointerValue*/ true);
4769
605
      }
4770
      // Adjust mapping for internal locals by mapping actual memory instead of
4771
      // a pointer to this memory.
4772
235
      for (auto &Pair : UntiedLocalVars) {
4773
14
        QualType VDType = Pair.first->getType().getNonReferenceType();
4774
14
        if (isAllocatableDecl(Pair.first)) {
4775
3
          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4776
3
          Address Replacement(
4777
3
              Ptr,
4778
3
              CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
4779
3
              CGF.getPointerAlign());
4780
3
          Pair.second.first = Replacement;
4781
3
          Ptr = CGF.Builder.CreateLoad(Replacement);
4782
3
          Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
4783
3
                                CGF.getContext().getDeclAlign(Pair.first));
4784
3
          Pair.second.second = Replacement;
4785
11
        } else {
4786
11
          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4787
11
          Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
4788
11
                              CGF.getContext().getDeclAlign(Pair.first));
4789
11
          Pair.second.first = Replacement;
4790
11
        }
4791
14
      }
4792
235
    }
4793
468
    if (Data.Reductions) {
4794
6
      OMPPrivateScope FirstprivateScope(CGF);
4795
18
      for (const auto &Pair : FirstprivatePtrs) {
4796
18
        Address Replacement(
4797
18
            CGF.Builder.CreateLoad(Pair.second),
4798
18
            CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4799
18
            CGF.getContext().getDeclAlign(Pair.first));
4800
18
        FirstprivateScope.addPrivate(Pair.first, Replacement);
4801
18
      }
4802
6
      (void)FirstprivateScope.Privatize();
4803
6
      OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4804
6
      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4805
6
                             Data.ReductionCopies, Data.ReductionOps);
4806
6
      llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4807
6
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4808
30
      for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; 
++Cnt24
) {
4809
24
        RedCG.emitSharedOrigLValue(CGF, Cnt);
4810
24
        RedCG.emitAggregateType(CGF, Cnt);
4811
        // FIXME: This must removed once the runtime library is fixed.
4812
        // Emit required threadprivate variables for
4813
        // initializer/combiner/finalizer.
4814
24
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4815
24
                                                           RedCG, Cnt);
4816
24
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4817
24
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4818
24
        Replacement =
4819
24
            Address(CGF.EmitScalarConversion(
4820
24
                        Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4821
24
                        CGF.getContext().getPointerType(
4822
24
                            Data.ReductionCopies[Cnt]->getType()),
4823
24
                        Data.ReductionCopies[Cnt]->getExprLoc()),
4824
24
                    CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
4825
24
                    Replacement.getAlignment());
4826
24
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4827
24
        Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4828
24
      }
4829
6
    }
4830
    // Privatize all private variables except for in_reduction items.
4831
468
    (void)Scope.Privatize();
4832
468
    SmallVector<const Expr *, 4> InRedVars;
4833
468
    SmallVector<const Expr *, 4> InRedPrivs;
4834
468
    SmallVector<const Expr *, 4> InRedOps;
4835
468
    SmallVector<const Expr *, 4> TaskgroupDescriptors;
4836
468
    for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4837
44
      auto IPriv = C->privates().begin();
4838
44
      auto IRed = C->reduction_ops().begin();
4839
44
      auto ITD = C->taskgroup_descriptors().begin();
4840
66
      for (const Expr *Ref : C->varlists()) {
4841
66
        InRedVars.emplace_back(Ref);
4842
66
        InRedPrivs.emplace_back(*IPriv);
4843
66
        InRedOps.emplace_back(*IRed);
4844
66
        TaskgroupDescriptors.emplace_back(*ITD);
4845
66
        std::advance(IPriv, 1);
4846
66
        std::advance(IRed, 1);
4847
66
        std::advance(ITD, 1);
4848
66
      }
4849
44
    }
4850
    // Privatize in_reduction items here, because taskgroup descriptors must be
4851
    // privatized earlier.
4852
468
    OMPPrivateScope InRedScope(CGF);
4853
468
    if (!InRedVars.empty()) {
4854
34
      ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4855
100
      for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; 
++Cnt66
) {
4856
66
        RedCG.emitSharedOrigLValue(CGF, Cnt);
4857
66
        RedCG.emitAggregateType(CGF, Cnt);
4858
        // The taskgroup descriptor variable is always implicit firstprivate and
4859
        // privatized already during processing of the firstprivates.
4860
        // FIXME: This must removed once the runtime library is fixed.
4861
        // Emit required threadprivate variables for
4862
        // initializer/combiner/finalizer.
4863
66
        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4864
66
                                                           RedCG, Cnt);
4865
66
        llvm::Value *ReductionsPtr;
4866
66
        if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4867
64
          ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4868
64
                                               TRExpr->getExprLoc());
4869
64
        } else {
4870
2
          ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4871
2
        }
4872
66
        Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4873
66
            CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4874
66
        Replacement = Address(
4875
66
            CGF.EmitScalarConversion(
4876
66
                Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4877
66
                CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4878
66
                InRedPrivs[Cnt]->getExprLoc()),
4879
66
            CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
4880
66
            Replacement.getAlignment());
4881
66
        Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4882
66
        InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4883
66
      }
4884
34
    }
4885
468
    (void)InRedScope.Privatize();
4886
4887
468
    CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4888
468
                                                             UntiedLocalVars);
4889
468
    Action.Enter(CGF);
4890
468
    BodyGen(CGF);
4891
468
  };
4892
468
  llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4893
468
      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4894
468
      Data.NumberOfParts);
4895
468
  OMPLexicalScope Scope(*this, S, llvm::None,
4896
468
                        !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4897
468
                            
!isOpenMPSimdDirective(S.getDirectiveKind())396
);
4898
468
  TaskGen(*this, OutlinedFn, Data);
4899
468
}
4900
4901
static ImplicitParamDecl *
4902
createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4903
                                  QualType Ty, CapturedDecl *CD,
4904
710
                                  SourceLocation Loc) {
4905
710
  auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4906
710
                                           ImplicitParamDecl::Other);
4907
710
  auto *OrigRef = DeclRefExpr::Create(
4908
710
      C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4909
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4910
710
  auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4911
710
                                              ImplicitParamDecl::Other);
4912
710
  auto *PrivateRef = DeclRefExpr::Create(
4913
710
      C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
4914
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4915
710
  QualType ElemType = C.getBaseElementType(Ty);
4916
710
  auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
4917
710
                                           ImplicitParamDecl::Other);
4918
710
  auto *InitRef = DeclRefExpr::Create(
4919
710
      C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4920
710
      /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4921
710
  PrivateVD->setInitStyle(VarDecl::CInit);
4922
710
  PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4923
710
                                              InitRef, /*BasePath=*/nullptr,
4924
710
                                              VK_PRValue, FPOptionsOverride()));
4925
710
  Data.FirstprivateVars.emplace_back(OrigRef);
4926
710
  Data.FirstprivateCopies.emplace_back(PrivateRef);
4927
710
  Data.FirstprivateInits.emplace_back(InitRef);
4928
710
  return OrigVD;
4929
710
}
4930
4931
void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
4932
    const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
4933
462
    OMPTargetDataInfo &InputInfo) {
4934
  // Emit outlined function for task construct.
4935
462
  const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
4936
462
  Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
4937
462
  QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4938
462
  auto I = CS->getCapturedDecl()->param_begin();
4939
462
  auto PartId = std::next(I);
4940
462
  auto TaskT = std::next(I, 4);
4941
462
  OMPTaskDataTy Data;
4942
  // The task is not final.
4943
462
  Data.Final.setInt(/*IntVal=*/false);
4944
  // Get list of firstprivate variables.
4945
462
  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4946
254
    auto IRef = C->varlist_begin();
4947
254
    auto IElemInitRef = C->inits().begin();
4948
394
    for (auto *IInit : C->private_copies()) {
4949
394
      Data.FirstprivateVars.push_back(*IRef);
4950
394
      Data.FirstprivateCopies.push_back(IInit);
4951
394
      Data.FirstprivateInits.push_back(*IElemInitRef);
4952
394
      ++IRef;
4953
394
      ++IElemInitRef;
4954
394
    }
4955
254
  }
4956
462
  SmallVector<const Expr *, 4> LHSs;
4957
462
  SmallVector<const Expr *, 4> RHSs;
4958
462
  for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4959
2
    Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4960
2
    Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4961
2
    Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4962
2
    Data.ReductionOps.append(C->reduction_ops().begin(),
4963
2
                             C->reduction_ops().end());
4964
2
    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4965
2
    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4966
2
  }
4967
462
  OMPPrivateScope TargetScope(*this);
4968
462
  VarDecl *BPVD = nullptr;
4969
462
  VarDecl *PVD = nullptr;
4970
462
  VarDecl *SVD = nullptr;
4971
462
  VarDecl *MVD = nullptr;
4972
462
  if (InputInfo.NumberOfTargetItems > 0) {
4973
228
    auto *CD = CapturedDecl::Create(
4974
228
        getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
4975
228
    llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
4976
228
    QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
4977
228
        getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
4978
228
        /*IndexTypeQuals=*/0);
4979
228
    BPVD = createImplicitFirstprivateForType(
4980
228
        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4981
228
    PVD = createImplicitFirstprivateForType(
4982
228
        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4983
228
    QualType SizesType = getContext().getConstantArrayType(
4984
228
        getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
4985
228
        ArrSize, nullptr, ArrayType::Normal,
4986
228
        /*IndexTypeQuals=*/0);
4987
228
    SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
4988
228
                                            S.getBeginLoc());
4989
228
    TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
4990
228
    TargetScope.addPrivate(PVD, InputInfo.PointersArray);
4991
228
    TargetScope.addPrivate(SVD, InputInfo.SizesArray);
4992
    // If there is no user-defined mapper, the mapper array will be nullptr. In
4993
    // this case, we don't need to privatize it.
4994
228
    if (!isa_and_nonnull<llvm::ConstantPointerNull>(
4995
228
            InputInfo.MappersArray.getPointer())) {
4996
26
      MVD = createImplicitFirstprivateForType(
4997
26
          getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4998
26
      TargetScope.addPrivate(MVD, InputInfo.MappersArray);
4999
26
    }
5000
228
  }
5001
462
  (void)TargetScope.Privatize();
5002
462
  buildDependences(S, Data);
5003
462
  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5004
462
                    &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5005
    // Set proper addresses for generated private copies.
5006
462
    OMPPrivateScope Scope(CGF);
5007
462
    if (!Data.FirstprivateVars.empty()) {
5008
358
      enum { PrivatesParam = 2, CopyFnParam = 3 };
5009
358
      llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5010
358
          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5011
358
      llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5012
358
          CS->getCapturedDecl()->getParam(PrivatesParam)));
5013
      // Map privates.
5014
358
      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;