/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGStmtOpenMP.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This contains code to emit OpenMP nodes as LLVM code. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "CGCleanup.h" |
14 | | #include "CGOpenMPRuntime.h" |
15 | | #include "CodeGenFunction.h" |
16 | | #include "CodeGenModule.h" |
17 | | #include "TargetInfo.h" |
18 | | #include "clang/AST/ASTContext.h" |
19 | | #include "clang/AST/Attr.h" |
20 | | #include "clang/AST/DeclOpenMP.h" |
21 | | #include "clang/AST/OpenMPClause.h" |
22 | | #include "clang/AST/Stmt.h" |
23 | | #include "clang/AST/StmtOpenMP.h" |
24 | | #include "clang/AST/StmtVisitor.h" |
25 | | #include "clang/Basic/OpenMPKinds.h" |
26 | | #include "clang/Basic/PrettyStackTrace.h" |
27 | | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
28 | | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
29 | | #include "llvm/IR/Constants.h" |
30 | | #include "llvm/IR/Instructions.h" |
31 | | #include "llvm/Support/AtomicOrdering.h" |
32 | | using namespace clang; |
33 | | using namespace CodeGen; |
34 | | using namespace llvm::omp; |
35 | | |
36 | | static const VarDecl *getBaseDecl(const Expr *Ref); |
37 | | |
38 | | namespace { |
39 | | /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
40 | | /// for captured expressions. |
41 | | class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
42 | 14.9k | void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
43 | 18.0k | for (const auto *C : S.clauses()) { |
44 | 18.0k | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
45 | 10.5k | if (const auto *PreInit = |
46 | 1.06k | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
47 | 1.14k | for (const auto *I : PreInit->decls()) { |
48 | 1.14k | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
49 | 1.12k | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
50 | 18 | } else { |
51 | 18 | CodeGenFunction::AutoVarEmission Emission = |
52 | 18 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
53 | 18 | CGF.EmitAutoVarCleanups(Emission); |
54 | 18 | } |
55 | 1.14k | } |
56 | 1.06k | } |
57 | 10.5k | } |
58 | 18.0k | } |
59 | 14.9k | } |
60 | | CodeGenFunction::OMPPrivateScope InlinedShareds; |
61 | | |
62 | 16.5k | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
63 | 16.5k | return CGF.LambdaCaptureFields.lookup(VD) || |
64 | 16.1k | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)4.17k ) || |
65 | 12.5k | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
66 | 8 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
67 | 16.5k | } |
68 | | |
69 | | public: |
70 | | OMPLexicalScope( |
71 | | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
72 | | const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, |
73 | | const bool EmitPreInitStmt = true) |
74 | | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
75 | 24.5k | InlinedShareds(CGF) { |
76 | 24.5k | if (EmitPreInitStmt) |
77 | 14.9k | emitPreInitStmt(CGF, S); |
78 | 24.5k | if (!CapturedRegion.hasValue()) |
79 | 12.2k | return; |
80 | 12.2k | assert(S.hasAssociatedStmt() && |
81 | 12.2k | "Expected associated statement for inlined directive."); |
82 | 12.2k | const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); |
83 | 18.8k | for (const auto &C : CS->captures()) { |
84 | 18.8k | if (C.capturesVariable() || C.capturesVariableByCopy()10.9k ) { |
85 | 16.5k | auto *VD = C.getCapturedVar(); |
86 | 16.5k | assert(VD == VD->getCanonicalDecl() && |
87 | 16.5k | "Canonical decl must be captured."); |
88 | 16.5k | DeclRefExpr DRE( |
89 | 16.5k | CGF.getContext(), const_cast<VarDecl *>(VD), |
90 | 16.5k | isCapturedVar(CGF, VD) || (12.5k CGF.CapturedStmtInfo12.5k && |
91 | 564 | InlinedShareds.isGlobalVarCaptured(VD)), |
92 | 16.5k | VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); |
93 | 16.5k | InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { |
94 | 16.5k | return CGF.EmitLValue(&DRE).getAddress(CGF); |
95 | 16.5k | }); |
96 | 16.5k | } |
97 | 18.8k | } |
98 | 12.2k | (void)InlinedShareds.Privatize(); |
99 | 12.2k | } |
100 | | }; |
101 | | |
102 | | /// Lexical scope for OpenMP parallel construct, that handles correct codegen |
103 | | /// for captured expressions. |
104 | | class OMPParallelScope final : public OMPLexicalScope { |
105 | 6.05k | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
106 | 6.05k | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
107 | 6.05k | return !(isOpenMPTargetExecutionDirective(Kind) || |
108 | 2.74k | isOpenMPLoopBoundSharingDirective(Kind)) && |
109 | 1.34k | isOpenMPParallelDirective(Kind); |
110 | 6.05k | } |
111 | | |
112 | | public: |
113 | | OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
114 | | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, |
115 | 6.05k | EmitPreInitStmt(S)) {} |
116 | | }; |
117 | | |
118 | | /// Lexical scope for OpenMP teams construct, that handles correct codegen |
119 | | /// for captured expressions. |
120 | | class OMPTeamsScope final : public OMPLexicalScope { |
121 | 5.56k | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
122 | 5.56k | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
123 | 5.56k | return !isOpenMPTargetExecutionDirective(Kind) && |
124 | 1.92k | isOpenMPTeamsDirective(Kind); |
125 | 5.56k | } |
126 | | |
127 | | public: |
128 | | OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
129 | | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, |
130 | 5.56k | EmitPreInitStmt(S)) {} |
131 | | }; |
132 | | |
133 | | /// Private scope for OpenMP loop-based directives, that supports capturing |
134 | | /// of used expression from loop statement. |
135 | | class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
136 | 17.0k | void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { |
137 | 17.0k | CodeGenFunction::OMPMapVars PreCondVars; |
138 | 17.0k | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
139 | 17.7k | for (const auto *E : S.counters()) { |
140 | 17.7k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
141 | 17.7k | EmittedAsPrivate.insert(VD->getCanonicalDecl()); |
142 | 17.7k | (void)PreCondVars.setVarAddr( |
143 | 17.7k | CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); |
144 | 17.7k | } |
145 | | // Mark private vars as undefs. |
146 | 770 | for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { |
147 | 2.90k | for (const Expr *IRef : C->varlists()) { |
148 | 2.90k | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
149 | 2.90k | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
150 | 2.57k | (void)PreCondVars.setVarAddr( |
151 | 2.57k | CGF, OrigVD, |
152 | 2.57k | Address(llvm::UndefValue::get( |
153 | 2.57k | CGF.ConvertTypeForMem(CGF.getContext().getPointerType( |
154 | 2.57k | OrigVD->getType().getNonReferenceType()))), |
155 | 2.57k | CGF.getContext().getDeclAlign(OrigVD))); |
156 | 2.57k | } |
157 | 2.90k | } |
158 | 770 | } |
159 | 17.0k | (void)PreCondVars.apply(CGF); |
160 | | // Emit init, __range and __end variables for C++ range loops. |
161 | 17.0k | const Stmt *Body = |
162 | 17.0k | S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
163 | 34.8k | for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt17.7k ) { |
164 | 17.7k | Body = OMPLoopDirective::tryToFindNextInnerLoop( |
165 | 17.7k | Body, /*TryImperfectlyNestedLoops=*/true); |
166 | 17.7k | if (auto *For = dyn_cast<ForStmt>(Body)) { |
167 | 17.7k | Body = For->getBody(); |
168 | 6 | } else { |
169 | 6 | assert(isa<CXXForRangeStmt>(Body) && |
170 | 6 | "Expected canonical for loop or range-based for loop."); |
171 | 6 | auto *CXXFor = cast<CXXForRangeStmt>(Body); |
172 | 6 | if (const Stmt *Init = CXXFor->getInit()) |
173 | 0 | CGF.EmitStmt(Init); |
174 | 6 | CGF.EmitStmt(CXXFor->getRangeStmt()); |
175 | 6 | CGF.EmitStmt(CXXFor->getEndStmt()); |
176 | 6 | Body = CXXFor->getBody(); |
177 | 6 | } |
178 | 17.7k | } |
179 | 17.0k | if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { |
180 | 3.14k | for (const auto *I : PreInits->decls()) |
181 | 6.83k | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
182 | 3.14k | } |
183 | 17.0k | PreCondVars.restore(CGF); |
184 | 17.0k | } |
185 | | |
186 | | public: |
187 | | OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) |
188 | 17.0k | : CodeGenFunction::RunCleanupsScope(CGF) { |
189 | 17.0k | emitPreInitStmt(CGF, S); |
190 | 17.0k | } |
191 | | }; |
192 | | |
193 | | class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { |
194 | | CodeGenFunction::OMPPrivateScope InlinedShareds; |
195 | | |
196 | 41.2k | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
197 | 41.2k | return CGF.LambdaCaptureFields.lookup(VD) || |
198 | 40.2k | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)6.51k ) || |
199 | 40.2k | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
200 | 78 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
201 | 41.2k | } |
202 | | |
203 | | public: |
204 | | OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
205 | | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
206 | 13.2k | InlinedShareds(CGF) { |
207 | 17.4k | for (const auto *C : S.clauses()) { |
208 | 17.4k | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
209 | 11.5k | if (const auto *PreInit = |
210 | 1.29k | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
211 | 1.35k | for (const auto *I : PreInit->decls()) { |
212 | 1.35k | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
213 | 1.34k | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
214 | 18 | } else { |
215 | 18 | CodeGenFunction::AutoVarEmission Emission = |
216 | 18 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
217 | 18 | CGF.EmitAutoVarCleanups(Emission); |
218 | 18 | } |
219 | 1.35k | } |
220 | 1.29k | } |
221 | 5.92k | } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { |
222 | 86 | for (const Expr *E : UDP->varlists()) { |
223 | 86 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
224 | 86 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
225 | 20 | CGF.EmitVarDecl(*OED); |
226 | 86 | } |
227 | 5.84k | } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { |
228 | 24 | for (const Expr *E : UDP->varlists()) { |
229 | 24 | const Decl *D = getBaseDecl(E); |
230 | 24 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
231 | 10 | CGF.EmitVarDecl(*OED); |
232 | 24 | } |
233 | 6 | } |
234 | 17.4k | } |
235 | 13.2k | if (!isOpenMPSimdDirective(S.getDirectiveKind())) |
236 | 9.81k | CGF.EmitOMPPrivateClause(S, InlinedShareds); |
237 | 13.2k | if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { |
238 | 37 | if (const Expr *E = TG->getReductionRef()) |
239 | 26 | CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); |
240 | 37 | } |
241 | 13.2k | const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); |
242 | 41.0k | while (CS) { |
243 | 47.3k | for (auto &C : CS->captures()) { |
244 | 47.3k | if (C.capturesVariable() || C.capturesVariableByCopy()30.8k ) { |
245 | 41.2k | auto *VD = C.getCapturedVar(); |
246 | 41.2k | assert(VD == VD->getCanonicalDecl() && |
247 | 41.2k | "Canonical decl must be captured."); |
248 | 41.2k | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
249 | 41.2k | isCapturedVar(CGF, VD) || |
250 | 40.2k | (CGF.CapturedStmtInfo && |
251 | 6.50k | InlinedShareds.isGlobalVarCaptured(VD)), |
252 | 41.2k | VD->getType().getNonReferenceType(), VK_LValue, |
253 | 41.2k | C.getLocation()); |
254 | 41.2k | InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { |
255 | 41.2k | return CGF.EmitLValue(&DRE).getAddress(CGF); |
256 | 41.2k | }); |
257 | 41.2k | } |
258 | 47.3k | } |
259 | 27.7k | CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); |
260 | 27.7k | } |
261 | 13.2k | (void)InlinedShareds.Privatize(); |
262 | 13.2k | } |
263 | | }; |
264 | | |
265 | | } // namespace |
266 | | |
267 | | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
268 | | const OMPExecutableDirective &S, |
269 | | const RegionCodeGenTy &CodeGen); |
270 | | |
271 | 12.5k | LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { |
272 | 12.5k | if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { |
273 | 9.19k | if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { |
274 | 9.19k | OrigVD = OrigVD->getCanonicalDecl(); |
275 | 9.19k | bool IsCaptured = |
276 | 9.19k | LambdaCaptureFields.lookup(OrigVD) || |
277 | 9.06k | (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)964 ) || |
278 | 8.40k | (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)8.35k ); |
279 | 9.19k | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, |
280 | 9.19k | OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); |
281 | 9.19k | return EmitLValue(&DRE); |
282 | 9.19k | } |
283 | 3.35k | } |
284 | 3.35k | return EmitLValue(E); |
285 | 3.35k | } |
286 | | |
287 | 17.1k | llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { |
288 | 17.1k | ASTContext &C = getContext(); |
289 | 17.1k | llvm::Value *Size = nullptr; |
290 | 17.1k | auto SizeInChars = C.getTypeSizeInChars(Ty); |
291 | 17.1k | if (SizeInChars.isZero()) { |
292 | | // getTypeSizeInChars() returns 0 for a VLA. |
293 | 2.32k | while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { |
294 | 1.16k | VlaSizePair VlaSize = getVLASize(VAT); |
295 | 1.16k | Ty = VlaSize.Type; |
296 | 0 | Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) |
297 | 1.16k | : VlaSize.NumElts; |
298 | 1.16k | } |
299 | 1.16k | SizeInChars = C.getTypeSizeInChars(Ty); |
300 | 1.16k | if (SizeInChars.isZero()) |
301 | 0 | return llvm::ConstantInt::get(SizeTy, /*V=*/0); |
302 | 1.16k | return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); |
303 | 1.16k | } |
304 | 15.9k | return CGM.getSize(SizeInChars); |
305 | 15.9k | } |
306 | | |
307 | | void CodeGenFunction::GenerateOpenMPCapturedVars( |
308 | 20.8k | const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
309 | 20.8k | const RecordDecl *RD = S.getCapturedRecordDecl(); |
310 | 20.8k | auto CurField = RD->field_begin(); |
311 | 20.8k | auto CurCap = S.captures().begin(); |
312 | 20.8k | for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), |
313 | 20.8k | E = S.capture_init_end(); |
314 | 52.8k | I != E; ++I, ++CurField, ++CurCap31.9k ) { |
315 | 31.9k | if (CurField->hasCapturedVLAType()) { |
316 | 2.60k | const VariableArrayType *VAT = CurField->getCapturedVLAType(); |
317 | 2.60k | llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; |
318 | 2.60k | CapturedVars.push_back(Val); |
319 | 29.3k | } else if (CurCap->capturesThis()) { |
320 | 1.69k | CapturedVars.push_back(CXXThisValue); |
321 | 27.7k | } else if (CurCap->capturesVariableByCopy()) { |
322 | 14.8k | llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); |
323 | | |
324 | | // If the field is not a pointer, we need to save the actual value |
325 | | // and load it as a void pointer. |
326 | 14.8k | if (!CurField->getType()->isAnyPointerType()) { |
327 | 13.0k | ASTContext &Ctx = getContext(); |
328 | 13.0k | Address DstAddr = CreateMemTemp( |
329 | 13.0k | Ctx.getUIntPtrType(), |
330 | 13.0k | Twine(CurCap->getCapturedVar()->getName(), ".casted")); |
331 | 13.0k | LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); |
332 | | |
333 | 13.0k | llvm::Value *SrcAddrVal = EmitScalarConversion( |
334 | 13.0k | DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), |
335 | 13.0k | Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); |
336 | 13.0k | LValue SrcLV = |
337 | 13.0k | MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); |
338 | | |
339 | | // Store the value using the source type pointer. |
340 | 13.0k | EmitStoreThroughLValue(RValue::get(CV), SrcLV); |
341 | | |
342 | | // Load the value using the destination type pointer. |
343 | 13.0k | CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); |
344 | 13.0k | } |
345 | 14.8k | CapturedVars.push_back(CV); |
346 | 12.8k | } else { |
347 | 12.8k | assert(CurCap->capturesVariable() && "Expected capture by reference."); |
348 | 12.8k | CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); |
349 | 12.8k | } |
350 | 31.9k | } |
351 | 20.8k | } |
352 | | |
353 | | static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, |
354 | | QualType DstType, StringRef Name, |
355 | 18.2k | LValue AddrLV) { |
356 | 18.2k | ASTContext &Ctx = CGF.getContext(); |
357 | | |
358 | 18.2k | llvm::Value *CastedPtr = CGF.EmitScalarConversion( |
359 | 18.2k | AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), |
360 | 18.2k | Ctx.getPointerType(DstType), Loc); |
361 | 18.2k | Address TmpAddr = |
362 | 18.2k | CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) |
363 | 18.2k | .getAddress(CGF); |
364 | 18.2k | return TmpAddr; |
365 | 18.2k | } |
366 | | |
367 | 7.12k | static QualType getCanonicalParamType(ASTContext &C, QualType T) { |
368 | 7.12k | if (T->isLValueReferenceType()) |
369 | 2.08k | return C.getLValueReferenceType( |
370 | 2.08k | getCanonicalParamType(C, T.getNonReferenceType()), |
371 | 2.08k | /*SpelledAsLValue=*/false); |
372 | 5.04k | if (T->isPointerType()) |
373 | 27 | return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); |
374 | 5.01k | if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { |
375 | 2.97k | if (const auto *VLA = dyn_cast<VariableArrayType>(A)) |
376 | 2.91k | return getCanonicalParamType(C, VLA->getElementType()); |
377 | 60 | if (!A->isVariablyModifiedType()) |
378 | 60 | return C.getCanonicalType(T); |
379 | 2.04k | } |
380 | 2.04k | return C.getCanonicalParamType(T); |
381 | 2.04k | } |
382 | | |
383 | | namespace { |
384 | | /// Contains required data for proper outlined function codegen. |
385 | | struct FunctionOptions { |
386 | | /// Captured statement for which the function is generated. |
387 | | const CapturedStmt *S = nullptr; |
388 | | /// true if cast to/from UIntPtr is required for variables captured by |
389 | | /// value. |
390 | | const bool UIntPtrCastRequired = true; |
391 | | /// true if only casted arguments must be registered as local args or VLA |
392 | | /// sizes. |
393 | | const bool RegisterCastedArgsOnly = false; |
394 | | /// Name of the generated function. |
395 | | const StringRef FunctionName; |
396 | | /// Location of the non-debug version of the outlined function. |
397 | | SourceLocation Loc; |
398 | | explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, |
399 | | bool RegisterCastedArgsOnly, StringRef FunctionName, |
400 | | SourceLocation Loc) |
401 | | : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
402 | | RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
403 | 23.3k | FunctionName(FunctionName), Loc(Loc) {} |
404 | | }; |
405 | | } // namespace |
406 | | |
407 | | static llvm::Function *emitOutlinedFunctionPrologue( |
408 | | CodeGenFunction &CGF, FunctionArgList &Args, |
409 | | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
410 | | &LocalAddrs, |
411 | | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> |
412 | | &VLASizes, |
413 | 23.3k | llvm::Value *&CXXThisValue, const FunctionOptions &FO) { |
414 | 23.3k | const CapturedDecl *CD = FO.S->getCapturedDecl(); |
415 | 23.3k | const RecordDecl *RD = FO.S->getCapturedRecordDecl(); |
416 | 23.3k | assert(CD->hasBody() && "missing CapturedDecl body"); |
417 | | |
418 | 23.3k | CXXThisValue = nullptr; |
419 | | // Build the argument list. |
420 | 23.3k | CodeGenModule &CGM = CGF.CGM; |
421 | 23.3k | ASTContext &Ctx = CGM.getContext(); |
422 | 23.3k | FunctionArgList TargetArgs; |
423 | 23.3k | Args.append(CD->param_begin(), |
424 | 23.3k | std::next(CD->param_begin(), CD->getContextParamPosition())); |
425 | 23.3k | TargetArgs.append( |
426 | 23.3k | CD->param_begin(), |
427 | 23.3k | std::next(CD->param_begin(), CD->getContextParamPosition())); |
428 | 23.3k | auto I = FO.S->captures().begin(); |
429 | 23.3k | FunctionDecl *DebugFunctionDecl = nullptr; |
430 | 23.3k | if (!FO.UIntPtrCastRequired) { |
431 | 109 | FunctionProtoType::ExtProtoInfo EPI; |
432 | 109 | QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); |
433 | 109 | DebugFunctionDecl = FunctionDecl::Create( |
434 | 109 | Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), |
435 | 109 | SourceLocation(), DeclarationName(), FunctionTy, |
436 | 109 | Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, |
437 | 109 | /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); |
438 | 109 | } |
439 | 36.3k | for (const FieldDecl *FD : RD->fields()) { |
440 | 36.3k | QualType ArgType = FD->getType(); |
441 | 36.3k | IdentifierInfo *II = nullptr; |
442 | 36.3k | VarDecl *CapVar = nullptr; |
443 | | |
444 | | // If this is a capture by copy and the type is not a pointer, the outlined |
445 | | // function argument type should be uintptr and the value properly casted to |
446 | | // uintptr. This is necessary given that the runtime library is only able to |
447 | | // deal with pointers. We can pass in the same way the VLA type sizes to the |
448 | | // outlined function. |
449 | 36.3k | if (FO.UIntPtrCastRequired && |
450 | 36.1k | ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()17.1k ) || |
451 | 20.8k | I->capturesVariableArrayType())) |
452 | 18.2k | ArgType = Ctx.getUIntPtrType(); |
453 | | |
454 | 36.3k | if (I->capturesVariable() || I->capturesVariableByCopy()22.0k ) { |
455 | 31.4k | CapVar = I->getCapturedVar(); |
456 | 31.4k | II = CapVar->getIdentifier(); |
457 | 4.87k | } else if (I->capturesThis()) { |
458 | 1.85k | II = &Ctx.Idents.get("this"); |
459 | 3.01k | } else { |
460 | 3.01k | assert(I->capturesVariableArrayType()); |
461 | 3.01k | II = &Ctx.Idents.get("vla"); |
462 | 3.01k | } |
463 | 36.3k | if (ArgType->isVariablyModifiedType()) |
464 | 2.10k | ArgType = getCanonicalParamType(Ctx, ArgType); |
465 | 36.3k | VarDecl *Arg; |
466 | 36.3k | if (DebugFunctionDecl && (191 CapVar191 || I->capturesThis()16 )) { |
467 | 181 | Arg = ParmVarDecl::Create( |
468 | 181 | Ctx, DebugFunctionDecl, |
469 | 175 | CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc()6 , |
470 | 175 | CapVar ? CapVar->getLocation() : FD->getLocation()6 , II, ArgType, |
471 | 181 | /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); |
472 | 36.1k | } else { |
473 | 36.1k | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
474 | 36.1k | II, ArgType, ImplicitParamDecl::Other); |
475 | 36.1k | } |
476 | 36.3k | Args.emplace_back(Arg); |
477 | | // Do not cast arguments if we emit function with non-original types. |
478 | 36.3k | TargetArgs.emplace_back( |
479 | 36.3k | FO.UIntPtrCastRequired |
480 | 36.1k | ? Arg |
481 | 191 | : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); |
482 | 36.3k | ++I; |
483 | 36.3k | } |
484 | 23.3k | Args.append( |
485 | 23.3k | std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
486 | 23.3k | CD->param_end()); |
487 | 23.3k | TargetArgs.append( |
488 | 23.3k | std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
489 | 23.3k | CD->param_end()); |
490 | | |
491 | | // Create the function declaration. |
492 | 23.3k | const CGFunctionInfo &FuncInfo = |
493 | 23.3k | CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); |
494 | 23.3k | llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); |
495 | | |
496 | 23.3k | auto *F = |
497 | 23.3k | llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, |
498 | 23.3k | FO.FunctionName, &CGM.getModule()); |
499 | 23.3k | CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); |
500 | 23.3k | if (CD->isNothrow()) |
501 | 23.3k | F->setDoesNotThrow(); |
502 | 23.3k | F->setDoesNotRecurse(); |
503 | | |
504 | | // Generate the function. |
505 | 23.3k | CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, |
506 | 23.2k | FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc()109 , |
507 | 23.2k | FO.UIntPtrCastRequired ? FO.Loc |
508 | 109 | : CD->getBody()->getBeginLoc()); |
509 | 23.3k | unsigned Cnt = CD->getContextParamPosition(); |
510 | 23.3k | I = FO.S->captures().begin(); |
511 | 36.3k | for (const FieldDecl *FD : RD->fields()) { |
512 | | // Do not map arguments if we emit function with non-original types. |
513 | 36.3k | Address LocalAddr(Address::invalid()); |
514 | 36.3k | if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]191 ) { |
515 | 54 | LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], |
516 | 54 | TargetArgs[Cnt]); |
517 | 36.2k | } else { |
518 | 36.2k | LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); |
519 | 36.2k | } |
520 | | // If we are capturing a pointer by copy we don't need to do anything, just |
521 | | // use the value that we get from the arguments. |
522 | 36.3k | if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()17.2k ) { |
523 | 1.92k | const VarDecl *CurVD = I->getCapturedVar(); |
524 | 1.92k | if (!FO.RegisterCastedArgsOnly) |
525 | 1.92k | LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); |
526 | 1.92k | ++Cnt; |
527 | 1.92k | ++I; |
528 | 1.92k | continue; |
529 | 1.92k | } |
530 | | |
531 | 34.3k | LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), |
532 | 34.3k | AlignmentSource::Decl); |
533 | 34.3k | if (FD->hasCapturedVLAType()) { |
534 | 3.01k | if (FO.UIntPtrCastRequired) { |
535 | 3.00k | ArgLVal = CGF.MakeAddrLValue( |
536 | 3.00k | castValueFromUintptr(CGF, I->getLocation(), FD->getType(), |
537 | 3.00k | Args[Cnt]->getName(), ArgLVal), |
538 | 3.00k | FD->getType(), AlignmentSource::Decl); |
539 | 3.00k | } |
540 | 3.01k | llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
541 | 3.01k | const VariableArrayType *VAT = FD->getCapturedVLAType(); |
542 | 3.01k | VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); |
543 | 31.3k | } else if (I->capturesVariable()) { |
544 | 14.2k | const VarDecl *Var = I->getCapturedVar(); |
545 | 14.2k | QualType VarTy = Var->getType(); |
546 | 14.2k | Address ArgAddr = ArgLVal.getAddress(CGF); |
547 | 14.2k | if (ArgLVal.getType()->isLValueReferenceType()) { |
548 | 14.2k | ArgAddr = CGF.EmitLoadOfReference(ArgLVal); |
549 | 0 | } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { |
550 | 0 | assert(ArgLVal.getType()->isPointerType()); |
551 | 0 | ArgAddr = CGF.EmitLoadOfPointer( |
552 | 0 | ArgAddr, ArgLVal.getType()->castAs<PointerType>()); |
553 | 0 | } |
554 | 14.2k | if (!FO.RegisterCastedArgsOnly) { |
555 | 14.0k | LocalAddrs.insert( |
556 | 14.0k | {Args[Cnt], |
557 | 14.0k | {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); |
558 | 14.0k | } |
559 | 17.1k | } else if (I->capturesVariableByCopy()) { |
560 | 15.2k | assert(!FD->getType()->isAnyPointerType() && |
561 | 15.2k | "Not expecting a captured pointer."); |
562 | 15.2k | const VarDecl *Var = I->getCapturedVar(); |
563 | 15.2k | LocalAddrs.insert({Args[Cnt], |
564 | 15.2k | {Var, FO.UIntPtrCastRequired |
565 | 15.2k | ? castValueFromUintptr( |
566 | 15.2k | CGF, I->getLocation(), FD->getType(), |
567 | 15.2k | Args[Cnt]->getName(), ArgLVal) |
568 | 22 | : ArgLVal.getAddress(CGF)}}); |
569 | 1.85k | } else { |
570 | | // If 'this' is captured, load it into CXXThisValue. |
571 | 1.85k | assert(I->capturesThis()); |
572 | 1.85k | CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
573 | 1.85k | LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); |
574 | 1.85k | } |
575 | 34.3k | ++Cnt; |
576 | 34.3k | ++I; |
577 | 34.3k | } |
578 | | |
579 | 23.3k | return F; |
580 | 23.3k | } |
581 | | |
582 | | llvm::Function * |
583 | | CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
584 | 23.2k | SourceLocation Loc) { |
585 | 23.2k | assert( |
586 | 23.2k | CapturedStmtInfo && |
587 | 23.2k | "CapturedStmtInfo should be set when generating the captured function"); |
588 | 23.2k | const CapturedDecl *CD = S.getCapturedDecl(); |
589 | | // Build the argument list. |
590 | 23.2k | bool NeedWrapperFunction = |
591 | 23.2k | getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo()260 ; |
592 | 23.2k | FunctionArgList Args; |
593 | 23.2k | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; |
594 | 23.2k | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; |
595 | 23.2k | SmallString<256> Buffer; |
596 | 23.2k | llvm::raw_svector_ostream Out(Buffer); |
597 | 23.2k | Out << CapturedStmtInfo->getHelperName(); |
598 | 23.2k | if (NeedWrapperFunction) |
599 | 109 | Out << "_debug__"; |
600 | 23.2k | FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, |
601 | 23.2k | Out.str(), Loc); |
602 | 23.2k | llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, |
603 | 23.2k | VLASizes, CXXThisValue, FO); |
604 | 23.2k | CodeGenFunction::OMPPrivateScope LocalScope(*this); |
605 | 33.1k | for (const auto &LocalAddrPair : LocalAddrs) { |
606 | 33.1k | if (LocalAddrPair.second.first) { |
607 | 31.2k | LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { |
608 | 31.2k | return LocalAddrPair.second.second; |
609 | 31.2k | }); |
610 | 31.2k | } |
611 | 33.1k | } |
612 | 23.2k | (void)LocalScope.Privatize(); |
613 | 23.2k | for (const auto &VLASizePair : VLASizes) |
614 | 3.00k | VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; |
615 | 23.2k | PGO.assignRegionCounters(GlobalDecl(CD), F); |
616 | 23.2k | CapturedStmtInfo->EmitBody(*this, CD->getBody()); |
617 | 23.2k | (void)LocalScope.ForceCleanup(); |
618 | 23.2k | FinishFunction(CD->getBodyRBrace()); |
619 | 23.2k | if (!NeedWrapperFunction) |
620 | 23.1k | return F; |
621 | | |
622 | 109 | FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, |
623 | 109 | /*RegisterCastedArgsOnly=*/true, |
624 | 109 | CapturedStmtInfo->getHelperName(), Loc); |
625 | 109 | CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); |
626 | 109 | WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; |
627 | 109 | Args.clear(); |
628 | 109 | LocalAddrs.clear(); |
629 | 109 | VLASizes.clear(); |
630 | 109 | llvm::Function *WrapperF = |
631 | 109 | emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, |
632 | 109 | WrapperCGF.CXXThisValue, WrapperFO); |
633 | 109 | llvm::SmallVector<llvm::Value *, 4> CallArgs; |
634 | 305 | for (const auto *Arg : Args) { |
635 | 305 | llvm::Value *CallArg; |
636 | 305 | auto I = LocalAddrs.find(Arg); |
637 | 305 | if (I != LocalAddrs.end()) { |
638 | 28 | LValue LV = WrapperCGF.MakeAddrLValue( |
639 | 28 | I->second.second, |
640 | 22 | I->second.first ? I->second.first->getType() : Arg->getType()6 , |
641 | 28 | AlignmentSource::Decl); |
642 | 28 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
643 | 277 | } else { |
644 | 277 | auto EI = VLASizes.find(Arg); |
645 | 277 | if (EI != VLASizes.end()) { |
646 | 10 | CallArg = EI->second.second; |
647 | 267 | } else { |
648 | 267 | LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), |
649 | 267 | Arg->getType(), |
650 | 267 | AlignmentSource::Decl); |
651 | 267 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
652 | 267 | } |
653 | 277 | } |
654 | 305 | CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); |
655 | 305 | } |
656 | 109 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); |
657 | 109 | WrapperCGF.FinishFunction(); |
658 | 109 | return WrapperF; |
659 | 109 | } |
660 | | |
661 | | //===----------------------------------------------------------------------===// |
662 | | // OpenMP Directive Emission |
663 | | //===----------------------------------------------------------------------===// |
664 | | void CodeGenFunction::EmitOMPAggregateAssign( |
665 | | Address DestAddr, Address SrcAddr, QualType OriginalType, |
666 | 539 | const llvm::function_ref<void(Address, Address)> CopyGen) { |
667 | | // Perform element-by-element initialization. |
668 | 539 | QualType ElementTy; |
669 | | |
670 | | // Drill down to the base element type on both arrays. |
671 | 539 | const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
672 | 539 | llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); |
673 | 539 | SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); |
674 | | |
675 | 539 | llvm::Value *SrcBegin = SrcAddr.getPointer(); |
676 | 539 | llvm::Value *DestBegin = DestAddr.getPointer(); |
677 | | // Cast from pointer to array type to pointer to single element. |
678 | 539 | llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); |
679 | | // The basic structure here is a while-do loop. |
680 | 539 | llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); |
681 | 539 | llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); |
682 | 539 | llvm::Value *IsEmpty = |
683 | 539 | Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); |
684 | 539 | Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
685 | | |
686 | | // Enter the loop body, making that address the current address. |
687 | 539 | llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); |
688 | 539 | EmitBlock(BodyBB); |
689 | | |
690 | 539 | CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); |
691 | | |
692 | 539 | llvm::PHINode *SrcElementPHI = |
693 | 539 | Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); |
694 | 539 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
695 | 539 | Address SrcElementCurrent = |
696 | 539 | Address(SrcElementPHI, |
697 | 539 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
698 | | |
699 | 539 | llvm::PHINode *DestElementPHI = |
700 | 539 | Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
701 | 539 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
702 | 539 | Address DestElementCurrent = |
703 | 539 | Address(DestElementPHI, |
704 | 539 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
705 | | |
706 | | // Emit copy. |
707 | 539 | CopyGen(DestElementCurrent, SrcElementCurrent); |
708 | | |
709 | | // Shift the address forward by one element. |
710 | 539 | llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( |
711 | 539 | DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); |
712 | 539 | llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( |
713 | 539 | SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); |
714 | | // Check whether we've reached the end. |
715 | 539 | llvm::Value *Done = |
716 | 539 | Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); |
717 | 539 | Builder.CreateCondBr(Done, DoneBB, BodyBB); |
718 | 539 | DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); |
719 | 539 | SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); |
720 | | |
721 | | // Done. |
722 | 539 | EmitBlock(DoneBB, /*IsFinished=*/true); |
723 | 539 | } |
724 | | |
725 | | void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, |
726 | | Address SrcAddr, const VarDecl *DestVD, |
727 | 2.03k | const VarDecl *SrcVD, const Expr *Copy) { |
728 | 2.03k | if (OriginalType->isArrayType()) { |
729 | 602 | const auto *BO = dyn_cast<BinaryOperator>(Copy); |
730 | 602 | if (BO && BO->getOpcode() == BO_Assign335 ) { |
731 | | // Perform simple memcpy for simple copying. |
732 | 335 | LValue Dest = MakeAddrLValue(DestAddr, OriginalType); |
733 | 335 | LValue Src = MakeAddrLValue(SrcAddr, OriginalType); |
734 | 335 | EmitAggregateAssign(Dest, Src, OriginalType); |
735 | 267 | } else { |
736 | | // For arrays with complex element types perform element by element |
737 | | // copying. |
738 | 267 | EmitOMPAggregateAssign( |
739 | 267 | DestAddr, SrcAddr, OriginalType, |
740 | 267 | [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { |
741 | | // Working with the single array element, so have to remap |
742 | | // destination and source variables to corresponding array |
743 | | // elements. |
744 | 267 | CodeGenFunction::OMPPrivateScope Remap(*this); |
745 | 267 | Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); |
746 | 267 | Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); |
747 | 267 | (void)Remap.Privatize(); |
748 | 267 | EmitIgnoredExpr(Copy); |
749 | 267 | }); |
750 | 267 | } |
751 | 1.42k | } else { |
752 | | // Remap pseudo source variable to private copy. |
753 | 1.42k | CodeGenFunction::OMPPrivateScope Remap(*this); |
754 | 1.42k | Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); |
755 | 1.42k | Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); |
756 | 1.42k | (void)Remap.Privatize(); |
757 | | // Emit copying of the whole variable. |
758 | 1.42k | EmitIgnoredExpr(Copy); |
759 | 1.42k | } |
760 | 2.03k | } |
761 | | |
762 | | bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, |
763 | 18.7k | OMPPrivateScope &PrivateScope) { |
764 | 18.7k | if (!HaveInsertPoint()) |
765 | 0 | return false; |
766 | 18.7k | bool DeviceConstTarget = |
767 | 18.7k | getLangOpts().OpenMPIsDevice && |
768 | 3.82k | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
769 | 18.7k | bool FirstprivateIsLastprivate = false; |
770 | 18.7k | llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; |
771 | 436 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
772 | 436 | for (const auto *D : C->varlists()) |
773 | 1.55k | Lastprivates.try_emplace( |
774 | 1.55k | cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), |
775 | 1.55k | C->getKind()); |
776 | 436 | } |
777 | 18.7k | llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; |
778 | 18.7k | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
779 | 18.7k | getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); |
780 | | // Force emission of the firstprivate copy if the directive does not emit |
781 | | // outlined function, like omp for, omp simd, omp distribute etc. |
782 | 18.7k | bool MustEmitFirstprivateCopy = |
783 | 18.7k | CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown4.94k ; |
784 | 6.83k | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
785 | 6.83k | const auto *IRef = C->varlist_begin(); |
786 | 6.83k | const auto *InitsRef = C->inits().begin(); |
787 | 10.4k | for (const Expr *IInit : C->private_copies()) { |
788 | 10.4k | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
789 | 10.4k | bool ThisFirstprivateIsLastprivate = |
790 | 10.4k | Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; |
791 | 10.4k | const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); |
792 | 10.4k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
793 | 10.4k | if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate10.2k && FD10.2k && |
794 | 10.2k | !FD->getType()->isReferenceType() && |
795 | 9.08k | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
796 | 9.08k | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
797 | 9.08k | ++IRef; |
798 | 9.08k | ++InitsRef; |
799 | 9.08k | continue; |
800 | 9.08k | } |
801 | | // Do not emit copy for firstprivate constant variables in target regions, |
802 | | // captured by reference. |
803 | 1.36k | if (DeviceConstTarget && OrigVD->getType().isConstant(getContext())201 && |
804 | 6 | FD && FD->getType()->isReferenceType() && |
805 | 6 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
806 | 6 | (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, |
807 | 6 | OrigVD); |
808 | 6 | ++IRef; |
809 | 6 | ++InitsRef; |
810 | 6 | continue; |
811 | 6 | } |
812 | 1.35k | FirstprivateIsLastprivate = |
813 | 1.35k | FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; |
814 | 1.35k | if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { |
815 | 1.26k | const auto *VDInit = |
816 | 1.26k | cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); |
817 | 1.26k | bool IsRegistered; |
818 | 1.26k | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
819 | 1.26k | /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, |
820 | 1.26k | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
821 | 1.26k | LValue OriginalLVal; |
822 | 1.26k | if (!FD) { |
823 | | // Check if the firstprivate variable is just a constant value. |
824 | 58 | ConstantEmission CE = tryEmitAsConstant(&DRE); |
825 | 58 | if (CE && !CE.isReference()6 ) { |
826 | | // Constant value, no need to create a copy. |
827 | 4 | ++IRef; |
828 | 4 | ++InitsRef; |
829 | 4 | continue; |
830 | 4 | } |
831 | 54 | if (CE && CE.isReference()2 ) { |
832 | 2 | OriginalLVal = CE.getReferenceLValue(*this, &DRE); |
833 | 52 | } else { |
834 | 52 | assert(!CE && "Expected non-constant firstprivate."); |
835 | 52 | OriginalLVal = EmitLValue(&DRE); |
836 | 52 | } |
837 | 1.20k | } else { |
838 | 1.20k | OriginalLVal = EmitLValue(&DRE); |
839 | 1.20k | } |
840 | 1.25k | QualType Type = VD->getType(); |
841 | 1.25k | if (Type->isArrayType()) { |
842 | | // Emit VarDecl with copy init for arrays. |
843 | | // Get the address of the original variable captured in current |
844 | | // captured region. |
845 | 625 | IsRegistered = PrivateScope.addPrivate( |
846 | 625 | OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { |
847 | 625 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
848 | 625 | const Expr *Init = VD->getInit(); |
849 | 625 | if (!isa<CXXConstructExpr>(Init) || |
850 | 405 | isTrivialInitializer(Init)220 ) { |
851 | | // Perform simple memcpy. |
852 | 405 | LValue Dest = |
853 | 405 | MakeAddrLValue(Emission.getAllocatedAddress(), Type); |
854 | 405 | EmitAggregateAssign(Dest, OriginalLVal, Type); |
855 | 220 | } else { |
856 | 220 | EmitOMPAggregateAssign( |
857 | 220 | Emission.getAllocatedAddress(), |
858 | 220 | OriginalLVal.getAddress(*this), Type, |
859 | 220 | [this, VDInit, Init](Address DestElement, |
860 | 220 | Address SrcElement) { |
861 | | // Clean up any temporaries needed by the |
862 | | // initialization. |
863 | 220 | RunCleanupsScope InitScope(*this); |
864 | | // Emit initialization for single element. |
865 | 220 | setAddrOfLocalVar(VDInit, SrcElement); |
866 | 220 | EmitAnyExprToMem(Init, DestElement, |
867 | 220 | Init->getType().getQualifiers(), |
868 | 220 | /*IsInitializer*/ false); |
869 | 220 | LocalDeclMap.erase(VDInit); |
870 | 220 | }); |
871 | 220 | } |
872 | 625 | EmitAutoVarCleanups(Emission); |
873 | 625 | return Emission.getAllocatedAddress(); |
874 | 625 | }); |
875 | 634 | } else { |
876 | 634 | Address OriginalAddr = OriginalLVal.getAddress(*this); |
877 | 634 | IsRegistered = |
878 | 634 | PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, |
879 | 634 | ThisFirstprivateIsLastprivate, |
880 | 634 | OrigVD, &Lastprivates, IRef]() { |
881 | | // Emit private VarDecl with copy init. |
882 | | // Remap temp VDInit variable to the address of the original |
883 | | // variable (for proper handling of captured global variables). |
884 | 634 | setAddrOfLocalVar(VDInit, OriginalAddr); |
885 | 634 | EmitDecl(*VD); |
886 | 634 | LocalDeclMap.erase(VDInit); |
887 | 634 | if (ThisFirstprivateIsLastprivate && |
888 | 8 | Lastprivates[OrigVD->getCanonicalDecl()] == |
889 | 0 | OMPC_LASTPRIVATE_conditional) { |
890 | | // Create/init special variable for lastprivate conditionals. |
891 | 0 | Address VDAddr = |
892 | 0 | CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
893 | 0 | *this, OrigVD); |
894 | 0 | llvm::Value *V = EmitLoadOfScalar( |
895 | 0 | MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), |
896 | 0 | AlignmentSource::Decl), |
897 | 0 | (*IRef)->getExprLoc()); |
898 | 0 | EmitStoreOfScalar(V, |
899 | 0 | MakeAddrLValue(VDAddr, (*IRef)->getType(), |
900 | 0 | AlignmentSource::Decl)); |
901 | 0 | LocalDeclMap.erase(VD); |
902 | 0 | setAddrOfLocalVar(VD, VDAddr); |
903 | 0 | return VDAddr; |
904 | 0 | } |
905 | 634 | return GetAddrOfLocalVar(VD); |
906 | 634 | }); |
907 | 634 | } |
908 | 1.25k | assert(IsRegistered && |
909 | 1.25k | "firstprivate var already registered as private"); |
910 | | // Silence the warning about unused variable. |
911 | 1.25k | (void)IsRegistered; |
912 | 1.25k | } |
913 | 1.35k | ++IRef; |
914 | 1.35k | ++InitsRef; |
915 | 1.35k | } |
916 | 6.83k | } |
917 | 18.7k | return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty()16 ; |
918 | 18.7k | } |
919 | | |
920 | | void CodeGenFunction::EmitOMPPrivateClause( |
921 | | const OMPExecutableDirective &D, |
922 | 32.6k | CodeGenFunction::OMPPrivateScope &PrivateScope) { |
923 | 32.6k | if (!HaveInsertPoint()) |
924 | 0 | return; |
925 | 32.6k | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
926 | 1.03k | for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { |
927 | 1.03k | auto IRef = C->varlist_begin(); |
928 | 3.19k | for (const Expr *IInit : C->private_copies()) { |
929 | 3.19k | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
930 | 3.19k | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
931 | 2.92k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
932 | 2.92k | bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { |
933 | | // Emit private VarDecl with copy init. |
934 | 2.92k | EmitDecl(*VD); |
935 | 2.92k | return GetAddrOfLocalVar(VD); |
936 | 2.92k | }); |
937 | 2.92k | assert(IsRegistered && "private var already registered as private"); |
938 | | // Silence the warning about unused variable. |
939 | 2.92k | (void)IsRegistered; |
940 | 2.92k | } |
941 | 3.19k | ++IRef; |
942 | 3.19k | } |
943 | 1.03k | } |
944 | 32.6k | } |
945 | | |
946 | 943 | bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { |
947 | 943 | if (!HaveInsertPoint()) |
948 | 0 | return false; |
949 | | // threadprivate_var1 = master_threadprivate_var1; |
950 | | // operator=(threadprivate_var2, master_threadprivate_var2); |
951 | | // ... |
952 | | // __kmpc_barrier(&loc, global_tid); |
953 | 943 | llvm::DenseSet<const VarDecl *> CopiedVars; |
954 | 943 | llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; |
955 | 27 | for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { |
956 | 27 | auto IRef = C->varlist_begin(); |
957 | 27 | auto ISrcRef = C->source_exprs().begin(); |
958 | 27 | auto IDestRef = C->destination_exprs().begin(); |
959 | 53 | for (const Expr *AssignOp : C->assignment_ops()) { |
960 | 53 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
961 | 53 | QualType Type = VD->getType(); |
962 | 53 | if (CopiedVars.insert(VD->getCanonicalDecl()).second) { |
963 | | // Get the address of the master variable. If we are emitting code with |
964 | | // TLS support, the address is passed from the master as field in the |
965 | | // captured declaration. |
966 | 53 | Address MasterAddr = Address::invalid(); |
967 | 53 | if (getLangOpts().OpenMPUseTLS && |
968 | 27 | getContext().getTargetInfo().isTLSSupported()) { |
969 | 27 | assert(CapturedStmtInfo->lookup(VD) && |
970 | 27 | "Copyin threadprivates should have been captured!"); |
971 | 27 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, |
972 | 27 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
973 | 27 | MasterAddr = EmitLValue(&DRE).getAddress(*this); |
974 | 27 | LocalDeclMap.erase(VD); |
975 | 26 | } else { |
976 | 26 | MasterAddr = |
977 | 22 | Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) |
978 | 4 | : CGM.GetAddrOfGlobal(VD), |
979 | 26 | getContext().getDeclAlign(VD)); |
980 | 26 | } |
981 | | // Get the address of the threadprivate variable. |
982 | 53 | Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); |
983 | 53 | if (CopiedVars.size() == 1) { |
984 | | // At first check if current thread is a master thread. If it is, no |
985 | | // need to copy data. |
986 | 27 | CopyBegin = createBasicBlock("copyin.not.master"); |
987 | 27 | CopyEnd = createBasicBlock("copyin.not.master.end"); |
988 | 27 | Builder.CreateCondBr( |
989 | 27 | Builder.CreateICmpNE( |
990 | 27 | Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), |
991 | 27 | Builder.CreatePtrToInt(PrivateAddr.getPointer(), |
992 | 27 | CGM.IntPtrTy)), |
993 | 27 | CopyBegin, CopyEnd); |
994 | 27 | EmitBlock(CopyBegin); |
995 | 27 | } |
996 | 53 | const auto *SrcVD = |
997 | 53 | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
998 | 53 | const auto *DestVD = |
999 | 53 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1000 | 53 | EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); |
1001 | 53 | } |
1002 | 53 | ++IRef; |
1003 | 53 | ++ISrcRef; |
1004 | 53 | ++IDestRef; |
1005 | 53 | } |
1006 | 27 | } |
1007 | 943 | if (CopyEnd) { |
1008 | | // Exit out of copying procedure for non-master thread. |
1009 | 27 | EmitBlock(CopyEnd, /*IsFinished=*/true); |
1010 | 27 | return true; |
1011 | 27 | } |
1012 | 916 | return false; |
1013 | 916 | } |
1014 | | |
1015 | | bool CodeGenFunction::EmitOMPLastprivateClauseInit( |
1016 | 13.4k | const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { |
1017 | 13.4k | if (!HaveInsertPoint()) |
1018 | 0 | return false; |
1019 | 13.4k | bool HasAtLeastOneLastprivate = false; |
1020 | 13.4k | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
1021 | 13.4k | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
1022 | 8.73k | const auto *LoopDirective = cast<OMPLoopDirective>(&D); |
1023 | 9.10k | for (const Expr *C : LoopDirective->counters()) { |
1024 | 9.10k | SIMDLCVs.insert( |
1025 | 9.10k | cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); |
1026 | 9.10k | } |
1027 | 8.73k | } |
1028 | 13.4k | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1029 | 612 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1030 | 612 | HasAtLeastOneLastprivate = true; |
1031 | 612 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
1032 | 74 | !getLangOpts().OpenMPSimd) |
1033 | 49 | break; |
1034 | 563 | const auto *IRef = C->varlist_begin(); |
1035 | 563 | const auto *IDestRef = C->destination_exprs().begin(); |
1036 | 2.09k | for (const Expr *IInit : C->private_copies()) { |
1037 | | // Keep the address of the original variable for future update at the end |
1038 | | // of the loop. |
1039 | 2.09k | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1040 | | // Taskloops do not require additional initialization, it is done in |
1041 | | // runtime support library. |
1042 | 2.09k | if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { |
1043 | 1.67k | const auto *DestVD = |
1044 | 1.67k | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1045 | 1.67k | PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { |
1046 | 1.67k | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1047 | | /*RefersToEnclosingVariableOrCapture=*/ |
1048 | 1.67k | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
1049 | 1.67k | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1050 | 1.67k | return EmitLValue(&DRE).getAddress(*this); |
1051 | 1.67k | }); |
1052 | | // Check if the variable is also a firstprivate: in this case IInit is |
1053 | | // not generated. Initialization of this variable will happen in codegen |
1054 | | // for 'firstprivate' clause. |
1055 | 1.67k | if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())1.59k ) { |
1056 | 1.58k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
1057 | 1.58k | bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, |
1058 | 1.58k | OrigVD]() { |
1059 | 1.58k | if (C->getKind() == OMPC_LASTPRIVATE_conditional) { |
1060 | 10 | Address VDAddr = |
1061 | 10 | CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, |
1062 | 10 | OrigVD); |
1063 | 10 | setAddrOfLocalVar(VD, VDAddr); |
1064 | 10 | return VDAddr; |
1065 | 10 | } |
1066 | | // Emit private VarDecl with copy init. |
1067 | 1.57k | EmitDecl(*VD); |
1068 | 1.57k | return GetAddrOfLocalVar(VD); |
1069 | 1.57k | }); |
1070 | 1.58k | assert(IsRegistered && |
1071 | 1.58k | "lastprivate var already registered as private"); |
1072 | 1.58k | (void)IsRegistered; |
1073 | 1.58k | } |
1074 | 1.67k | } |
1075 | 2.09k | ++IRef; |
1076 | 2.09k | ++IDestRef; |
1077 | 2.09k | } |
1078 | 563 | } |
1079 | 13.4k | return HasAtLeastOneLastprivate; |
1080 | 13.4k | } |
1081 | | |
1082 | | void CodeGenFunction::EmitOMPLastprivateClauseFinal( |
1083 | | const OMPExecutableDirective &D, bool NoFinals, |
1084 | 604 | llvm::Value *IsLastIterCond) { |
1085 | 604 | if (!HaveInsertPoint()) |
1086 | 0 | return; |
1087 | | // Emit following code: |
1088 | | // if (<IsLastIterCond>) { |
1089 | | // orig_var1 = private_orig_var1; |
1090 | | // ... |
1091 | | // orig_varn = private_orig_varn; |
1092 | | // } |
1093 | 604 | llvm::BasicBlock *ThenBB = nullptr; |
1094 | 604 | llvm::BasicBlock *DoneBB = nullptr; |
1095 | 604 | if (IsLastIterCond) { |
1096 | | // Emit implicit barrier if at least one lastprivate conditional is found |
1097 | | // and this is not a simd mode. |
1098 | 477 | if (!getLangOpts().OpenMPSimd && |
1099 | 477 | llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), |
1100 | 485 | [](const OMPLastprivateClause *C) { |
1101 | 485 | return C->getKind() == OMPC_LASTPRIVATE_conditional; |
1102 | 6 | })) { |
1103 | 6 | CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), |
1104 | 6 | OMPD_unknown, |
1105 | 6 | /*EmitChecks=*/false, |
1106 | 6 | /*ForceSimpleCall=*/true); |
1107 | 6 | } |
1108 | 477 | ThenBB = createBasicBlock(".omp.lastprivate.then"); |
1109 | 477 | DoneBB = createBasicBlock(".omp.lastprivate.done"); |
1110 | 477 | Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); |
1111 | 477 | EmitBlock(ThenBB); |
1112 | 477 | } |
1113 | 604 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1114 | 604 | llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; |
1115 | 604 | if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { |
1116 | 588 | auto IC = LoopDirective->counters().begin(); |
1117 | 608 | for (const Expr *F : LoopDirective->finals()) { |
1118 | 608 | const auto *D = |
1119 | 608 | cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); |
1120 | 608 | if (NoFinals) |
1121 | 232 | AlreadyEmittedVars.insert(D); |
1122 | 376 | else |
1123 | 376 | LoopCountersAndUpdates[D] = F; |
1124 | 608 | ++IC; |
1125 | 608 | } |
1126 | 588 | } |
1127 | 612 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1128 | 612 | auto IRef = C->varlist_begin(); |
1129 | 612 | auto ISrcRef = C->source_exprs().begin(); |
1130 | 612 | auto IDestRef = C->destination_exprs().begin(); |
1131 | 2.28k | for (const Expr *AssignOp : C->assignment_ops()) { |
1132 | 2.28k | const auto *PrivateVD = |
1133 | 2.28k | cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1134 | 2.28k | QualType Type = PrivateVD->getType(); |
1135 | 2.28k | const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); |
1136 | 2.28k | if (AlreadyEmittedVars.insert(CanonicalVD).second) { |
1137 | | // If lastprivate variable is a loop control variable for loop-based |
1138 | | // directive, update its value before copyin back to original |
1139 | | // variable. |
1140 | 1.80k | if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) |
1141 | 13 | EmitIgnoredExpr(FinalExpr); |
1142 | 1.80k | const auto *SrcVD = |
1143 | 1.80k | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
1144 | 1.80k | const auto *DestVD = |
1145 | 1.80k | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1146 | | // Get the address of the private variable. |
1147 | 1.80k | Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); |
1148 | 1.80k | if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) |
1149 | 334 | PrivateAddr = |
1150 | 334 | Address(Builder.CreateLoad(PrivateAddr), |
1151 | 334 | CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); |
1152 | | // Store the last value to the private copy in the last iteration. |
1153 | 1.80k | if (C->getKind() == OMPC_LASTPRIVATE_conditional) |
1154 | 10 | CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( |
1155 | 10 | *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, |
1156 | 10 | (*IRef)->getExprLoc()); |
1157 | | // Get the address of the original variable. |
1158 | 1.80k | Address OriginalAddr = GetAddrOfLocalVar(DestVD); |
1159 | 1.80k | EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); |
1160 | 1.80k | } |
1161 | 2.28k | ++IRef; |
1162 | 2.28k | ++ISrcRef; |
1163 | 2.28k | ++IDestRef; |
1164 | 2.28k | } |
1165 | 612 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1166 | 10 | EmitIgnoredExpr(PostUpdate); |
1167 | 612 | } |
1168 | 604 | if (IsLastIterCond) |
1169 | 477 | EmitBlock(DoneBB, /*IsFinished=*/true); |
1170 | 604 | } |
1171 | | |
1172 | | void CodeGenFunction::EmitOMPReductionClauseInit( |
1173 | | const OMPExecutableDirective &D, |
1174 | 27.2k | CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1175 | 27.2k | if (!HaveInsertPoint()) |
1176 | 0 | return; |
1177 | 27.2k | SmallVector<const Expr *, 4> Shareds; |
1178 | 27.2k | SmallVector<const Expr *, 4> Privates; |
1179 | 27.2k | SmallVector<const Expr *, 4> ReductionOps; |
1180 | 27.2k | SmallVector<const Expr *, 4> LHSs; |
1181 | 27.2k | SmallVector<const Expr *, 4> RHSs; |
1182 | 27.2k | OMPTaskDataTy Data; |
1183 | 27.2k | SmallVector<const Expr *, 4> TaskLHSs; |
1184 | 27.2k | SmallVector<const Expr *, 4> TaskRHSs; |
1185 | 1.12k | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1186 | 1.12k | if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) |
1187 | 417 | continue; |
1188 | 707 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
1189 | 707 | Privates.append(C->privates().begin(), C->privates().end()); |
1190 | 707 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1191 | 707 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1192 | 707 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1193 | 707 | if (C->getModifier() == OMPC_REDUCTION_task) { |
1194 | 26 | Data.ReductionVars.append(C->privates().begin(), C->privates().end()); |
1195 | 26 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
1196 | 26 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
1197 | 26 | Data.ReductionOps.append(C->reduction_ops().begin(), |
1198 | 26 | C->reduction_ops().end()); |
1199 | 26 | TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1200 | 26 | TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1201 | 26 | } |
1202 | 707 | } |
1203 | 27.2k | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
1204 | 27.2k | unsigned Count = 0; |
1205 | 27.2k | auto *ILHS = LHSs.begin(); |
1206 | 27.2k | auto *IRHS = RHSs.begin(); |
1207 | 27.2k | auto *IPriv = Privates.begin(); |
1208 | 796 | for (const Expr *IRef : Shareds) { |
1209 | 796 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); |
1210 | | // Emit private VarDecl with reduction init. |
1211 | 796 | RedCG.emitSharedOrigLValue(*this, Count); |
1212 | 796 | RedCG.emitAggregateType(*this, Count); |
1213 | 796 | AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); |
1214 | 796 | RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), |
1215 | 796 | RedCG.getSharedLValue(Count), |
1216 | 604 | [&Emission](CodeGenFunction &CGF) { |
1217 | 604 | CGF.EmitAutoVarInit(Emission); |
1218 | 604 | return true; |
1219 | 604 | }); |
1220 | 796 | EmitAutoVarCleanups(Emission); |
1221 | 796 | Address BaseAddr = RedCG.adjustPrivateAddress( |
1222 | 796 | *this, Count, Emission.getAllocatedAddress()); |
1223 | 796 | bool IsRegistered = PrivateScope.addPrivate( |
1224 | 796 | RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); |
1225 | 796 | assert(IsRegistered && "private var already registered as private"); |
1226 | | // Silence the warning about unused variable. |
1227 | 796 | (void)IsRegistered; |
1228 | | |
1229 | 796 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
1230 | 796 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
1231 | 796 | QualType Type = PrivateVD->getType(); |
1232 | 796 | bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); |
1233 | 796 | if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()151 ) { |
1234 | | // Store the address of the original variable associated with the LHS |
1235 | | // implicit variable. |
1236 | 108 | PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { |
1237 | 108 | return RedCG.getSharedLValue(Count).getAddress(*this); |
1238 | 108 | }); |
1239 | 108 | PrivateScope.addPrivate( |
1240 | 108 | RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); |
1241 | 688 | } else if ((isaOMPArraySectionExpr && Type->isScalarType()43 ) || |
1242 | 688 | isa<ArraySubscriptExpr>(IRef)) { |
1243 | | // Store the address of the original variable associated with the LHS |
1244 | | // implicit variable. |
1245 | 0 | PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { |
1246 | 0 | return RedCG.getSharedLValue(Count).getAddress(*this); |
1247 | 0 | }); |
1248 | 0 | PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { |
1249 | 0 | return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), |
1250 | 0 | ConvertTypeForMem(RHSVD->getType()), |
1251 | 0 | "rhs.begin"); |
1252 | 0 | }); |
1253 | 688 | } else { |
1254 | 688 | QualType Type = PrivateVD->getType(); |
1255 | 688 | bool IsArray = getContext().getAsArrayType(Type) != nullptr; |
1256 | 688 | Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); |
1257 | | // Store the address of the original variable associated with the LHS |
1258 | | // implicit variable. |
1259 | 688 | if (IsArray) { |
1260 | 105 | OriginalAddr = Builder.CreateElementBitCast( |
1261 | 105 | OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); |
1262 | 105 | } |
1263 | 688 | PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); |
1264 | 688 | PrivateScope.addPrivate( |
1265 | 688 | RHSVD, [this, PrivateVD, RHSVD, IsArray]() { |
1266 | 688 | return IsArray |
1267 | 105 | ? Builder.CreateElementBitCast( |
1268 | 105 | GetAddrOfLocalVar(PrivateVD), |
1269 | 105 | ConvertTypeForMem(RHSVD->getType()), "rhs.begin") |
1270 | 583 | : GetAddrOfLocalVar(PrivateVD); |
1271 | 688 | }); |
1272 | 688 | } |
1273 | 796 | ++ILHS; |
1274 | 796 | ++IRHS; |
1275 | 796 | ++IPriv; |
1276 | 796 | ++Count; |
1277 | 796 | } |
1278 | 27.2k | if (!Data.ReductionVars.empty()) { |
1279 | 26 | Data.IsReductionWithTaskMod = true; |
1280 | 26 | Data.IsWorksharingReduction = |
1281 | 26 | isOpenMPWorksharingDirective(D.getDirectiveKind()); |
1282 | 26 | llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( |
1283 | 26 | *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); |
1284 | 26 | const Expr *TaskRedRef = nullptr; |
1285 | 26 | switch (D.getDirectiveKind()) { |
1286 | 2 | case OMPD_parallel: |
1287 | 2 | TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); |
1288 | 2 | break; |
1289 | 2 | case OMPD_for: |
1290 | 2 | TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); |
1291 | 2 | break; |
1292 | 2 | case OMPD_sections: |
1293 | 2 | TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); |
1294 | 2 | break; |
1295 | 2 | case OMPD_parallel_for: |
1296 | 2 | TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); |
1297 | 2 | break; |
1298 | 2 | case OMPD_parallel_master: |
1299 | 2 | TaskRedRef = |
1300 | 2 | cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); |
1301 | 2 | break; |
1302 | 2 | case OMPD_parallel_sections: |
1303 | 2 | TaskRedRef = |
1304 | 2 | cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); |
1305 | 2 | break; |
1306 | 2 | case OMPD_target_parallel: |
1307 | 2 | TaskRedRef = |
1308 | 2 | cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); |
1309 | 2 | break; |
1310 | 2 | case OMPD_target_parallel_for: |
1311 | 2 | TaskRedRef = |
1312 | 2 | cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); |
1313 | 2 | break; |
1314 | 2 | case OMPD_distribute_parallel_for: |
1315 | 2 | TaskRedRef = |
1316 | 2 | cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); |
1317 | 2 | break; |
1318 | 4 | case OMPD_teams_distribute_parallel_for: |
1319 | 4 | TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) |
1320 | 4 | .getTaskReductionRefExpr(); |
1321 | 4 | break; |
1322 | 4 | case OMPD_target_teams_distribute_parallel_for: |
1323 | 4 | TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) |
1324 | 4 | .getTaskReductionRefExpr(); |
1325 | 4 | break; |
1326 | 0 | case OMPD_simd: |
1327 | 0 | case OMPD_for_simd: |
1328 | 0 | case OMPD_section: |
1329 | 0 | case OMPD_single: |
1330 | 0 | case OMPD_master: |
1331 | 0 | case OMPD_critical: |
1332 | 0 | case OMPD_parallel_for_simd: |
1333 | 0 | case OMPD_task: |
1334 | 0 | case OMPD_taskyield: |
1335 | 0 | case OMPD_barrier: |
1336 | 0 | case OMPD_taskwait: |
1337 | 0 | case OMPD_taskgroup: |
1338 | 0 | case OMPD_flush: |
1339 | 0 | case OMPD_depobj: |
1340 | 0 | case OMPD_scan: |
1341 | 0 | case OMPD_ordered: |
1342 | 0 | case OMPD_atomic: |
1343 | 0 | case OMPD_teams: |
1344 | 0 | case OMPD_target: |
1345 | 0 | case OMPD_cancellation_point: |
1346 | 0 | case OMPD_cancel: |
1347 | 0 | case OMPD_target_data: |
1348 | 0 | case OMPD_target_enter_data: |
1349 | 0 | case OMPD_target_exit_data: |
1350 | 0 | case OMPD_taskloop: |
1351 | 0 | case OMPD_taskloop_simd: |
1352 | 0 | case OMPD_master_taskloop: |
1353 | 0 | case OMPD_master_taskloop_simd: |
1354 | 0 | case OMPD_parallel_master_taskloop: |
1355 | 0 | case OMPD_parallel_master_taskloop_simd: |
1356 | 0 | case OMPD_distribute: |
1357 | 0 | case OMPD_target_update: |
1358 | 0 | case OMPD_distribute_parallel_for_simd: |
1359 | 0 | case OMPD_distribute_simd: |
1360 | 0 | case OMPD_target_parallel_for_simd: |
1361 | 0 | case OMPD_target_simd: |
1362 | 0 | case OMPD_teams_distribute: |
1363 | 0 | case OMPD_teams_distribute_simd: |
1364 | 0 | case OMPD_teams_distribute_parallel_for_simd: |
1365 | 0 | case OMPD_target_teams: |
1366 | 0 | case OMPD_target_teams_distribute: |
1367 | 0 | case OMPD_target_teams_distribute_parallel_for_simd: |
1368 | 0 | case OMPD_target_teams_distribute_simd: |
1369 | 0 | case OMPD_declare_target: |
1370 | 0 | case OMPD_end_declare_target: |
1371 | 0 | case OMPD_threadprivate: |
1372 | 0 | case OMPD_allocate: |
1373 | 0 | case OMPD_declare_reduction: |
1374 | 0 | case OMPD_declare_mapper: |
1375 | 0 | case OMPD_declare_simd: |
1376 | 0 | case OMPD_requires: |
1377 | 0 | case OMPD_declare_variant: |
1378 | 0 | case OMPD_begin_declare_variant: |
1379 | 0 | case OMPD_end_declare_variant: |
1380 | 0 | case OMPD_unknown: |
1381 | 0 | default: |
1382 | 0 | llvm_unreachable("Enexpected directive with task reductions."); |
1383 | 26 | } |
1384 | | |
1385 | 26 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); |
1386 | 26 | EmitVarDecl(*VD); |
1387 | 26 | EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), |
1388 | 26 | /*Volatile=*/false, TaskRedRef->getType()); |
1389 | 26 | } |
1390 | 27.2k | } |
1391 | | |
1392 | | void CodeGenFunction::EmitOMPReductionClauseFinal( |
1393 | 16.3k | const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { |
1394 | 16.3k | if (!HaveInsertPoint()) |
1395 | 2 | return; |
1396 | 16.3k | llvm::SmallVector<const Expr *, 8> Privates; |
1397 | 16.3k | llvm::SmallVector<const Expr *, 8> LHSExprs; |
1398 | 16.3k | llvm::SmallVector<const Expr *, 8> RHSExprs; |
1399 | 16.3k | llvm::SmallVector<const Expr *, 8> ReductionOps; |
1400 | 16.3k | bool HasAtLeastOneReduction = false; |
1401 | 16.3k | bool IsReductionWithTaskMod = false; |
1402 | 699 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1403 | | // Do not emit for inscan reductions. |
1404 | 699 | if (C->getModifier() == OMPC_REDUCTION_inscan) |
1405 | 48 | continue; |
1406 | 651 | HasAtLeastOneReduction = true; |
1407 | 651 | Privates.append(C->privates().begin(), C->privates().end()); |
1408 | 651 | LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1409 | 651 | RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1410 | 651 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1411 | 651 | IsReductionWithTaskMod = |
1412 | 651 | IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; |
1413 | 651 | } |
1414 | 16.3k | if (HasAtLeastOneReduction) { |
1415 | 551 | if (IsReductionWithTaskMod) { |
1416 | 26 | CGM.getOpenMPRuntime().emitTaskReductionFini( |
1417 | 26 | *this, D.getBeginLoc(), |
1418 | 26 | isOpenMPWorksharingDirective(D.getDirectiveKind())); |
1419 | 26 | } |
1420 | 551 | bool WithNowait = D.getSingleClause<OMPNowaitClause>() || |
1421 | 545 | isOpenMPParallelDirective(D.getDirectiveKind()) || |
1422 | 274 | ReductionKind == OMPD_simd; |
1423 | 551 | bool SimpleReduction = ReductionKind == OMPD_simd; |
1424 | | // Emit nowait reduction if nowait clause is present or directive is a |
1425 | | // parallel directive (it always has implicit barrier). |
1426 | 551 | CGM.getOpenMPRuntime().emitReduction( |
1427 | 551 | *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, |
1428 | 551 | {WithNowait, SimpleReduction, ReductionKind}); |
1429 | 551 | } |
1430 | 16.3k | } |
1431 | | |
1432 | | static void emitPostUpdateForReductionClause( |
1433 | | CodeGenFunction &CGF, const OMPExecutableDirective &D, |
1434 | 16.3k | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1435 | 16.3k | if (!CGF.HaveInsertPoint()) |
1436 | 0 | return; |
1437 | 16.3k | llvm::BasicBlock *DoneBB = nullptr; |
1438 | 707 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1439 | 707 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) { |
1440 | 4 | if (!DoneBB) { |
1441 | 4 | if (llvm::Value *Cond = CondGen(CGF)) { |
1442 | | // If the first post-update expression is found, emit conditional |
1443 | | // block if it was requested. |
1444 | 0 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); |
1445 | 0 | DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); |
1446 | 0 | CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
1447 | 0 | CGF.EmitBlock(ThenBB); |
1448 | 0 | } |
1449 | 4 | } |
1450 | 4 | CGF.EmitIgnoredExpr(PostUpdate); |
1451 | 4 | } |
1452 | 707 | } |
1453 | 16.3k | if (DoneBB) |
1454 | 0 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
1455 | 16.3k | } |
1456 | | |
1457 | | namespace { |
1458 | | /// Codegen lambda for appending distribute lower and upper bounds to outlined |
1459 | | /// parallel function. This is necessary for combined constructs such as |
1460 | | /// 'distribute parallel for' |
1461 | | typedef llvm::function_ref<void(CodeGenFunction &, |
1462 | | const OMPExecutableDirective &, |
1463 | | llvm::SmallVectorImpl<llvm::Value *> &)> |
1464 | | CodeGenBoundParametersTy; |
1465 | | } // anonymous namespace |
1466 | | |
1467 | | static void |
1468 | | checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, |
1469 | 15.9k | const OMPExecutableDirective &S) { |
1470 | 15.9k | if (CGF.getLangOpts().OpenMP < 50) |
1471 | 4.11k | return; |
1472 | 11.8k | llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; |
1473 | 702 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
1474 | 856 | for (const Expr *Ref : C->varlists()) { |
1475 | 856 | if (!Ref->getType()->isScalarType()) |
1476 | 421 | continue; |
1477 | 435 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1478 | 435 | if (!DRE) |
1479 | 0 | continue; |
1480 | 435 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1481 | 435 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1482 | 435 | } |
1483 | 702 | } |
1484 | 295 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
1485 | 1.22k | for (const Expr *Ref : C->varlists()) { |
1486 | 1.22k | if (!Ref->getType()->isScalarType()) |
1487 | 658 | continue; |
1488 | 567 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1489 | 567 | if (!DRE) |
1490 | 0 | continue; |
1491 | 567 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1492 | 567 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1493 | 567 | } |
1494 | 295 | } |
1495 | 279 | for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { |
1496 | 350 | for (const Expr *Ref : C->varlists()) { |
1497 | 350 | if (!Ref->getType()->isScalarType()) |
1498 | 0 | continue; |
1499 | 350 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1500 | 350 | if (!DRE) |
1501 | 0 | continue; |
1502 | 350 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1503 | 350 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1504 | 350 | } |
1505 | 279 | } |
1506 | | // Privates should ne analyzed since they are not captured at all. |
1507 | | // Task reductions may be skipped - tasks are ignored. |
1508 | | // Firstprivates do not return value but may be passed by reference - no need |
1509 | | // to check for updated lastprivate conditional. |
1510 | 4.00k | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
1511 | 6.44k | for (const Expr *Ref : C->varlists()) { |
1512 | 6.44k | if (!Ref->getType()->isScalarType()) |
1513 | 853 | continue; |
1514 | 5.58k | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1515 | 5.58k | if (!DRE) |
1516 | 0 | continue; |
1517 | 5.58k | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1518 | 5.58k | } |
1519 | 4.00k | } |
1520 | 11.8k | CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( |
1521 | 11.8k | CGF, S, PrivateDecls); |
1522 | 11.8k | } |
1523 | | |
1524 | | static void emitCommonOMPParallelDirective( |
1525 | | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
1526 | | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1527 | 6.05k | const CodeGenBoundParametersTy &CodeGenBoundParameters) { |
1528 | 6.05k | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1529 | 6.05k | llvm::Function *OutlinedFn = |
1530 | 6.05k | CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( |
1531 | 6.05k | S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); |
1532 | 6.05k | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { |
1533 | 307 | CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
1534 | 307 | llvm::Value *NumThreads = |
1535 | 307 | CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1536 | 307 | /*IgnoreResultAssign=*/true); |
1537 | 307 | CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
1538 | 307 | CGF, NumThreads, NumThreadsClause->getBeginLoc()); |
1539 | 307 | } |
1540 | 6.05k | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { |
1541 | 114 | CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); |
1542 | 114 | CGF.CGM.getOpenMPRuntime().emitProcBindClause( |
1543 | 114 | CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); |
1544 | 114 | } |
1545 | 6.05k | const Expr *IfCond = nullptr; |
1546 | 1.22k | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
1547 | 1.22k | if (C->getNameModifier() == OMPD_unknown || |
1548 | 796 | C->getNameModifier() == OMPD_parallel) { |
1549 | 613 | IfCond = C->getCondition(); |
1550 | 613 | break; |
1551 | 613 | } |
1552 | 1.22k | } |
1553 | | |
1554 | 6.05k | OMPParallelScope Scope(CGF, S); |
1555 | 6.05k | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
1556 | | // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1557 | | // lower and upper bounds with the pragma 'for' chunking mechanism. |
1558 | | // The following lambda takes care of appending the lower and upper bound |
1559 | | // parameters when necessary |
1560 | 6.05k | CodeGenBoundParameters(CGF, S, CapturedVars); |
1561 | 6.05k | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
1562 | 6.05k | CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, |
1563 | 6.05k | CapturedVars, IfCond); |
1564 | 6.05k | } |
1565 | | |
1566 | 244 | static bool isAllocatableDecl(const VarDecl *VD) { |
1567 | 244 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1568 | 244 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
1569 | 240 | return false; |
1570 | 4 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1571 | | // Use the default allocation. |
1572 | 4 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
1573 | 4 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
1574 | 0 | !AA->getAllocator()); |
1575 | 4 | } |
1576 | | |
1577 | | static void emitEmptyBoundParameters(CodeGenFunction &, |
1578 | | const OMPExecutableDirective &, |
1579 | 3.28k | llvm::SmallVectorImpl<llvm::Value *> &) {} |
1580 | | |
1581 | | Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( |
1582 | 228 | CodeGenFunction &CGF, const VarDecl *VD) { |
1583 | 228 | CodeGenModule &CGM = CGF.CGM; |
1584 | 228 | auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1585 | | |
1586 | 228 | if (!VD) |
1587 | 0 | return Address::invalid(); |
1588 | 228 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1589 | 228 | if (!isAllocatableDecl(CVD)) |
1590 | 228 | return Address::invalid(); |
1591 | 0 | llvm::Value *Size; |
1592 | 0 | CharUnits Align = CGM.getContext().getDeclAlign(CVD); |
1593 | 0 | if (CVD->getType()->isVariablyModifiedType()) { |
1594 | 0 | Size = CGF.getTypeSize(CVD->getType()); |
1595 | | // Align the size: ((size + align - 1) / align) * align |
1596 | 0 | Size = CGF.Builder.CreateNUWAdd( |
1597 | 0 | Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); |
1598 | 0 | Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); |
1599 | 0 | Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); |
1600 | 0 | } else { |
1601 | 0 | CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); |
1602 | 0 | Size = CGM.getSize(Sz.alignTo(Align)); |
1603 | 0 | } |
1604 | |
|
1605 | 0 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1606 | 0 | assert(AA->getAllocator() && |
1607 | 0 | "Expected allocator expression for non-default allocator."); |
1608 | 0 | llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); |
1609 | | // According to the standard, the original allocator type is a enum (integer). |
1610 | | // Convert to pointer type, if required. |
1611 | 0 | if (Allocator->getType()->isIntegerTy()) |
1612 | 0 | Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); |
1613 | 0 | else if (Allocator->getType()->isPointerTy()) |
1614 | 0 | Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, |
1615 | 0 | CGM.VoidPtrTy); |
1616 | |
|
1617 | 0 | llvm::Value *Addr = OMPBuilder.createOMPAlloc( |
1618 | 0 | CGF.Builder, Size, Allocator, |
1619 | 0 | getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); |
1620 | 0 | llvm::CallInst *FreeCI = |
1621 | 0 | OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); |
1622 | |
|
1623 | 0 | CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); |
1624 | 0 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1625 | 0 | Addr, |
1626 | 0 | CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), |
1627 | 0 | getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); |
1628 | 0 | return Address(Addr, Align); |
1629 | 0 | } |
1630 | | |
1631 | | Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( |
1632 | | CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, |
1633 | 0 | SourceLocation Loc) { |
1634 | 0 | CodeGenModule &CGM = CGF.CGM; |
1635 | 0 | if (CGM.getLangOpts().OpenMPUseTLS && |
1636 | 0 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1637 | 0 | return VDAddr; |
1638 | | |
1639 | 0 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1640 | |
|
1641 | 0 | llvm::Type *VarTy = VDAddr.getElementType(); |
1642 | 0 | llvm::Value *Data = |
1643 | 0 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); |
1644 | 0 | llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); |
1645 | 0 | std::string Suffix = getNameWithSeparators({"cache", ""}); |
1646 | 0 | llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); |
1647 | |
|
1648 | 0 | llvm::CallInst *ThreadPrivateCacheCall = |
1649 | 0 | OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); |
1650 | |
|
1651 | 0 | return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); |
1652 | 0 | } |
1653 | | |
1654 | | std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( |
1655 | 0 | ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { |
1656 | 0 | SmallString<128> Buffer; |
1657 | 0 | llvm::raw_svector_ostream OS(Buffer); |
1658 | 0 | StringRef Sep = FirstSeparator; |
1659 | 0 | for (StringRef Part : Parts) { |
1660 | 0 | OS << Sep << Part; |
1661 | 0 | Sep = Separator; |
1662 | 0 | } |
1663 | 0 | return OS.str().str(); |
1664 | 0 | } |
1665 | 947 | void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1666 | 947 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1667 | 34 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1668 | | // Check if we have any if clause associated with the directive. |
1669 | 34 | llvm::Value *IfCond = nullptr; |
1670 | 34 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1671 | 0 | IfCond = EmitScalarExpr(C->getCondition(), |
1672 | 0 | /*IgnoreResultAssign=*/true); |
1673 | | |
1674 | 34 | llvm::Value *NumThreads = nullptr; |
1675 | 34 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) |
1676 | 0 | NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1677 | 0 | /*IgnoreResultAssign=*/true); |
1678 | | |
1679 | 34 | ProcBindKind ProcBind = OMP_PROC_BIND_default; |
1680 | 34 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) |
1681 | 0 | ProcBind = ProcBindClause->getProcBindKind(); |
1682 | | |
1683 | 34 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
1684 | | |
1685 | | // The cleanup callback that finalizes all variabels at the given location, |
1686 | | // thus calls destructors etc. |
1687 | 42 | auto FiniCB = [this](InsertPointTy IP) { |
1688 | 42 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
1689 | 42 | }; |
1690 | | |
1691 | | // Privatization callback that performs appropriate action for |
1692 | | // shared/private/firstprivate/lastprivate/copyin/... variables. |
1693 | | // |
1694 | | // TODO: This defaults to shared right now. |
1695 | 34 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1696 | 68 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1697 | | // The next line is appropriate only for variables (Val) with the |
1698 | | // data-sharing attribute "shared". |
1699 | 68 | ReplVal = &Val; |
1700 | | |
1701 | 68 | return CodeGenIP; |
1702 | 68 | }; |
1703 | | |
1704 | 34 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1705 | 34 | const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); |
1706 | | |
1707 | 34 | auto BodyGenCB = [ParallelRegionBodyStmt, |
1708 | 34 | this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1709 | 34 | llvm::BasicBlock &ContinuationBB) { |
1710 | 34 | OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, |
1711 | 34 | ContinuationBB); |
1712 | 34 | OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, |
1713 | 34 | CodeGenIP, ContinuationBB); |
1714 | 34 | }; |
1715 | | |
1716 | 34 | CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); |
1717 | 34 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
1718 | 34 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
1719 | 34 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
1720 | 34 | Builder.restoreIP( |
1721 | 34 | OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, |
1722 | 34 | IfCond, NumThreads, ProcBind, S.hasCancel())); |
1723 | 34 | return; |
1724 | 34 | } |
1725 | | |
1726 | | // Emit parallel region as a standalone region. |
1727 | 913 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
1728 | 913 | Action.Enter(CGF); |
1729 | 913 | OMPPrivateScope PrivateScope(CGF); |
1730 | 913 | bool Copyins = CGF.EmitOMPCopyinClause(S); |
1731 | 913 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
1732 | 913 | if (Copyins) { |
1733 | | // Emit implicit barrier to synchronize threads and avoid data races on |
1734 | | // propagation master's thread values of threadprivate variables to local |
1735 | | // instances of that variables of all other implicit threads. |
1736 | 23 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
1737 | 23 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
1738 | 23 | /*ForceSimpleCall=*/true); |
1739 | 23 | } |
1740 | 913 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
1741 | 913 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
1742 | 913 | (void)PrivateScope.Privatize(); |
1743 | 913 | CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); |
1744 | 913 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
1745 | 913 | }; |
1746 | 913 | { |
1747 | 913 | auto LPCRegion = |
1748 | 913 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
1749 | 913 | emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, |
1750 | 913 | emitEmptyBoundParameters); |
1751 | 913 | emitPostUpdateForReductionClause(*this, S, |
1752 | 4 | [](CodeGenFunction &) { return nullptr; }); |
1753 | 913 | } |
1754 | | // Check for outer lastprivate conditional update. |
1755 | 913 | checkForLastprivateConditionalUpdate(*this, S); |
1756 | 913 | } |
1757 | | |
1758 | | static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, |
1759 | 11.3k | int MaxLevel, int Level = 0) { |
1760 | 11.3k | assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); |
1761 | 11.3k | const Stmt *SimplifiedS = S->IgnoreContainers(); |
1762 | 11.3k | if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { |
1763 | 6 | PrettyStackTraceLoc CrashInfo( |
1764 | 6 | CGF.getContext().getSourceManager(), CS->getLBracLoc(), |
1765 | 6 | "LLVM IR generation of compound statement ('{}')"); |
1766 | | |
1767 | | // Keep track of the current cleanup stack depth, including debug scopes. |
1768 | 6 | CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); |
1769 | 6 | for (const Stmt *CurStmt : CS->body()) |
1770 | 30 | emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); |
1771 | 6 | return; |
1772 | 6 | } |
1773 | 11.3k | if (SimplifiedS == NextLoop) { |
1774 | 11.3k | if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { |
1775 | 11.3k | S = For->getBody(); |
1776 | 6 | } else { |
1777 | 6 | assert(isa<CXXForRangeStmt>(SimplifiedS) && |
1778 | 6 | "Expected canonical for loop or range-based for loop."); |
1779 | 6 | const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); |
1780 | 6 | CGF.EmitStmt(CXXFor->getLoopVarStmt()); |
1781 | 6 | S = CXXFor->getBody(); |
1782 | 6 | } |
1783 | 11.3k | if (Level + 1 < MaxLevel) { |
1784 | 507 | NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( |
1785 | 507 | S, /*TryImperfectlyNestedLoops=*/true); |
1786 | 507 | emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); |
1787 | 507 | return; |
1788 | 507 | } |
1789 | 10.8k | } |
1790 | 10.8k | CGF.EmitStmt(S); |
1791 | 10.8k | } |
1792 | | |
1793 | | void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, |
1794 | 10.8k | JumpDest LoopExit) { |
1795 | 10.8k | RunCleanupsScope BodyScope(*this); |
1796 | | // Update counters values on current iteration. |
1797 | 10.8k | for (const Expr *UE : D.updates()) |
1798 | 11.3k | EmitIgnoredExpr(UE); |
1799 | | // Update the linear variables. |
1800 | | // In distribute directives only loop counters may be marked as linear, no |
1801 | | // need to generate the code for them. |
1802 | 10.8k | if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { |
1803 | 396 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1804 | 396 | for (const Expr *UE : C->updates()) |
1805 | 518 | EmitIgnoredExpr(UE); |
1806 | 396 | } |
1807 | 4.28k | } |
1808 | | |
1809 | | // On a continue in the body, jump to the end. |
1810 | 10.8k | JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); |
1811 | 10.8k | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
1812 | 11.3k | for (const Expr *E : D.finals_conditions()) { |
1813 | 11.3k | if (!E) |
1814 | 11.3k | continue; |
1815 | | // Check that loop counter in non-rectangular nest fits into the iteration |
1816 | | // space. |
1817 | 20 | llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); |
1818 | 20 | EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), |
1819 | 20 | getProfileCount(D.getBody())); |
1820 | 20 | EmitBlock(NextBB); |
1821 | 20 | } |
1822 | | |
1823 | 10.8k | OMPPrivateScope InscanScope(*this); |
1824 | 10.8k | EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); |
1825 | 10.8k | bool IsInscanRegion = InscanScope.Privatize(); |
1826 | 10.8k | if (IsInscanRegion) { |
1827 | | // Need to remember the block before and after scan directive |
1828 | | // to dispatch them correctly depending on the clause used in |
1829 | | // this directive, inclusive or exclusive. For inclusive scan the natural |
1830 | | // order of the blocks is used, for exclusive clause the blocks must be |
1831 | | // executed in reverse order. |
1832 | 48 | OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); |
1833 | 48 | OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); |
1834 | | // No need to allocate inscan exit block, in simd mode it is selected in the |
1835 | | // codegen for the scan directive. |
1836 | 48 | if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd40 ) |
1837 | 32 | OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); |
1838 | 48 | OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); |
1839 | 48 | EmitBranch(OMPScanDispatch); |
1840 | 48 | EmitBlock(OMPBeforeScanBlock); |
1841 | 48 | } |
1842 | | |
1843 | | // Emit loop variables for C++ range loops. |
1844 | 10.8k | const Stmt *Body = |
1845 | 10.8k | D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
1846 | | // Emit loop body. |
1847 | 10.8k | emitBody(*this, Body, |
1848 | 10.8k | OMPLoopDirective::tryToFindNextInnerLoop( |
1849 | 10.8k | Body, /*TryImperfectlyNestedLoops=*/true), |
1850 | 10.8k | D.getCollapsedNumber()); |
1851 | | |
1852 | | // Jump to the dispatcher at the end of the loop body. |
1853 | 10.8k | if (IsInscanRegion) |
1854 | 48 | EmitBranch(OMPScanExitBlock); |
1855 | | |
1856 | | // The end (updates/cleanups). |
1857 | 10.8k | EmitBlock(Continue.getBlock()); |
1858 | 10.8k | BreakContinueStack.pop_back(); |
1859 | 10.8k | } |
1860 | | |
1861 | | void CodeGenFunction::EmitOMPInnerLoop( |
1862 | | const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, |
1863 | | const Expr *IncExpr, |
1864 | | const llvm::function_ref<void(CodeGenFunction &)> BodyGen, |
1865 | 13.7k | const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { |
1866 | 13.7k | auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); |
1867 | | |
1868 | | // Start the loop with a block that tests the condition. |
1869 | 13.7k | auto CondBlock = createBasicBlock("omp.inner.for.cond"); |
1870 | 13.7k | EmitBlock(CondBlock); |
1871 | 13.7k | const SourceRange R = S.getSourceRange(); |
1872 | | |
1873 | | // If attributes are attached, push to the basic block with them. |
1874 | 13.7k | const auto &OMPED = cast<OMPExecutableDirective>(S); |
1875 | 13.7k | const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); |
1876 | 13.7k | const Stmt *SS = ICS->getCapturedStmt(); |
1877 | 13.7k | const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); |
1878 | 13.7k | if (AS) |
1879 | 1 | LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), |
1880 | 1 | AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), |
1881 | 1 | SourceLocToDebugLoc(R.getEnd())); |
1882 | 13.7k | else |
1883 | 13.7k | LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), |
1884 | 13.7k | SourceLocToDebugLoc(R.getEnd())); |
1885 | | |
1886 | | // If there are any cleanups between here and the loop-exit scope, |
1887 | | // create a block to stage a loop exit along. |
1888 | 13.7k | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
1889 | 13.7k | if (RequiresCleanup) |
1890 | 850 | ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); |
1891 | | |
1892 | 13.7k | llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); |
1893 | | |
1894 | | // Emit condition. |
1895 | 13.7k | EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); |
1896 | 13.7k | if (ExitBlock != LoopExit.getBlock()) { |
1897 | 850 | EmitBlock(ExitBlock); |
1898 | 850 | EmitBranchThroughCleanup(LoopExit); |
1899 | 850 | } |
1900 | | |
1901 | 13.7k | EmitBlock(LoopBody); |
1902 | 13.7k | incrementProfileCounter(&S); |
1903 | | |
1904 | | // Create a block for the increment. |
1905 | 13.7k | JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); |
1906 | 13.7k | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
1907 | | |
1908 | 13.7k | BodyGen(*this); |
1909 | | |
1910 | | // Emit "IV = IV + 1" and a back-edge to the condition block. |
1911 | 13.7k | EmitBlock(Continue.getBlock()); |
1912 | 13.7k | EmitIgnoredExpr(IncExpr); |
1913 | 13.7k | PostIncGen(*this); |
1914 | 13.7k | BreakContinueStack.pop_back(); |
1915 | 13.7k | EmitBranch(CondBlock); |
1916 | 13.7k | LoopStack.pop(); |
1917 | | // Emit the fall-through block. |
1918 | 13.7k | EmitBlock(LoopExit.getBlock()); |
1919 | 13.7k | } |
1920 | | |
1921 | 8.94k | bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { |
1922 | 8.94k | if (!HaveInsertPoint()) |
1923 | 0 | return false; |
1924 | | // Emit inits for the linear variables. |
1925 | 8.94k | bool HasLinears = false; |
1926 | 492 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1927 | 614 | for (const Expr *Init : C->inits()) { |
1928 | 614 | HasLinears = true; |
1929 | 614 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); |
1930 | 614 | if (const auto *Ref = |
1931 | 614 | dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { |
1932 | 614 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
1933 | 614 | const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); |
1934 | 614 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1935 | 614 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
1936 | 614 | VD->getInit()->getType(), VK_LValue, |
1937 | 614 | VD->getInit()->getExprLoc()); |
1938 | 614 | EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), |
1939 | 614 | VD->getType()), |
1940 | 614 | /*capturedByInit=*/false); |
1941 | 614 | EmitAutoVarCleanups(Emission); |
1942 | 0 | } else { |
1943 | 0 | EmitVarDecl(*VD); |
1944 | 0 | } |
1945 | 614 | } |
1946 | | // Emit the linear steps for the linear clauses. |
1947 | | // If a step is not constant, it is pre-calculated before the loop. |
1948 | 492 | if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) |
1949 | 154 | if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { |
1950 | 154 | EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); |
1951 | | // Emit calculation of the linear step. |
1952 | 154 | EmitIgnoredExpr(CS); |
1953 | 154 | } |
1954 | 492 | } |
1955 | 8.94k | return HasLinears; |
1956 | 8.94k | } |
1957 | | |
1958 | | void CodeGenFunction::EmitOMPLinearClauseFinal( |
1959 | | const OMPLoopDirective &D, |
1960 | 8.94k | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1961 | 8.94k | if (!HaveInsertPoint()) |
1962 | 0 | return; |
1963 | 8.94k | llvm::BasicBlock *DoneBB = nullptr; |
1964 | | // Emit the final values of the linear variables. |
1965 | 492 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1966 | 492 | auto IC = C->varlist_begin(); |
1967 | 614 | for (const Expr *F : C->finals()) { |
1968 | 614 | if (!DoneBB) { |
1969 | 539 | if (llvm::Value *Cond = CondGen(*this)) { |
1970 | | // If the first post-update expression is found, emit conditional |
1971 | | // block if it was requested. |
1972 | 135 | llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); |
1973 | 135 | DoneBB = createBasicBlock(".omp.linear.pu.done"); |
1974 | 135 | Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
1975 | 135 | EmitBlock(ThenBB); |
1976 | 135 | } |
1977 | 539 | } |
1978 | 614 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); |
1979 | 614 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1980 | 614 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
1981 | 614 | (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); |
1982 | 614 | Address OrigAddr = EmitLValue(&DRE).getAddress(*this); |
1983 | 614 | CodeGenFunction::OMPPrivateScope VarScope(*this); |
1984 | 614 | VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); |
1985 | 614 | (void)VarScope.Privatize(); |
1986 | 614 | EmitIgnoredExpr(F); |
1987 | 614 | ++IC; |
1988 | 614 | } |
1989 | 492 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1990 | 4 | EmitIgnoredExpr(PostUpdate); |
1991 | 492 | } |
1992 | 8.94k | if (DoneBB) |
1993 | 135 | EmitBlock(DoneBB, /*IsFinished=*/true); |
1994 | 8.94k | } |
1995 | | |
1996 | | static void emitAlignedClause(CodeGenFunction &CGF, |
1997 | 13.1k | const OMPExecutableDirective &D) { |
1998 | 13.1k | if (!CGF.HaveInsertPoint()) |
1999 | 0 | return; |
2000 | 13.1k | for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { |
2001 | 306 | llvm::APInt ClauseAlignment(64, 0); |
2002 | 306 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2003 | 108 | auto *AlignmentCI = |
2004 | 108 | cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); |
2005 | 108 | ClauseAlignment = AlignmentCI->getValue(); |
2006 | 108 | } |
2007 | 356 | for (const Expr *E : Clause->varlists()) { |
2008 | 356 | llvm::APInt Alignment(ClauseAlignment); |
2009 | 356 | if (Alignment == 0) { |
2010 | | // OpenMP [2.8.1, Description] |
2011 | | // If no optional parameter is specified, implementation-defined default |
2012 | | // alignments for SIMD instructions on the target platforms are assumed. |
2013 | 240 | Alignment = |
2014 | 240 | CGF.getContext() |
2015 | 240 | .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( |
2016 | 240 | E->getType()->getPointeeType())) |
2017 | 240 | .getQuantity(); |
2018 | 240 | } |
2019 | 356 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2020 | 356 | "alignment is not power of 2"); |
2021 | 356 | if (Alignment != 0) { |
2022 | 356 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2023 | 356 | CGF.emitAlignmentAssumption( |
2024 | 356 | PtrValue, E, /*No second loc needed*/ SourceLocation(), |
2025 | 356 | llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); |
2026 | 356 | } |
2027 | 356 | } |
2028 | 306 | } |
2029 | 13.1k | } |
2030 | | |
2031 | | void CodeGenFunction::EmitOMPPrivateLoopCounters( |
2032 | 15.8k | const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { |
2033 | 15.8k | if (!HaveInsertPoint()) |
2034 | 0 | return; |
2035 | 15.8k | auto I = S.private_counters().begin(); |
2036 | 16.6k | for (const Expr *E : S.counters()) { |
2037 | 16.6k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2038 | 16.6k | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); |
2039 | | // Emit var without initialization. |
2040 | 16.6k | AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); |
2041 | 16.6k | EmitAutoVarCleanups(VarEmission); |
2042 | 16.6k | LocalDeclMap.erase(PrivateVD); |
2043 | 16.6k | (void)LoopScope.addPrivate(VD, [&VarEmission]() { |
2044 | 16.6k | return VarEmission.getAllocatedAddress(); |
2045 | 16.6k | }); |
2046 | 16.6k | if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD)15.9k || |
2047 | 15.9k | VD->hasGlobalStorage()) { |
2048 | 685 | (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { |
2049 | 685 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), |
2050 | 685 | LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD)31 , |
2051 | 685 | E->getType(), VK_LValue, E->getExprLoc()); |
2052 | 685 | return EmitLValue(&DRE).getAddress(*this); |
2053 | 685 | }); |
2054 | 15.9k | } else { |
2055 | 15.9k | (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { |
2056 | 15.9k | return VarEmission.getAllocatedAddress(); |
2057 | 15.9k | }); |
2058 | 15.9k | } |
2059 | 16.6k | ++I; |
2060 | 16.6k | } |
2061 | | // Privatize extra loop counters used in loops for ordered(n) clauses. |
2062 | 80 | for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { |
2063 | 80 | if (!C->getNumForLoops()) |
2064 | 54 | continue; |
2065 | 26 | for (unsigned I = S.getCollapsedNumber(), |
2066 | 26 | E = C->getLoopNumIterations().size(); |
2067 | 32 | I < E; ++I6 ) { |
2068 | 6 | const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); |
2069 | 6 | const auto *VD = cast<VarDecl>(DRE->getDecl()); |
2070 | | // Override only those variables that can be captured to avoid re-emission |
2071 | | // of the variables declared within the loops. |
2072 | 6 | if (DRE->refersToEnclosingVariableOrCapture()) { |
2073 | 4 | (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { |
2074 | 4 | return CreateMemTemp(DRE->getType(), VD->getName()); |
2075 | 4 | }); |
2076 | 4 | } |
2077 | 6 | } |
2078 | 26 | } |
2079 | 15.8k | } |
2080 | | |
2081 | | static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2082 | | const Expr *Cond, llvm::BasicBlock *TrueBlock, |
2083 | 2.40k | llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { |
2084 | 2.40k | if (!CGF.HaveInsertPoint()) |
2085 | 0 | return; |
2086 | 2.40k | { |
2087 | 2.40k | CodeGenFunction::OMPPrivateScope PreCondScope(CGF); |
2088 | 2.40k | CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); |
2089 | 2.40k | (void)PreCondScope.Privatize(); |
2090 | | // Get initial values of real counters. |
2091 | 2.61k | for (const Expr *I : S.inits()) { |
2092 | 2.61k | CGF.EmitIgnoredExpr(I); |
2093 | 2.61k | } |
2094 | 2.40k | } |
2095 | | // Create temp loop control variables with their init values to support |
2096 | | // non-rectangular loops. |
2097 | 2.40k | CodeGenFunction::OMPMapVars PreCondVars; |
2098 | 2.61k | for (const Expr * E: S.dependent_counters()) { |
2099 | 2.61k | if (!E) |
2100 | 2.61k | continue; |
2101 | 5 | assert(!E->getType().getNonReferenceType()->isRecordType() && |
2102 | 5 | "dependent counter must not be an iterator."); |
2103 | 5 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2104 | 5 | Address CounterAddr = |
2105 | 5 | CGF.CreateMemTemp(VD->getType().getNonReferenceType()); |
2106 | 5 | (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); |
2107 | 5 | } |
2108 | 2.40k | (void)PreCondVars.apply(CGF); |
2109 | 2.61k | for (const Expr *E : S.dependent_inits()) { |
2110 | 2.61k | if (!E) |
2111 | 2.61k | continue; |
2112 | 5 | CGF.EmitIgnoredExpr(E); |
2113 | 5 | } |
2114 | | // Check that loop is executed at least one time. |
2115 | 2.40k | CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); |
2116 | 2.40k | PreCondVars.restore(CGF); |
2117 | 2.40k | } |
2118 | | |
2119 | | void CodeGenFunction::EmitOMPLinearClause( |
2120 | 8.94k | const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { |
2121 | 8.94k | if (!HaveInsertPoint()) |
2122 | 0 | return; |
2123 | 8.94k | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
2124 | 8.94k | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
2125 | 6.41k | const auto *LoopDirective = cast<OMPLoopDirective>(&D); |
2126 | 6.70k | for (const Expr *C : LoopDirective->counters()) { |
2127 | 6.70k | SIMDLCVs.insert( |
2128 | 6.70k | cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); |
2129 | 6.70k | } |
2130 | 6.41k | } |
2131 | 492 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2132 | 492 | auto CurPrivate = C->privates().begin(); |
2133 | 614 | for (const Expr *E : C->varlists()) { |
2134 | 614 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2135 | 614 | const auto *PrivateVD = |
2136 | 614 | cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); |
2137 | 614 | if (!SIMDLCVs.count(VD->getCanonicalDecl())) { |
2138 | 508 | bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { |
2139 | | // Emit private VarDecl with copy init. |
2140 | 508 | EmitVarDecl(*PrivateVD); |
2141 | 508 | return GetAddrOfLocalVar(PrivateVD); |
2142 | 508 | }); |
2143 | 508 | assert(IsRegistered && "linear var already registered as private"); |
2144 | | // Silence the warning about unused variable. |
2145 | 508 | (void)IsRegistered; |
2146 | 106 | } else { |
2147 | 106 | EmitVarDecl(*PrivateVD); |
2148 | 106 | } |
2149 | 614 | ++CurPrivate; |
2150 | 614 | } |
2151 | 492 | } |
2152 | 8.94k | } |
2153 | | |
2154 | | static void emitSimdlenSafelenClause(CodeGenFunction &CGF, |
2155 | | const OMPExecutableDirective &D, |
2156 | 8.61k | bool IsMonotonic) { |
2157 | 8.61k | if (!CGF.HaveInsertPoint()) |
2158 | 0 | return; |
2159 | 8.61k | if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { |
2160 | 248 | RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), |
2161 | 248 | /*ignoreResult=*/true); |
2162 | 248 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2163 | 248 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2164 | | // In presence of finite 'safelen', it may be unsafe to mark all |
2165 | | // the memory instructions parallel, because loop-carried |
2166 | | // dependences of 'safelen' iterations are possible. |
2167 | 248 | if (!IsMonotonic) |
2168 | 140 | CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); |
2169 | 8.36k | } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { |
2170 | 166 | RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), |
2171 | 166 | /*ignoreResult=*/true); |
2172 | 166 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2173 | 166 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2174 | | // In presence of finite 'safelen', it may be unsafe to mark all |
2175 | | // the memory instructions parallel, because loop-carried |
2176 | | // dependences of 'safelen' iterations are possible. |
2177 | 166 | CGF.LoopStack.setParallel(/*Enable=*/false); |
2178 | 166 | } |
2179 | 8.61k | } |
2180 | | |
2181 | | void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, |
2182 | 8.61k | bool IsMonotonic) { |
2183 | | // Walk clauses and process safelen/lastprivate. |
2184 | 8.61k | LoopStack.setParallel(!IsMonotonic); |
2185 | 8.61k | LoopStack.setVectorizeEnable(); |
2186 | 8.61k | emitSimdlenSafelenClause(*this, D, IsMonotonic); |
2187 | 8.61k | if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2188 | 0 | if (C->getKind() == OMPC_ORDER_concurrent) |
2189 | 0 | LoopStack.setParallel(/*Enable=*/true); |
2190 | 8.61k | if ((D.getDirectiveKind() == OMPD_simd || |
2191 | 8.28k | (getLangOpts().OpenMPSimd && |
2192 | 3.16k | isOpenMPSimdDirective(D.getDirectiveKind()))) && |
2193 | 3.48k | llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), |
2194 | 95 | [](const OMPReductionClause *C) { |
2195 | 95 | return C->getModifier() == OMPC_REDUCTION_inscan; |
2196 | 95 | })) |
2197 | | // Disable parallel access in case of prefix sum. |
2198 | 16 | LoopStack.setParallel(/*Enable=*/false); |
2199 | 8.61k | } |
2200 | | |
2201 | | void CodeGenFunction::EmitOMPSimdFinal( |
2202 | | const OMPLoopDirective &D, |
2203 | 8.61k | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2204 | 8.61k | if (!HaveInsertPoint()) |
2205 | 0 | return; |
2206 | 8.61k | llvm::BasicBlock *DoneBB = nullptr; |
2207 | 8.61k | auto IC = D.counters().begin(); |
2208 | 8.61k | auto IPC = D.private_counters().begin(); |
2209 | 8.97k | for (const Expr *F : D.finals()) { |
2210 | 8.97k | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); |
2211 | 8.97k | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); |
2212 | 8.97k | const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); |
2213 | 8.97k | if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)0 || |
2214 | 8.97k | OrigVD->hasGlobalStorage()0 || CED0 ) { |
2215 | 8.97k | if (!DoneBB) { |
2216 | 8.79k | if (llvm::Value *Cond = CondGen(*this)) { |
2217 | | // If the first post-update expression is found, emit conditional |
2218 | | // block if it was requested. |
2219 | 4.56k | llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); |
2220 | 4.56k | DoneBB = createBasicBlock(".omp.final.done"); |
2221 | 4.56k | Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
2222 | 4.56k | EmitBlock(ThenBB); |
2223 | 4.56k | } |
2224 | 8.79k | } |
2225 | 8.97k | Address OrigAddr = Address::invalid(); |
2226 | 8.97k | if (CED) { |
2227 | 28 | OrigAddr = |
2228 | 28 | EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); |
2229 | 8.94k | } else { |
2230 | 8.94k | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), |
2231 | 8.94k | /*RefersToEnclosingVariableOrCapture=*/false, |
2232 | 8.94k | (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); |
2233 | 8.94k | OrigAddr = EmitLValue(&DRE).getAddress(*this); |
2234 | 8.94k | } |
2235 | 8.97k | OMPPrivateScope VarScope(*this); |
2236 | 8.97k | VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); |
2237 | 8.97k | (void)VarScope.Privatize(); |
2238 | 8.97k | EmitIgnoredExpr(F); |
2239 | 8.97k | } |
2240 | 8.97k | ++IC; |
2241 | 8.97k | ++IPC; |
2242 | 8.97k | } |
2243 | 8.61k | if (DoneBB) |
2244 | 4.56k | EmitBlock(DoneBB, /*IsFinished=*/true); |
2245 | 8.61k | } |
2246 | | |
2247 | | static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, |
2248 | | const OMPLoopDirective &S, |
2249 | 10.8k | CodeGenFunction::JumpDest LoopExit) { |
2250 | 10.8k | CGF.EmitOMPLoopBody(S, LoopExit); |
2251 | 10.8k | CGF.EmitStopPoint(&S); |
2252 | 10.8k | } |
2253 | | |
2254 | | /// Emit a helper variable and return corresponding lvalue. |
2255 | | static LValue EmitOMPHelperVar(CodeGenFunction &CGF, |
2256 | 42.0k | const DeclRefExpr *Helper) { |
2257 | 42.0k | auto VDecl = cast<VarDecl>(Helper->getDecl()); |
2258 | 42.0k | CGF.EmitVarDecl(*VDecl); |
2259 | 42.0k | return CGF.EmitLValue(Helper); |
2260 | 42.0k | } |
2261 | | |
2262 | | static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2263 | | const RegionCodeGenTy &SimdInitGen, |
2264 | 13.3k | const RegionCodeGenTy &BodyCodeGen) { |
2265 | 13.3k | auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, |
2266 | 13.2k | PrePostActionTy &) { |
2267 | 13.2k | CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); |
2268 | 13.2k | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2269 | 13.2k | SimdInitGen(CGF); |
2270 | | |
2271 | 13.2k | BodyCodeGen(CGF); |
2272 | 13.2k | }; |
2273 | 343 | auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { |
2274 | 343 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2275 | 343 | CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); |
2276 | | |
2277 | 343 | BodyCodeGen(CGF); |
2278 | 343 | }; |
2279 | 13.3k | const Expr *IfCond = nullptr; |
2280 | 13.3k | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
2281 | 1.79k | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
2282 | 1.79k | if (CGF.getLangOpts().OpenMP >= 50 && |
2283 | 993 | (C->getNameModifier() == OMPD_unknown || |
2284 | 566 | C->getNameModifier() == OMPD_simd)) { |
2285 | 493 | IfCond = C->getCondition(); |
2286 | 493 | break; |
2287 | 493 | } |
2288 | 1.79k | } |
2289 | 8.73k | } |
2290 | 13.3k | if (IfCond) { |
2291 | 493 | CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); |
2292 | 12.9k | } else { |
2293 | 12.9k | RegionCodeGenTy ThenRCG(ThenGen); |
2294 | 12.9k | ThenRCG(CGF); |
2295 | 12.9k | } |
2296 | 13.3k | } |
2297 | | |
2298 | | static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2299 | 4.13k | PrePostActionTy &Action) { |
2300 | 4.13k | Action.Enter(CGF); |
2301 | 4.13k | assert(isOpenMPSimdDirective(S.getDirectiveKind()) && |
2302 | 4.13k | "Expected simd directive"); |
2303 | 4.13k | OMPLoopScope PreInitScope(CGF, S); |
2304 | | // if (PreCond) { |
2305 | | // for (IV in 0..LastIteration) BODY; |
2306 | | // <Final counter/linear vars updates>; |
2307 | | // } |
2308 | | // |
2309 | 4.13k | if (isOpenMPDistributeDirective(S.getDirectiveKind()) || |
2310 | 2.16k | isOpenMPWorksharingDirective(S.getDirectiveKind()) || |
2311 | 2.78k | isOpenMPTaskLoopDirective(S.getDirectiveKind())1.46k ) { |
2312 | 2.78k | (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); |
2313 | 2.78k | (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); |
2314 | 2.78k | } |
2315 | | |
2316 | | // Emit: if (PreCond) - begin. |
2317 | | // If the condition constant folds and can be elided, avoid emitting the |
2318 | | // whole loop. |
2319 | 4.13k | bool CondConstant; |
2320 | 4.13k | llvm::BasicBlock *ContBlock = nullptr; |
2321 | 4.13k | if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
2322 | 3.51k | if (!CondConstant) |
2323 | 82 | return; |
2324 | 621 | } else { |
2325 | 621 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); |
2326 | 621 | ContBlock = CGF.createBasicBlock("simd.if.end"); |
2327 | 621 | emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, |
2328 | 621 | CGF.getProfileCount(&S)); |
2329 | 621 | CGF.EmitBlock(ThenBlock); |
2330 | 621 | CGF.incrementProfileCounter(&S); |
2331 | 621 | } |
2332 | | |
2333 | | // Emit the loop iteration variable. |
2334 | 4.05k | const Expr *IVExpr = S.getIterationVariable(); |
2335 | 4.05k | const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); |
2336 | 4.05k | CGF.EmitVarDecl(*IVDecl); |
2337 | 4.05k | CGF.EmitIgnoredExpr(S.getInit()); |
2338 | | |
2339 | | // Emit the iterations count variable. |
2340 | | // If it is not a variable, Sema decided to calculate iterations count on |
2341 | | // each iteration (e.g., it is foldable into a constant). |
2342 | 4.05k | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
2343 | 0 | CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
2344 | | // Emit calculation of the iterations count. |
2345 | 0 | CGF.EmitIgnoredExpr(S.getCalcLastIteration()); |
2346 | 0 | } |
2347 | | |
2348 | 4.05k | emitAlignedClause(CGF, S); |
2349 | 4.05k | (void)CGF.EmitOMPLinearClauseInit(S); |
2350 | 4.05k | { |
2351 | 4.05k | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
2352 | 4.05k | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
2353 | 4.05k | CGF.EmitOMPLinearClause(S, LoopScope); |
2354 | 4.05k | CGF.EmitOMPPrivateClause(S, LoopScope); |
2355 | 4.05k | CGF.EmitOMPReductionClauseInit(S, LoopScope); |
2356 | 4.05k | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
2357 | 4.05k | CGF, S, CGF.EmitLValue(S.getIterationVariable())); |
2358 | 4.05k | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
2359 | 4.05k | (void)LoopScope.Privatize(); |
2360 | 4.05k | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
2361 | 2.64k | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
2362 | | |
2363 | 4.05k | emitCommonSimdLoop( |
2364 | 4.05k | CGF, S, |
2365 | 3.98k | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2366 | 3.98k | CGF.EmitOMPSimdInit(S); |
2367 | 3.98k | }, |
2368 | 4.16k | [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2369 | 4.16k | CGF.EmitOMPInnerLoop( |
2370 | 4.16k | S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
2371 | 4.16k | [&S](CodeGenFunction &CGF) { |
2372 | 4.16k | emitOMPLoopBodyWithStopPoint(CGF, S, |
2373 | 4.16k | CodeGenFunction::JumpDest()); |
2374 | 4.16k | }, |
2375 | 4.16k | [](CodeGenFunction &) {}); |
2376 | 4.16k | }); |
2377 | 4.23k | CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); |
2378 | | // Emit final copy of the lastprivate variables at the end of loops. |
2379 | 4.05k | if (HasLastprivateClause) |
2380 | 127 | CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); |
2381 | 4.05k | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); |
2382 | 4.05k | emitPostUpdateForReductionClause(CGF, S, |
2383 | 0 | [](CodeGenFunction &) { return nullptr; }); |
2384 | 4.05k | } |
2385 | 404 | CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); |
2386 | | // Emit: if (PreCond) - end. |
2387 | 4.05k | if (ContBlock) { |
2388 | 621 | CGF.EmitBranch(ContBlock); |
2389 | 621 | CGF.EmitBlock(ContBlock, true); |
2390 | 621 | } |
2391 | 4.05k | } |
2392 | | |
2393 | 167 | void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
2394 | 167 | ParentLoopDirectiveForScanRegion ScanRegion(*this, S); |
2395 | 167 | OMPFirstScanLoop = true; |
2396 | 167 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2397 | 167 | emitOMPSimdRegion(CGF, S, Action); |
2398 | 167 | }; |
2399 | 167 | { |
2400 | 167 | auto LPCRegion = |
2401 | 167 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2402 | 167 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2403 | 167 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
2404 | 167 | } |
2405 | | // Check for outer lastprivate conditional update. |
2406 | 167 | checkForLastprivateConditionalUpdate(*this, S); |
2407 | 167 | } |
2408 | | |
2409 | | void CodeGenFunction::EmitOMPOuterLoop( |
2410 | | bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2411 | | CodeGenFunction::OMPPrivateScope &LoopScope, |
2412 | | const CodeGenFunction::OMPLoopArguments &LoopArgs, |
2413 | | const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, |
2414 | 1.21k | const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { |
2415 | 1.21k | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2416 | | |
2417 | 1.21k | const Expr *IVExpr = S.getIterationVariable(); |
2418 | 1.21k | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2419 | 1.21k | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2420 | | |
2421 | 1.21k | JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); |
2422 | | |
2423 | | // Start the loop with a block that tests the condition. |
2424 | 1.21k | llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); |
2425 | 1.21k | EmitBlock(CondBlock); |
2426 | 1.21k | const SourceRange R = S.getSourceRange(); |
2427 | 1.21k | LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), |
2428 | 1.21k | SourceLocToDebugLoc(R.getEnd())); |
2429 | | |
2430 | 1.21k | llvm::Value *BoolCondVal = nullptr; |
2431 | 1.21k | if (!DynamicOrOrdered) { |
2432 | | // UB = min(UB, GlobalUB) or |
2433 | | // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. |
2434 | | // 'distribute parallel for') |
2435 | 469 | EmitIgnoredExpr(LoopArgs.EUB); |
2436 | | // IV = LB |
2437 | 469 | EmitIgnoredExpr(LoopArgs.Init); |
2438 | | // IV < UB |
2439 | 469 | BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); |
2440 | 744 | } else { |
2441 | 744 | BoolCondVal = |
2442 | 744 | RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, |
2443 | 744 | LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); |
2444 | 744 | } |
2445 | | |
2446 | | // If there are any cleanups between here and the loop-exit scope, |
2447 | | // create a block to stage a loop exit along. |
2448 | 1.21k | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2449 | 1.21k | if (LoopScope.requiresCleanups()) |
2450 | 32 | ExitBlock = createBasicBlock("omp.dispatch.cleanup"); |
2451 | | |
2452 | 1.21k | llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); |
2453 | 1.21k | Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); |
2454 | 1.21k | if (ExitBlock != LoopExit.getBlock()) { |
2455 | 32 | EmitBlock(ExitBlock); |
2456 | 32 | EmitBranchThroughCleanup(LoopExit); |
2457 | 32 | } |
2458 | 1.21k | EmitBlock(LoopBody); |
2459 | | |
2460 | | // Emit "IV = LB" (in case of static schedule, we have already calculated new |
2461 | | // LB for loop condition and emitted it above). |
2462 | 1.21k | if (DynamicOrOrdered) |
2463 | 744 | EmitIgnoredExpr(LoopArgs.Init); |
2464 | | |
2465 | | // Create a block for the increment. |
2466 | 1.21k | JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); |
2467 | 1.21k | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
2468 | | |
2469 | 1.21k | emitCommonSimdLoop( |
2470 | 1.21k | *this, S, |
2471 | 1.21k | [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { |
2472 | | // Generate !llvm.loop.parallel metadata for loads and stores for loops |
2473 | | // with dynamic/guided scheduling and without ordered clause. |
2474 | 1.21k | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
2475 | 666 | CGF.LoopStack.setParallel(!IsMonotonic); |
2476 | 666 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) |
2477 | 0 | if (C->getKind() == OMPC_ORDER_concurrent) |
2478 | 0 | CGF.LoopStack.setParallel(/*Enable=*/true); |
2479 | 547 | } else { |
2480 | 547 | CGF.EmitOMPSimdInit(S, IsMonotonic); |
2481 | 547 | } |
2482 | 1.21k | }, |
2483 | 1.21k | [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, |
2484 | 1.21k | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2485 | 1.21k | SourceLocation Loc = S.getBeginLoc(); |
2486 | | // when 'distribute' is not combined with a 'for': |
2487 | | // while (idx <= UB) { BODY; ++idx; } |
2488 | | // when 'distribute' is combined with a 'for' |
2489 | | // (e.g. 'distribute parallel for') |
2490 | | // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } |
2491 | 1.21k | CGF.EmitOMPInnerLoop( |
2492 | 1.21k | S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, |
2493 | 1.21k | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
2494 | 1.21k | CodeGenLoop(CGF, S, LoopExit); |
2495 | 1.21k | }, |
2496 | 1.21k | [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { |
2497 | 1.21k | CodeGenOrdered(CGF, Loc, IVSize, IVSigned); |
2498 | 1.21k | }); |
2499 | 1.21k | }); |
2500 | | |
2501 | 1.21k | EmitBlock(Continue.getBlock()); |
2502 | 1.21k | BreakContinueStack.pop_back(); |
2503 | 1.21k | if (!DynamicOrOrdered) { |
2504 | | // Emit "LB = LB + Stride", "UB = UB + Stride". |
2505 | 469 | EmitIgnoredExpr(LoopArgs.NextLB); |
2506 | 469 | EmitIgnoredExpr(LoopArgs.NextUB); |
2507 | 469 | } |
2508 | | |
2509 | 1.21k | EmitBranch(CondBlock); |
2510 | 1.21k | LoopStack.pop(); |
2511 | | // Emit the fall-through block. |
2512 | 1.21k | EmitBlock(LoopExit.getBlock()); |
2513 | | |
2514 | | // Tell the runtime we are done. |
2515 | 1.21k | auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { |
2516 | 1.21k | if (!DynamicOrOrdered) |
2517 | 469 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
2518 | 469 | S.getDirectiveKind()); |
2519 | 1.21k | }; |
2520 | 1.21k | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
2521 | 1.21k | } |
2522 | | |
2523 | | void CodeGenFunction::EmitOMPForOuterLoop( |
2524 | | const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, |
2525 | | const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, |
2526 | | const OMPLoopArguments &LoopArgs, |
2527 | 1.04k | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
2528 | 1.04k | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2529 | | |
2530 | | // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). |
2531 | 1.04k | const bool DynamicOrOrdered = |
2532 | 1.04k | Ordered || RT.isDynamic(ScheduleKind.Schedule)1.01k ; |
2533 | | |
2534 | 1.04k | assert((Ordered || |
2535 | 1.04k | !RT.isStaticNonchunked(ScheduleKind.Schedule, |
2536 | 1.04k | LoopArgs.Chunk != nullptr)) && |
2537 | 1.04k | "static non-chunked schedule does not need outer loop"); |
2538 | | |
2539 | | // Emit outer loop. |
2540 | | // |
2541 | | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2542 | | // When schedule(dynamic,chunk_size) is specified, the iterations are |
2543 | | // distributed to threads in the team in chunks as the threads request them. |
2544 | | // Each thread executes a chunk of iterations, then requests another chunk, |
2545 | | // until no chunks remain to be distributed. Each chunk contains chunk_size |
2546 | | // iterations, except for the last chunk to be distributed, which may have |
2547 | | // fewer iterations. When no chunk_size is specified, it defaults to 1. |
2548 | | // |
2549 | | // When schedule(guided,chunk_size) is specified, the iterations are assigned |
2550 | | // to threads in the team in chunks as the executing threads request them. |
2551 | | // Each thread executes a chunk of iterations, then requests another chunk, |
2552 | | // until no chunks remain to be assigned. For a chunk_size of 1, the size of |
2553 | | // each chunk is proportional to the number of unassigned iterations divided |
2554 | | // by the number of threads in the team, decreasing to 1. For a chunk_size |
2555 | | // with value k (greater than 1), the size of each chunk is determined in the |
2556 | | // same way, with the restriction that the chunks do not contain fewer than k |
2557 | | // iterations (except for the last chunk to be assigned, which may have fewer |
2558 | | // than k iterations). |
2559 | | // |
2560 | | // When schedule(auto) is specified, the decision regarding scheduling is |
2561 | | // delegated to the compiler and/or runtime system. The programmer gives the |
2562 | | // implementation the freedom to choose any possible mapping of iterations to |
2563 | | // threads in the team. |
2564 | | // |
2565 | | // When schedule(runtime) is specified, the decision regarding scheduling is |
2566 | | // deferred until run time, and the schedule and chunk size are taken from the |
2567 | | // run-sched-var ICV. If the ICV is set to auto, the schedule is |
2568 | | // implementation defined |
2569 | | // |
2570 | | // while(__kmpc_dispatch_next(&LB, &UB)) { |
2571 | | // idx = LB; |
2572 | | // while (idx <= UB) { BODY; ++idx; |
2573 | | // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. |
2574 | | // } // inner loop |
2575 | | // } |
2576 | | // |
2577 | | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2578 | | // When schedule(static, chunk_size) is specified, iterations are divided into |
2579 | | // chunks of size chunk_size, and the chunks are assigned to the threads in |
2580 | | // the team in a round-robin fashion in the order of the thread number. |
2581 | | // |
2582 | | // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { |
2583 | | // while (idx <= UB) { BODY; ++idx; } // inner loop |
2584 | | // LB = LB + ST; |
2585 | | // UB = UB + ST; |
2586 | | // } |
2587 | | // |
2588 | | |
2589 | 1.04k | const Expr *IVExpr = S.getIterationVariable(); |
2590 | 1.04k | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2591 | 1.04k | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2592 | | |
2593 | 1.04k | if (DynamicOrOrdered) { |
2594 | 744 | const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = |
2595 | 744 | CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); |
2596 | 744 | llvm::Value *LBVal = DispatchBounds.first; |
2597 | 744 | llvm::Value *UBVal = DispatchBounds.second; |
2598 | 744 | CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, |
2599 | 744 | LoopArgs.Chunk}; |
2600 | 744 | RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, |
2601 | 744 | IVSigned, Ordered, DipatchRTInputValues); |
2602 | 305 | } else { |
2603 | 305 | CGOpenMPRuntime::StaticRTInput StaticInit( |
2604 | 305 | IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, |
2605 | 305 | LoopArgs.ST, LoopArgs.Chunk); |
2606 | 305 | RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), |
2607 | 305 | ScheduleKind, StaticInit); |
2608 | 305 | } |
2609 | | |
2610 | 1.04k | auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, |
2611 | 1.04k | const unsigned IVSize, |
2612 | 1.05k | const bool IVSigned) { |
2613 | 1.05k | if (Ordered) { |
2614 | 37 | CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, |
2615 | 37 | IVSigned); |
2616 | 37 | } |
2617 | 1.05k | }; |
2618 | | |
2619 | 1.04k | OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, |
2620 | 1.04k | LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); |
2621 | 1.04k | OuterLoopArgs.IncExpr = S.getInc(); |
2622 | 1.04k | OuterLoopArgs.Init = S.getInit(); |
2623 | 1.04k | OuterLoopArgs.Cond = S.getCond(); |
2624 | 1.04k | OuterLoopArgs.NextLB = S.getNextLowerBound(); |
2625 | 1.04k | OuterLoopArgs.NextUB = S.getNextUpperBound(); |
2626 | 1.04k | EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, |
2627 | 1.04k | emitOMPLoopBodyWithStopPoint, CodeGenOrdered); |
2628 | 1.04k | } |
2629 | | |
2630 | | static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, |
2631 | 164 | const unsigned IVSize, const bool IVSigned) {} |
2632 | | |
2633 | | void CodeGenFunction::EmitOMPDistributeOuterLoop( |
2634 | | OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, |
2635 | | OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, |
2636 | 164 | const CodeGenLoopTy &CodeGenLoopContent) { |
2637 | | |
2638 | 164 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2639 | | |
2640 | | // Emit outer loop. |
2641 | | // Same behavior as a OMPForOuterLoop, except that schedule cannot be |
2642 | | // dynamic |
2643 | | // |
2644 | | |
2645 | 164 | const Expr *IVExpr = S.getIterationVariable(); |
2646 | 164 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2647 | 164 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2648 | | |
2649 | 164 | CGOpenMPRuntime::StaticRTInput StaticInit( |
2650 | 164 | IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, |
2651 | 164 | LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); |
2652 | 164 | RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); |
2653 | | |
2654 | | // for combined 'distribute' and 'for' the increment expression of distribute |
2655 | | // is stored in DistInc. For 'distribute' alone, it is in Inc. |
2656 | 164 | Expr *IncExpr; |
2657 | 164 | if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) |
2658 | 0 | IncExpr = S.getDistInc(); |
2659 | 164 | else |
2660 | 164 | IncExpr = S.getInc(); |
2661 | | |
2662 | | // this routine is shared by 'omp distribute parallel for' and |
2663 | | // 'omp distribute': select the right EUB expression depending on the |
2664 | | // directive |
2665 | 164 | OMPLoopArguments OuterLoopArgs; |
2666 | 164 | OuterLoopArgs.LB = LoopArgs.LB; |
2667 | 164 | OuterLoopArgs.UB = LoopArgs.UB; |
2668 | 164 | OuterLoopArgs.ST = LoopArgs.ST; |
2669 | 164 | OuterLoopArgs.IL = LoopArgs.IL; |
2670 | 164 | OuterLoopArgs.Chunk = LoopArgs.Chunk; |
2671 | 164 | OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
2672 | 0 | ? S.getCombinedEnsureUpperBound() |
2673 | 164 | : S.getEnsureUpperBound(); |
2674 | 164 | OuterLoopArgs.IncExpr = IncExpr; |
2675 | 164 | OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
2676 | 0 | ? S.getCombinedInit() |
2677 | 164 | : S.getInit(); |
2678 | 164 | OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
2679 | 0 | ? S.getCombinedCond() |
2680 | 164 | : S.getCond(); |
2681 | 164 | OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
2682 | 0 | ? S.getCombinedNextLowerBound() |
2683 | 164 | : S.getNextLowerBound(); |
2684 | 164 | OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
2685 | 0 | ? S.getCombinedNextUpperBound() |
2686 | 164 | : S.getNextUpperBound(); |
2687 | | |
2688 | 164 | EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, |
2689 | 164 | LoopScope, OuterLoopArgs, CodeGenLoopContent, |
2690 | 164 | emitEmptyOrdered); |
2691 | 164 | } |
2692 | | |
2693 | | static std::pair<LValue, LValue> |
2694 | | emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, |
2695 | 2.76k | const OMPExecutableDirective &S) { |
2696 | 2.76k | const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); |
2697 | 2.76k | LValue LB = |
2698 | 2.76k | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); |
2699 | 2.76k | LValue UB = |
2700 | 2.76k | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); |
2701 | | |
2702 | | // When composing 'distribute' with 'for' (e.g. as in 'distribute |
2703 | | // parallel for') we need to use the 'distribute' |
2704 | | // chunk lower and upper bounds rather than the whole loop iteration |
2705 | | // space. These are parameters to the outlined function for 'parallel' |
2706 | | // and we copy the bounds of the previous schedule into the |
2707 | | // the current ones. |
2708 | 2.76k | LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); |
2709 | 2.76k | LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); |
2710 | 2.76k | llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( |
2711 | 2.76k | PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); |
2712 | 2.76k | PrevLBVal = CGF.EmitScalarConversion( |
2713 | 2.76k | PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), |
2714 | 2.76k | LS.getIterationVariable()->getType(), |
2715 | 2.76k | LS.getPrevLowerBoundVariable()->getExprLoc()); |
2716 | 2.76k | llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( |
2717 | 2.76k | PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); |
2718 | 2.76k | PrevUBVal = CGF.EmitScalarConversion( |
2719 | 2.76k | PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), |
2720 | 2.76k | LS.getIterationVariable()->getType(), |
2721 | 2.76k | LS.getPrevUpperBoundVariable()->getExprLoc()); |
2722 | | |
2723 | 2.76k | CGF.EmitStoreOfScalar(PrevLBVal, LB); |
2724 | 2.76k | CGF.EmitStoreOfScalar(PrevUBVal, UB); |
2725 | | |
2726 | 2.76k | return {LB, UB}; |
2727 | 2.76k | } |
2728 | | |
2729 | | /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then |
2730 | | /// we need to use the LB and UB expressions generated by the worksharing |
2731 | | /// code generation support, whereas in non combined situations we would |
2732 | | /// just emit 0 and the LastIteration expression |
2733 | | /// This function is necessary due to the difference of the LB and UB |
2734 | | /// types for the RT emission routines for 'for_static_init' and |
2735 | | /// 'for_dispatch_init' |
2736 | | static std::pair<llvm::Value *, llvm::Value *> |
2737 | | emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, |
2738 | | const OMPExecutableDirective &S, |
2739 | 440 | Address LB, Address UB) { |
2740 | 440 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); |
2741 | 440 | const Expr *IVExpr = LS.getIterationVariable(); |
2742 | | // when implementing a dynamic schedule for a 'for' combined with a |
2743 | | // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop |
2744 | | // is not normalized as each team only executes its own assigned |
2745 | | // distribute chunk |
2746 | 440 | QualType IteratorTy = IVExpr->getType(); |
2747 | 440 | llvm::Value *LBVal = |
2748 | 440 | CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); |
2749 | 440 | llvm::Value *UBVal = |
2750 | 440 | CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); |
2751 | 440 | return {LBVal, UBVal}; |
2752 | 440 | } |
2753 | | |
2754 | | static void emitDistributeParallelForDistributeInnerBoundParams( |
2755 | | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
2756 | 2.76k | llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { |
2757 | 2.76k | const auto &Dir = cast<OMPLoopDirective>(S); |
2758 | 2.76k | LValue LB = |
2759 | 2.76k | CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); |
2760 | 2.76k | llvm::Value *LBCast = |
2761 | 2.76k | CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), |
2762 | 2.76k | CGF.SizeTy, /*isSigned=*/false); |
2763 | 2.76k | CapturedVars.push_back(LBCast); |
2764 | 2.76k | LValue UB = |
2765 | 2.76k | CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); |
2766 | | |
2767 | 2.76k | llvm::Value *UBCast = |
2768 | 2.76k | CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), |
2769 | 2.76k | CGF.SizeTy, /*isSigned=*/false); |
2770 | 2.76k | CapturedVars.push_back(UBCast); |
2771 | 2.76k | } |
2772 | | |
2773 | | static void |
2774 | | emitInnerParallelForWhenCombined(CodeGenFunction &CGF, |
2775 | | const OMPLoopDirective &S, |
2776 | 2.76k | CodeGenFunction::JumpDest LoopExit) { |
2777 | 2.76k | auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, |
2778 | 2.76k | PrePostActionTy &Action) { |
2779 | 2.76k | Action.Enter(CGF); |
2780 | 2.76k | bool HasCancel = false; |
2781 | 2.76k | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
2782 | 1.32k | if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) |
2783 | 340 | HasCancel = D->hasCancel(); |
2784 | 988 | else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) |
2785 | 412 | HasCancel = D->hasCancel(); |
2786 | 576 | else if (const auto *D = |
2787 | 576 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) |
2788 | 576 | HasCancel = D->hasCancel(); |
2789 | 1.32k | } |
2790 | 2.76k | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
2791 | 2.76k | HasCancel); |
2792 | 2.76k | CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), |
2793 | 2.76k | emitDistributeParallelForInnerBounds, |
2794 | 2.76k | emitDistributeParallelForDispatchBounds); |
2795 | 2.76k | }; |
2796 | | |
2797 | 2.76k | emitCommonOMPParallelDirective( |
2798 | 2.76k | CGF, S, |
2799 | 1.44k | isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for1.32k , |
2800 | 2.76k | CGInlinedWorksharingLoop, |
2801 | 2.76k | emitDistributeParallelForDistributeInnerBoundParams); |
2802 | 2.76k | } |
2803 | | |
2804 | | void CodeGenFunction::EmitOMPDistributeParallelForDirective( |
2805 | 412 | const OMPDistributeParallelForDirective &S) { |
2806 | 412 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2807 | 412 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
2808 | 412 | S.getDistInc()); |
2809 | 412 | }; |
2810 | 412 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
2811 | 412 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
2812 | 412 | } |
2813 | | |
2814 | | void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( |
2815 | 308 | const OMPDistributeParallelForSimdDirective &S) { |
2816 | 308 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2817 | 308 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
2818 | 308 | S.getDistInc()); |
2819 | 308 | }; |
2820 | 308 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
2821 | 308 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
2822 | 308 | } |
2823 | | |
2824 | | void CodeGenFunction::EmitOMPDistributeSimdDirective( |
2825 | 150 | const OMPDistributeSimdDirective &S) { |
2826 | 150 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2827 | 150 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
2828 | 150 | }; |
2829 | 150 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2830 | 150 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
2831 | 150 | } |
2832 | | |
2833 | | void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( |
2834 | 193 | CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { |
2835 | | // Emit SPMD target parallel for region as a standalone region. |
2836 | 193 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2837 | 193 | emitOMPSimdRegion(CGF, S, Action); |
2838 | 193 | }; |
2839 | 193 | llvm::Function *Fn; |
2840 | 193 | llvm::Constant *Addr; |
2841 | | // Emit target region as a standalone region. |
2842 | 193 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
2843 | 193 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
2844 | 193 | assert(Fn && Addr && "Target device function emission failed."); |
2845 | 193 | } |
2846 | | |
2847 | | void CodeGenFunction::EmitOMPTargetSimdDirective( |
2848 | 327 | const OMPTargetSimdDirective &S) { |
2849 | 327 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2850 | 327 | emitOMPSimdRegion(CGF, S, Action); |
2851 | 327 | }; |
2852 | 327 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
2853 | 327 | } |
2854 | | |
2855 | | namespace { |
2856 | | struct ScheduleKindModifiersTy { |
2857 | | OpenMPScheduleClauseKind Kind; |
2858 | | OpenMPScheduleClauseModifier M1; |
2859 | | OpenMPScheduleClauseModifier M2; |
2860 | | ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, |
2861 | | OpenMPScheduleClauseModifier M1, |
2862 | | OpenMPScheduleClauseModifier M2) |
2863 | 0 | : Kind(Kind), M1(M1), M2(M2) {} |
2864 | | }; |
2865 | | } // namespace |
2866 | | |
2867 | | bool CodeGenFunction::EmitOMPWorksharingLoop( |
2868 | | const OMPLoopDirective &S, Expr *EUB, |
2869 | | const CodeGenLoopBoundsTy &CodeGenLoopBounds, |
2870 | 4.71k | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
2871 | | // Emit the loop iteration variable. |
2872 | 4.71k | const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); |
2873 | 4.71k | const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); |
2874 | 4.71k | EmitVarDecl(*IVDecl); |
2875 | | |
2876 | | // Emit the iterations count variable. |
2877 | | // If it is not a variable, Sema decided to calculate iterations count on each |
2878 | | // iteration (e.g., it is foldable into a constant). |
2879 | 4.71k | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
2880 | 0 | EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
2881 | | // Emit calculation of the iterations count. |
2882 | 0 | EmitIgnoredExpr(S.getCalcLastIteration()); |
2883 | 0 | } |
2884 | | |
2885 | 4.71k | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2886 | | |
2887 | 4.71k | bool HasLastprivateClause; |
2888 | | // Check pre-condition. |
2889 | 4.71k | { |
2890 | 4.71k | OMPLoopScope PreInitScope(*this, S); |
2891 | | // Skip the entire loop if we don't meet the precondition. |
2892 | | // If the condition constant folds and can be elided, avoid emitting the |
2893 | | // whole loop. |
2894 | 4.71k | bool CondConstant; |
2895 | 4.71k | llvm::BasicBlock *ContBlock = nullptr; |
2896 | 4.71k | if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
2897 | 3.86k | if (!CondConstant) |
2898 | 52 | return false; |
2899 | 848 | } else { |
2900 | 848 | llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); |
2901 | 848 | ContBlock = createBasicBlock("omp.precond.end"); |
2902 | 848 | emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, |
2903 | 848 | getProfileCount(&S)); |
2904 | 848 | EmitBlock(ThenBlock); |
2905 | 848 | incrementProfileCounter(&S); |
2906 | 848 | } |
2907 | | |
2908 | 4.66k | RunCleanupsScope DoacrossCleanupScope(*this); |
2909 | 4.66k | bool Ordered = false; |
2910 | 4.66k | if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { |
2911 | 53 | if (OrderedClause->getNumForLoops()) |
2912 | 16 | RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); |
2913 | 37 | else |
2914 | 37 | Ordered = true; |
2915 | 53 | } |
2916 | | |
2917 | 4.66k | llvm::DenseSet<const Expr *> EmittedFinals; |
2918 | 4.66k | emitAlignedClause(*this, S); |
2919 | 4.66k | bool HasLinears = EmitOMPLinearClauseInit(S); |
2920 | | // Emit helper vars inits. |
2921 | | |
2922 | 4.66k | std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); |
2923 | 4.66k | LValue LB = Bounds.first; |
2924 | 4.66k | LValue UB = Bounds.second; |
2925 | 4.66k | LValue ST = |
2926 | 4.66k | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); |
2927 | 4.66k | LValue IL = |
2928 | 4.66k | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); |
2929 | | |
2930 | | // Emit 'then' code. |
2931 | 4.66k | { |
2932 | 4.66k | OMPPrivateScope LoopScope(*this); |
2933 | 4.66k | if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears4.64k ) { |
2934 | | // Emit implicit barrier to synchronize threads and avoid data races on |
2935 | | // initialization of firstprivate variables and post-update of |
2936 | | // lastprivate variables. |
2937 | 150 | CGM.getOpenMPRuntime().emitBarrierCall( |
2938 | 150 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
2939 | 150 | /*ForceSimpleCall=*/true); |
2940 | 150 | } |
2941 | 4.66k | EmitOMPPrivateClause(S, LoopScope); |
2942 | 4.66k | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
2943 | 4.66k | *this, S, EmitLValue(S.getIterationVariable())); |
2944 | 4.66k | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
2945 | 4.66k | EmitOMPReductionClauseInit(S, LoopScope); |
2946 | 4.66k | EmitOMPPrivateLoopCounters(S, LoopScope); |
2947 | 4.66k | EmitOMPLinearClause(S, LoopScope); |
2948 | 4.66k | (void)LoopScope.Privatize(); |
2949 | 4.66k | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
2950 | 2.36k | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
2951 | | |
2952 | | // Detect the loop schedule kind and chunk. |
2953 | 4.66k | const Expr *ChunkExpr = nullptr; |
2954 | 4.66k | OpenMPScheduleTy ScheduleKind; |
2955 | 4.66k | if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { |
2956 | 1.23k | ScheduleKind.Schedule = C->getScheduleKind(); |
2957 | 1.23k | ScheduleKind.M1 = C->getFirstScheduleModifier(); |
2958 | 1.23k | ScheduleKind.M2 = C->getSecondScheduleModifier(); |
2959 | 1.23k | ChunkExpr = C->getChunkSize(); |
2960 | 3.42k | } else { |
2961 | | // Default behaviour for schedule clause. |
2962 | 3.42k | CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( |
2963 | 3.42k | *this, S, ScheduleKind.Schedule, ChunkExpr); |
2964 | 3.42k | } |
2965 | 4.66k | bool HasChunkSizeOne = false; |
2966 | 4.66k | llvm::Value *Chunk = nullptr; |
2967 | 4.66k | if (ChunkExpr) { |
2968 | 659 | Chunk = EmitScalarExpr(ChunkExpr); |
2969 | 659 | Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), |
2970 | 659 | S.getIterationVariable()->getType(), |
2971 | 659 | S.getBeginLoc()); |
2972 | 659 | Expr::EvalResult Result; |
2973 | 659 | if (ChunkExpr->EvaluateAsInt(Result, getContext())) { |
2974 | 454 | llvm::APSInt EvaluatedChunk = Result.Val.getInt(); |
2975 | 454 | HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); |
2976 | 454 | } |
2977 | 659 | } |
2978 | 4.66k | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2979 | 4.66k | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2980 | | // OpenMP 4.5, 2.7.1 Loop Construct, Description. |
2981 | | // If the static schedule kind is specified or if the ordered clause is |
2982 | | // specified, and if no monotonic modifier is specified, the effect will |
2983 | | // be as if the monotonic modifier was specified. |
2984 | 4.66k | bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, |
2985 | 527 | /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && |
2986 | 308 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
2987 | 4.66k | bool IsMonotonic = |
2988 | 4.66k | Ordered || |
2989 | 4.62k | ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || |
2990 | 3.91k | ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && |
2991 | 3.92k | !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || |
2992 | 3.91k | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || |
2993 | 708 | ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || |
2994 | 700 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; |
2995 | 4.66k | if ((RT.isStaticNonchunked(ScheduleKind.Schedule, |
2996 | 4.66k | /* Chunked */ Chunk != nullptr) || |
2997 | 1.25k | StaticChunkedOne) && |
2998 | 3.62k | !Ordered) { |
2999 | 3.61k | JumpDest LoopExit = |
3000 | 3.61k | getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); |
3001 | 3.61k | emitCommonSimdLoop( |
3002 | 3.61k | *this, S, |
3003 | 3.59k | [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { |
3004 | 3.59k | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3005 | 1.76k | CGF.EmitOMPSimdInit(S, IsMonotonic); |
3006 | 1.83k | } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
3007 | 5 | if (C->getKind() == OMPC_ORDER_concurrent) |
3008 | 5 | CGF.LoopStack.setParallel(/*Enable=*/true); |
3009 | 5 | } |
3010 | 3.59k | }, |
3011 | 3.61k | [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, |
3012 | 3.61k | &S, ScheduleKind, LoopExit, |
3013 | 3.66k | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
3014 | | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3015 | | // When no chunk_size is specified, the iteration space is divided |
3016 | | // into chunks that are approximately equal in size, and at most |
3017 | | // one chunk is distributed to each thread. Note that the size of |
3018 | | // the chunks is unspecified in this case. |
3019 | 3.66k | CGOpenMPRuntime::StaticRTInput StaticInit( |
3020 | 3.66k | IVSize, IVSigned, Ordered, IL.getAddress(CGF), |
3021 | 3.66k | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), |
3022 | 3.44k | StaticChunkedOne ? Chunk221 : nullptr); |
3023 | 3.66k | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
3024 | 3.66k | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, |
3025 | 3.66k | StaticInit); |
3026 | | // UB = min(UB, GlobalUB); |
3027 | 3.66k | if (!StaticChunkedOne) |
3028 | 3.44k | CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); |
3029 | | // IV = LB; |
3030 | 3.66k | CGF.EmitIgnoredExpr(S.getInit()); |
3031 | | // For unchunked static schedule generate: |
3032 | | // |
3033 | | // while (idx <= UB) { |
3034 | | // BODY; |
3035 | | // ++idx; |
3036 | | // } |
3037 | | // |
3038 | | // For static schedule with chunk one: |
3039 | | // |
3040 | | // while (IV <= PrevUB) { |
3041 | | // BODY; |
3042 | | // IV += ST; |
3043 | | // } |
3044 | 3.66k | CGF.EmitOMPInnerLoop( |
3045 | 3.66k | S, LoopScope.requiresCleanups(), |
3046 | 221 | StaticChunkedOne ? S.getCombinedParForInDistCond() |
3047 | 3.44k | : S.getCond(), |
3048 | 3.44k | StaticChunkedOne ? S.getDistInc()221 : S.getInc(), |
3049 | 3.66k | [&S, LoopExit](CodeGenFunction &CGF) { |
3050 | 3.66k | emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); |
3051 | 3.66k | }, |
3052 | 3.66k | [](CodeGenFunction &) {}); |
3053 | 3.66k | }); |
3054 | 3.61k | EmitBlock(LoopExit.getBlock()); |
3055 | | // Tell the runtime we are done. |
3056 | 3.67k | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
3057 | 3.67k | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
3058 | 3.67k | S.getDirectiveKind()); |
3059 | 3.67k | }; |
3060 | 3.61k | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
3061 | 1.04k | } else { |
3062 | | // Emit the outer loop, which requests its work chunk [LB..UB] from |
3063 | | // runtime and runs the inner loop to process it. |
3064 | 1.04k | const OMPLoopArguments LoopArguments( |
3065 | 1.04k | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
3066 | 1.04k | IL.getAddress(*this), Chunk, EUB); |
3067 | 1.04k | EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, |
3068 | 1.04k | LoopArguments, CGDispatchBounds); |
3069 | 1.04k | } |
3070 | 4.66k | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3071 | 2.24k | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3072 | 2.24k | return CGF.Builder.CreateIsNotNull( |
3073 | 2.24k | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3074 | 2.24k | }); |
3075 | 2.24k | } |
3076 | 4.66k | EmitOMPReductionClauseFinal( |
3077 | 4.66k | S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) |
3078 | 2.24k | ? /*Parallel and Simd*/ OMPD_parallel_for_simd |
3079 | 2.42k | : /*Parallel only*/ OMPD_parallel); |
3080 | | // Emit post-update of the reduction variables if IsLastIter != 0. |
3081 | 4.66k | emitPostUpdateForReductionClause( |
3082 | 0 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
3083 | 0 | return CGF.Builder.CreateIsNotNull( |
3084 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3085 | 0 | }); |
3086 | | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
3087 | 4.66k | if (HasLastprivateClause) |
3088 | 196 | EmitOMPLastprivateClauseFinal( |
3089 | 196 | S, isOpenMPSimdDirective(S.getDirectiveKind()), |
3090 | 196 | Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); |
3091 | 4.66k | } |
3092 | 134 | EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3093 | 134 | return CGF.Builder.CreateIsNotNull( |
3094 | 134 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3095 | 134 | }); |
3096 | 4.66k | DoacrossCleanupScope.ForceCleanup(); |
3097 | | // We're now done with the loop, so jump to the continuation block. |
3098 | 4.66k | if (ContBlock) { |
3099 | 848 | EmitBranch(ContBlock); |
3100 | 848 | EmitBlock(ContBlock, /*IsFinished=*/true); |
3101 | 848 | } |
3102 | 4.66k | } |
3103 | 4.66k | return HasLastprivateClause; |
3104 | 4.71k | } |
3105 | | |
3106 | | /// The following two functions generate expressions for the loop lower |
3107 | | /// and upper bounds in case of static and dynamic (dispatch) schedule |
3108 | | /// of the associated 'for' or 'distribute' loop. |
3109 | | static std::pair<LValue, LValue> |
3110 | 1.89k | emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3111 | 1.89k | const auto &LS = cast<OMPLoopDirective>(S); |
3112 | 1.89k | LValue LB = |
3113 | 1.89k | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); |
3114 | 1.89k | LValue UB = |
3115 | 1.89k | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); |
3116 | 1.89k | return {LB, UB}; |
3117 | 1.89k | } |
3118 | | |
3119 | | /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not |
3120 | | /// consider the lower and upper bound expressions generated by the |
3121 | | /// worksharing loop support, but we use 0 and the iteration space size as |
3122 | | /// constants |
3123 | | static std::pair<llvm::Value *, llvm::Value *> |
3124 | | emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3125 | 304 | Address LB, Address UB) { |
3126 | 304 | const auto &LS = cast<OMPLoopDirective>(S); |
3127 | 304 | const Expr *IVExpr = LS.getIterationVariable(); |
3128 | 304 | const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); |
3129 | 304 | llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); |
3130 | 304 | llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); |
3131 | 304 | return {LBVal, UBVal}; |
3132 | 304 | } |
3133 | | |
3134 | | /// Emits the code for the directive with inscan reductions. |
3135 | | /// The code is the following: |
3136 | | /// \code |
3137 | | /// size num_iters = <num_iters>; |
3138 | | /// <type> buffer[num_iters]; |
3139 | | /// #pragma omp ... |
3140 | | /// for (i: 0..<num_iters>) { |
3141 | | /// <input phase>; |
3142 | | /// buffer[i] = red; |
3143 | | /// } |
3144 | | /// for (int k = 0; k != ceil(log2(num_iters)); ++k) |
3145 | | /// for (size cnt = last_iter; cnt >= pow(2, k); --k) |
3146 | | /// buffer[i] op= buffer[i-pow(2,k)]; |
3147 | | /// #pragma omp ... |
3148 | | /// for (0..<num_iters>) { |
3149 | | /// red = InclusiveScan ? buffer[i] : buffer[i-1]; |
3150 | | /// <scan phase>; |
3151 | | /// } |
3152 | | /// \endcode |
3153 | | static void emitScanBasedDirective( |
3154 | | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3155 | | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, |
3156 | | llvm::function_ref<void(CodeGenFunction &)> FirstGen, |
3157 | 16 | llvm::function_ref<void(CodeGenFunction &)> SecondGen) { |
3158 | 16 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3159 | 16 | NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); |
3160 | 16 | SmallVector<const Expr *, 4> Shareds; |
3161 | 16 | SmallVector<const Expr *, 4> Privates; |
3162 | 16 | SmallVector<const Expr *, 4> ReductionOps; |
3163 | 16 | SmallVector<const Expr *, 4> LHSs; |
3164 | 16 | SmallVector<const Expr *, 4> RHSs; |
3165 | 16 | SmallVector<const Expr *, 4> CopyOps; |
3166 | 16 | SmallVector<const Expr *, 4> CopyArrayTemps; |
3167 | 16 | SmallVector<const Expr *, 4> CopyArrayElems; |
3168 | 16 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3169 | 16 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3170 | 16 | "Only inscan reductions are expected."); |
3171 | 16 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
3172 | 16 | Privates.append(C->privates().begin(), C->privates().end()); |
3173 | 16 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
3174 | 16 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3175 | 16 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3176 | 16 | CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); |
3177 | 16 | CopyArrayTemps.append(C->copy_array_temps().begin(), |
3178 | 16 | C->copy_array_temps().end()); |
3179 | 16 | CopyArrayElems.append(C->copy_array_elems().begin(), |
3180 | 16 | C->copy_array_elems().end()); |
3181 | 16 | } |
3182 | 16 | { |
3183 | | // Emit buffers for each reduction variables. |
3184 | | // ReductionCodeGen is required to emit correctly the code for array |
3185 | | // reductions. |
3186 | 16 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
3187 | 16 | unsigned Count = 0; |
3188 | 16 | auto *ITA = CopyArrayTemps.begin(); |
3189 | 32 | for (const Expr *IRef : Privates) { |
3190 | 32 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
3191 | | // Emit variably modified arrays, used for arrays/array sections |
3192 | | // reductions. |
3193 | 32 | if (PrivateVD->getType()->isVariablyModifiedType()) { |
3194 | 16 | RedCG.emitSharedOrigLValue(CGF, Count); |
3195 | 16 | RedCG.emitAggregateType(CGF, Count); |
3196 | 16 | } |
3197 | 32 | CodeGenFunction::OpaqueValueMapping DimMapping( |
3198 | 32 | CGF, |
3199 | 32 | cast<OpaqueValueExpr>( |
3200 | 32 | cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) |
3201 | 32 | ->getSizeExpr()), |
3202 | 32 | RValue::get(OMPScanNumIterations)); |
3203 | | // Emit temp buffer. |
3204 | 32 | CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); |
3205 | 32 | ++ITA; |
3206 | 32 | ++Count; |
3207 | 32 | } |
3208 | 16 | } |
3209 | 16 | CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); |
3210 | 16 | { |
3211 | | // Emit loop with input phase: |
3212 | | // #pragma omp ... |
3213 | | // for (i: 0..<num_iters>) { |
3214 | | // <input phase>; |
3215 | | // buffer[i] = red; |
3216 | | // } |
3217 | 16 | CGF.OMPFirstScanLoop = true; |
3218 | 16 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3219 | 16 | FirstGen(CGF); |
3220 | 16 | } |
3221 | | // Emit prefix reduction: |
3222 | | // for (int k = 0; k <= ceil(log2(n)); ++k) |
3223 | 16 | llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); |
3224 | 16 | llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); |
3225 | 16 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); |
3226 | 16 | llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); |
3227 | 16 | llvm::Value *Arg = |
3228 | 16 | CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); |
3229 | 16 | llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); |
3230 | 16 | F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); |
3231 | 16 | LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); |
3232 | 16 | LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); |
3233 | 16 | llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( |
3234 | 16 | OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); |
3235 | 16 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); |
3236 | 16 | CGF.EmitBlock(LoopBB); |
3237 | 16 | auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); |
3238 | | // size pow2k = 1; |
3239 | 16 | auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); |
3240 | 16 | Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); |
3241 | 16 | Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); |
3242 | | // for (size i = n - 1; i >= 2 ^ k; --i) |
3243 | | // tmp[i] op= tmp[i-pow2k]; |
3244 | 16 | llvm::BasicBlock *InnerLoopBB = |
3245 | 16 | CGF.createBasicBlock("omp.inner.log.scan.body"); |
3246 | 16 | llvm::BasicBlock *InnerExitBB = |
3247 | 16 | CGF.createBasicBlock("omp.inner.log.scan.exit"); |
3248 | 16 | llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); |
3249 | 16 | CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); |
3250 | 16 | CGF.EmitBlock(InnerLoopBB); |
3251 | 16 | auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); |
3252 | 16 | IVal->addIncoming(NMin1, LoopBB); |
3253 | 16 | { |
3254 | 16 | CodeGenFunction::OMPPrivateScope PrivScope(CGF); |
3255 | 16 | auto *ILHS = LHSs.begin(); |
3256 | 16 | auto *IRHS = RHSs.begin(); |
3257 | 32 | for (const Expr *CopyArrayElem : CopyArrayElems) { |
3258 | 32 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
3259 | 32 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
3260 | 32 | Address LHSAddr = Address::invalid(); |
3261 | 32 | { |
3262 | 32 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3263 | 32 | CGF, |
3264 | 32 | cast<OpaqueValueExpr>( |
3265 | 32 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3266 | 32 | RValue::get(IVal)); |
3267 | 32 | LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); |
3268 | 32 | } |
3269 | 32 | PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); |
3270 | 32 | Address RHSAddr = Address::invalid(); |
3271 | 32 | { |
3272 | 32 | llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); |
3273 | 32 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3274 | 32 | CGF, |
3275 | 32 | cast<OpaqueValueExpr>( |
3276 | 32 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3277 | 32 | RValue::get(OffsetIVal)); |
3278 | 32 | RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); |
3279 | 32 | } |
3280 | 32 | PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); |
3281 | 32 | ++ILHS; |
3282 | 32 | ++IRHS; |
3283 | 32 | } |
3284 | 16 | PrivScope.Privatize(); |
3285 | 16 | CGF.CGM.getOpenMPRuntime().emitReduction( |
3286 | 16 | CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
3287 | 16 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); |
3288 | 16 | } |
3289 | 16 | llvm::Value *NextIVal = |
3290 | 16 | CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); |
3291 | 16 | IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); |
3292 | 16 | CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); |
3293 | 16 | CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); |
3294 | 16 | CGF.EmitBlock(InnerExitBB); |
3295 | 16 | llvm::Value *Next = |
3296 | 16 | CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); |
3297 | 16 | Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); |
3298 | | // pow2k <<= 1; |
3299 | 16 | llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); |
3300 | 16 | Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); |
3301 | 16 | llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); |
3302 | 16 | CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); |
3303 | 16 | auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); |
3304 | 16 | CGF.EmitBlock(ExitBB); |
3305 | | |
3306 | 16 | CGF.OMPFirstScanLoop = false; |
3307 | 16 | SecondGen(CGF); |
3308 | 16 | } |
3309 | | |
3310 | | static bool emitWorksharingDirective(CodeGenFunction &CGF, |
3311 | | const OMPLoopDirective &S, |
3312 | 897 | bool HasCancel) { |
3313 | 897 | bool HasLastprivates; |
3314 | 897 | if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
3315 | 166 | [](const OMPReductionClause *C) { |
3316 | 166 | return C->getModifier() == OMPC_REDUCTION_inscan; |
3317 | 16 | })) { |
3318 | 16 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
3319 | 16 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3320 | 16 | OMPLoopScope LoopScope(CGF, S); |
3321 | 16 | return CGF.EmitScalarExpr(S.getNumIterations()); |
3322 | 16 | }; |
3323 | 16 | const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { |
3324 | 16 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3325 | 16 | CGF, S.getDirectiveKind(), HasCancel); |
3326 | 16 | (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3327 | 16 | emitForLoopBounds, |
3328 | 16 | emitDispatchForLoopBounds); |
3329 | | // Emit an implicit barrier at the end. |
3330 | 16 | CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), |
3331 | 16 | OMPD_for); |
3332 | 16 | }; |
3333 | 16 | const auto &&SecondGen = [&S, HasCancel, |
3334 | 16 | &HasLastprivates](CodeGenFunction &CGF) { |
3335 | 16 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3336 | 16 | CGF, S.getDirectiveKind(), HasCancel); |
3337 | 16 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3338 | 16 | emitForLoopBounds, |
3339 | 16 | emitDispatchForLoopBounds); |
3340 | 16 | }; |
3341 | 16 | emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); |
3342 | 881 | } else { |
3343 | 881 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3344 | 881 | HasCancel); |
3345 | 881 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3346 | 881 | emitForLoopBounds, |
3347 | 881 | emitDispatchForLoopBounds); |
3348 | 881 | } |
3349 | 897 | return HasLastprivates; |
3350 | 897 | } |
3351 | | |
3352 | 345 | void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { |
3353 | 345 | bool HasLastprivates = false; |
3354 | 345 | auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
3355 | 345 | PrePostActionTy &) { |
3356 | 345 | HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); |
3357 | 345 | }; |
3358 | 345 | { |
3359 | 345 | auto LPCRegion = |
3360 | 345 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3361 | 345 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3362 | 345 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, |
3363 | 345 | S.hasCancel()); |
3364 | 345 | } |
3365 | | |
3366 | | // Emit an implicit barrier at the end. |
3367 | 345 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates11 ) |
3368 | 334 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3369 | | // Check for outer lastprivate conditional update. |
3370 | 345 | checkForLastprivateConditionalUpdate(*this, S); |
3371 | 345 | } |
3372 | | |
3373 | 249 | void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { |
3374 | 249 | bool HasLastprivates = false; |
3375 | 249 | auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
3376 | 249 | PrePostActionTy &) { |
3377 | 249 | HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3378 | 249 | }; |
3379 | 249 | { |
3380 | 249 | auto LPCRegion = |
3381 | 249 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3382 | 249 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3383 | 249 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
3384 | 249 | } |
3385 | | |
3386 | | // Emit an implicit barrier at the end. |
3387 | 249 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates0 ) |
3388 | 249 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3389 | | // Check for outer lastprivate conditional update. |
3390 | 249 | checkForLastprivateConditionalUpdate(*this, S); |
3391 | 249 | } |
3392 | | |
3393 | | static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, |
3394 | | const Twine &Name, |
3395 | 440 | llvm::Value *Init = nullptr) { |
3396 | 440 | LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); |
3397 | 440 | if (Init) |
3398 | 352 | CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); |
3399 | 440 | return LVal; |
3400 | 440 | } |
3401 | | |
3402 | 88 | void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { |
3403 | 88 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
3404 | 88 | const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); |
3405 | 88 | bool HasLastprivates = false; |
3406 | 88 | auto &&CodeGen = [&S, CapturedStmt, CS, |
3407 | 88 | &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { |
3408 | 88 | const ASTContext &C = CGF.getContext(); |
3409 | 88 | QualType KmpInt32Ty = |
3410 | 88 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
3411 | | // Emit helper vars inits. |
3412 | 88 | LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", |
3413 | 88 | CGF.Builder.getInt32(0)); |
3414 | 88 | llvm::ConstantInt *GlobalUBVal = CS != nullptr |
3415 | 88 | ? CGF.Builder.getInt32(CS->size() - 1) |
3416 | 0 | : CGF.Builder.getInt32(0); |
3417 | 88 | LValue UB = |
3418 | 88 | createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); |
3419 | 88 | LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", |
3420 | 88 | CGF.Builder.getInt32(1)); |
3421 | 88 | LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", |
3422 | 88 | CGF.Builder.getInt32(0)); |
3423 | | // Loop counter. |
3424 | 88 | LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); |
3425 | 88 | OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
3426 | 88 | CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); |
3427 | 88 | OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
3428 | 88 | CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
3429 | | // Generate condition for loop. |
3430 | 88 | BinaryOperator *Cond = BinaryOperator::Create( |
3431 | 88 | C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, |
3432 | 88 | S.getBeginLoc(), FPOptionsOverride()); |
3433 | | // Increment for loop counter. |
3434 | 88 | UnaryOperator *Inc = UnaryOperator::Create( |
3435 | 88 | C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, |
3436 | 88 | S.getBeginLoc(), true, FPOptionsOverride()); |
3437 | 88 | auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
3438 | | // Iterate through all sections and emit a switch construct: |
3439 | | // switch (IV) { |
3440 | | // case 0: |
3441 | | // <SectionStmt[0]>; |
3442 | | // break; |
3443 | | // ... |
3444 | | // case <NumSection> - 1: |
3445 | | // <SectionStmt[<NumSection> - 1]>; |
3446 | | // break; |
3447 | | // } |
3448 | | // .omp.sections.exit: |
3449 | 88 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); |
3450 | 88 | llvm::SwitchInst *SwitchStmt = |
3451 | 88 | CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), |
3452 | 88 | ExitBB, CS == nullptr ? 10 : CS->size()); |
3453 | 88 | if (CS) { |
3454 | 88 | unsigned CaseNumber = 0; |
3455 | 138 | for (const Stmt *SubStmt : CS->children()) { |
3456 | 138 | auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); |
3457 | 138 | CGF.EmitBlock(CaseBB); |
3458 | 138 | SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); |
3459 | 138 | CGF.EmitStmt(SubStmt); |
3460 | 138 | CGF.EmitBranch(ExitBB); |
3461 | 138 | ++CaseNumber; |
3462 | 138 | } |
3463 | 0 | } else { |
3464 | 0 | llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); |
3465 | 0 | CGF.EmitBlock(CaseBB); |
3466 | 0 | SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); |
3467 | 0 | CGF.EmitStmt(CapturedStmt); |
3468 | 0 | CGF.EmitBranch(ExitBB); |
3469 | 0 | } |
3470 | 88 | CGF.EmitBlock(ExitBB, /*IsFinished=*/true); |
3471 | 88 | }; |
3472 | | |
3473 | 88 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
3474 | 88 | if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { |
3475 | | // Emit implicit barrier to synchronize threads and avoid data races on |
3476 | | // initialization of firstprivate variables and post-update of lastprivate |
3477 | | // variables. |
3478 | 0 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
3479 | 0 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3480 | 0 | /*ForceSimpleCall=*/true); |
3481 | 0 | } |
3482 | 88 | CGF.EmitOMPPrivateClause(S, LoopScope); |
3483 | 88 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); |
3484 | 88 | HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
3485 | 88 | CGF.EmitOMPReductionClauseInit(S, LoopScope); |
3486 | 88 | (void)LoopScope.Privatize(); |
3487 | 88 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
3488 | 0 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
3489 | | |
3490 | | // Emit static non-chunked loop. |
3491 | 88 | OpenMPScheduleTy ScheduleKind; |
3492 | 88 | ScheduleKind.Schedule = OMPC_SCHEDULE_static; |
3493 | 88 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3494 | 88 | /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), |
3495 | 88 | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); |
3496 | 88 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
3497 | 88 | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); |
3498 | | // UB = min(UB, GlobalUB); |
3499 | 88 | llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); |
3500 | 88 | llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( |
3501 | 88 | CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); |
3502 | 88 | CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); |
3503 | | // IV = LB; |
3504 | 88 | CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); |
3505 | | // while (idx <= UB) { BODY; ++idx; } |
3506 | 88 | CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, |
3507 | 88 | [](CodeGenFunction &) {}); |
3508 | | // Tell the runtime we are done. |
3509 | 128 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
3510 | 128 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
3511 | 128 | S.getDirectiveKind()); |
3512 | 128 | }; |
3513 | 88 | CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); |
3514 | 88 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
3515 | | // Emit post-update of the reduction variables if IsLastIter != 0. |
3516 | 0 | emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { |
3517 | 0 | return CGF.Builder.CreateIsNotNull( |
3518 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3519 | 0 | }); |
3520 | | |
3521 | | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
3522 | 88 | if (HasLastprivates) |
3523 | 16 | CGF.EmitOMPLastprivateClauseFinal( |
3524 | 16 | S, /*NoFinals=*/false, |
3525 | 16 | CGF.Builder.CreateIsNotNull( |
3526 | 16 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); |
3527 | 88 | }; |
3528 | | |
3529 | 88 | bool HasCancel = false; |
3530 | 88 | if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) |
3531 | 62 | HasCancel = OSD->hasCancel(); |
3532 | 26 | else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) |
3533 | 26 | HasCancel = OPSD->hasCancel(); |
3534 | 88 | OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); |
3535 | 88 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, |
3536 | 88 | HasCancel); |
3537 | | // Emit barrier for lastprivates only if 'sections' directive has 'nowait' |
3538 | | // clause. Otherwise the barrier will be generated by the codegen for the |
3539 | | // directive. |
3540 | 88 | if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()16 ) { |
3541 | | // Emit implicit barrier to synchronize threads and avoid data races on |
3542 | | // initialization of firstprivate variables. |
3543 | 0 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
3544 | 0 | OMPD_unknown); |
3545 | 0 | } |
3546 | 88 | } |
3547 | | |
3548 | 62 | void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
3549 | 62 | { |
3550 | 62 | auto LPCRegion = |
3551 | 62 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3552 | 62 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3553 | 62 | EmitSections(S); |
3554 | 62 | } |
3555 | | // Emit an implicit barrier at the end. |
3556 | 62 | if (!S.getSingleClause<OMPNowaitClause>()) { |
3557 | 56 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
3558 | 56 | OMPD_sections); |
3559 | 56 | } |
3560 | | // Check for outer lastprivate conditional update. |
3561 | 62 | checkForLastprivateConditionalUpdate(*this, S); |
3562 | 62 | } |
3563 | | |
3564 | 54 | void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
3565 | 54 | LexicalScope Scope(*this, S.getSourceRange()); |
3566 | 54 | EmitStopPoint(&S); |
3567 | 54 | EmitStmt(S.getAssociatedStmt()); |
3568 | 54 | } |
3569 | | |
3570 | 57 | void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
3571 | 57 | llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
3572 | 57 | llvm::SmallVector<const Expr *, 8> DestExprs; |
3573 | 57 | llvm::SmallVector<const Expr *, 8> SrcExprs; |
3574 | 57 | llvm::SmallVector<const Expr *, 8> AssignmentOps; |
3575 | | // Check if there are any 'copyprivate' clauses associated with this |
3576 | | // 'single' construct. |
3577 | | // Build a list of copyprivate variables along with helper expressions |
3578 | | // (<source>, <destination>, <destination>=<source> expressions) |
3579 | 28 | for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { |
3580 | 28 | CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); |
3581 | 28 | DestExprs.append(C->destination_exprs().begin(), |
3582 | 28 | C->destination_exprs().end()); |
3583 | 28 | SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); |
3584 | 28 | AssignmentOps.append(C->assignment_ops().begin(), |
3585 | 28 | C->assignment_ops().end()); |
3586 | 28 | } |
3587 | | // Emit code for 'single' region along with 'copyprivate' clauses |
3588 | 57 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3589 | 57 | Action.Enter(CGF); |
3590 | 57 | OMPPrivateScope SingleScope(CGF); |
3591 | 57 | (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); |
3592 | 57 | CGF.EmitOMPPrivateClause(S, SingleScope); |
3593 | 57 | (void)SingleScope.Privatize(); |
3594 | 57 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
3595 | 57 | }; |
3596 | 57 | { |
3597 | 57 | auto LPCRegion = |
3598 | 57 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3599 | 57 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3600 | 57 | CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), |
3601 | 57 | CopyprivateVars, DestExprs, |
3602 | 57 | SrcExprs, AssignmentOps); |
3603 | 57 | } |
3604 | | // Emit an implicit barrier at the end (to avoid data race on firstprivate |
3605 | | // init or if no 'nowait' clause was specified and no 'copyprivate' clause). |
3606 | 57 | if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()50 ) { |
3607 | 22 | CGM.getOpenMPRuntime().emitBarrierCall( |
3608 | 22 | *this, S.getBeginLoc(), |
3609 | 22 | S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown0 : OMPD_single); |
3610 | 22 | } |
3611 | | // Check for outer lastprivate conditional update. |
3612 | 57 | checkForLastprivateConditionalUpdate(*this, S); |
3613 | 57 | } |
3614 | | |
3615 | 39 | static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3616 | 39 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3617 | 39 | Action.Enter(CGF); |
3618 | 39 | CGF.EmitStmt(S.getRawStmt()); |
3619 | 39 | }; |
3620 | 39 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); |
3621 | 39 | } |
3622 | | |
3623 | 15 | void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
3624 | 15 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
3625 | 6 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
3626 | 6 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
3627 | | |
3628 | 6 | const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); |
3629 | | |
3630 | 6 | auto FiniCB = [this](InsertPointTy IP) { |
3631 | 6 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
3632 | 6 | }; |
3633 | | |
3634 | 6 | auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, |
3635 | 6 | InsertPointTy CodeGenIP, |
3636 | 6 | llvm::BasicBlock &FiniBB) { |
3637 | 6 | OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); |
3638 | 6 | OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, |
3639 | 6 | CodeGenIP, FiniBB); |
3640 | 6 | }; |
3641 | | |
3642 | 6 | LexicalScope Scope(*this, S.getSourceRange()); |
3643 | 6 | EmitStopPoint(&S); |
3644 | 6 | Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); |
3645 | | |
3646 | 6 | return; |
3647 | 6 | } |
3648 | 9 | LexicalScope Scope(*this, S.getSourceRange()); |
3649 | 9 | EmitStopPoint(&S); |
3650 | 9 | emitMaster(*this, S); |
3651 | 9 | } |
3652 | | |
3653 | 56 | void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
3654 | 56 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
3655 | 14 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
3656 | 14 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
3657 | | |
3658 | 14 | const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); |
3659 | 14 | const Expr *Hint = nullptr; |
3660 | 14 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
3661 | 2 | Hint = HintClause->getHint(); |
3662 | | |
3663 | | // TODO: This is slightly different from what's currently being done in |
3664 | | // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything |
3665 | | // about typing is final. |
3666 | 14 | llvm::Value *HintInst = nullptr; |
3667 | 14 | if (Hint) |
3668 | 2 | HintInst = |
3669 | 2 | Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); |
3670 | | |
3671 | 12 | auto FiniCB = [this](InsertPointTy IP) { |
3672 | 12 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
3673 | 12 | }; |
3674 | | |
3675 | 14 | auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, |
3676 | 14 | InsertPointTy CodeGenIP, |
3677 | 14 | llvm::BasicBlock &FiniBB) { |
3678 | 14 | OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); |
3679 | 14 | OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, |
3680 | 14 | CodeGenIP, FiniBB); |
3681 | 14 | }; |
3682 | | |
3683 | 14 | LexicalScope Scope(*this, S.getSourceRange()); |
3684 | 14 | EmitStopPoint(&S); |
3685 | 14 | Builder.restoreIP(OMPBuilder.createCritical( |
3686 | 14 | Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), |
3687 | 14 | HintInst)); |
3688 | | |
3689 | 14 | return; |
3690 | 14 | } |
3691 | | |
3692 | 42 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3693 | 42 | Action.Enter(CGF); |
3694 | 42 | CGF.EmitStmt(S.getAssociatedStmt()); |
3695 | 42 | }; |
3696 | 42 | const Expr *Hint = nullptr; |
3697 | 42 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
3698 | 3 | Hint = HintClause->getHint(); |
3699 | 42 | LexicalScope Scope(*this, S.getSourceRange()); |
3700 | 42 | EmitStopPoint(&S); |
3701 | 42 | CGM.getOpenMPRuntime().emitCriticalRegion(*this, |
3702 | 42 | S.getDirectiveName().getAsString(), |
3703 | 42 | CodeGen, S.getBeginLoc(), Hint); |
3704 | 42 | } |
3705 | | |
3706 | | void CodeGenFunction::EmitOMPParallelForDirective( |
3707 | 207 | const OMPParallelForDirective &S) { |
3708 | | // Emit directive as a combined directive that consists of two implicit |
3709 | | // directives: 'parallel' with 'for' directive. |
3710 | 207 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3711 | 207 | Action.Enter(CGF); |
3712 | 207 | (void)emitWorksharingDirective(CGF, S, S.hasCancel()); |
3713 | 207 | }; |
3714 | 207 | { |
3715 | 207 | auto LPCRegion = |
3716 | 207 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3717 | 207 | emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
3718 | 207 | emitEmptyBoundParameters); |
3719 | 207 | } |
3720 | | // Check for outer lastprivate conditional update. |
3721 | 207 | checkForLastprivateConditionalUpdate(*this, S); |
3722 | 207 | } |
3723 | | |
3724 | | void CodeGenFunction::EmitOMPParallelForSimdDirective( |
3725 | 96 | const OMPParallelForSimdDirective &S) { |
3726 | | // Emit directive as a combined directive that consists of two implicit |
3727 | | // directives: 'parallel' with 'for' directive. |
3728 | 96 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3729 | 96 | Action.Enter(CGF); |
3730 | 96 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3731 | 96 | }; |
3732 | 96 | { |
3733 | 96 | auto LPCRegion = |
3734 | 96 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3735 | 96 | emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, |
3736 | 96 | emitEmptyBoundParameters); |
3737 | 96 | } |
3738 | | // Check for outer lastprivate conditional update. |
3739 | 96 | checkForLastprivateConditionalUpdate(*this, S); |
3740 | 96 | } |
3741 | | |
3742 | | void CodeGenFunction::EmitOMPParallelMasterDirective( |
3743 | 30 | const OMPParallelMasterDirective &S) { |
3744 | | // Emit directive as a combined directive that consists of two implicit |
3745 | | // directives: 'parallel' with 'master' directive. |
3746 | 30 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3747 | 30 | Action.Enter(CGF); |
3748 | 30 | OMPPrivateScope PrivateScope(CGF); |
3749 | 30 | bool Copyins = CGF.EmitOMPCopyinClause(S); |
3750 | 30 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
3751 | 30 | if (Copyins) { |
3752 | | // Emit implicit barrier to synchronize threads and avoid data races on |
3753 | | // propagation master's thread values of threadprivate variables to local |
3754 | | // instances of that variables of all other implicit threads. |
3755 | 4 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
3756 | 4 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3757 | 4 | /*ForceSimpleCall=*/true); |
3758 | 4 | } |
3759 | 30 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
3760 | 30 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
3761 | 30 | (void)PrivateScope.Privatize(); |
3762 | 30 | emitMaster(CGF, S); |
3763 | 30 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
3764 | 30 | }; |
3765 | 30 | { |
3766 | 30 | auto LPCRegion = |
3767 | 30 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3768 | 30 | emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, |
3769 | 30 | emitEmptyBoundParameters); |
3770 | 30 | emitPostUpdateForReductionClause(*this, S, |
3771 | 0 | [](CodeGenFunction &) { return nullptr; }); |
3772 | 30 | } |
3773 | | // Check for outer lastprivate conditional update. |
3774 | 30 | checkForLastprivateConditionalUpdate(*this, S); |
3775 | 30 | } |
3776 | | |
3777 | | void CodeGenFunction::EmitOMPParallelSectionsDirective( |
3778 | 26 | const OMPParallelSectionsDirective &S) { |
3779 | | // Emit directive as a combined directive that consists of two implicit |
3780 | | // directives: 'parallel' with 'sections' directive. |
3781 | 26 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3782 | 26 | Action.Enter(CGF); |
3783 | 26 | CGF.EmitSections(S); |
3784 | 26 | }; |
3785 | 26 | { |
3786 | 26 | auto LPCRegion = |
3787 | 26 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3788 | 26 | emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, |
3789 | 26 | emitEmptyBoundParameters); |
3790 | 26 | } |
3791 | | // Check for outer lastprivate conditional update. |
3792 | 26 | checkForLastprivateConditionalUpdate(*this, S); |
3793 | 26 | } |
3794 | | |
3795 | | namespace { |
3796 | | /// Get the list of variables declared in the context of the untied tasks. |
3797 | | class CheckVarsEscapingUntiedTaskDeclContext final |
3798 | | : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { |
3799 | | llvm::SmallVector<const VarDecl *, 4> PrivateDecls; |
3800 | | |
3801 | | public: |
3802 | 16 | explicit CheckVarsEscapingUntiedTaskDeclContext() = default; |
3803 | 16 | virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; |
3804 | 6 | void VisitDeclStmt(const DeclStmt *S) { |
3805 | 6 | if (!S) |
3806 | 0 | return; |
3807 | | // Need to privatize only local vars, static locals can be processed as is. |
3808 | 10 | for (const Decl *D : S->decls())6 { |
3809 | 10 | if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) |
3810 | 8 | if (VD->hasLocalStorage()) |
3811 | 8 | PrivateDecls.push_back(VD); |
3812 | 10 | } |
3813 | 6 | } |
3814 | 16 | void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } |
3815 | 0 | void VisitCapturedStmt(const CapturedStmt *) { return; } |
3816 | 0 | void VisitLambdaExpr(const LambdaExpr *) { return; } |
3817 | 0 | void VisitBlockExpr(const BlockExpr *) { return; } |
3818 | 108 | void VisitStmt(const Stmt *S) { |
3819 | 108 | if (!S) |
3820 | 0 | return; |
3821 | 108 | for (const Stmt *Child : S->children()) |
3822 | 114 | if (Child) |
3823 | 114 | Visit(Child); |
3824 | 108 | } |
3825 | | |
3826 | | /// Swaps list of vars with the provided one. |
3827 | 32 | ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } |
3828 | | }; |
3829 | | } // anonymous namespace |
3830 | | |
3831 | | void CodeGenFunction::EmitOMPTaskBasedDirective( |
3832 | | const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, |
3833 | | const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, |
3834 | 407 | OMPTaskDataTy &Data) { |
3835 | | // Emit outlined function for task construct. |
3836 | 407 | const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); |
3837 | 407 | auto I = CS->getCapturedDecl()->param_begin(); |
3838 | 407 | auto PartId = std::next(I); |
3839 | 407 | auto TaskT = std::next(I, 4); |
3840 | | // Check if the task is final |
3841 | 407 | if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { |
3842 | | // If the condition constant folds and can be elided, try to avoid emitting |
3843 | | // the condition and the dead arm of the if/else. |
3844 | 22 | const Expr *Cond = Clause->getCondition(); |
3845 | 22 | bool CondConstant; |
3846 | 22 | if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) |
3847 | 12 | Data.Final.setInt(CondConstant); |
3848 | 10 | else |
3849 | 10 | Data.Final.setPointer(EvaluateExprAsBool(Cond)); |
3850 | 385 | } else { |
3851 | | // By default the task is not final. |
3852 | 385 | Data.Final.setInt(/*IntVal=*/false); |
3853 | 385 | } |
3854 | | // Check if the task has 'priority' clause. |
3855 | 407 | if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { |
3856 | 22 | const Expr *Prio = Clause->getPriority(); |
3857 | 22 | Data.Priority.setInt(/*IntVal=*/true); |
3858 | 22 | Data.Priority.setPointer(EmitScalarConversion( |
3859 | 22 | EmitScalarExpr(Prio), Prio->getType(), |
3860 | 22 | getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), |
3861 | 22 | Prio->getExprLoc())); |
3862 | 22 | } |
3863 | | // The first function argument for tasks is a thread id, the second one is a |
3864 | | // part id (0 for tied tasks, >=0 for untied task). |
3865 | 407 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
3866 | | // Get list of private variables. |
3867 | 50 | for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { |
3868 | 50 | auto IRef = C->varlist_begin(); |
3869 | 226 | for (const Expr *IInit : C->private_copies()) { |
3870 | 226 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
3871 | 226 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
3872 | 170 | Data.PrivateVars.push_back(*IRef); |
3873 | 170 | Data.PrivateCopies.push_back(IInit); |
3874 | 170 | } |
3875 | 226 | ++IRef; |
3876 | 226 | } |
3877 | 50 | } |
3878 | 407 | EmittedAsPrivate.clear(); |
3879 | | // Get list of firstprivate variables. |
3880 | 113 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
3881 | 113 | auto IRef = C->varlist_begin(); |
3882 | 113 | auto IElemInitRef = C->inits().begin(); |
3883 | 329 | for (const Expr *IInit : C->private_copies()) { |
3884 | 329 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
3885 | 329 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
3886 | 255 | Data.FirstprivateVars.push_back(*IRef); |
3887 | 255 | Data.FirstprivateCopies.push_back(IInit); |
3888 | 255 | Data.FirstprivateInits.push_back(*IElemInitRef); |
3889 | 255 | } |
3890 | 329 | ++IRef; |
3891 | 329 | ++IElemInitRef; |
3892 | 329 | } |
3893 | 113 | } |
3894 | | // Get list of lastprivate variables (for taskloops). |
3895 | 407 | llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; |
3896 | 49 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
3897 | 49 | auto IRef = C->varlist_begin(); |
3898 | 49 | auto ID = C->destination_exprs().begin(); |
3899 | 199 | for (const Expr *IInit : C->private_copies()) { |
3900 | 199 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
3901 | 199 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
3902 | 151 | Data.LastprivateVars.push_back(*IRef); |
3903 | 151 | Data.LastprivateCopies.push_back(IInit); |
3904 | 151 | } |
3905 | 199 | LastprivateDstsOrigs.insert( |
3906 | 199 | {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), |
3907 | 199 | cast<DeclRefExpr>(*IRef)}); |
3908 | 199 | ++IRef; |
3909 | 199 | ++ID; |
3910 | 199 | } |
3911 | 49 | } |
3912 | 407 | SmallVector<const Expr *, 4> LHSs; |
3913 | 407 | SmallVector<const Expr *, 4> RHSs; |
3914 | 6 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3915 | 6 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
3916 | 6 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
3917 | 6 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
3918 | 6 | Data.ReductionOps.append(C->reduction_ops().begin(), |
3919 | 6 | C->reduction_ops().end()); |
3920 | 6 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3921 | 6 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3922 | 6 | } |
3923 | 407 | Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( |
3924 | 407 | *this, S.getBeginLoc(), LHSs, RHSs, Data); |
3925 | | // Build list of dependences. |
3926 | 38 | for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
3927 | 38 | OMPTaskDataTy::DependData &DD = |
3928 | 38 | Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); |
3929 | 38 | DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); |
3930 | 38 | } |
3931 | | // Get list of local vars for untied tasks. |
3932 | 407 | if (!Data.Tied) { |
3933 | 16 | CheckVarsEscapingUntiedTaskDeclContext Checker; |
3934 | 16 | Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); |
3935 | 16 | Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), |
3936 | 16 | Checker.getPrivateDecls().end()); |
3937 | 16 | } |
3938 | 407 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, |
3939 | 407 | CapturedRegion](CodeGenFunction &CGF, |
3940 | 407 | PrePostActionTy &Action) { |
3941 | 407 | llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>> |
3942 | 407 | UntiedLocalVars; |
3943 | | // Set proper addresses for generated private copies. |
3944 | 407 | OMPPrivateScope Scope(CGF); |
3945 | 407 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; |
3946 | 407 | if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty()357 || |
3947 | 246 | !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()197 ) { |
3948 | 210 | llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( |
3949 | 210 | CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); |
3950 | 210 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
3951 | 210 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
3952 | 210 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
3953 | 210 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
3954 | 210 | CS->getCapturedDecl()->getParam(PrivatesParam))); |
3955 | | // Map privates. |
3956 | 210 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
3957 | 210 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
3958 | 210 | CallArgs.push_back(PrivatesPtr); |
3959 | 170 | for (const Expr *E : Data.PrivateVars) { |
3960 | 170 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3961 | 170 | Address PrivatePtr = CGF.CreateMemTemp( |
3962 | 170 | CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); |
3963 | 170 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
3964 | 170 | CallArgs.push_back(PrivatePtr.getPointer()); |
3965 | 170 | } |
3966 | 255 | for (const Expr *E : Data.FirstprivateVars) { |
3967 | 255 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3968 | 255 | Address PrivatePtr = |
3969 | 255 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
3970 | 255 | ".firstpriv.ptr.addr"); |
3971 | 255 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
3972 | 255 | FirstprivatePtrs.emplace_back(VD, PrivatePtr); |
3973 | 255 | CallArgs.push_back(PrivatePtr.getPointer()); |
3974 | 255 | } |
3975 | 151 | for (const Expr *E : Data.LastprivateVars) { |
3976 | 151 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3977 | 151 | Address PrivatePtr = |
3978 | 151 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
3979 | 151 | ".lastpriv.ptr.addr"); |
3980 | 151 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
3981 | 151 | CallArgs.push_back(PrivatePtr.getPointer()); |
3982 | 151 | } |
3983 | 8 | for (const VarDecl *VD : Data.PrivateLocals) { |
3984 | 8 | QualType Ty = VD->getType().getNonReferenceType(); |
3985 | 8 | if (VD->getType()->isLValueReferenceType()) |
3986 | 0 | Ty = CGF.getContext().getPointerType(Ty); |
3987 | 8 | if (isAllocatableDecl(VD)) |
3988 | 2 | Ty = CGF.getContext().getPointerType(Ty); |
3989 | 8 | Address PrivatePtr = CGF.CreateMemTemp( |
3990 | 8 | CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); |
3991 | 8 | UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid()); |
3992 | 8 | CallArgs.push_back(PrivatePtr.getPointer()); |
3993 | 8 | } |
3994 | 210 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
3995 | 210 | CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
3996 | 199 | for (const auto &Pair : LastprivateDstsOrigs) { |
3997 | 199 | const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); |
3998 | 199 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), |
3999 | | /*RefersToEnclosingVariableOrCapture=*/ |
4000 | 199 | CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, |
4001 | 199 | Pair.second->getType(), VK_LValue, |
4002 | 199 | Pair.second->getExprLoc()); |
4003 | 199 | Scope.addPrivate(Pair.first, [&CGF, &DRE]() { |
4004 | 199 | return CGF.EmitLValue(&DRE).getAddress(CGF); |
4005 | 199 | }); |
4006 | 199 | } |
4007 | 576 | for (const auto &Pair : PrivatePtrs) { |
4008 | 576 | Address Replacement(CGF.Builder.CreateLoad(Pair.second), |
4009 | 576 | CGF.getContext().getDeclAlign(Pair.first)); |
4010 | 576 | Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); |
4011 | 576 | } |
4012 | | // Adjust mapping for internal locals by mapping actual memory instead of |
4013 | | // a pointer to this memory. |
4014 | 8 | for (auto &Pair : UntiedLocalVars) { |
4015 | 8 | if (isAllocatableDecl(Pair.first)) { |
4016 | 2 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); |
4017 | 2 | Address Replacement(Ptr, CGF.getPointerAlign()); |
4018 | 2 | Pair.getSecond().first = Replacement; |
4019 | 2 | Ptr = CGF.Builder.CreateLoad(Replacement); |
4020 | 2 | Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first)); |
4021 | 2 | Pair.getSecond().second = Replacement; |
4022 | 6 | } else { |
4023 | 6 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); |
4024 | 6 | Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first)); |
4025 | 6 | Pair.getSecond().first = Replacement; |
4026 | 6 | } |
4027 | 8 | } |
4028 | 210 | } |
4029 | 407 | if (Data.Reductions) { |
4030 | 6 | OMPPrivateScope FirstprivateScope(CGF); |
4031 | 18 | for (const auto &Pair : FirstprivatePtrs) { |
4032 | 18 | Address Replacement(CGF.Builder.CreateLoad(Pair.second), |
4033 | 18 | CGF.getContext().getDeclAlign(Pair.first)); |
4034 | 18 | FirstprivateScope.addPrivate(Pair.first, |
4035 | 18 | [Replacement]() { return Replacement; }); |
4036 | 18 | } |
4037 | 6 | (void)FirstprivateScope.Privatize(); |
4038 | 6 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
4039 | 6 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
4040 | 6 | Data.ReductionCopies, Data.ReductionOps); |
4041 | 6 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
4042 | 6 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); |
4043 | 30 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt24 ) { |
4044 | 24 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
4045 | 24 | RedCG.emitAggregateType(CGF, Cnt); |
4046 | | // FIXME: This must removed once the runtime library is fixed. |
4047 | | // Emit required threadprivate variables for |
4048 | | // initializer/combiner/finalizer. |
4049 | 24 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
4050 | 24 | RedCG, Cnt); |
4051 | 24 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4052 | 24 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
4053 | 24 | Replacement = |
4054 | 24 | Address(CGF.EmitScalarConversion( |
4055 | 24 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
4056 | 24 | CGF.getContext().getPointerType( |
4057 | 24 | Data.ReductionCopies[Cnt]->getType()), |
4058 | 24 | Data.ReductionCopies[Cnt]->getExprLoc()), |
4059 | 24 | Replacement.getAlignment()); |
4060 | 24 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
4061 | 24 | Scope.addPrivate(RedCG.getBaseDecl(Cnt), |
4062 | 24 | [Replacement]() { return Replacement; }); |
4063 | 24 | } |
4064 | 6 | } |
4065 | | // Privatize all private variables except for in_reduction items. |
4066 | 407 | (void)Scope.Privatize(); |
4067 | 407 | SmallVector<const Expr *, 4> InRedVars; |
4068 | 407 | SmallVector<const Expr *, 4> InRedPrivs; |
4069 | 407 | SmallVector<const Expr *, 4> InRedOps; |
4070 | 407 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
4071 | 44 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
4072 | 44 | auto IPriv = C->privates().begin(); |
4073 | 44 | auto IRed = C->reduction_ops().begin(); |
4074 | 44 | auto ITD = C->taskgroup_descriptors().begin(); |
4075 | 66 | for (const Expr *Ref : C->varlists()) { |
4076 | 66 | InRedVars.emplace_back(Ref); |
4077 | 66 | InRedPrivs.emplace_back(*IPriv); |
4078 | 66 | InRedOps.emplace_back(*IRed); |
4079 | 66 | TaskgroupDescriptors.emplace_back(*ITD); |
4080 | 66 | std::advance(IPriv, 1); |
4081 | 66 | std::advance(IRed, 1); |
4082 | 66 | std::advance(ITD, 1); |
4083 | 66 | } |
4084 | 44 | } |
4085 | | // Privatize in_reduction items here, because taskgroup descriptors must be |
4086 | | // privatized earlier. |
4087 | 407 | OMPPrivateScope InRedScope(CGF); |
4088 | 407 | if (!InRedVars.empty()) { |
4089 | 34 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
4090 | 100 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt66 ) { |
4091 | 66 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
4092 | 66 | RedCG.emitAggregateType(CGF, Cnt); |
4093 | | // The taskgroup descriptor variable is always implicit firstprivate and |
4094 | | // privatized already during processing of the firstprivates. |
4095 | | // FIXME: This must removed once the runtime library is fixed. |
4096 | | // Emit required threadprivate variables for |
4097 | | // initializer/combiner/finalizer. |
4098 | 66 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
4099 | 66 | RedCG, Cnt); |
4100 | 66 | llvm::Value *ReductionsPtr; |
4101 | 66 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
4102 | 64 | ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), |
4103 | 64 | TRExpr->getExprLoc()); |
4104 | 2 | } else { |
4105 | 2 | ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4106 | 2 | } |
4107 | 66 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4108 | 66 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
4109 | 66 | Replacement = Address( |
4110 | 66 | CGF.EmitScalarConversion( |
4111 | 66 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
4112 | 66 | CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), |
4113 | 66 | InRedPrivs[Cnt]->getExprLoc()), |
4114 | 66 | Replacement.getAlignment()); |
4115 | 66 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
4116 | 66 | InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), |
4117 | 66 | [Replacement]() { return Replacement; }); |
4118 | 66 | } |
4119 | 34 | } |
4120 | 407 | (void)InRedScope.Privatize(); |
4121 | | |
4122 | 407 | CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, |
4123 | 407 | UntiedLocalVars); |
4124 | 407 | Action.Enter(CGF); |
4125 | 407 | BodyGen(CGF); |
4126 | 407 | }; |
4127 | 407 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
4128 | 407 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, |
4129 | 407 | Data.NumberOfParts); |
4130 | 407 | OMPLexicalScope Scope(*this, S, llvm::None, |
4131 | 407 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
4132 | 335 | !isOpenMPSimdDirective(S.getDirectiveKind())); |
4133 | 407 | TaskGen(*this, OutlinedFn, Data); |
4134 | 407 | } |
4135 | | |
4136 | | static ImplicitParamDecl * |
4137 | | createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, |
4138 | | QualType Ty, CapturedDecl *CD, |
4139 | 674 | SourceLocation Loc) { |
4140 | 674 | auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
4141 | 674 | ImplicitParamDecl::Other); |
4142 | 674 | auto *OrigRef = DeclRefExpr::Create( |
4143 | 674 | C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, |
4144 | 674 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
4145 | 674 | auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
4146 | 674 | ImplicitParamDecl::Other); |
4147 | 674 | auto *PrivateRef = DeclRefExpr::Create( |
4148 | 674 | C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, |
4149 | 674 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
4150 | 674 | QualType ElemType = C.getBaseElementType(Ty); |
4151 | 674 | auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, |
4152 | 674 | ImplicitParamDecl::Other); |
4153 | 674 | auto *InitRef = DeclRefExpr::Create( |
4154 | 674 | C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, |
4155 | 674 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); |
4156 | 674 | PrivateVD->setInitStyle(VarDecl::CInit); |
4157 | 674 | PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, |
4158 | 674 | InitRef, /*BasePath=*/nullptr, |
4159 | 674 | VK_RValue, FPOptionsOverride())); |
4160 | 674 | Data.FirstprivateVars.emplace_back(OrigRef); |
4161 | 674 | Data.FirstprivateCopies.emplace_back(PrivateRef); |
4162 | 674 | Data.FirstprivateInits.emplace_back(InitRef); |
4163 | 674 | return OrigVD; |
4164 | 674 | } |
4165 | | |
4166 | | void CodeGenFunction::EmitOMPTargetTaskBasedDirective( |
4167 | | const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, |
4168 | 448 | OMPTargetDataInfo &InputInfo) { |
4169 | | // Emit outlined function for task construct. |
4170 | 448 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
4171 | 448 | Address CapturedStruct = GenerateCapturedStmtArgument(*CS); |
4172 | 448 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
4173 | 448 | auto I = CS->getCapturedDecl()->param_begin(); |
4174 | 448 | auto PartId = std::next(I); |
4175 | 448 | auto TaskT = std::next(I, 4); |
4176 | 448 | OMPTaskDataTy Data; |
4177 | | // The task is not final. |
4178 | 448 | Data.Final.setInt(/*IntVal=*/false); |
4179 | | // Get list of firstprivate variables. |
4180 | 252 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
4181 | 252 | auto IRef = C->varlist_begin(); |
4182 | 252 | auto IElemInitRef = C->inits().begin(); |
4183 | 392 | for (auto *IInit : C->private_copies()) { |
4184 | 392 | Data.FirstprivateVars.push_back(*IRef); |
4185 | 392 | Data.FirstprivateCopies.push_back(IInit); |
4186 | 392 | Data.FirstprivateInits.push_back(*IElemInitRef); |
4187 | 392 | ++IRef; |
4188 | 392 | ++IElemInitRef; |
4189 | 392 | } |
4190 | 252 | } |
4191 | 448 | OMPPrivateScope TargetScope(*this); |
4192 | 448 | VarDecl *BPVD = nullptr; |
4193 | 448 | VarDecl *PVD = nullptr; |
4194 | 448 | VarDecl *SVD = nullptr; |
4195 | 448 | VarDecl *MVD = nullptr; |
4196 | 448 | if (InputInfo.NumberOfTargetItems > 0) { |
4197 | 216 | auto *CD = CapturedDecl::Create( |
4198 | 216 | getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); |
4199 | 216 | llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); |
4200 | 216 | QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( |
4201 | 216 | getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, |
4202 | 216 | /*IndexTypeQuals=*/0); |
4203 | 216 | BPVD = createImplicitFirstprivateForType( |
4204 | 216 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
4205 | 216 | PVD = createImplicitFirstprivateForType( |
4206 | 216 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
4207 | 216 | QualType SizesType = getContext().getConstantArrayType( |
4208 | 216 | getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), |
4209 | 216 | ArrSize, nullptr, ArrayType::Normal, |
4210 | 216 | /*IndexTypeQuals=*/0); |
4211 | 216 | SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, |
4212 | 216 | S.getBeginLoc()); |
4213 | 216 | TargetScope.addPrivate( |
4214 | 216 | BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); |
4215 | 216 | TargetScope.addPrivate(PVD, |
4216 | 216 | [&InputInfo]() { return InputInfo.PointersArray; }); |
4217 | 216 | TargetScope.addPrivate(SVD, |
4218 | 216 | [&InputInfo]() { return InputInfo.SizesArray; }); |
4219 | | // If there is no user-defined mapper, the mapper array will be nullptr. In |
4220 | | // this case, we don't need to privatize it. |
4221 | 216 | if (!dyn_cast_or_null<llvm::ConstantPointerNull>( |
4222 | 26 | InputInfo.MappersArray.getPointer())) { |
4223 | 26 | MVD = createImplicitFirstprivateForType( |
4224 | 26 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
4225 | 26 | TargetScope.addPrivate(MVD, |
4226 | 26 | [&InputInfo]() { return InputInfo.MappersArray; }); |
4227 | 26 | } |
4228 | 216 | } |
4229 | 448 | (void)TargetScope.Privatize(); |
4230 | | // Build list of dependences. |
4231 | 376 | for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
4232 | 376 | OMPTaskDataTy::DependData &DD = |
4233 | 376 | Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); |
4234 | 376 | DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); |
4235 | 376 | } |
4236 | 448 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, |
4237 | 448 | &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4238 | | // Set proper addresses for generated private copies. |
4239 | 448 | OMPPrivateScope Scope(CGF); |
4240 | 448 | if (!Data.FirstprivateVars.empty()) { |
4241 | 344 | llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( |
4242 | 344 | CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); |
4243 | 344 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
4244 | 344 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
4245 | 344 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
4246 | 344 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
4247 | 344 | CS->getCapturedDecl()->getParam(PrivatesParam))); |
4248 | | // Map privates. |
4249 | 344 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
4250 | 344 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
4251 | 344 | CallArgs.push_back(PrivatesPtr); |
4252 | 1.06k | for (const Expr *E : Data.FirstprivateVars) { |
4253 | 1.06k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4254 | 1.06k | Address PrivatePtr = |
4255 | 1.06k | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
4256 | 1.06k | ".firstpriv.ptr.addr"); |
4257 | 1.06k | PrivatePtrs.emplace_back(VD, PrivatePtr); |
4258 | 1.06k | CallArgs.push_back(PrivatePtr.getPointer()); |
4259 | 1.06k | } |
4260 | 344 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
4261 | 344 | CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
4262 | 1.06k | for (const auto &Pair : PrivatePtrs) { |
4263 | 1.06k | Address Replacement(CGF.Builder.CreateLoad(Pair.second), |
4264 | 1.06k | CGF.getContext().getDeclAlign(Pair.first)); |
4265 | 1.06k | Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); |
4266 | 1.06k | } |
4267 | 344 | } |
4268 | | // Privatize all private variables except for in_reduction items. |
4269 | 448 | (void)Scope.Privatize(); |
4270 | 448 | if (InputInfo.NumberOfTargetItems > 0) { |
4271 | 216 | InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( |
4272 | 216 | CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); |
4273 | 216 | InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( |
4274 | 216 | CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); |
4275 | 216 | InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( |
4276 | 216 | CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); |
4277 | | // If MVD is nullptr, the mapper array is not privatized |
4278 | 216 | if (MVD) |
4279 | 26 | InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( |
4280 | 26 | CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); |
4281 | 216 | } |
4282 | | |
4283 | 448 | Action.Enter(CGF); |
4284 | 448 | OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); |
4285 | 448 | BodyGen(CGF); |
4286 | 448 | }; |
4287 | 448 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
4288 | 448 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, |
4289 | 448 | Data.NumberOfParts); |
4290 | 284 | llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0164 ); |
4291 | 448 | IntegerLiteral IfCond(getContext(), TrueOrFalse, |
4292 | 448 | getContext().getIntTypeForBitwidth(32, /*Signed=*/0), |
4293 | 448 | SourceLocation()); |
4294 | | |
4295 | 448 | CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, |
4296 | 448 | SharedsTy, CapturedStruct, &IfCond, Data); |
4297 | 448 | } |
4298 | | |
4299 | 181 | void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { |
4300 | | // Emit outlined function for task construct. |
4301 | 181 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
4302 | 181 | Address CapturedStruct = GenerateCapturedStmtArgument(*CS); |
4303 | 181 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
4304 | 181 | const Expr *IfCond = nullptr; |
4305 | 44 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
4306 | 44 | if (C->getNameModifier() == OMPD_unknown || |
4307 | 44 | C->getNameModifier() == OMPD_task12 ) { |
4308 | 44 | IfCond = C->getCondition(); |
4309 | 44 | break; |
4310 | 44 | } |
4311 | 44 | } |
4312 | | |
4313 | 181 | OMPTaskDataTy Data; |
4314 | | // Check if we should emit tied or untied task. |
4315 | 181 | Data.Tied = !S.getSingleClause<OMPUntiedClause>(); |
4316 | 181 | auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { |
4317 | 181 | CGF.EmitStmt(CS->getCapturedStmt()); |
4318 | 181 | }; |
4319 | 181 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
4320 | 181 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
4321 | 181 | const OMPTaskDataTy &Data) { |
4322 | 181 | CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, |
4323 | 181 | SharedsTy, CapturedStruct, IfCond, |
4324 | 181 | Data); |
4325 | 181 | }; |
4326 | 181 | auto LPCRegion = |
4327 | 181 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4328 | 181 | EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); |
4329 | 181 | } |
4330 | | |
4331 | | void CodeGenFunction::EmitOMPTaskyieldDirective( |
4332 | 16 | const OMPTaskyieldDirective &S) { |
4333 | 16 | CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); |
4334 | 16 | } |
4335 | | |
4336 | 34 | void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { |
4337 | 34 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); |
4338 | 34 | } |
4339 | | |
4340 | 12 | void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { |
4341 | 12 | CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); |
4342 | 12 | } |
4343 | | |
4344 | | void CodeGenFunction::EmitOMPTaskgroupDirective( |
4345 | 39 | const OMPTaskgroupDirective &S) { |
4346 | 39 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4347 | 39 | Action.Enter(CGF); |
4348 | 39 | if (const Expr *E = S.getReductionRef()) { |
4349 | 26 | SmallVector<const Expr *, 4> LHSs; |
4350 | 26 | SmallVector<const Expr *, 4> RHSs; |
4351 | 26 | OMPTaskDataTy Data; |
4352 | 26 | for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { |
4353 | 26 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
4354 | 26 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
4355 | 26 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
4356 | 26 | Data.ReductionOps.append(C->reduction_ops().begin(), |
4357 | 26 | C->reduction_ops().end()); |
4358 | 26 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
4359 | 26 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
4360 | 26 | } |
4361 | 26 | llvm::Value *ReductionDesc = |
4362 | 26 | CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), |
4363 | 26 | LHSs, RHSs, Data); |
4364 | 26 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4365 | 26 | CGF.EmitVarDecl(*VD); |
4366 | 26 | CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), |
4367 | 26 | /*Volatile=*/false, E->getType()); |
4368 | 26 | } |
4369 | 39 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
4370 | 39 | }; |
4371 | 39 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4372 | 39 | CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); |
4373 | 39 | } |
4374 | | |
4375 | 40 | void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { |
4376 | 40 | llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() |
4377 | 8 | ? llvm::AtomicOrdering::NotAtomic |
4378 | 32 | : llvm::AtomicOrdering::AcquireRelease; |
4379 | 40 | CGM.getOpenMPRuntime().emitFlush( |
4380 | 40 | *this, |
4381 | 40 | [&S]() -> ArrayRef<const Expr *> { |
4382 | 40 | if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) |
4383 | 8 | return llvm::makeArrayRef(FlushClause->varlist_begin(), |
4384 | 8 | FlushClause->varlist_end()); |
4385 | 32 | return llvm::None; |
4386 | 32 | }(), |
4387 | 40 | S.getBeginLoc(), AO); |
4388 | 40 | } |
4389 | | |
4390 | 14 | void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { |
4391 | 14 | const auto *DO = S.getSingleClause<OMPDepobjClause>(); |
4392 | 14 | LValue DOLVal = EmitLValue(DO->getDepobj()); |
4393 | 14 | if (const auto *DC = S.getSingleClause<OMPDependClause>()) { |
4394 | 6 | OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), |
4395 | 6 | DC->getModifier()); |
4396 | 6 | Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); |
4397 | 6 | Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( |
4398 | 6 | *this, Dependencies, DC->getBeginLoc()); |
4399 | 6 | EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); |
4400 | 6 | return; |
4401 | 6 | } |
4402 | 8 | if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { |
4403 | 4 | CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); |
4404 | 4 | return; |
4405 | 4 | } |
4406 | 4 | if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { |
4407 | 4 | CGM.getOpenMPRuntime().emitUpdateClause( |
4408 | 4 | *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); |
4409 | 4 | return; |
4410 | 4 | } |
4411 | 4 | } |
4412 | | |
4413 | 56 | void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { |
4414 | 56 | if (!OMPParentLoopDirectiveForScan) |
4415 | 8 | return; |
4416 | 48 | const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; |
4417 | 48 | bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); |
4418 | 48 | SmallVector<const Expr *, 4> Shareds; |
4419 | 48 | SmallVector<const Expr *, 4> Privates; |
4420 | 48 | SmallVector<const Expr *, 4> LHSs; |
4421 | 48 | SmallVector<const Expr *, 4> RHSs; |
4422 | 48 | SmallVector<const Expr *, 4> ReductionOps; |
4423 | 48 | SmallVector<const Expr *, 4> CopyOps; |
4424 | 48 | SmallVector<const Expr *, 4> CopyArrayTemps; |
4425 | 48 | SmallVector<const Expr *, 4> CopyArrayElems; |
4426 | 48 | for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { |
4427 | 48 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
4428 | 0 | continue; |
4429 | 48 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
4430 | 48 | Privates.append(C->privates().begin(), C->privates().end()); |
4431 | 48 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
4432 | 48 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
4433 | 48 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
4434 | 48 | CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); |
4435 | 48 | CopyArrayTemps.append(C->copy_array_temps().begin(), |
4436 | 48 | C->copy_array_temps().end()); |
4437 | 48 | CopyArrayElems.append(C->copy_array_elems().begin(), |
4438 | 48 | C->copy_array_elems().end()); |
4439 | 48 | } |
4440 | 48 | if (ParentDir.getDirectiveKind() == OMPD_simd || |
4441 | 40 | (getLangOpts().OpenMPSimd && |
4442 | 16 | isOpenMPSimdDirective(ParentDir.getDirectiveKind())8 )) { |
4443 | | // For simd directive and simd-based directives in simd only mode, use the |
4444 | | // following codegen: |
4445 | | // int x = 0; |
4446 | | // #pragma omp simd reduction(inscan, +: x) |
4447 | | // for (..) { |
4448 | | // <first part> |
4449 | | // #pragma omp scan inclusive(x) |
4450 | | // <second part> |
4451 | | // } |
4452 | | // is transformed to: |
4453 | | // int x = 0; |
4454 | | // for (..) { |
4455 | | // int x_priv = 0; |
4456 | | // <first part> |
4457 | | // x = x_priv + x; |
4458 | | // x_priv = x; |
4459 | | // <second part> |
4460 | | // } |
4461 | | // and |
4462 | | // int x = 0; |
4463 | | // #pragma omp simd reduction(inscan, +: x) |
4464 | | // for (..) { |
4465 | | // <first part> |
4466 | | // #pragma omp scan exclusive(x) |
4467 | | // <second part> |
4468 | | // } |
4469 | | // to |
4470 | | // int x = 0; |
4471 | | // for (..) { |
4472 | | // int x_priv = 0; |
4473 | | // <second part> |
4474 | | // int temp = x; |
4475 | | // x = x_priv + x; |
4476 | | // x_priv = temp; |
4477 | | // <first part> |
4478 | | // } |
4479 | 16 | llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); |
4480 | 16 | EmitBranch(IsInclusive |
4481 | 8 | ? OMPScanReduce |
4482 | 8 | : BreakContinueStack.back().ContinueBlock.getBlock()); |
4483 | 16 | EmitBlock(OMPScanDispatch); |
4484 | 16 | { |
4485 | | // New scope for correct construction/destruction of temp variables for |
4486 | | // exclusive scan. |
4487 | 16 | LexicalScope Scope(*this, S.getSourceRange()); |
4488 | 8 | EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); |
4489 | 16 | EmitBlock(OMPScanReduce); |
4490 | 16 | if (!IsInclusive) { |
4491 | | // Create temp var and copy LHS value to this temp value. |
4492 | | // TMP = LHS; |
4493 | 20 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I12 ) { |
4494 | 12 | const Expr *PrivateExpr = Privates[I]; |
4495 | 12 | const Expr *TempExpr = CopyArrayTemps[I]; |
4496 | 12 | EmitAutoVarDecl( |
4497 | 12 | *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); |
4498 | 12 | LValue DestLVal = EmitLValue(TempExpr); |
4499 | 12 | LValue SrcLVal = EmitLValue(LHSs[I]); |
4500 | 12 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
4501 | 12 | SrcLVal.getAddress(*this), |
4502 | 12 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
4503 | 12 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
4504 | 12 | CopyOps[I]); |
4505 | 12 | } |
4506 | 8 | } |
4507 | 16 | CGM.getOpenMPRuntime().emitReduction( |
4508 | 16 | *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
4509 | 16 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); |
4510 | 40 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I24 ) { |
4511 | 24 | const Expr *PrivateExpr = Privates[I]; |
4512 | 24 | LValue DestLVal; |
4513 | 24 | LValue SrcLVal; |
4514 | 24 | if (IsInclusive) { |
4515 | 12 | DestLVal = EmitLValue(RHSs[I]); |
4516 | 12 | SrcLVal = EmitLValue(LHSs[I]); |
4517 | 12 | } else { |
4518 | 12 | const Expr *TempExpr = CopyArrayTemps[I]; |
4519 | 12 | DestLVal = EmitLValue(RHSs[I]); |
4520 | 12 | SrcLVal = EmitLValue(TempExpr); |
4521 | 12 | } |
4522 | 24 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
4523 | 24 | SrcLVal.getAddress(*this), |
4524 | 24 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
4525 | 24 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
4526 | 24 | CopyOps[I]); |
4527 | 24 | } |
4528 | 16 | } |
4529 | 8 | EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); |
4530 | 16 | OMPScanExitBlock = IsInclusive |
4531 | 8 | ? BreakContinueStack.back().ContinueBlock.getBlock() |
4532 | 8 | : OMPScanReduce; |
4533 | 16 | EmitBlock(OMPAfterScanBlock); |
4534 | 16 | return; |
4535 | 16 | } |
4536 | 32 | if (!IsInclusive) { |
4537 | 16 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
4538 | 16 | EmitBlock(OMPScanExitBlock); |
4539 | 16 | } |
4540 | 32 | if (OMPFirstScanLoop) { |
4541 | | // Emit buffer[i] = red; at the end of the input phase. |
4542 | 16 | const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) |
4543 | 16 | .getIterationVariable() |
4544 | 16 | ->IgnoreParenImpCasts(); |
4545 | 16 | LValue IdxLVal = EmitLValue(IVExpr); |
4546 | 16 | llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); |
4547 | 16 | IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); |
4548 | 48 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I32 ) { |
4549 | 32 | const Expr *PrivateExpr = Privates[I]; |
4550 | 32 | const Expr *OrigExpr = Shareds[I]; |
4551 | 32 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
4552 | 32 | OpaqueValueMapping IdxMapping( |
4553 | 32 | *this, |
4554 | 32 | cast<OpaqueValueExpr>( |
4555 | 32 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
4556 | 32 | RValue::get(IdxVal)); |
4557 | 32 | LValue DestLVal = EmitLValue(CopyArrayElem); |
4558 | 32 | LValue SrcLVal = EmitLValue(OrigExpr); |
4559 | 32 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
4560 | 32 | SrcLVal.getAddress(*this), |
4561 | 32 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
4562 | 32 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
4563 | 32 | CopyOps[I]); |
4564 | 32 | } |
4565 | 16 | } |
4566 | 32 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
4567 | 32 | if (IsInclusive) { |
4568 | 16 | EmitBlock(OMPScanExitBlock); |
4569 | 16 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
4570 | 16 | } |
4571 | 32 | EmitBlock(OMPScanDispatch); |
4572 | 32 | if (!OMPFirstScanLoop) { |
4573 | | // Emit red = buffer[i]; at the entrance to the scan phase. |
4574 | 16 | const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) |
4575 | 16 | .getIterationVariable() |
4576 | 16 | ->IgnoreParenImpCasts(); |
4577 | 16 | LValue IdxLVal = EmitLValue(IVExpr); |
4578 | 16 | llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); |
4579 | 16 | IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); |
4580 | 16 | llvm::BasicBlock *ExclusiveExitBB = nullptr; |
4581 | 16 | if (!IsInclusive) { |
4582 | 8 | llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); |
4583 | 8 | ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); |
4584 | 8 | llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); |
4585 | 8 | Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); |
4586 | 8 | EmitBlock(ContBB); |
4587 | | // Use idx - 1 iteration for exclusive scan. |
4588 | 8 | IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); |
4589 | 8 | } |
4590 | 48 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I32 ) { |
4591 | 32 | const Expr *PrivateExpr = Privates[I]; |
4592 | 32 | const Expr *OrigExpr = Shareds[I]; |
4593 | 32 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
4594 | 32 | OpaqueValueMapping IdxMapping( |
4595 | 32 | *this, |
4596 | 32 | cast<OpaqueValueExpr>( |
4597 | 32 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
4598 | 32 | RValue::get(IdxVal)); |
4599 | 32 | LValue SrcLVal = EmitLValue(CopyArrayElem); |
4600 | 32 | LValue DestLVal = EmitLValue(OrigExpr); |
4601 | 32 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
4602 | 32 | SrcLVal.getAddress(*this), |
4603 | 32 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
4604 | 32 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
4605 | 32 | CopyOps[I]); |
4606 | 32 | } |
4607 | 16 | if (!IsInclusive) { |
4608 | 8 | EmitBlock(ExclusiveExitBB); |
4609 | 8 | } |
4610 | 16 | } |
4611 | 16 | EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock |
4612 | 16 | : OMPAfterScanBlock); |
4613 | 32 | EmitBlock(OMPAfterScanBlock); |
4614 | 32 | } |
4615 | | |
4616 | | void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, |
4617 | | const CodeGenLoopTy &CodeGenLoop, |
4618 | 4.45k | Expr *IncExpr) { |
4619 | | // Emit the loop iteration variable. |
4620 | 4.45k | const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); |
4621 | 4.45k | const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); |
4622 | 4.45k | EmitVarDecl(*IVDecl); |
4623 | | |
4624 | | // Emit the iterations count variable. |
4625 | | // If it is not a variable, Sema decided to calculate iterations count on each |
4626 | | // iteration (e.g., it is foldable into a constant). |
4627 | 4.45k | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
4628 | 0 | EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
4629 | | // Emit calculation of the iterations count. |
4630 | 0 | EmitIgnoredExpr(S.getCalcLastIteration()); |
4631 | 0 | } |
4632 | | |
4633 | 4.45k | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
4634 | | |
4635 | 4.45k | bool HasLastprivateClause = false; |
4636 | | // Check pre-condition. |
4637 | 4.45k | { |
4638 | 4.45k | OMPLoopScope PreInitScope(*this, S); |
4639 | | // Skip the entire loop if we don't meet the precondition. |
4640 | | // If the condition constant folds and can be elided, avoid emitting the |
4641 | | // whole loop. |
4642 | 4.45k | bool CondConstant; |
4643 | 4.45k | llvm::BasicBlock *ContBlock = nullptr; |
4644 | 4.45k | if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
4645 | 3.56k | if (!CondConstant) |
4646 | 0 | return; |
4647 | 890 | } else { |
4648 | 890 | llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); |
4649 | 890 | ContBlock = createBasicBlock("omp.precond.end"); |
4650 | 890 | emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, |
4651 | 890 | getProfileCount(&S)); |
4652 | 890 | EmitBlock(ThenBlock); |
4653 | 890 | incrementProfileCounter(&S); |
4654 | 890 | } |
4655 | | |
4656 | 4.45k | emitAlignedClause(*this, S); |
4657 | | // Emit 'then' code. |
4658 | 4.45k | { |
4659 | | // Emit helper vars inits. |
4660 | | |
4661 | 4.45k | LValue LB = EmitOMPHelperVar( |
4662 | 4.45k | *this, cast<DeclRefExpr>( |
4663 | 4.45k | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
4664 | 2.75k | ? S.getCombinedLowerBoundVariable() |
4665 | 1.70k | : S.getLowerBoundVariable()))); |
4666 | 4.45k | LValue UB = EmitOMPHelperVar( |
4667 | 4.45k | *this, cast<DeclRefExpr>( |
4668 | 4.45k | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
4669 | 2.75k | ? S.getCombinedUpperBoundVariable() |
4670 | 1.70k | : S.getUpperBoundVariable()))); |
4671 | 4.45k | LValue ST = |
4672 | 4.45k | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); |
4673 | 4.45k | LValue IL = |
4674 | 4.45k | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); |
4675 | | |
4676 | 4.45k | OMPPrivateScope LoopScope(*this); |
4677 | 4.45k | if (EmitOMPFirstprivateClause(S, LoopScope)) { |
4678 | | // Emit implicit barrier to synchronize threads and avoid data races |
4679 | | // on initialization of firstprivate variables and post-update of |
4680 | | // lastprivate variables. |
4681 | 0 | CGM.getOpenMPRuntime().emitBarrierCall( |
4682 | 0 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
4683 | 0 | /*ForceSimpleCall=*/true); |
4684 | 0 | } |
4685 | 4.45k | EmitOMPPrivateClause(S, LoopScope); |
4686 | 4.45k | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
4687 | 2.32k | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
4688 | 896 | !isOpenMPTeamsDirective(S.getDirectiveKind())) |
4689 | 150 | EmitOMPReductionClauseInit(S, LoopScope); |
4690 | 4.45k | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
4691 | 4.45k | EmitOMPPrivateLoopCounters(S, LoopScope); |
4692 | 4.45k | (void)LoopScope.Privatize(); |
4693 | 4.45k | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
4694 | 2.57k | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
4695 | | |
4696 | | // Detect the distribute schedule kind and chunk. |
4697 | 4.45k | llvm::Value *Chunk = nullptr; |
4698 | 4.45k | OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; |
4699 | 4.45k | if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { |
4700 | 472 | ScheduleKind = C->getDistScheduleKind(); |
4701 | 472 | if (const Expr *Ch = C->getChunkSize()) { |
4702 | 286 | Chunk = EmitScalarExpr(Ch); |
4703 | 286 | Chunk = EmitScalarConversion(Chunk, Ch->getType(), |
4704 | 286 | S.getIterationVariable()->getType(), |
4705 | 286 | S.getBeginLoc()); |
4706 | 286 | } |
4707 | 3.98k | } else { |
4708 | | // Default behaviour for dist_schedule clause. |
4709 | 3.98k | CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( |
4710 | 3.98k | *this, S, ScheduleKind, Chunk); |
4711 | 3.98k | } |
4712 | 4.45k | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
4713 | 4.45k | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
4714 | | |
4715 | | // OpenMP [2.10.8, distribute Construct, Description] |
4716 | | // If dist_schedule is specified, kind must be static. If specified, |
4717 | | // iterations are divided into chunks of size chunk_size, chunks are |
4718 | | // assigned to the teams of the league in a round-robin fashion in the |
4719 | | // order of the team number. When no chunk_size is specified, the |
4720 | | // iteration space is divided into chunks that are approximately equal |
4721 | | // in size, and at most one chunk is distributed to each team of the |
4722 | | // league. The size of the chunks is unspecified in this case. |
4723 | 4.45k | bool StaticChunked = RT.isStaticChunked( |
4724 | 4.45k | ScheduleKind, /* Chunked */ Chunk != nullptr) && |
4725 | 652 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
4726 | 4.45k | if (RT.isStaticNonchunked(ScheduleKind, |
4727 | 4.45k | /* Chunked */ Chunk != nullptr) || |
4728 | 4.29k | StaticChunked652 ) { |
4729 | 4.29k | CGOpenMPRuntime::StaticRTInput StaticInit( |
4730 | 4.29k | IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), |
4731 | 4.29k | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
4732 | 3.80k | StaticChunked ? Chunk488 : nullptr); |
4733 | 4.29k | RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, |
4734 | 4.29k | StaticInit); |
4735 | 4.29k | JumpDest LoopExit = |
4736 | 4.29k | getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); |
4737 | | // UB = min(UB, GlobalUB); |
4738 | 4.29k | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
4739 | 2.75k | ? S.getCombinedEnsureUpperBound() |
4740 | 1.54k | : S.getEnsureUpperBound()); |
4741 | | // IV = LB; |
4742 | 4.29k | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
4743 | 2.75k | ? S.getCombinedInit() |
4744 | 1.54k | : S.getInit()); |
4745 | | |
4746 | 4.29k | const Expr *Cond = |
4747 | 4.29k | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
4748 | 2.75k | ? S.getCombinedCond() |
4749 | 1.54k | : S.getCond(); |
4750 | | |
4751 | 4.29k | if (StaticChunked) |
4752 | 488 | Cond = S.getCombinedDistCond(); |
4753 | | |
4754 | | // For static unchunked schedules generate: |
4755 | | // |
4756 | | // 1. For distribute alone, codegen |
4757 | | // while (idx <= UB) { |
4758 | | // BODY; |
4759 | | // ++idx; |
4760 | | // } |
4761 | | // |
4762 | | // 2. When combined with 'for' (e.g. as in 'distribute parallel for') |
4763 | | // while (idx <= UB) { |
4764 | | // <CodeGen rest of pragma>(LB, UB); |
4765 | | // idx += ST; |
4766 | | // } |
4767 | | // |
4768 | | // For static chunk one schedule generate: |
4769 | | // |
4770 | | // while (IV <= GlobalUB) { |
4771 | | // <CodeGen rest of pragma>(LB, UB); |
4772 | | // LB += ST; |
4773 | | // UB += ST; |
4774 | | // UB = min(UB, GlobalUB); |
4775 | | // IV = LB; |
4776 | | // } |
4777 | | // |
4778 | 4.29k | emitCommonSimdLoop( |
4779 | 4.29k | *this, S, |
4780 | 4.26k | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
4781 | 4.26k | if (isOpenMPSimdDirective(S.getDirectiveKind())) |
4782 | 2.19k | CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); |
4783 | 4.26k | }, |
4784 | 4.29k | [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, |
4785 | 4.34k | StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { |
4786 | 4.34k | CGF.EmitOMPInnerLoop( |
4787 | 4.34k | S, LoopScope.requiresCleanups(), Cond, IncExpr, |
4788 | 4.34k | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
4789 | 4.34k | CodeGenLoop(CGF, S, LoopExit); |
4790 | 4.34k | }, |
4791 | 4.34k | [&S, StaticChunked](CodeGenFunction &CGF) { |
4792 | 4.34k | if (StaticChunked) { |
4793 | 491 | CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); |
4794 | 491 | CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); |
4795 | 491 | CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); |
4796 | 491 | CGF.EmitIgnoredExpr(S.getCombinedInit()); |
4797 | 491 | } |
4798 | 4.34k | }); |
4799 | 4.34k | }); |
4800 | 4.29k | EmitBlock(LoopExit.getBlock()); |
4801 | | // Tell the runtime we are done. |
4802 | 4.29k | RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); |
4803 | 164 | } else { |
4804 | | // Emit the outer loop, which requests its work chunk [LB..UB] from |
4805 | | // runtime and runs the inner loop to process it. |
4806 | 164 | const OMPLoopArguments LoopArguments = { |
4807 | 164 | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
4808 | 164 | IL.getAddress(*this), Chunk}; |
4809 | 164 | EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, |
4810 | 164 | CodeGenLoop); |
4811 | 164 | } |
4812 | 4.45k | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
4813 | 2.32k | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
4814 | 2.32k | return CGF.Builder.CreateIsNotNull( |
4815 | 2.32k | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
4816 | 2.32k | }); |
4817 | 2.32k | } |
4818 | 4.45k | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
4819 | 2.32k | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
4820 | 896 | !isOpenMPTeamsDirective(S.getDirectiveKind())) { |
4821 | 150 | EmitOMPReductionClauseFinal(S, OMPD_simd); |
4822 | | // Emit post-update of the reduction variables if IsLastIter != 0. |
4823 | 150 | emitPostUpdateForReductionClause( |
4824 | 0 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
4825 | 0 | return CGF.Builder.CreateIsNotNull( |
4826 | 0 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
4827 | 0 | }); |
4828 | 150 | } |
4829 | | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
4830 | 4.45k | if (HasLastprivateClause) { |
4831 | 216 | EmitOMPLastprivateClauseFinal( |
4832 | 216 | S, /*NoFinals=*/false, |
4833 | 216 | Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); |
4834 | 216 | } |
4835 | 4.45k | } |
4836 | | |
4837 | | // We're now done with the loop, so jump to the continuation block. |
4838 | 4.45k | if (ContBlock) { |
4839 | 890 | EmitBranch(ContBlock); |
4840 | 890 | EmitBlock(ContBlock, true); |
4841 | 890 | } |
4842 | 4.45k | } |
4843 | 4.45k | } |
4844 | | |
4845 | | void CodeGenFunction::EmitOMPDistributeDirective( |
4846 | 110 | const OMPDistributeDirective &S) { |
4847 | 110 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
4848 | 110 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
4849 | 110 | }; |
4850 | 110 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4851 | 110 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
4852 | 110 | } |
4853 | | |
4854 | | static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, |
4855 | | const CapturedStmt *S, |
4856 | 8 | SourceLocation Loc) { |
4857 | 8 | CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); |
4858 | 8 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
4859 | 8 | CGF.CapturedStmtInfo = &CapStmtInfo; |
4860 | 8 | llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); |
4861 | 8 | Fn->setDoesNotRecurse(); |
4862 | 8 | return Fn; |
4863 | 8 | } |
4864 | | |
4865 | 40 | void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { |
4866 | 40 | if (S.hasClausesOfKind<OMPDependClause>()) { |
4867 | 16 | assert(!S.hasAssociatedStmt() && |
4868 | 16 | "No associated statement must be in ordered depend construct."); |
4869 | 16 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
4870 | 18 | CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
4871 | 16 | return; |
4872 | 16 | } |
4873 | 24 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
4874 | 24 | auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, |
4875 | 24 | PrePostActionTy &Action) { |
4876 | 24 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
4877 | 24 | if (C) { |
4878 | 8 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
4879 | 8 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
4880 | 8 | llvm::Function *OutlinedFn = |
4881 | 8 | emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); |
4882 | 8 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), |
4883 | 8 | OutlinedFn, CapturedVars); |
4884 | 16 | } else { |
4885 | 16 | Action.Enter(CGF); |
4886 | 16 | CGF.EmitStmt(CS->getCapturedStmt()); |
4887 | 16 | } |
4888 | 24 | }; |
4889 | 24 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4890 | 24 | CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); |
4891 | 24 | } |
4892 | | |
4893 | | static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, |
4894 | | QualType SrcType, QualType DestType, |
4895 | 291 | SourceLocation Loc) { |
4896 | 291 | assert(CGF.hasScalarEvaluationKind(DestType) && |
4897 | 291 | "DestType must have scalar evaluation kind."); |
4898 | 291 | assert(!Val.isAggregate() && "Must be a scalar or complex."); |
4899 | 287 | return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, |
4900 | 287 | DestType, Loc) |
4901 | 4 | : CGF.EmitComplexToScalarConversion( |
4902 | 4 | Val.getComplexVal(), SrcType, DestType, Loc); |
4903 | 291 | } |
4904 | | |
4905 | | static CodeGenFunction::ComplexPairTy |
4906 | | convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, |
4907 | 24 | QualType DestType, SourceLocation Loc) { |
4908 | 24 | assert(CGF.getEvaluationKind(DestType) == TEK_Complex && |
4909 | 24 | "DestType must have complex evaluation kind."); |
4910 | 24 | CodeGenFunction::ComplexPairTy ComplexVal; |
4911 | 24 | if (Val.isScalar()) { |
4912 | | // Convert the input element to the element type of the complex. |
4913 | 6 | QualType DestElementType = |
4914 | 6 | DestType->castAs<ComplexType>()->getElementType(); |
4915 | 6 | llvm::Value *ScalarVal = CGF.EmitScalarConversion( |
4916 | 6 | Val.getScalarVal(), SrcType, DestElementType, Loc); |
4917 | 6 | ComplexVal = CodeGenFunction::ComplexPairTy( |
4918 | 6 | ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); |
4919 | 18 | } else { |
4920 | 18 | assert(Val.isComplex() && "Must be a scalar or complex."); |
4921 | 18 | QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); |
4922 | 18 | QualType DestElementType = |
4923 | 18 | DestType->castAs<ComplexType>()->getElementType(); |
4924 | 18 | ComplexVal.first = CGF.EmitScalarConversion( |
4925 | 18 | Val.getComplexVal().first, SrcElementType, DestElementType, Loc); |
4926 | 18 | ComplexVal.second = CGF.EmitScalarConversion( |
4927 | 18 | Val.getComplexVal().second, SrcElementType, DestElementType, Loc); |
4928 | 18 | } |
4929 | 24 | return ComplexVal; |
4930 | 24 | } |
4931 | | |
4932 | | static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
4933 | 110 | LValue LVal, RValue RVal) { |
4934 | 110 | if (LVal.isGlobalReg()) |
4935 | 0 | CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); |
4936 | 110 | else |
4937 | 110 | CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); |
4938 | 110 | } |
4939 | | |
4940 | | static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, |
4941 | | llvm::AtomicOrdering AO, LValue LVal, |
4942 | 108 | SourceLocation Loc) { |
4943 | 108 | if (LVal.isGlobalReg()) |
4944 | 2 | return CGF.EmitLoadOfLValue(LVal, Loc); |
4945 | 106 | return CGF.EmitAtomicLoad( |
4946 | 106 | LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), |
4947 | 106 | LVal.isVolatile()); |
4948 | 106 | } |
4949 | | |
4950 | | void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, |
4951 | 313 | QualType RValTy, SourceLocation Loc) { |
4952 | 313 | switch (getEvaluationKind(LVal.getType())) { |
4953 | 289 | case TEK_Scalar: |
4954 | 289 | EmitStoreThroughLValue(RValue::get(convertToScalarValue( |
4955 | 289 | *this, RVal, RValTy, LVal.getType(), Loc)), |
4956 | 289 | LVal); |
4957 | 289 | break; |
4958 | 24 | case TEK_Complex: |
4959 | 24 | EmitStoreOfComplex( |
4960 | 24 | convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, |
4961 | 24 | /*isInit=*/false); |
4962 | 24 | break; |
4963 | 0 | case TEK_Aggregate: |
4964 | 0 | llvm_unreachable("Must be a scalar or complex."); |
4965 | 313 | } |
4966 | 313 | } |
4967 | | |
4968 | | static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
4969 | | const Expr *X, const Expr *V, |
4970 | 108 | SourceLocation Loc) { |
4971 | | // v = x; |
4972 | 108 | assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); |
4973 | 108 | assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); |
4974 | 108 | LValue XLValue = CGF.EmitLValue(X); |
4975 | 108 | LValue VLValue = CGF.EmitLValue(V); |
4976 | 108 | RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); |
4977 | | // OpenMP, 2.17.7, atomic Construct |
4978 | | // If the read or capture clause is specified and the acquire, acq_rel, or |
4979 | | // seq_cst clause is specified then the strong flush on exit from the atomic |
4980 | | // operation is also an acquire flush. |
4981 | 108 | switch (AO) { |
4982 | 4 | case llvm::AtomicOrdering::Acquire: |
4983 | 4 | case llvm::AtomicOrdering::AcquireRelease: |
4984 | 14 | case llvm::AtomicOrdering::SequentiallyConsistent: |
4985 | 14 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, |
4986 | 14 | llvm::AtomicOrdering::Acquire); |
4987 | 14 | break; |
4988 | 94 | case llvm::AtomicOrdering::Monotonic: |
4989 | 94 | case llvm::AtomicOrdering::Release: |
4990 | 94 | break; |
4991 | 0 | case llvm::AtomicOrdering::NotAtomic: |
4992 | 0 | case llvm::AtomicOrdering::Unordered: |
4993 | 0 | llvm_unreachable("Unexpected ordering."); |
4994 | 108 | } |
4995 | 108 | CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); |
4996 | 108 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); |
4997 | 108 | } |
4998 | | |
4999 | | static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, |
5000 | | llvm::AtomicOrdering AO, const Expr *X, |
5001 | 110 | const Expr *E, SourceLocation Loc) { |
5002 | | // x = expr; |
5003 | 110 | assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); |
5004 | 110 | emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); |
5005 | 110 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
5006 | | // OpenMP, 2.17.7, atomic Construct |
5007 | | // If the write, update, or capture clause is specified and the release, |
5008 | | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
5009 | | // the atomic operation is also a release flush. |
5010 | 110 | switch (AO) { |
5011 | 4 | case llvm::AtomicOrdering::Release: |
5012 | 4 | case llvm::AtomicOrdering::AcquireRelease: |
5013 | 14 | case llvm::AtomicOrdering::SequentiallyConsistent: |
5014 | 14 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, |
5015 | 14 | llvm::AtomicOrdering::Release); |
5016 | 14 | break; |
5017 | 0 | case llvm::AtomicOrdering::Acquire: |
5018 | 96 | case llvm::AtomicOrdering::Monotonic: |
5019 | 96 | break; |
5020 | 0 | case llvm::AtomicOrdering::NotAtomic: |
5021 | 0 | case llvm::AtomicOrdering::Unordered: |
5022 | 0 | llvm_unreachable("Unexpected ordering."); |
5023 | 110 | } |
5024 | 110 | } |
5025 | | |
5026 | | static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, |
5027 | | RValue Update, |
5028 | | BinaryOperatorKind BO, |
5029 | | llvm::AtomicOrdering AO, |
5030 | 655 | bool IsXLHSInRHSPart) { |
5031 | 655 | ASTContext &Context = CGF.getContext(); |
5032 | | // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' |
5033 | | // expression is simple and atomic is allowed for the given type for the |
5034 | | // target platform. |
5035 | 655 | if (BO == BO_Comma || !Update.isScalar() || |
5036 | 629 | !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple()522 || |
|