/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This provides a class for OpenMP runtime code generation. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "CGOpenMPRuntime.h" |
14 | | #include "CGCXXABI.h" |
15 | | #include "CGCleanup.h" |
16 | | #include "CGRecordLayout.h" |
17 | | #include "CodeGenFunction.h" |
18 | | #include "TargetInfo.h" |
19 | | #include "clang/AST/APValue.h" |
20 | | #include "clang/AST/Attr.h" |
21 | | #include "clang/AST/Decl.h" |
22 | | #include "clang/AST/OpenMPClause.h" |
23 | | #include "clang/AST/StmtOpenMP.h" |
24 | | #include "clang/AST/StmtVisitor.h" |
25 | | #include "clang/Basic/BitmaskEnum.h" |
26 | | #include "clang/Basic/FileManager.h" |
27 | | #include "clang/Basic/OpenMPKinds.h" |
28 | | #include "clang/Basic/SourceManager.h" |
29 | | #include "clang/CodeGen/ConstantInitBuilder.h" |
30 | | #include "llvm/ADT/ArrayRef.h" |
31 | | #include "llvm/ADT/SetOperations.h" |
32 | | #include "llvm/ADT/SmallBitVector.h" |
33 | | #include "llvm/ADT/StringExtras.h" |
34 | | #include "llvm/Bitcode/BitcodeReader.h" |
35 | | #include "llvm/IR/Constants.h" |
36 | | #include "llvm/IR/DerivedTypes.h" |
37 | | #include "llvm/IR/GlobalValue.h" |
38 | | #include "llvm/IR/InstrTypes.h" |
39 | | #include "llvm/IR/Value.h" |
40 | | #include "llvm/Support/AtomicOrdering.h" |
41 | | #include "llvm/Support/Format.h" |
42 | | #include "llvm/Support/raw_ostream.h" |
43 | | #include <cassert> |
44 | | #include <numeric> |
45 | | |
46 | | using namespace clang; |
47 | | using namespace CodeGen; |
48 | | using namespace llvm::omp; |
49 | | |
50 | | namespace { |
51 | | /// Base class for handling code generation inside OpenMP regions. |
52 | | class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { |
53 | | public: |
54 | | /// Kinds of OpenMP regions used in codegen. |
55 | | enum CGOpenMPRegionKind { |
56 | | /// Region with outlined function for standalone 'parallel' |
57 | | /// directive. |
58 | | ParallelOutlinedRegion, |
59 | | /// Region with outlined function for standalone 'task' directive. |
60 | | TaskOutlinedRegion, |
61 | | /// Region for constructs that do not require function outlining, |
62 | | /// like 'for', 'sections', 'atomic' etc. directives. |
63 | | InlinedRegion, |
64 | | /// Region with outlined function for standalone 'target' directive. |
65 | | TargetRegion, |
66 | | }; |
67 | | |
68 | | CGOpenMPRegionInfo(const CapturedStmt &CS, |
69 | | const CGOpenMPRegionKind RegionKind, |
70 | | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
71 | | bool HasCancel) |
72 | | : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), |
73 | 24.7k | CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} |
74 | | |
75 | | CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, |
76 | | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
77 | | bool HasCancel) |
78 | | : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), |
79 | 40.1k | Kind(Kind), HasCancel(HasCancel) {} |
80 | | |
81 | | /// Get a variable or parameter for storing global thread id |
82 | | /// inside OpenMP construct. |
83 | | virtual const VarDecl *getThreadIDVariable() const = 0; |
84 | | |
85 | | /// Emit the captured statement body. |
86 | | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; |
87 | | |
88 | | /// Get an LValue for the current ThreadID variable. |
89 | | /// \return LValue for thread id variable. This LValue always has type int32*. |
90 | | virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); |
91 | | |
92 | 29 | virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} |
93 | | |
94 | 39.9k | CGOpenMPRegionKind getRegionKind() const { return RegionKind; } |
95 | | |
96 | 187 | OpenMPDirectiveKind getDirectiveKind() const { return Kind; } |
97 | | |
98 | 358 | bool hasCancel() const { return HasCancel; } |
99 | | |
100 | 104k | static bool classof(const CGCapturedStmtInfo *Info) { |
101 | 104k | return Info->getKind() == CR_OpenMP; |
102 | 104k | } |
103 | | |
104 | 64.9k | ~CGOpenMPRegionInfo() override = default; |
105 | | |
106 | | protected: |
107 | | CGOpenMPRegionKind RegionKind; |
108 | | RegionCodeGenTy CodeGen; |
109 | | OpenMPDirectiveKind Kind; |
110 | | bool HasCancel; |
111 | | }; |
112 | | |
113 | | /// API for captured statement code generation in OpenMP constructs. |
114 | | class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
115 | | public: |
116 | | CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, |
117 | | const RegionCodeGenTy &CodeGen, |
118 | | OpenMPDirectiveKind Kind, bool HasCancel, |
119 | | StringRef HelperName) |
120 | | : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, |
121 | | HasCancel), |
122 | 11.9k | ThreadIDVar(ThreadIDVar), HelperName(HelperName) { |
123 | 11.9k | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); |
124 | 11.9k | } |
125 | | |
126 | | /// Get a variable or parameter for storing global thread id |
127 | | /// inside OpenMP construct. |
128 | 37.4k | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
129 | | |
130 | | /// Get the name of the capture helper. |
131 | 11.9k | StringRef getHelperName() const override { return HelperName; } |
132 | | |
133 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { |
134 | 0 | return CGOpenMPRegionInfo::classof(Info) && |
135 | 0 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
136 | 0 | ParallelOutlinedRegion; |
137 | 0 | } |
138 | | |
139 | | private: |
140 | | /// A variable or parameter storing global thread id for OpenMP |
141 | | /// constructs. |
142 | | const VarDecl *ThreadIDVar; |
143 | | StringRef HelperName; |
144 | | }; |
145 | | |
146 | | /// API for captured statement code generation in OpenMP constructs. |
147 | | class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
148 | | public: |
149 | | class UntiedTaskActionTy final : public PrePostActionTy { |
150 | | bool Untied; |
151 | | const VarDecl *PartIDVar; |
152 | | const RegionCodeGenTy UntiedCodeGen; |
153 | | llvm::SwitchInst *UntiedSwitch = nullptr; |
154 | | |
155 | | public: |
156 | | UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, |
157 | | const RegionCodeGenTy &UntiedCodeGen) |
158 | 930 | : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} |
159 | 930 | void Enter(CodeGenFunction &CGF) override { |
160 | 930 | if (Untied) { |
161 | | // Emit task switching point. |
162 | 28 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
163 | 28 | CGF.GetAddrOfLocalVar(PartIDVar), |
164 | 28 | PartIDVar->getType()->castAs<PointerType>()); |
165 | 28 | llvm::Value *Res = |
166 | 28 | CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); |
167 | 28 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); |
168 | 28 | UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); |
169 | 28 | CGF.EmitBlock(DoneBB); |
170 | 28 | CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); |
171 | 28 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
172 | 28 | UntiedSwitch->addCase(CGF.Builder.getInt32(0), |
173 | 28 | CGF.Builder.GetInsertBlock()); |
174 | 28 | emitUntiedSwitch(CGF); |
175 | 28 | } |
176 | 930 | } |
177 | 56 | void emitUntiedSwitch(CodeGenFunction &CGF) const { |
178 | 56 | if (Untied) { |
179 | 52 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
180 | 52 | CGF.GetAddrOfLocalVar(PartIDVar), |
181 | 52 | PartIDVar->getType()->castAs<PointerType>()); |
182 | 52 | CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
183 | 52 | PartIdLVal); |
184 | 52 | UntiedCodeGen(CGF); |
185 | 52 | CodeGenFunction::JumpDest CurPoint = |
186 | 52 | CGF.getJumpDestInCurrentScope(".untied.next."); |
187 | 52 | CGF.EmitBranch(CGF.ReturnBlock.getBlock()); |
188 | 52 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
189 | 52 | UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
190 | 52 | CGF.Builder.GetInsertBlock()); |
191 | 52 | CGF.EmitBranchThroughCleanup(CurPoint); |
192 | 52 | CGF.EmitBlock(CurPoint.getBlock()); |
193 | 52 | } |
194 | 56 | } |
195 | 28 | unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } |
196 | | }; |
197 | | CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, |
198 | | const VarDecl *ThreadIDVar, |
199 | | const RegionCodeGenTy &CodeGen, |
200 | | OpenMPDirectiveKind Kind, bool HasCancel, |
201 | | const UntiedTaskActionTy &Action) |
202 | | : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), |
203 | 930 | ThreadIDVar(ThreadIDVar), Action(Action) { |
204 | 930 | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); |
205 | 930 | } |
206 | | |
207 | | /// Get a variable or parameter for storing global thread id |
208 | | /// inside OpenMP construct. |
209 | 333 | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
210 | | |
211 | | /// Get an LValue for the current ThreadID variable. |
212 | | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; |
213 | | |
214 | | /// Get the name of the capture helper. |
215 | 930 | StringRef getHelperName() const override { return ".omp_outlined."; } |
216 | | |
217 | 28 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
218 | 28 | Action.emitUntiedSwitch(CGF); |
219 | 28 | } |
220 | | |
221 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { |
222 | 0 | return CGOpenMPRegionInfo::classof(Info) && |
223 | 0 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
224 | 0 | TaskOutlinedRegion; |
225 | 0 | } |
226 | | |
227 | | private: |
228 | | /// A variable or parameter storing global thread id for OpenMP |
229 | | /// constructs. |
230 | | const VarDecl *ThreadIDVar; |
231 | | /// Action for emitting code for untied tasks. |
232 | | const UntiedTaskActionTy &Action; |
233 | | }; |
234 | | |
235 | | /// API for inlined captured statement code generation in OpenMP |
236 | | /// constructs. |
237 | | class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { |
238 | | public: |
239 | | CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, |
240 | | const RegionCodeGenTy &CodeGen, |
241 | | OpenMPDirectiveKind Kind, bool HasCancel) |
242 | | : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), |
243 | | OldCSI(OldCSI), |
244 | 40.1k | OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} |
245 | | |
246 | | // Retrieve the value of the context parameter. |
247 | 0 | llvm::Value *getContextValue() const override { |
248 | 0 | if (OuterRegionInfo) |
249 | 0 | return OuterRegionInfo->getContextValue(); |
250 | 0 | llvm_unreachable("No context value for inlined OpenMP region"); |
251 | 0 | } |
252 | | |
253 | 0 | void setContextValue(llvm::Value *V) override { |
254 | 0 | if (OuterRegionInfo) { |
255 | 0 | OuterRegionInfo->setContextValue(V); |
256 | 0 | return; |
257 | 0 | } |
258 | 0 | llvm_unreachable("No context value for inlined OpenMP region"); |
259 | 0 | } |
260 | | |
261 | | /// Lookup the captured field decl for a variable. |
262 | 26.2k | const FieldDecl *lookup(const VarDecl *VD) const override { |
263 | 26.2k | if (OuterRegionInfo) |
264 | 13.1k | return OuterRegionInfo->lookup(VD); |
265 | | // If there is no outer outlined region,no need to lookup in a list of |
266 | | // captured variables, we can use the original one. |
267 | 13.0k | return nullptr; |
268 | 26.2k | } |
269 | | |
270 | 0 | FieldDecl *getThisFieldDecl() const override { |
271 | 0 | if (OuterRegionInfo) |
272 | 0 | return OuterRegionInfo->getThisFieldDecl(); |
273 | 0 | return nullptr; |
274 | 0 | } |
275 | | |
276 | | /// Get a variable or parameter for storing global thread id |
277 | | /// inside OpenMP construct. |
278 | 6.74k | const VarDecl *getThreadIDVariable() const override { |
279 | 6.74k | if (OuterRegionInfo) |
280 | 6.52k | return OuterRegionInfo->getThreadIDVariable(); |
281 | 227 | return nullptr; |
282 | 6.74k | } |
283 | | |
284 | | /// Get an LValue for the current ThreadID variable. |
285 | 6.52k | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { |
286 | 6.52k | if (OuterRegionInfo) |
287 | 6.52k | return OuterRegionInfo->getThreadIDVariableLValue(CGF); |
288 | 0 | llvm_unreachable("No LValue for inlined OpenMP construct"); |
289 | 0 | } |
290 | | |
291 | | /// Get the name of the capture helper. |
292 | 0 | StringRef getHelperName() const override { |
293 | 0 | if (auto *OuterRegionInfo = getOldCSI()) |
294 | 0 | return OuterRegionInfo->getHelperName(); |
295 | 0 | llvm_unreachable("No helper name for inlined OpenMP construct"); |
296 | 0 | } |
297 | | |
298 | 17 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
299 | 17 | if (OuterRegionInfo) |
300 | 15 | OuterRegionInfo->emitUntiedSwitch(CGF); |
301 | 17 | } |
302 | | |
303 | 39.9k | CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } |
304 | | |
305 | 39.9k | static bool classof(const CGCapturedStmtInfo *Info) { |
306 | 39.9k | return CGOpenMPRegionInfo::classof(Info) && |
307 | 39.9k | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; |
308 | 39.9k | } |
309 | | |
310 | 40.1k | ~CGOpenMPInlinedRegionInfo() override = default; |
311 | | |
312 | | private: |
313 | | /// CodeGen info about outer OpenMP region. |
314 | | CodeGenFunction::CGCapturedStmtInfo *OldCSI; |
315 | | CGOpenMPRegionInfo *OuterRegionInfo; |
316 | | }; |
317 | | |
318 | | /// API for captured statement code generation in OpenMP target |
319 | | /// constructs. For this captures, implicit parameters are used instead of the |
320 | | /// captured fields. The name of the target region has to be unique in a given |
321 | | /// application so it is provided by the client, because only the client has |
322 | | /// the information to generate that. |
323 | | class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { |
324 | | public: |
325 | | CGOpenMPTargetRegionInfo(const CapturedStmt &CS, |
326 | | const RegionCodeGenTy &CodeGen, StringRef HelperName) |
327 | | : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, |
328 | | /*HasCancel=*/false), |
329 | 11.9k | HelperName(HelperName) {} |
330 | | |
331 | | /// This is unused for target regions because each starts executing |
332 | | /// with a single thread. |
333 | 2.02k | const VarDecl *getThreadIDVariable() const override { return nullptr; } |
334 | | |
335 | | /// Get the name of the capture helper. |
336 | 12.0k | StringRef getHelperName() const override { return HelperName; } |
337 | | |
338 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { |
339 | 0 | return CGOpenMPRegionInfo::classof(Info) && |
340 | 0 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; |
341 | 0 | } |
342 | | |
343 | | private: |
344 | | StringRef HelperName; |
345 | | }; |
346 | | |
347 | 0 | static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { |
348 | 0 | llvm_unreachable("No codegen for expressions"); |
349 | 0 | } |
350 | | /// API for generation of expressions captured in a innermost OpenMP |
351 | | /// region. |
352 | | class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { |
353 | | public: |
354 | | CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) |
355 | | : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, |
356 | | OMPD_unknown, |
357 | | /*HasCancel=*/false), |
358 | 282 | PrivScope(CGF) { |
359 | | // Make sure the globals captured in the provided statement are local by |
360 | | // using the privatization logic. We assume the same variable is not |
361 | | // captured more than once. |
362 | 412 | for (const auto &C : CS.captures()) { |
363 | 412 | if (!C.capturesVariable() && !C.capturesVariableByCopy()320 ) |
364 | 16 | continue; |
365 | | |
366 | 396 | const VarDecl *VD = C.getCapturedVar(); |
367 | 396 | if (VD->isLocalVarDeclOrParm()) |
368 | 318 | continue; |
369 | | |
370 | 78 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
371 | 78 | /*RefersToEnclosingVariableOrCapture=*/false, |
372 | 78 | VD->getType().getNonReferenceType(), VK_LValue, |
373 | 78 | C.getLocation()); |
374 | 78 | PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); |
375 | 78 | } |
376 | 282 | (void)PrivScope.Privatize(); |
377 | 282 | } |
378 | | |
379 | | /// Lookup the captured field decl for a variable. |
380 | 0 | const FieldDecl *lookup(const VarDecl *VD) const override { |
381 | 0 | if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) |
382 | 0 | return FD; |
383 | 0 | return nullptr; |
384 | 0 | } |
385 | | |
386 | | /// Emit the captured statement body. |
387 | 0 | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { |
388 | 0 | llvm_unreachable("No body for expressions"); |
389 | 0 | } |
390 | | |
391 | | /// Get a variable or parameter for storing global thread id |
392 | | /// inside OpenMP construct. |
393 | 0 | const VarDecl *getThreadIDVariable() const override { |
394 | 0 | llvm_unreachable("No thread id for expressions"); |
395 | 0 | } |
396 | | |
397 | | /// Get the name of the capture helper. |
398 | 0 | StringRef getHelperName() const override { |
399 | 0 | llvm_unreachable("No helper name for expressions"); |
400 | 0 | } |
401 | | |
402 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { return false; } |
403 | | |
404 | | private: |
405 | | /// Private scope to capture global variables. |
406 | | CodeGenFunction::OMPPrivateScope PrivScope; |
407 | | }; |
408 | | |
409 | | /// RAII for emitting code of OpenMP constructs. |
410 | | class InlinedOpenMPRegionRAII { |
411 | | CodeGenFunction &CGF; |
412 | | llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; |
413 | | FieldDecl *LambdaThisCaptureField = nullptr; |
414 | | const CodeGen::CGBlockInfo *BlockInfo = nullptr; |
415 | | bool NoInheritance = false; |
416 | | |
417 | | public: |
418 | | /// Constructs region for combined constructs. |
419 | | /// \param CodeGen Code generation sequence for combined directives. Includes |
420 | | /// a list of functions used for code generation of implicitly inlined |
421 | | /// regions. |
422 | | InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, |
423 | | OpenMPDirectiveKind Kind, bool HasCancel, |
424 | | bool NoInheritance = true) |
425 | 39.9k | : CGF(CGF), NoInheritance(NoInheritance) { |
426 | | // Start emission for the construct. |
427 | 39.9k | CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( |
428 | 39.9k | CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); |
429 | 39.9k | if (NoInheritance) { |
430 | 39.4k | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
431 | 39.4k | LambdaThisCaptureField = CGF.LambdaThisCaptureField; |
432 | 39.4k | CGF.LambdaThisCaptureField = nullptr; |
433 | 39.4k | BlockInfo = CGF.BlockInfo; |
434 | 39.4k | CGF.BlockInfo = nullptr; |
435 | 39.4k | } |
436 | 39.9k | } |
437 | | |
438 | 39.9k | ~InlinedOpenMPRegionRAII() { |
439 | | // Restore original CapturedStmtInfo only if we're done with code emission. |
440 | 39.9k | auto *OldCSI = |
441 | 39.9k | cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); |
442 | 39.9k | delete CGF.CapturedStmtInfo; |
443 | 39.9k | CGF.CapturedStmtInfo = OldCSI; |
444 | 39.9k | if (NoInheritance) { |
445 | 39.4k | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
446 | 39.4k | CGF.LambdaThisCaptureField = LambdaThisCaptureField; |
447 | 39.4k | CGF.BlockInfo = BlockInfo; |
448 | 39.4k | } |
449 | 39.9k | } |
450 | | }; |
451 | | |
452 | | /// Values for bit flags used in the ident_t to describe the fields. |
453 | | /// All enumeric elements are named and described in accordance with the code |
454 | | /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h |
455 | | enum OpenMPLocationFlags : unsigned { |
456 | | /// Use trampoline for internal microtask. |
457 | | OMP_IDENT_IMD = 0x01, |
458 | | /// Use c-style ident structure. |
459 | | OMP_IDENT_KMPC = 0x02, |
460 | | /// Atomic reduction option for kmpc_reduce. |
461 | | OMP_ATOMIC_REDUCE = 0x10, |
462 | | /// Explicit 'barrier' directive. |
463 | | OMP_IDENT_BARRIER_EXPL = 0x20, |
464 | | /// Implicit barrier in code. |
465 | | OMP_IDENT_BARRIER_IMPL = 0x40, |
466 | | /// Implicit barrier in 'for' directive. |
467 | | OMP_IDENT_BARRIER_IMPL_FOR = 0x40, |
468 | | /// Implicit barrier in 'sections' directive. |
469 | | OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, |
470 | | /// Implicit barrier in 'single' directive. |
471 | | OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, |
472 | | /// Call of __kmp_for_static_init for static loop. |
473 | | OMP_IDENT_WORK_LOOP = 0x200, |
474 | | /// Call of __kmp_for_static_init for sections. |
475 | | OMP_IDENT_WORK_SECTIONS = 0x400, |
476 | | /// Call of __kmp_for_static_init for distribute. |
477 | | OMP_IDENT_WORK_DISTRIBUTE = 0x800, |
478 | | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) |
479 | | }; |
480 | | |
481 | | namespace { |
482 | | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); |
483 | | /// Values for bit flags for marking which requires clauses have been used. |
484 | | enum OpenMPOffloadingRequiresDirFlags : int64_t { |
485 | | /// flag undefined. |
486 | | OMP_REQ_UNDEFINED = 0x000, |
487 | | /// no requires clause present. |
488 | | OMP_REQ_NONE = 0x001, |
489 | | /// reverse_offload clause. |
490 | | OMP_REQ_REVERSE_OFFLOAD = 0x002, |
491 | | /// unified_address clause. |
492 | | OMP_REQ_UNIFIED_ADDRESS = 0x004, |
493 | | /// unified_shared_memory clause. |
494 | | OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, |
495 | | /// dynamic_allocators clause. |
496 | | OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, |
497 | | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) |
498 | | }; |
499 | | |
500 | | enum OpenMPOffloadingReservedDeviceIDs { |
501 | | /// Device ID if the device was not defined, runtime should get it |
502 | | /// from environment variables in the spec. |
503 | | OMP_DEVICEID_UNDEF = -1, |
504 | | }; |
505 | | } // anonymous namespace |
506 | | |
507 | | /// Describes ident structure that describes a source location. |
508 | | /// All descriptions are taken from |
509 | | /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h |
510 | | /// Original structure: |
511 | | /// typedef struct ident { |
512 | | /// kmp_int32 reserved_1; /**< might be used in Fortran; |
513 | | /// see above */ |
514 | | /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; |
515 | | /// KMP_IDENT_KMPC identifies this union |
516 | | /// member */ |
517 | | /// kmp_int32 reserved_2; /**< not really used in Fortran any more; |
518 | | /// see above */ |
519 | | ///#if USE_ITT_BUILD |
520 | | /// /* but currently used for storing |
521 | | /// region-specific ITT */ |
522 | | /// /* contextual information. */ |
523 | | ///#endif /* USE_ITT_BUILD */ |
524 | | /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for |
525 | | /// C++ */ |
526 | | /// char const *psource; /**< String describing the source location. |
527 | | /// The string is composed of semi-colon separated |
528 | | // fields which describe the source file, |
529 | | /// the function and a pair of line numbers that |
530 | | /// delimit the construct. |
531 | | /// */ |
532 | | /// } ident_t; |
533 | | enum IdentFieldIndex { |
534 | | /// might be used in Fortran |
535 | | IdentField_Reserved_1, |
536 | | /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. |
537 | | IdentField_Flags, |
538 | | /// Not really used in Fortran any more |
539 | | IdentField_Reserved_2, |
540 | | /// Source[4] in Fortran, do not use for C++ |
541 | | IdentField_Reserved_3, |
542 | | /// String describing the source location. The string is composed of |
543 | | /// semi-colon separated fields which describe the source file, the function |
544 | | /// and a pair of line numbers that delimit the construct. |
545 | | IdentField_PSource |
546 | | }; |
547 | | |
548 | | /// Schedule types for 'omp for' loops (these enumerators are taken from |
549 | | /// the enum sched_type in kmp.h). |
550 | | enum OpenMPSchedType { |
551 | | /// Lower bound for default (unordered) versions. |
552 | | OMP_sch_lower = 32, |
553 | | OMP_sch_static_chunked = 33, |
554 | | OMP_sch_static = 34, |
555 | | OMP_sch_dynamic_chunked = 35, |
556 | | OMP_sch_guided_chunked = 36, |
557 | | OMP_sch_runtime = 37, |
558 | | OMP_sch_auto = 38, |
559 | | /// static with chunk adjustment (e.g., simd) |
560 | | OMP_sch_static_balanced_chunked = 45, |
561 | | /// Lower bound for 'ordered' versions. |
562 | | OMP_ord_lower = 64, |
563 | | OMP_ord_static_chunked = 65, |
564 | | OMP_ord_static = 66, |
565 | | OMP_ord_dynamic_chunked = 67, |
566 | | OMP_ord_guided_chunked = 68, |
567 | | OMP_ord_runtime = 69, |
568 | | OMP_ord_auto = 70, |
569 | | OMP_sch_default = OMP_sch_static, |
570 | | /// dist_schedule types |
571 | | OMP_dist_sch_static_chunked = 91, |
572 | | OMP_dist_sch_static = 92, |
573 | | /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. |
574 | | /// Set if the monotonic schedule modifier was present. |
575 | | OMP_sch_modifier_monotonic = (1 << 29), |
576 | | /// Set if the nonmonotonic schedule modifier was present. |
577 | | OMP_sch_modifier_nonmonotonic = (1 << 30), |
578 | | }; |
579 | | |
580 | | /// A basic class for pre|post-action for advanced codegen sequence for OpenMP |
581 | | /// region. |
582 | | class CleanupTy final : public EHScopeStack::Cleanup { |
583 | | PrePostActionTy *Action; |
584 | | |
585 | | public: |
586 | 15.7k | explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} |
587 | 15.7k | void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { |
588 | 15.7k | if (!CGF.HaveInsertPoint()) |
589 | 0 | return; |
590 | 15.7k | Action->Exit(CGF); |
591 | 15.7k | } |
592 | | }; |
593 | | |
594 | | } // anonymous namespace |
595 | | |
596 | 125k | void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { |
597 | 125k | CodeGenFunction::RunCleanupsScope Scope(CGF); |
598 | 125k | if (PrePostAction) { |
599 | 15.7k | CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); |
600 | 15.7k | Callback(CodeGen, CGF, *PrePostAction); |
601 | 109k | } else { |
602 | 109k | PrePostActionTy Action; |
603 | 109k | Callback(CodeGen, CGF, Action); |
604 | 109k | } |
605 | 125k | } |
606 | | |
607 | | /// Check if the combiner is a call to UDR combiner and if it is so return the |
608 | | /// UDR decl used for reduction. |
609 | | static const OMPDeclareReductionDecl * |
610 | 1.11k | getReductionInit(const Expr *ReductionOp) { |
611 | 1.11k | if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) |
612 | 146 | if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) |
613 | 83 | if (const auto *DRE = |
614 | 83 | dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) |
615 | 83 | if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) |
616 | 83 | return DRD; |
617 | 1.03k | return nullptr; |
618 | 1.11k | } |
619 | | |
620 | | static void emitInitWithReductionInitializer(CodeGenFunction &CGF, |
621 | | const OMPDeclareReductionDecl *DRD, |
622 | | const Expr *InitOp, |
623 | | Address Private, Address Original, |
624 | 67 | QualType Ty) { |
625 | 67 | if (DRD->getInitializer()) { |
626 | 53 | std::pair<llvm::Function *, llvm::Function *> Reduction = |
627 | 53 | CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); |
628 | 53 | const auto *CE = cast<CallExpr>(InitOp); |
629 | 53 | const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); |
630 | 53 | const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); |
631 | 53 | const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); |
632 | 53 | const auto *LHSDRE = |
633 | 53 | cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); |
634 | 53 | const auto *RHSDRE = |
635 | 53 | cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); |
636 | 53 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
637 | 53 | PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); |
638 | 53 | PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); |
639 | 53 | (void)PrivateScope.Privatize(); |
640 | 53 | RValue Func = RValue::get(Reduction.second); |
641 | 53 | CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); |
642 | 53 | CGF.EmitIgnoredExpr(InitOp); |
643 | 53 | } else { |
644 | 14 | llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); |
645 | 14 | std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); |
646 | 14 | auto *GV = new llvm::GlobalVariable( |
647 | 14 | CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, |
648 | 14 | llvm::GlobalValue::PrivateLinkage, Init, Name); |
649 | 14 | LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); |
650 | 14 | RValue InitRVal; |
651 | 14 | switch (CGF.getEvaluationKind(Ty)) { |
652 | 11 | case TEK_Scalar: |
653 | 11 | InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); |
654 | 11 | break; |
655 | 0 | case TEK_Complex: |
656 | 0 | InitRVal = |
657 | 0 | RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); |
658 | 0 | break; |
659 | 3 | case TEK_Aggregate: { |
660 | 3 | OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); |
661 | 3 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); |
662 | 3 | CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), |
663 | 3 | /*IsInitializer=*/false); |
664 | 3 | return; |
665 | 0 | } |
666 | 14 | } |
667 | 11 | OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); |
668 | 11 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); |
669 | 11 | CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), |
670 | 11 | /*IsInitializer=*/false); |
671 | 11 | } |
672 | 67 | } |
673 | | |
674 | | /// Emit initialization of arrays of complex types. |
675 | | /// \param DestAddr Address of the array. |
676 | | /// \param Type Type of array. |
677 | | /// \param Init Initial expression of array. |
678 | | /// \param SrcAddr Address of the original array. |
679 | | static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, |
680 | | QualType Type, bool EmitDeclareReductionInit, |
681 | | const Expr *Init, |
682 | | const OMPDeclareReductionDecl *DRD, |
683 | 286 | Address SrcAddr = Address::invalid()) { |
684 | | // Perform element-by-element initialization. |
685 | 286 | QualType ElementTy; |
686 | | |
687 | | // Drill down to the base element type on both arrays. |
688 | 286 | const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); |
689 | 286 | llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); |
690 | 286 | if (DRD) |
691 | 31 | SrcAddr = |
692 | 31 | CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); |
693 | | |
694 | 286 | llvm::Value *SrcBegin = nullptr; |
695 | 286 | if (DRD) |
696 | 31 | SrcBegin = SrcAddr.getPointer(); |
697 | 286 | llvm::Value *DestBegin = DestAddr.getPointer(); |
698 | | // Cast from pointer to array type to pointer to single element. |
699 | 286 | llvm::Value *DestEnd = |
700 | 286 | CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); |
701 | | // The basic structure here is a while-do loop. |
702 | 286 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); |
703 | 286 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); |
704 | 286 | llvm::Value *IsEmpty = |
705 | 286 | CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); |
706 | 286 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
707 | | |
708 | | // Enter the loop body, making that address the current address. |
709 | 286 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
710 | 286 | CGF.EmitBlock(BodyBB); |
711 | | |
712 | 286 | CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); |
713 | | |
714 | 286 | llvm::PHINode *SrcElementPHI = nullptr; |
715 | 286 | Address SrcElementCurrent = Address::invalid(); |
716 | 286 | if (DRD) { |
717 | 31 | SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, |
718 | 31 | "omp.arraycpy.srcElementPast"); |
719 | 31 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
720 | 31 | SrcElementCurrent = |
721 | 31 | Address(SrcElementPHI, SrcAddr.getElementType(), |
722 | 31 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
723 | 31 | } |
724 | 286 | llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( |
725 | 286 | DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
726 | 286 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
727 | 286 | Address DestElementCurrent = |
728 | 286 | Address(DestElementPHI, DestAddr.getElementType(), |
729 | 286 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
730 | | |
731 | | // Emit copy. |
732 | 286 | { |
733 | 286 | CodeGenFunction::RunCleanupsScope InitScope(CGF); |
734 | 286 | if (EmitDeclareReductionInit) { |
735 | 31 | emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, |
736 | 31 | SrcElementCurrent, ElementTy); |
737 | 31 | } else |
738 | 255 | CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), |
739 | 255 | /*IsInitializer=*/false); |
740 | 286 | } |
741 | | |
742 | 286 | if (DRD) { |
743 | | // Shift the address forward by one element. |
744 | 31 | llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( |
745 | 31 | SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, |
746 | 31 | "omp.arraycpy.dest.element"); |
747 | 31 | SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); |
748 | 31 | } |
749 | | |
750 | | // Shift the address forward by one element. |
751 | 286 | llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( |
752 | 286 | DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, |
753 | 286 | "omp.arraycpy.dest.element"); |
754 | | // Check whether we've reached the end. |
755 | 286 | llvm::Value *Done = |
756 | 286 | CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); |
757 | 286 | CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); |
758 | 286 | DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); |
759 | | |
760 | | // Done. |
761 | 286 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
762 | 286 | } |
763 | | |
764 | 1.12k | LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { |
765 | 1.12k | return CGF.EmitOMPSharedLValue(E); |
766 | 1.12k | } |
767 | | |
768 | | LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, |
769 | 1.12k | const Expr *E) { |
770 | 1.12k | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) |
771 | 234 | return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); |
772 | 892 | return LValue(); |
773 | 1.12k | } |
774 | | |
775 | | void ReductionCodeGen::emitAggregateInitialization( |
776 | | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, |
777 | 286 | const OMPDeclareReductionDecl *DRD) { |
778 | | // Emit VarDecl with copy init for arrays. |
779 | | // Get the address of the original variable captured in current |
780 | | // captured region. |
781 | 286 | const auto *PrivateVD = |
782 | 286 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
783 | 286 | bool EmitDeclareReductionInit = |
784 | 286 | DRD && (31 DRD->getInitializer()31 || !PrivateVD->hasInit()4 ); |
785 | 286 | EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), |
786 | 286 | EmitDeclareReductionInit, |
787 | 286 | EmitDeclareReductionInit ? ClausesData[N].ReductionOp31 |
788 | 286 | : PrivateVD->getInit()255 , |
789 | 286 | DRD, SharedAddr); |
790 | 286 | } |
791 | | |
792 | | ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, |
793 | | ArrayRef<const Expr *> Origs, |
794 | | ArrayRef<const Expr *> Privates, |
795 | 27.9k | ArrayRef<const Expr *> ReductionOps) { |
796 | 27.9k | ClausesData.reserve(Shareds.size()); |
797 | 27.9k | SharedAddresses.reserve(Shareds.size()); |
798 | 27.9k | Sizes.reserve(Shareds.size()); |
799 | 27.9k | BaseDecls.reserve(Shareds.size()); |
800 | 27.9k | const auto *IOrig = Origs.begin(); |
801 | 27.9k | const auto *IPriv = Privates.begin(); |
802 | 27.9k | const auto *IRed = ReductionOps.begin(); |
803 | 27.9k | for (const Expr *Ref : Shareds) { |
804 | 1.08k | ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); |
805 | 1.08k | std::advance(IOrig, 1); |
806 | 1.08k | std::advance(IPriv, 1); |
807 | 1.08k | std::advance(IRed, 1); |
808 | 1.08k | } |
809 | 27.9k | } |
810 | | |
811 | 1.07k | void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { |
812 | 1.07k | assert(SharedAddresses.size() == N && OrigAddresses.size() == N && |
813 | 1.07k | "Number of generated lvalues must be exactly N."); |
814 | 0 | LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); |
815 | 1.07k | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); |
816 | 1.07k | SharedAddresses.emplace_back(First, Second); |
817 | 1.07k | if (ClausesData[N].Shared == ClausesData[N].Ref) { |
818 | 1.02k | OrigAddresses.emplace_back(First, Second); |
819 | 1.02k | } else { |
820 | 53 | LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); |
821 | 53 | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); |
822 | 53 | OrigAddresses.emplace_back(First, Second); |
823 | 53 | } |
824 | 1.07k | } |
825 | | |
826 | 1.07k | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { |
827 | 1.07k | QualType PrivateType = getPrivateType(N); |
828 | 1.07k | bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); |
829 | 1.07k | if (!PrivateType->isVariablyModifiedType()) { |
830 | 834 | Sizes.emplace_back( |
831 | 834 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), |
832 | 834 | nullptr); |
833 | 834 | return; |
834 | 834 | } |
835 | 239 | llvm::Value *Size; |
836 | 239 | llvm::Value *SizeInChars; |
837 | 239 | auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); |
838 | 239 | auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); |
839 | 239 | if (AsArraySection) { |
840 | 186 | Size = CGF.Builder.CreatePtrDiff(ElemType, |
841 | 186 | OrigAddresses[N].second.getPointer(CGF), |
842 | 186 | OrigAddresses[N].first.getPointer(CGF)); |
843 | 186 | Size = CGF.Builder.CreateNUWAdd( |
844 | 186 | Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); |
845 | 186 | SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); |
846 | 186 | } else { |
847 | 53 | SizeInChars = |
848 | 53 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); |
849 | 53 | Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); |
850 | 53 | } |
851 | 239 | Sizes.emplace_back(SizeInChars, Size); |
852 | 239 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
853 | 239 | CGF, |
854 | 239 | cast<OpaqueValueExpr>( |
855 | 239 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
856 | 239 | RValue::get(Size)); |
857 | 239 | CGF.EmitVariablyModifiedType(PrivateType); |
858 | 239 | } |
859 | | |
860 | | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, |
861 | 325 | llvm::Value *Size) { |
862 | 325 | QualType PrivateType = getPrivateType(N); |
863 | 325 | if (!PrivateType->isVariablyModifiedType()) { |
864 | 211 | assert(!Size && !Sizes[N].second && |
865 | 211 | "Size should be nullptr for non-variably modified reduction " |
866 | 211 | "items."); |
867 | 0 | return; |
868 | 211 | } |
869 | 114 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
870 | 114 | CGF, |
871 | 114 | cast<OpaqueValueExpr>( |
872 | 114 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
873 | 114 | RValue::get(Size)); |
874 | 114 | CGF.EmitVariablyModifiedType(PrivateType); |
875 | 114 | } |
876 | | |
877 | | void ReductionCodeGen::emitInitialization( |
878 | | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, |
879 | 965 | llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { |
880 | 965 | assert(SharedAddresses.size() > N && "No variable was generated"); |
881 | 0 | const auto *PrivateVD = |
882 | 965 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
883 | 965 | const OMPDeclareReductionDecl *DRD = |
884 | 965 | getReductionInit(ClausesData[N].ReductionOp); |
885 | 965 | if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { |
886 | 286 | if (DRD && DRD->getInitializer()31 ) |
887 | 27 | (void)DefaultInit(CGF); |
888 | 286 | emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); |
889 | 679 | } else if (DRD && (46 DRD->getInitializer()46 || !PrivateVD->hasInit()20 )) { |
890 | 36 | (void)DefaultInit(CGF); |
891 | 36 | QualType SharedType = SharedAddresses[N].first.getType(); |
892 | 36 | emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, |
893 | 36 | PrivateAddr, SharedAddr, SharedType); |
894 | 643 | } else if (!DefaultInit(CGF) && PrivateVD->hasInit()83 && |
895 | 643 | !CGF.isTrivialInitializer(PrivateVD->getInit())83 ) { |
896 | 83 | CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, |
897 | 83 | PrivateVD->getType().getQualifiers(), |
898 | 83 | /*IsInitializer=*/false); |
899 | 83 | } |
900 | 965 | } |
901 | | |
902 | 173 | bool ReductionCodeGen::needCleanups(unsigned N) { |
903 | 173 | QualType PrivateType = getPrivateType(N); |
904 | 173 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
905 | 173 | return DTorKind != QualType::DK_none; |
906 | 173 | } |
907 | | |
908 | | void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, |
909 | 21 | Address PrivateAddr) { |
910 | 21 | QualType PrivateType = getPrivateType(N); |
911 | 21 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
912 | 21 | if (needCleanups(N)) { |
913 | 21 | PrivateAddr = CGF.Builder.CreateElementBitCast( |
914 | 21 | PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); |
915 | 21 | CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); |
916 | 21 | } |
917 | 21 | } |
918 | | |
919 | | static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
920 | 189 | LValue BaseLV) { |
921 | 189 | BaseTy = BaseTy.getNonReferenceType(); |
922 | 315 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()189 ) && |
923 | 315 | !CGF.getContext().hasSameType(BaseTy, ElTy)126 ) { |
924 | 126 | if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { |
925 | 126 | BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); |
926 | 126 | } else { |
927 | 0 | LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); |
928 | 0 | BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); |
929 | 0 | } |
930 | 126 | BaseTy = BaseTy->getPointeeType(); |
931 | 126 | } |
932 | 189 | return CGF.MakeAddrLValue( |
933 | 189 | CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), |
934 | 189 | CGF.ConvertTypeForMem(ElTy)), |
935 | 189 | BaseLV.getType(), BaseLV.getBaseInfo(), |
936 | 189 | CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); |
937 | 189 | } |
938 | | |
939 | | static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
940 | 189 | Address OriginalBaseAddress, llvm::Value *Addr) { |
941 | 189 | Address Tmp = Address::invalid(); |
942 | 189 | Address TopTmp = Address::invalid(); |
943 | 189 | Address MostTopTmp = Address::invalid(); |
944 | 189 | BaseTy = BaseTy.getNonReferenceType(); |
945 | 315 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()189 ) && |
946 | 315 | !CGF.getContext().hasSameType(BaseTy, ElTy)126 ) { |
947 | 126 | Tmp = CGF.CreateMemTemp(BaseTy); |
948 | 126 | if (TopTmp.isValid()) |
949 | 58 | CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); |
950 | 68 | else |
951 | 68 | MostTopTmp = Tmp; |
952 | 126 | TopTmp = Tmp; |
953 | 126 | BaseTy = BaseTy->getPointeeType(); |
954 | 126 | } |
955 | | |
956 | 189 | if (Tmp.isValid()) { |
957 | 68 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
958 | 68 | Addr, Tmp.getElementType()); |
959 | 68 | CGF.Builder.CreateStore(Addr, Tmp); |
960 | 68 | return MostTopTmp; |
961 | 68 | } |
962 | | |
963 | 121 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
964 | 121 | Addr, OriginalBaseAddress.getType()); |
965 | 121 | return OriginalBaseAddress.withPointer(Addr); |
966 | 189 | } |
967 | | |
968 | 1.07k | static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { |
969 | 1.07k | const VarDecl *OrigVD = nullptr; |
970 | 1.07k | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { |
971 | 285 | const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); |
972 | 425 | while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) |
973 | 140 | Base = TempOASE->getBase()->IgnoreParenImpCasts(); |
974 | 293 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
975 | 8 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
976 | 285 | DE = cast<DeclRefExpr>(Base); |
977 | 285 | OrigVD = cast<VarDecl>(DE->getDecl()); |
978 | 790 | } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { |
979 | 4 | const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); |
980 | 4 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
981 | 0 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
982 | 4 | DE = cast<DeclRefExpr>(Base); |
983 | 4 | OrigVD = cast<VarDecl>(DE->getDecl()); |
984 | 4 | } |
985 | 1.07k | return OrigVD; |
986 | 1.07k | } |
987 | | |
988 | | Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, |
989 | 905 | Address PrivateAddr) { |
990 | 905 | const DeclRefExpr *DE; |
991 | 905 | if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { |
992 | 189 | BaseDecls.emplace_back(OrigVD); |
993 | 189 | LValue OriginalBaseLValue = CGF.EmitLValue(DE); |
994 | 189 | LValue BaseLValue = |
995 | 189 | loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), |
996 | 189 | OriginalBaseLValue); |
997 | 189 | Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); |
998 | 189 | llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( |
999 | 189 | SharedAddr.getElementType(), BaseLValue.getPointer(CGF), |
1000 | 189 | SharedAddr.getPointer()); |
1001 | 189 | llvm::Value *PrivatePointer = |
1002 | 189 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1003 | 189 | PrivateAddr.getPointer(), SharedAddr.getType()); |
1004 | 189 | llvm::Value *Ptr = CGF.Builder.CreateGEP( |
1005 | 189 | SharedAddr.getElementType(), PrivatePointer, Adjustment); |
1006 | 189 | return castToBase(CGF, OrigVD->getType(), |
1007 | 189 | SharedAddresses[N].first.getType(), |
1008 | 189 | OriginalBaseLValue.getAddress(CGF), Ptr); |
1009 | 189 | } |
1010 | 716 | BaseDecls.emplace_back( |
1011 | 716 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); |
1012 | 716 | return PrivateAddr; |
1013 | 905 | } |
1014 | | |
1015 | 152 | bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { |
1016 | 152 | const OMPDeclareReductionDecl *DRD = |
1017 | 152 | getReductionInit(ClausesData[N].ReductionOp); |
1018 | 152 | return DRD && DRD->getInitializer()6 ; |
1019 | 152 | } |
1020 | | |
1021 | 12.4k | LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { |
1022 | 12.4k | return CGF.EmitLoadOfPointerLValue( |
1023 | 12.4k | CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1024 | 12.4k | getThreadIDVariable()->getType()->castAs<PointerType>()); |
1025 | 12.4k | } |
1026 | | |
1027 | 64.0k | void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { |
1028 | 64.0k | if (!CGF.HaveInsertPoint()) |
1029 | 0 | return; |
1030 | | // 1.2.2 OpenMP Language Terminology |
1031 | | // Structured block - An executable statement with a single entry at the |
1032 | | // top and a single exit at the bottom. |
1033 | | // The point of exit cannot be a branch out of the structured block. |
1034 | | // longjmp() and throw() must not violate the entry/exit criteria. |
1035 | 64.0k | CGF.EHStack.pushTerminate(); |
1036 | 64.0k | if (S) |
1037 | 24.7k | CGF.incrementProfileCounter(S); |
1038 | 64.0k | CodeGen(CGF); |
1039 | 64.0k | CGF.EHStack.popTerminate(); |
1040 | 64.0k | } |
1041 | | |
1042 | | LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( |
1043 | 111 | CodeGenFunction &CGF) { |
1044 | 111 | return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1045 | 111 | getThreadIDVariable()->getType(), |
1046 | 111 | AlignmentSource::Decl); |
1047 | 111 | } |
1048 | | |
1049 | | static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, |
1050 | 7.64k | QualType FieldTy) { |
1051 | 7.64k | auto *Field = FieldDecl::Create( |
1052 | 7.64k | C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, |
1053 | 7.64k | C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), |
1054 | 7.64k | /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); |
1055 | 7.64k | Field->setAccess(AS_public); |
1056 | 7.64k | DC->addDecl(Field); |
1057 | 7.64k | return Field; |
1058 | 7.64k | } |
1059 | | |
1060 | | CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, |
1061 | | StringRef Separator) |
1062 | | : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), |
1063 | 5.88k | OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { |
1064 | 5.88k | KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); |
1065 | | |
1066 | | // Initialize Types used in OpenMPIRBuilder from OMPKinds.def |
1067 | 5.88k | OMPBuilder.initialize(); |
1068 | 5.88k | loadOffloadInfoMetadata(); |
1069 | 5.88k | } |
1070 | | |
1071 | 5.88k | void CGOpenMPRuntime::clear() { |
1072 | 5.88k | InternalVars.clear(); |
1073 | | // Clean non-target variable declarations possibly used only in debug info. |
1074 | 5.88k | for (const auto &Data : EmittedNonTargetVariables) { |
1075 | 12 | if (!Data.getValue().pointsToAliveValue()) |
1076 | 0 | continue; |
1077 | 12 | auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); |
1078 | 12 | if (!GV) |
1079 | 0 | continue; |
1080 | 12 | if (!GV->isDeclaration() || GV->getNumUses() > 0) |
1081 | 11 | continue; |
1082 | 1 | GV->eraseFromParent(); |
1083 | 1 | } |
1084 | 5.88k | } |
1085 | | |
1086 | 31.2k | std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { |
1087 | 31.2k | SmallString<128> Buffer; |
1088 | 31.2k | llvm::raw_svector_ostream OS(Buffer); |
1089 | 31.2k | StringRef Sep = FirstSeparator; |
1090 | 46.9k | for (StringRef Part : Parts) { |
1091 | 46.9k | OS << Sep << Part; |
1092 | 46.9k | Sep = Separator; |
1093 | 46.9k | } |
1094 | 31.2k | return std::string(OS.str()); |
1095 | 31.2k | } |
1096 | | |
1097 | | static llvm::Function * |
1098 | | emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, |
1099 | | const Expr *CombinerInitializer, const VarDecl *In, |
1100 | 226 | const VarDecl *Out, bool IsCombiner) { |
1101 | | // void .omp_combiner.(Ty *in, Ty *out); |
1102 | 226 | ASTContext &C = CGM.getContext(); |
1103 | 226 | QualType PtrTy = C.getPointerType(Ty).withRestrict(); |
1104 | 226 | FunctionArgList Args; |
1105 | 226 | ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), |
1106 | 226 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1107 | 226 | ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), |
1108 | 226 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1109 | 226 | Args.push_back(&OmpOutParm); |
1110 | 226 | Args.push_back(&OmpInParm); |
1111 | 226 | const CGFunctionInfo &FnInfo = |
1112 | 226 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
1113 | 226 | llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); |
1114 | 226 | std::string Name = CGM.getOpenMPRuntime().getName( |
1115 | 226 | {IsCombiner ? "omp_combiner"147 : "omp_initializer"79 , ""}); |
1116 | 226 | auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, |
1117 | 226 | Name, &CGM.getModule()); |
1118 | 226 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); |
1119 | 226 | if (CGM.getLangOpts().Optimize) { |
1120 | 0 | Fn->removeFnAttr(llvm::Attribute::NoInline); |
1121 | 0 | Fn->removeFnAttr(llvm::Attribute::OptimizeNone); |
1122 | 0 | Fn->addFnAttr(llvm::Attribute::AlwaysInline); |
1123 | 0 | } |
1124 | 226 | CodeGenFunction CGF(CGM); |
1125 | | // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. |
1126 | | // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. |
1127 | 226 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), |
1128 | 226 | Out->getLocation()); |
1129 | 226 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
1130 | 226 | Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); |
1131 | 226 | Scope.addPrivate( |
1132 | 226 | In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) |
1133 | 226 | .getAddress(CGF)); |
1134 | 226 | Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); |
1135 | 226 | Scope.addPrivate( |
1136 | 226 | Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) |
1137 | 226 | .getAddress(CGF)); |
1138 | 226 | (void)Scope.Privatize(); |
1139 | 226 | if (!IsCombiner && Out->hasInit()79 && |
1140 | 226 | !CGF.isTrivialInitializer(Out->getInit())50 ) { |
1141 | 50 | CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), |
1142 | 50 | Out->getType().getQualifiers(), |
1143 | 50 | /*IsInitializer=*/true); |
1144 | 50 | } |
1145 | 226 | if (CombinerInitializer) |
1146 | 176 | CGF.EmitIgnoredExpr(CombinerInitializer); |
1147 | 226 | Scope.ForceCleanup(); |
1148 | 226 | CGF.FinishFunction(); |
1149 | 226 | return Fn; |
1150 | 226 | } |
1151 | | |
1152 | | void CGOpenMPRuntime::emitUserDefinedReduction( |
1153 | 148 | CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { |
1154 | 148 | if (UDRMap.count(D) > 0) |
1155 | 1 | return; |
1156 | 147 | llvm::Function *Combiner = emitCombinerOrInitializer( |
1157 | 147 | CGM, D->getType(), D->getCombiner(), |
1158 | 147 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), |
1159 | 147 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), |
1160 | 147 | /*IsCombiner=*/true); |
1161 | 147 | llvm::Function *Initializer = nullptr; |
1162 | 147 | if (const Expr *Init = D->getInitializer()) { |
1163 | 79 | Initializer = emitCombinerOrInitializer( |
1164 | 79 | CGM, D->getType(), |
1165 | 79 | D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init29 |
1166 | 79 | : nullptr50 , |
1167 | 79 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), |
1168 | 79 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), |
1169 | 79 | /*IsCombiner=*/false); |
1170 | 79 | } |
1171 | 147 | UDRMap.try_emplace(D, Combiner, Initializer); |
1172 | 147 | if (CGF) { |
1173 | 48 | auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); |
1174 | 48 | Decls.second.push_back(D); |
1175 | 48 | } |
1176 | 147 | } |
1177 | | |
1178 | | std::pair<llvm::Function *, llvm::Function *> |
1179 | 258 | CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { |
1180 | 258 | auto I = UDRMap.find(D); |
1181 | 258 | if (I != UDRMap.end()) |
1182 | 224 | return I->second; |
1183 | 34 | emitUserDefinedReduction(/*CGF=*/nullptr, D); |
1184 | 34 | return UDRMap.lookup(D); |
1185 | 258 | } |
1186 | | |
1187 | | namespace { |
1188 | | // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR |
1189 | | // Builder if one is present. |
1190 | | struct PushAndPopStackRAII { |
1191 | | PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, |
1192 | | bool HasCancel, llvm::omp::Directive Kind) |
1193 | 11.9k | : OMPBuilder(OMPBuilder) { |
1194 | 11.9k | if (!OMPBuilder) |
1195 | 0 | return; |
1196 | | |
1197 | | // The following callback is the crucial part of clangs cleanup process. |
1198 | | // |
1199 | | // NOTE: |
1200 | | // Once the OpenMPIRBuilder is used to create parallel regions (and |
1201 | | // similar), the cancellation destination (Dest below) is determined via |
1202 | | // IP. That means if we have variables to finalize we split the block at IP, |
1203 | | // use the new block (=BB) as destination to build a JumpDest (via |
1204 | | // getJumpDestInCurrentScope(BB)) which then is fed to |
1205 | | // EmitBranchThroughCleanup. Furthermore, there will not be the need |
1206 | | // to push & pop an FinalizationInfo object. |
1207 | | // The FiniCB will still be needed but at the point where the |
1208 | | // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. |
1209 | 11.9k | auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { |
1210 | 8 | assert(IP.getBlock()->end() == IP.getPoint() && |
1211 | 8 | "Clang CG should cause non-terminated block!"); |
1212 | 0 | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1213 | 8 | CGF.Builder.restoreIP(IP); |
1214 | 8 | CodeGenFunction::JumpDest Dest = |
1215 | 8 | CGF.getOMPCancelDestination(OMPD_parallel); |
1216 | 8 | CGF.EmitBranchThroughCleanup(Dest); |
1217 | 8 | }; |
1218 | | |
1219 | | // TODO: Remove this once we emit parallel regions through the |
1220 | | // OpenMPIRBuilder as it can do this setup internally. |
1221 | 11.9k | llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); |
1222 | 11.9k | OMPBuilder->pushFinalizationCB(std::move(FI)); |
1223 | 11.9k | } |
1224 | 11.9k | ~PushAndPopStackRAII() { |
1225 | 11.9k | if (OMPBuilder) |
1226 | 11.9k | OMPBuilder->popFinalizationCB(); |
1227 | 11.9k | } |
1228 | | llvm::OpenMPIRBuilder *OMPBuilder; |
1229 | | }; |
1230 | | } // namespace |
1231 | | |
1232 | | static llvm::Function *emitParallelOrTeamsOutlinedFunction( |
1233 | | CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, |
1234 | | const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, |
1235 | 11.9k | const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { |
1236 | 11.9k | assert(ThreadIDVar->getType()->isPointerType() && |
1237 | 11.9k | "thread id variable must be of type kmp_int32 *"); |
1238 | 0 | CodeGenFunction CGF(CGM, true); |
1239 | 11.9k | bool HasCancel = false; |
1240 | 11.9k | if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) |
1241 | 1.00k | HasCancel = OPD->hasCancel(); |
1242 | 10.8k | else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) |
1243 | 907 | HasCancel = OPD->hasCancel(); |
1244 | 9.99k | else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) |
1245 | 26 | HasCancel = OPSD->hasCancel(); |
1246 | 9.96k | else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) |
1247 | 266 | HasCancel = OPFD->hasCancel(); |
1248 | 9.70k | else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) |
1249 | 557 | HasCancel = OPFD->hasCancel(); |
1250 | 9.14k | else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) |
1251 | 401 | HasCancel = OPFD->hasCancel(); |
1252 | 8.74k | else if (const auto *OPFD = |
1253 | 8.74k | dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) |
1254 | 680 | HasCancel = OPFD->hasCancel(); |
1255 | 8.06k | else if (const auto *OPFD = |
1256 | 8.06k | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) |
1257 | 1.21k | HasCancel = OPFD->hasCancel(); |
1258 | | |
1259 | | // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new |
1260 | | // parallel region to make cancellation barriers work properly. |
1261 | 11.9k | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1262 | 11.9k | PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); |
1263 | 11.9k | CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, |
1264 | 11.9k | HasCancel, OutlinedHelperName); |
1265 | 11.9k | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1266 | 11.9k | return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); |
1267 | 11.9k | } |
1268 | | |
1269 | | llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( |
1270 | | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1271 | 6.18k | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1272 | 6.18k | const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); |
1273 | 6.18k | return emitParallelOrTeamsOutlinedFunction( |
1274 | 6.18k | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1275 | 6.18k | } |
1276 | | |
1277 | | llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( |
1278 | | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1279 | 5.71k | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1280 | 5.71k | const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); |
1281 | 5.71k | return emitParallelOrTeamsOutlinedFunction( |
1282 | 5.71k | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1283 | 5.71k | } |
1284 | | |
1285 | | llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( |
1286 | | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1287 | | const VarDecl *PartIDVar, const VarDecl *TaskTVar, |
1288 | | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1289 | 930 | bool Tied, unsigned &NumberOfParts) { |
1290 | 930 | auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, |
1291 | 930 | PrePostActionTy &) { |
1292 | 52 | llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); |
1293 | 52 | llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); |
1294 | 52 | llvm::Value *TaskArgs[] = { |
1295 | 52 | UpLoc, ThreadID, |
1296 | 52 | CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), |
1297 | 52 | TaskTVar->getType()->castAs<PointerType>()) |
1298 | 52 | .getPointer(CGF)}; |
1299 | 52 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1300 | 52 | CGM.getModule(), OMPRTL___kmpc_omp_task), |
1301 | 52 | TaskArgs); |
1302 | 52 | }; |
1303 | 930 | CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, |
1304 | 930 | UntiedCodeGen); |
1305 | 930 | CodeGen.setAction(Action); |
1306 | 930 | assert(!ThreadIDVar->getType()->isPointerType() && |
1307 | 930 | "thread id variable must be of type kmp_int32 for tasks"); |
1308 | 0 | const OpenMPDirectiveKind Region = |
1309 | 930 | isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop226 |
1310 | 930 | : OMPD_task704 ; |
1311 | 930 | const CapturedStmt *CS = D.getCapturedStmt(Region); |
1312 | 930 | bool HasCancel = false; |
1313 | 930 | if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) |
1314 | 242 | HasCancel = TD->hasCancel(); |
1315 | 688 | else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) |
1316 | 39 | HasCancel = TD->hasCancel(); |
1317 | 649 | else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) |
1318 | 35 | HasCancel = TD->hasCancel(); |
1319 | 614 | else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) |
1320 | 33 | HasCancel = TD->hasCancel(); |
1321 | | |
1322 | 930 | CodeGenFunction CGF(CGM, true); |
1323 | 930 | CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, |
1324 | 930 | InnermostKind, HasCancel, Action); |
1325 | 930 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1326 | 930 | llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); |
1327 | 930 | if (!Tied) |
1328 | 28 | NumberOfParts = Action.getNumberOfParts(); |
1329 | 930 | return Res; |
1330 | 930 | } |
1331 | | |
1332 | | void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, |
1333 | 2.59k | bool AtCurrentPoint) { |
1334 | 2.59k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1335 | 2.59k | assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); |
1336 | | |
1337 | 0 | llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); |
1338 | 2.59k | if (AtCurrentPoint) { |
1339 | 713 | Elem.second.ServiceInsertPt = new llvm::BitCastInst( |
1340 | 713 | Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); |
1341 | 1.88k | } else { |
1342 | 1.88k | Elem.second.ServiceInsertPt = |
1343 | 1.88k | new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); |
1344 | 1.88k | Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); |
1345 | 1.88k | } |
1346 | 2.59k | } |
1347 | | |
1348 | 10.4k | void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { |
1349 | 10.4k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1350 | 10.4k | if (Elem.second.ServiceInsertPt) { |
1351 | 2.59k | llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; |
1352 | 2.59k | Elem.second.ServiceInsertPt = nullptr; |
1353 | 2.59k | Ptr->eraseFromParent(); |
1354 | 2.59k | } |
1355 | 10.4k | } |
1356 | | |
1357 | | static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, |
1358 | | SourceLocation Loc, |
1359 | 326 | SmallString<128> &Buffer) { |
1360 | 326 | llvm::raw_svector_ostream OS(Buffer); |
1361 | | // Build debug location |
1362 | 326 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1363 | 326 | OS << ";" << PLoc.getFilename() << ";"; |
1364 | 326 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1365 | 326 | OS << FD->getQualifiedNameAsString(); |
1366 | 326 | OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; |
1367 | 326 | return OS.str(); |
1368 | 326 | } |
1369 | | |
1370 | | llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, |
1371 | | SourceLocation Loc, |
1372 | 48.7k | unsigned Flags) { |
1373 | 48.7k | uint32_t SrcLocStrSize; |
1374 | 48.7k | llvm::Constant *SrcLocStr; |
1375 | 48.7k | if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || |
1376 | 48.7k | Loc.isInvalid()982 ) { |
1377 | 47.8k | SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); |
1378 | 47.8k | } else { |
1379 | 918 | std::string FunctionName; |
1380 | 918 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1381 | 906 | FunctionName = FD->getQualifiedNameAsString(); |
1382 | 918 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1383 | 918 | const char *FileName = PLoc.getFilename(); |
1384 | 918 | unsigned Line = PLoc.getLine(); |
1385 | 918 | unsigned Column = PLoc.getColumn(); |
1386 | 918 | SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, |
1387 | 918 | Column, SrcLocStrSize); |
1388 | 918 | } |
1389 | 48.7k | unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); |
1390 | 48.7k | return OMPBuilder.getOrCreateIdent( |
1391 | 48.7k | SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); |
1392 | 48.7k | } |
1393 | | |
1394 | | llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, |
1395 | 26.6k | SourceLocation Loc) { |
1396 | 26.6k | assert(CGF.CurFn && "No function in current CodeGenFunction."); |
1397 | | // If the OpenMPIRBuilder is used we need to use it for all thread id calls as |
1398 | | // the clang invariants used below might be broken. |
1399 | 26.6k | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1400 | 326 | SmallString<128> Buffer; |
1401 | 326 | OMPBuilder.updateToLocation(CGF.Builder.saveIP()); |
1402 | 326 | uint32_t SrcLocStrSize; |
1403 | 326 | auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( |
1404 | 326 | getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); |
1405 | 326 | return OMPBuilder.getOrCreateThreadID( |
1406 | 326 | OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); |
1407 | 326 | } |
1408 | | |
1409 | 26.3k | llvm::Value *ThreadID = nullptr; |
1410 | | // Check whether we've already cached a load of the thread id in this |
1411 | | // function. |
1412 | 26.3k | auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); |
1413 | 26.3k | if (I != OpenMPLocThreadIDMap.end()) { |
1414 | 12.1k | ThreadID = I->second.ThreadID; |
1415 | 12.1k | if (ThreadID != nullptr) |
1416 | 11.5k | return ThreadID; |
1417 | 12.1k | } |
1418 | | // If exceptions are enabled, do not use parameter to avoid possible crash. |
1419 | 14.7k | if (auto *OMPRegionInfo = |
1420 | 14.7k | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { |
1421 | 14.1k | if (OMPRegionInfo->getThreadIDVariable()) { |
1422 | | // Check if this an outlined function with thread id passed as argument. |
1423 | 12.3k | LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); |
1424 | 12.3k | llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); |
1425 | 12.3k | if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || |
1426 | 12.3k | !CGF.getLangOpts().CXXExceptions850 || |
1427 | 12.3k | CGF.Builder.GetInsertBlock() == TopBlock850 || |
1428 | 12.3k | !isa<llvm::Instruction>(LVal.getPointer(CGF))318 || |
1429 | 12.3k | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1430 | 318 | TopBlock || |
1431 | 12.3k | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1432 | 12.3k | CGF.Builder.GetInsertBlock()) { |
1433 | 12.3k | ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); |
1434 | | // If value loaded in entry block, cache it and use it everywhere in |
1435 | | // function. |
1436 | 12.3k | if (CGF.Builder.GetInsertBlock() == TopBlock) { |
1437 | 7.12k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1438 | 7.12k | Elem.second.ThreadID = ThreadID; |
1439 | 7.12k | } |
1440 | 12.3k | return ThreadID; |
1441 | 12.3k | } |
1442 | 12.3k | } |
1443 | 14.1k | } |
1444 | | |
1445 | | // This is not an outlined function region - need to call __kmpc_int32 |
1446 | | // kmpc_global_thread_num(ident_t *loc). |
1447 | | // Generate thread id value and cache this value for use across the |
1448 | | // function. |
1449 | 2.46k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1450 | 2.46k | if (!Elem.second.ServiceInsertPt) |
1451 | 1.88k | setLocThreadIdInsertPt(CGF); |
1452 | 2.46k | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1453 | 2.46k | CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); |
1454 | 2.46k | llvm::CallInst *Call = CGF.Builder.CreateCall( |
1455 | 2.46k | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
1456 | 2.46k | OMPRTL___kmpc_global_thread_num), |
1457 | 2.46k | emitUpdateLocation(CGF, Loc)); |
1458 | 2.46k | Call->setCallingConv(CGF.getRuntimeCC()); |
1459 | 2.46k | Elem.second.ThreadID = Call; |
1460 | 2.46k | return Call; |
1461 | 14.7k | } |
1462 | | |
1463 | 70.4k | void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { |
1464 | 70.4k | assert(CGF.CurFn && "No function in current CodeGenFunction."); |
1465 | 70.4k | if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { |
1466 | 9.72k | clearLocThreadIdInsertPt(CGF); |
1467 | 9.72k | OpenMPLocThreadIDMap.erase(CGF.CurFn); |
1468 | 9.72k | } |
1469 | 70.4k | if (FunctionUDRMap.count(CGF.CurFn) > 0) { |
1470 | 32 | for(const auto *D : FunctionUDRMap[CGF.CurFn]) |
1471 | 48 | UDRMap.erase(D); |
1472 | 32 | FunctionUDRMap.erase(CGF.CurFn); |
1473 | 32 | } |
1474 | 70.4k | auto I = FunctionUDMMap.find(CGF.CurFn); |
1475 | 70.4k | if (I != FunctionUDMMap.end()) { |
1476 | 0 | for(const auto *D : I->second) |
1477 | 0 | UDMMap.erase(D); |
1478 | 0 | FunctionUDMMap.erase(I); |
1479 | 0 | } |
1480 | 70.4k | LastprivateConditionalToTypes.erase(CGF.CurFn); |
1481 | 70.4k | FunctionToUntiedTaskStackMap.erase(CGF.CurFn); |
1482 | 70.4k | } |
1483 | | |
1484 | 10.2k | llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { |
1485 | 10.2k | return OMPBuilder.IdentPtr; |
1486 | 10.2k | } |
1487 | | |
1488 | 10.7k | llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { |
1489 | 10.7k | if (!Kmpc_MicroTy) { |
1490 | | // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) |
1491 | 1.91k | llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), |
1492 | 1.91k | llvm::PointerType::getUnqual(CGM.Int32Ty)}; |
1493 | 1.91k | Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); |
1494 | 1.91k | } |
1495 | 10.7k | return llvm::PointerType::getUnqual(Kmpc_MicroTy); |
1496 | 10.7k | } |
1497 | | |
1498 | | llvm::FunctionCallee |
1499 | | CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, |
1500 | 8.63k | bool IsGPUDistribute) { |
1501 | 8.63k | assert((IVSize == 32 || IVSize == 64) && |
1502 | 8.63k | "IV size is not compatible with the omp runtime"); |
1503 | 0 | StringRef Name; |
1504 | 8.63k | if (IsGPUDistribute) |
1505 | 291 | Name = IVSize == 32 ? (288 IVSigned288 ? "__kmpc_distribute_static_init_4"288 |
1506 | 288 | : "__kmpc_distribute_static_init_4u"0 ) |
1507 | 291 | : (3 IVSigned3 ? "__kmpc_distribute_static_init_8"3 |
1508 | 3 | : "__kmpc_distribute_static_init_8u"0 ); |
1509 | 8.34k | else |
1510 | 8.34k | Name = IVSize == 32 ? (7.99k IVSigned7.99k ? "__kmpc_for_static_init_4"7.81k |
1511 | 7.99k | : "__kmpc_for_static_init_4u"184 ) |
1512 | 8.34k | : (346 IVSigned346 ? "__kmpc_for_static_init_8"238 |
1513 | 346 | : "__kmpc_for_static_init_8u"108 ); |
1514 | | |
1515 | 8.63k | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty8.28k : CGM.Int64Ty349 ; |
1516 | 8.63k | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1517 | 8.63k | llvm::Type *TypeParams[] = { |
1518 | 8.63k | getIdentTyPointerTy(), // loc |
1519 | 8.63k | CGM.Int32Ty, // tid |
1520 | 8.63k | CGM.Int32Ty, // schedtype |
1521 | 8.63k | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1522 | 8.63k | PtrTy, // p_lower |
1523 | 8.63k | PtrTy, // p_upper |
1524 | 8.63k | PtrTy, // p_stride |
1525 | 8.63k | ITy, // incr |
1526 | 8.63k | ITy // chunk |
1527 | 8.63k | }; |
1528 | 8.63k | auto *FnTy = |
1529 | 8.63k | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1530 | 8.63k | return CGM.CreateRuntimeFunction(FnTy, Name); |
1531 | 8.63k | } |
1532 | | |
1533 | | llvm::FunctionCallee |
1534 | 764 | CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { |
1535 | 764 | assert((IVSize == 32 || IVSize == 64) && |
1536 | 764 | "IV size is not compatible with the omp runtime"); |
1537 | 0 | StringRef Name = |
1538 | 764 | IVSize == 32 |
1539 | 764 | ? (715 IVSigned715 ? "__kmpc_dispatch_init_4"707 : "__kmpc_dispatch_init_4u"8 ) |
1540 | 764 | : (49 IVSigned49 ? "__kmpc_dispatch_init_8"19 : "__kmpc_dispatch_init_8u"30 ); |
1541 | 764 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty715 : CGM.Int64Ty49 ; |
1542 | 764 | llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc |
1543 | 764 | CGM.Int32Ty, // tid |
1544 | 764 | CGM.Int32Ty, // schedtype |
1545 | 764 | ITy, // lower |
1546 | 764 | ITy, // upper |
1547 | 764 | ITy, // stride |
1548 | 764 | ITy // chunk |
1549 | 764 | }; |
1550 | 764 | auto *FnTy = |
1551 | 764 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1552 | 764 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1553 | 764 | } |
1554 | | |
1555 | | llvm::FunctionCallee |
1556 | 57 | CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { |
1557 | 57 | assert((IVSize == 32 || IVSize == 64) && |
1558 | 57 | "IV size is not compatible with the omp runtime"); |
1559 | 0 | StringRef Name = |
1560 | 57 | IVSize == 32 |
1561 | 57 | ? (41 IVSigned41 ? "__kmpc_dispatch_fini_4"33 : "__kmpc_dispatch_fini_4u"8 ) |
1562 | 57 | : (16 IVSigned16 ? "__kmpc_dispatch_fini_8"8 : "__kmpc_dispatch_fini_8u"8 ); |
1563 | 57 | llvm::Type *TypeParams[] = { |
1564 | 57 | getIdentTyPointerTy(), // loc |
1565 | 57 | CGM.Int32Ty, // tid |
1566 | 57 | }; |
1567 | 57 | auto *FnTy = |
1568 | 57 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); |
1569 | 57 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1570 | 57 | } |
1571 | | |
1572 | | llvm::FunctionCallee |
1573 | 764 | CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { |
1574 | 764 | assert((IVSize == 32 || IVSize == 64) && |
1575 | 764 | "IV size is not compatible with the omp runtime"); |
1576 | 0 | StringRef Name = |
1577 | 764 | IVSize == 32 |
1578 | 764 | ? (715 IVSigned715 ? "__kmpc_dispatch_next_4"707 : "__kmpc_dispatch_next_4u"8 ) |
1579 | 764 | : (49 IVSigned49 ? "__kmpc_dispatch_next_8"19 : "__kmpc_dispatch_next_8u"30 ); |
1580 | 764 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty715 : CGM.Int64Ty49 ; |
1581 | 764 | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1582 | 764 | llvm::Type *TypeParams[] = { |
1583 | 764 | getIdentTyPointerTy(), // loc |
1584 | 764 | CGM.Int32Ty, // tid |
1585 | 764 | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1586 | 764 | PtrTy, // p_lower |
1587 | 764 | PtrTy, // p_upper |
1588 | 764 | PtrTy // p_stride |
1589 | 764 | }; |
1590 | 764 | auto *FnTy = |
1591 | 764 | llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); |
1592 | 764 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1593 | 764 | } |
1594 | | |
1595 | | /// Obtain information that uniquely identifies a target entry. This |
1596 | | /// consists of the file and device IDs as well as line number associated with |
1597 | | /// the relevant entry source location. |
1598 | | static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, |
1599 | | unsigned &DeviceID, unsigned &FileID, |
1600 | 15.8k | unsigned &LineNum) { |
1601 | 15.8k | SourceManager &SM = C.getSourceManager(); |
1602 | | |
1603 | | // The loc should be always valid and have a file ID (the user cannot use |
1604 | | // #pragma directives in macros) |
1605 | | |
1606 | 15.8k | assert(Loc.isValid() && "Source location is expected to be always valid."); |
1607 | | |
1608 | 0 | PresumedLoc PLoc = SM.getPresumedLoc(Loc); |
1609 | 15.8k | assert(PLoc.isValid() && "Source location is expected to be always valid."); |
1610 | | |
1611 | 0 | llvm::sys::fs::UniqueID ID; |
1612 | 15.8k | if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { |
1613 | 1 | PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); |
1614 | 1 | assert(PLoc.isValid() && "Source location is expected to be always valid."); |
1615 | 1 | if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) |
1616 | 0 | SM.getDiagnostics().Report(diag::err_cannot_open_file) |
1617 | 0 | << PLoc.getFilename() << EC.message(); |
1618 | 1 | } |
1619 | | |
1620 | 0 | DeviceID = ID.getDevice(); |
1621 | 15.8k | FileID = ID.getFile(); |
1622 | 15.8k | LineNum = PLoc.getLine(); |
1623 | 15.8k | } |
1624 | | |
1625 | 518 | Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { |
1626 | 518 | if (CGM.getLangOpts().OpenMPSimd) |
1627 | 20 | return Address::invalid(); |
1628 | 498 | llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1629 | 498 | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1630 | 498 | if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || |
1631 | 498 | (44 *Res == OMPDeclareTargetDeclAttr::MT_To44 && |
1632 | 498 | HasRequiresUnifiedSharedMemory44 ))) { |
1633 | 498 | SmallString<64> PtrName; |
1634 | 498 | { |
1635 | 498 | llvm::raw_svector_ostream OS(PtrName); |
1636 | 498 | OS << CGM.getMangledName(GlobalDecl(VD)); |
1637 | 498 | if (!VD->isExternallyVisible()) { |
1638 | 34 | unsigned DeviceID, FileID, Line; |
1639 | 34 | getTargetEntryUniqueInfo(CGM.getContext(), |
1640 | 34 | VD->getCanonicalDecl()->getBeginLoc(), |
1641 | 34 | DeviceID, FileID, Line); |
1642 | 34 | OS << llvm::format("_%x", FileID); |
1643 | 34 | } |
1644 | 498 | OS << "_decl_tgt_ref_ptr"; |
1645 | 498 | } |
1646 | 498 | llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); |
1647 | 498 | QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); |
1648 | 498 | llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); |
1649 | 498 | if (!Ptr) { |
1650 | 58 | Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName); |
1651 | | |
1652 | 58 | auto *GV = cast<llvm::GlobalVariable>(Ptr); |
1653 | 58 | GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); |
1654 | | |
1655 | 58 | if (!CGM.getLangOpts().OpenMPIsDevice) |
1656 | 38 | GV->setInitializer(CGM.GetAddrOfGlobal(VD)); |
1657 | 58 | registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); |
1658 | 58 | } |
1659 | 498 | return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); |
1660 | 498 | } |
1661 | 0 | return Address::invalid(); |
1662 | 498 | } |
1663 | | |
1664 | | llvm::Constant * |
1665 | 138 | CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { |
1666 | 138 | assert(!CGM.getLangOpts().OpenMPUseTLS || |
1667 | 138 | !CGM.getContext().getTargetInfo().isTLSSupported()); |
1668 | | // Lookup the entry, lazily creating it if necessary. |
1669 | 0 | std::string Suffix = getName({"cache", ""}); |
1670 | 138 | return getOrCreateInternalVariable( |
1671 | 138 | CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); |
1672 | 138 | } |
1673 | | |
1674 | | Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, |
1675 | | const VarDecl *VD, |
1676 | | Address VDAddr, |
1677 | 241 | SourceLocation Loc) { |
1678 | 241 | if (CGM.getLangOpts().OpenMPUseTLS && |
1679 | 241 | CGM.getContext().getTargetInfo().isTLSSupported()103 ) |
1680 | 103 | return VDAddr; |
1681 | | |
1682 | 138 | llvm::Type *VarTy = VDAddr.getElementType(); |
1683 | 138 | llvm::Value *Args[] = { |
1684 | 138 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
1685 | 138 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), |
1686 | 138 | CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), |
1687 | 138 | getOrCreateThreadPrivateCache(VD)}; |
1688 | 138 | return Address( |
1689 | 138 | CGF.EmitRuntimeCall( |
1690 | 138 | OMPBuilder.getOrCreateRuntimeFunction( |
1691 | 138 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
1692 | 138 | Args), |
1693 | 138 | CGF.Int8Ty, VDAddr.getAlignment()); |
1694 | 241 | } |
1695 | | |
1696 | | void CGOpenMPRuntime::emitThreadPrivateVarInit( |
1697 | | CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, |
1698 | 37 | llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { |
1699 | | // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime |
1700 | | // library. |
1701 | 37 | llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); |
1702 | 37 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1703 | 37 | CGM.getModule(), OMPRTL___kmpc_global_thread_num), |
1704 | 37 | OMPLoc); |
1705 | | // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) |
1706 | | // to register constructor/destructor for variable. |
1707 | 37 | llvm::Value *Args[] = { |
1708 | 37 | OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), |
1709 | 37 | Ctor, CopyCtor, Dtor}; |
1710 | 37 | CGF.EmitRuntimeCall( |
1711 | 37 | OMPBuilder.getOrCreateRuntimeFunction( |
1712 | 37 | CGM.getModule(), OMPRTL___kmpc_threadprivate_register), |
1713 | 37 | Args); |
1714 | 37 | } |
1715 | | |
1716 | | llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( |
1717 | | const VarDecl *VD, Address VDAddr, SourceLocation Loc, |
1718 | 151 | bool PerformInit, CodeGenFunction *CGF) { |
1719 | 151 | if (CGM.getLangOpts().OpenMPUseTLS && |
1720 | 151 | CGM.getContext().getTargetInfo().isTLSSupported()71 ) |
1721 | 71 | return nullptr; |
1722 | | |
1723 | 80 | VD = VD->getDefinition(CGM.getContext()); |
1724 | 80 | if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second68 ) { |
1725 | 48 | QualType ASTTy = VD->getType(); |
1726 | | |
1727 | 48 | llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; |
1728 | 48 | const Expr *Init = VD->getAnyInitializer(); |
1729 | 48 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1730 | | // Generate function that re-emits the declaration's initializer into the |
1731 | | // threadprivate copy of the variable VD |
1732 | 37 | CodeGenFunction CtorCGF(CGM); |
1733 | 37 | FunctionArgList Args; |
1734 | 37 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1735 | 37 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1736 | 37 | ImplicitParamDecl::Other); |
1737 | 37 | Args.push_back(&Dst); |
1738 | | |
1739 | 37 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1740 | 37 | CGM.getContext().VoidPtrTy, Args); |
1741 | 37 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1742 | 37 | std::string Name = getName({"__kmpc_global_ctor_", ""}); |
1743 | 37 | llvm::Function *Fn = |
1744 | 37 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1745 | 37 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, |
1746 | 37 | Args, Loc, Loc); |
1747 | 37 | llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( |
1748 | 37 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1749 | 37 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1750 | 37 | Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); |
1751 | 37 | Arg = CtorCGF.Builder.CreateElementBitCast( |
1752 | 37 | Arg, CtorCGF.ConvertTypeForMem(ASTTy)); |
1753 | 37 | CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), |
1754 | 37 | /*IsInitializer=*/true); |
1755 | 37 | ArgVal = CtorCGF.EmitLoadOfScalar( |
1756 | 37 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1757 | 37 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1758 | 37 | CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); |
1759 | 37 | CtorCGF.FinishFunction(); |
1760 | 37 | Ctor = Fn; |
1761 | 37 | } |
1762 | 48 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1763 | | // Generate function that emits destructor call for the threadprivate copy |
1764 | | // of the variable VD |
1765 | 35 | CodeGenFunction DtorCGF(CGM); |
1766 | 35 | FunctionArgList Args; |
1767 | 35 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1768 | 35 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1769 | 35 | ImplicitParamDecl::Other); |
1770 | 35 | Args.push_back(&Dst); |
1771 | | |
1772 | 35 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1773 | 35 | CGM.getContext().VoidTy, Args); |
1774 | 35 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1775 | 35 | std::string Name = getName({"__kmpc_global_dtor_", ""}); |
1776 | 35 | llvm::Function *Fn = |
1777 | 35 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1778 | 35 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1779 | 35 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, |
1780 | 35 | Loc, Loc); |
1781 | | // Create a scope with an artificial location for the body of this function. |
1782 | 35 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1783 | 35 | llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( |
1784 | 35 | DtorCGF.GetAddrOfLocalVar(&Dst), |
1785 | 35 | /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1786 | 35 | DtorCGF.emitDestroy( |
1787 | 35 | Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, |
1788 | 35 | DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1789 | 35 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1790 | 35 | DtorCGF.FinishFunction(); |
1791 | 35 | Dtor = Fn; |
1792 | 35 | } |
1793 | | // Do not emit init function if it is not required. |
1794 | 48 | if (!Ctor && !Dtor11 ) |
1795 | 11 | return nullptr; |
1796 | | |
1797 | 37 | llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; |
1798 | 37 | auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, |
1799 | 37 | /*isVarArg=*/false) |
1800 | 37 | ->getPointerTo(); |
1801 | | // Copying constructor for the threadprivate variable. |
1802 | | // Must be NULL - reserved by runtime, but currently it requires that this |
1803 | | // parameter is always NULL. Otherwise it fires assertion. |
1804 | 37 | CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); |
1805 | 37 | if (Ctor == nullptr) { |
1806 | 0 | auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, |
1807 | 0 | /*isVarArg=*/false) |
1808 | 0 | ->getPointerTo(); |
1809 | 0 | Ctor = llvm::Constant::getNullValue(CtorTy); |
1810 | 0 | } |
1811 | 37 | if (Dtor == nullptr) { |
1812 | 2 | auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, |
1813 | 2 | /*isVarArg=*/false) |
1814 | 2 | ->getPointerTo(); |
1815 | 2 | Dtor = llvm::Constant::getNullValue(DtorTy); |
1816 | 2 | } |
1817 | 37 | if (!CGF) { |
1818 | 16 | auto *InitFunctionTy = |
1819 | 16 | llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); |
1820 | 16 | std::string Name = getName({"__omp_threadprivate_init_", ""}); |
1821 | 16 | llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( |
1822 | 16 | InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); |
1823 | 16 | CodeGenFunction InitCGF(CGM); |
1824 | 16 | FunctionArgList ArgList; |
1825 | 16 | InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, |
1826 | 16 | CGM.getTypes().arrangeNullaryFunction(), ArgList, |
1827 | 16 | Loc, Loc); |
1828 | 16 | emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1829 | 16 | InitCGF.FinishFunction(); |
1830 | 16 | return InitFunction; |
1831 | 16 | } |
1832 | 21 | emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1833 | 21 | } |
1834 | 53 | return nullptr; |
1835 | 80 | } |
1836 | | |
1837 | | bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, |
1838 | | llvm::GlobalVariable *Addr, |
1839 | 3.90k | bool PerformInit) { |
1840 | 3.90k | if (CGM.getLangOpts().OMPTargetTriples.empty() && |
1841 | 3.90k | !CGM.getLangOpts().OpenMPIsDevice654 ) |
1842 | 592 | return false; |
1843 | 3.31k | Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1844 | 3.31k | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1845 | 3.31k | if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link150 || |
1846 | 3.31k | (150 *Res == OMPDeclareTargetDeclAttr::MT_To150 && |
1847 | 150 | HasRequiresUnifiedSharedMemory)) |
1848 | 3.16k | return CGM.getLangOpts().OpenMPIsDevice; |
1849 | 150 | VD = VD->getDefinition(CGM.getContext()); |
1850 | 150 | assert(VD && "Unknown VarDecl"); |
1851 | | |
1852 | 150 | if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) |
1853 | 4 | return CGM.getLangOpts().OpenMPIsDevice; |
1854 | | |
1855 | 146 | QualType ASTTy = VD->getType(); |
1856 | 146 | SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); |
1857 | | |
1858 | | // Produce the unique prefix to identify the new target regions. We use |
1859 | | // the source location of the variable declaration which we know to not |
1860 | | // conflict with any target region. |
1861 | 146 | unsigned DeviceID; |
1862 | 146 | unsigned FileID; |
1863 | 146 | unsigned Line; |
1864 | 146 | getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); |
1865 | 146 | SmallString<128> Buffer, Out; |
1866 | 146 | { |
1867 | 146 | llvm::raw_svector_ostream OS(Buffer); |
1868 | 146 | OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) |
1869 | 146 | << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; |
1870 | 146 | } |
1871 | | |
1872 | 146 | const Expr *Init = VD->getAnyInitializer(); |
1873 | 146 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1874 | 146 | llvm::Constant *Ctor; |
1875 | 146 | llvm::Constant *ID; |
1876 | 146 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1877 | | // Generate function that re-emits the declaration's initializer into |
1878 | | // the threadprivate copy of the variable VD |
1879 | 75 | CodeGenFunction CtorCGF(CGM); |
1880 | | |
1881 | 75 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1882 | 75 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1883 | 75 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1884 | 75 | FTy, Twine(Buffer, "_ctor"), FI, Loc, false, |
1885 | 75 | llvm::GlobalValue::WeakODRLinkage); |
1886 | 75 | if (CGM.getTriple().isAMDGCN()) |
1887 | 1 | Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); |
1888 | 75 | auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); |
1889 | 75 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1890 | 75 | FunctionArgList(), Loc, Loc); |
1891 | 75 | auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); |
1892 | 75 | llvm::Constant *AddrInAS0 = Addr; |
1893 | 75 | if (Addr->getAddressSpace() != 0) |
1894 | 1 | AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( |
1895 | 1 | Addr, llvm::PointerType::getWithSamePointeeType( |
1896 | 1 | cast<llvm::PointerType>(Addr->getType()), 0)); |
1897 | 75 | CtorCGF.EmitAnyExprToMem(Init, |
1898 | 75 | Address(AddrInAS0, Addr->getValueType(), |
1899 | 75 | CGM.getContext().getDeclAlign(VD)), |
1900 | 75 | Init->getType().getQualifiers(), |
1901 | 75 | /*IsInitializer=*/true); |
1902 | 75 | CtorCGF.FinishFunction(); |
1903 | 75 | Ctor = Fn; |
1904 | 75 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1905 | 75 | } else { |
1906 | 71 | Ctor = new llvm::GlobalVariable( |
1907 | 71 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1908 | 71 | llvm::GlobalValue::PrivateLinkage, |
1909 | 71 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); |
1910 | 71 | ID = Ctor; |
1911 | 71 | } |
1912 | | |
1913 | | // Register the information for the entry associated with the constructor. |
1914 | 146 | Out.clear(); |
1915 | 146 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1916 | 146 | DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, |
1917 | 146 | ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); |
1918 | 146 | } |
1919 | 146 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1920 | 94 | llvm::Constant *Dtor; |
1921 | 94 | llvm::Constant *ID; |
1922 | 94 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1923 | | // Generate function that emits destructor call for the threadprivate |
1924 | | // copy of the variable VD |
1925 | 59 | CodeGenFunction DtorCGF(CGM); |
1926 | | |
1927 | 59 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1928 | 59 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1929 | 59 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1930 | 59 | FTy, Twine(Buffer, "_dtor"), FI, Loc, false, |
1931 | 59 | llvm::GlobalValue::WeakODRLinkage); |
1932 | 59 | if (CGM.getTriple().isAMDGCN()) |
1933 | 1 | Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); |
1934 | 59 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1935 | 59 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1936 | 59 | FunctionArgList(), Loc, Loc); |
1937 | | // Create a scope with an artificial location for the body of this |
1938 | | // function. |
1939 | 59 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1940 | 59 | llvm::Constant *AddrInAS0 = Addr; |
1941 | 59 | if (Addr->getAddressSpace() != 0) |
1942 | 1 | AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( |
1943 | 1 | Addr, llvm::PointerType::getWithSamePointeeType( |
1944 | 1 | cast<llvm::PointerType>(Addr->getType()), 0)); |
1945 | 59 | DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), |
1946 | 59 | CGM.getContext().getDeclAlign(VD)), |
1947 | 59 | ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1948 | 59 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1949 | 59 | DtorCGF.FinishFunction(); |
1950 | 59 | Dtor = Fn; |
1951 | 59 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1952 | 59 | } else { |
1953 | 35 | Dtor = new llvm::GlobalVariable( |
1954 | 35 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1955 | 35 | llvm::GlobalValue::PrivateLinkage, |
1956 | 35 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); |
1957 | 35 | ID = Dtor; |
1958 | 35 | } |
1959 | | // Register the information for the entry associated with the destructor. |
1960 | 94 | Out.clear(); |
1961 | 94 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1962 | 94 | DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, |
1963 | 94 | ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); |
1964 | 94 | } |
1965 | 146 | return CGM.getLangOpts().OpenMPIsDevice; |
1966 | 150 | } |
1967 | | |
1968 | | Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, |
1969 | | QualType VarType, |
1970 | 158 | StringRef Name) { |
1971 | 158 | std::string Suffix = getName({"artificial", ""}); |
1972 | 158 | llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); |
1973 | 158 | llvm::GlobalVariable *GAddr = |
1974 | 158 | getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); |
1975 | 158 | if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && |
1976 | 158 | CGM.getTarget().isTLSSupported()123 ) { |
1977 | 83 | GAddr->setThreadLocal(/*Val=*/true); |
1978 | 83 | return Address(GAddr, GAddr->getValueType(), |
1979 | 83 | CGM.getContext().getTypeAlignInChars(VarType)); |
1980 | 83 | } |
1981 | 75 | std::string CacheSuffix = getName({"cache", ""}); |
1982 | 75 | llvm::Value *Args[] = { |
1983 | 75 | emitUpdateLocation(CGF, SourceLocation()), |
1984 | 75 | getThreadID(CGF, SourceLocation()), |
1985 | 75 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), |
1986 | 75 | CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, |
1987 | 75 | /*isSigned=*/false), |
1988 | 75 | getOrCreateInternalVariable( |
1989 | 75 | CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; |
1990 | 75 | return Address( |
1991 | 75 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1992 | 75 | CGF.EmitRuntimeCall( |
1993 | 75 | OMPBuilder.getOrCreateRuntimeFunction( |
1994 | 75 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
1995 | 75 | Args), |
1996 | 75 | VarLVType->getPointerTo(/*AddrSpace=*/0)), |
1997 | 75 | VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); |
1998 | 158 | } |
1999 | | |
2000 | | void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, |
2001 | | const RegionCodeGenTy &ThenGen, |
2002 | 2.53k | const RegionCodeGenTy &ElseGen) { |
2003 | 2.53k | CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); |
2004 | | |
2005 | | // If the condition constant folds and can be elided, try to avoid emitting |
2006 | | // the condition and the dead arm of the if/else. |
2007 | 2.53k | bool CondConstant; |
2008 | 2.53k | if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { |
2009 | 1.30k | if (CondConstant) |
2010 | 754 | ThenGen(CGF); |
2011 | 553 | else |
2012 | 553 | ElseGen(CGF); |
2013 | 1.30k | return; |
2014 | 1.30k | } |
2015 | | |
2016 | | // Otherwise, the condition did not fold, or we couldn't elide it. Just |
2017 | | // emit the conditional branch. |
2018 | 1.22k | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2019 | 1.22k | llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); |
2020 | 1.22k | llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); |
2021 | 1.22k | CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); |
2022 | | |
2023 | | // Emit the 'then' code. |
2024 | 1.22k | CGF.EmitBlock(ThenBlock); |
2025 | 1.22k | ThenGen(CGF); |
2026 | 1.22k | CGF.EmitBranch(ContBlock); |
2027 | | // Emit the 'else' code if present. |
2028 | | // There is no need to emit line number for unconditional branch. |
2029 | 1.22k | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2030 | 1.22k | CGF.EmitBlock(ElseBlock); |
2031 | 1.22k | ElseGen(CGF); |
2032 | | // There is no need to emit line number for unconditional branch. |
2033 | 1.22k | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2034 | 1.22k | CGF.EmitBranch(ContBlock); |
2035 | | // Emit the continuation block for code after the if. |
2036 | 1.22k | CGF.EmitBlock(ContBlock, /*IsFinished=*/true); |
2037 | 1.22k | } |
2038 | | |
2039 | | void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, |
2040 | | llvm::Function *OutlinedFn, |
2041 | | ArrayRef<llvm::Value *> CapturedVars, |
2042 | | const Expr *IfCond, |
2043 | 5.62k | llvm::Value *NumThreads) { |
2044 | 5.62k | if (!CGF.HaveInsertPoint()) |
2045 | 0 | return; |
2046 | 5.62k | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2047 | 5.62k | auto &M = CGM.getModule(); |
2048 | 5.62k | auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, |
2049 | 5.62k | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2050 | | // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); |
2051 | 5.41k | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2052 | 5.41k | llvm::Value *Args[] = { |
2053 | 5.41k | RTLoc, |
2054 | 5.41k | CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars |
2055 | 5.41k | CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; |
2056 | 5.41k | llvm::SmallVector<llvm::Value *, 16> RealArgs; |
2057 | 5.41k | RealArgs.append(std::begin(Args), std::end(Args)); |
2058 | 5.41k | RealArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2059 | | |
2060 | 5.41k | llvm::FunctionCallee RTLFn = |
2061 | 5.41k | OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); |
2062 | 5.41k | CGF.EmitRuntimeCall(RTLFn, RealArgs); |
2063 | 5.41k | }; |
2064 | 5.62k | auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, |
2065 | 5.62k | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2066 | 440 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2067 | 440 | llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); |
2068 | | // Build calls: |
2069 | | // __kmpc_serialized_parallel(&Loc, GTid); |
2070 | 440 | llvm::Value *Args[] = {RTLoc, ThreadID}; |
2071 | 440 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2072 | 440 | M, OMPRTL___kmpc_serialized_parallel), |
2073 | 440 | Args); |
2074 | | |
2075 | | // OutlinedFn(>id, &zero_bound, CapturedStruct); |
2076 | 440 | Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); |
2077 | 440 | Address ZeroAddrBound = |
2078 | 440 | CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, |
2079 | 440 | /*Name=*/".bound.zero.addr"); |
2080 | 440 | CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); |
2081 | 440 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2082 | | // ThreadId for serialized parallels is 0. |
2083 | 440 | OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); |
2084 | 440 | OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); |
2085 | 440 | OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2086 | | |
2087 | | // Ensure we do not inline the function. This is trivially true for the ones |
2088 | | // passed to __kmpc_fork_call but the ones called in serialized regions |
2089 | | // could be inlined. This is not a perfect but it is closer to the invariant |
2090 | | // we want, namely, every data environment starts with a new function. |
2091 | | // TODO: We should pass the if condition to the runtime function and do the |
2092 | | // handling there. Much cleaner code. |
2093 | 440 | OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); |
2094 | 440 | OutlinedFn->addFnAttr(llvm::Attribute::NoInline); |
2095 | 440 | RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); |
2096 | | |
2097 | | // __kmpc_end_serialized_parallel(&Loc, GTid); |
2098 | 440 | llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; |
2099 | 440 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2100 | 440 | M, OMPRTL___kmpc_end_serialized_parallel), |
2101 | 440 | EndArgs); |
2102 | 440 | }; |
2103 | 5.62k | if (IfCond) { |
2104 | 568 | emitIfClause(CGF, IfCond, ThenGen, ElseGen); |
2105 | 5.06k | } else { |
2106 | 5.06k | RegionCodeGenTy ThenRCG(ThenGen); |
2107 | 5.06k | ThenRCG(CGF); |
2108 | 5.06k | } |
2109 | 5.62k | } |
2110 | | |
2111 | | // If we're inside an (outlined) parallel region, use the region info's |
2112 | | // thread-ID variable (it is passed in a first argument of the outlined function |
2113 | | // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in |
2114 | | // regular serial code region, get thread ID by calling kmp_int32 |
2115 | | // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and |
2116 | | // return the address of that temp. |
2117 | | Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, |
2118 | 767 | SourceLocation Loc) { |
2119 | 767 | if (auto *OMPRegionInfo = |
2120 | 767 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2121 | 731 | if (OMPRegionInfo->getThreadIDVariable()) |
2122 | 275 | return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); |
2123 | | |
2124 | 492 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2125 | 492 | QualType Int32Ty = |
2126 | 492 | CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); |
2127 | 492 | Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); |
2128 | 492 | CGF.EmitStoreOfScalar(ThreadID, |
2129 | 492 | CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); |
2130 | | |
2131 | 492 | return ThreadIDTemp; |
2132 | 767 | } |
2133 | | |
2134 | | llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( |
2135 | 1.13k | llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { |
2136 | 1.13k | SmallString<256> Buffer; |
2137 | 1.13k | llvm::raw_svector_ostream Out(Buffer); |
2138 | 1.13k | Out << Name; |
2139 | 1.13k | StringRef RuntimeName = Out.str(); |
2140 | 1.13k | auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; |
2141 | 1.13k | if (Elem.second) { |
2142 | 689 | assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && |
2143 | 689 | "OMP internal variable has different type than requested"); |
2144 | 0 | return &*Elem.second; |
2145 | 689 | } |
2146 | | |
2147 | 448 | return Elem.second = new llvm::GlobalVariable( |
2148 | 448 | CGM.getModule(), Ty, /*IsConstant*/ false, |
2149 | 448 | llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), |
2150 | 448 | Elem.first(), /*InsertBefore=*/nullptr, |
2151 | 448 | llvm::GlobalValue::NotThreadLocal, AddressSpace); |
2152 | 1.13k | } |
2153 | | |
2154 | 671 | llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { |
2155 | 671 | std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); |
2156 | 671 | std::string Name = getName({Prefix, "var"}); |
2157 | 671 | return getOrCreateInternalVariable(KmpCriticalNameTy, Name); |
2158 | 671 | } |
2159 | | |
2160 | | namespace { |
2161 | | /// Common pre(post)-action for different OpenMP constructs. |
2162 | | class CommonActionTy final : public PrePostActionTy { |
2163 | | llvm::FunctionCallee EnterCallee; |
2164 | | ArrayRef<llvm::Value *> EnterArgs; |
2165 | | llvm::FunctionCallee ExitCallee; |
2166 | | ArrayRef<llvm::Value *> ExitArgs; |
2167 | | bool Conditional; |
2168 | | llvm::BasicBlock *ContBlock = nullptr; |
2169 | | |
2170 | | public: |
2171 | | CommonActionTy(llvm::FunctionCallee EnterCallee, |
2172 | | ArrayRef<llvm::Value *> EnterArgs, |
2173 | | llvm::FunctionCallee ExitCallee, |
2174 | | ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) |
2175 | | : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), |
2176 | 1.60k | ExitArgs(ExitArgs), Conditional(Conditional) {} |
2177 | 970 | void Enter(CodeGenFunction &CGF) override { |
2178 | 970 | llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); |
2179 | 970 | if (Conditional) { |
2180 | 277 | llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); |
2181 | 277 | auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2182 | 277 | ContBlock = CGF.createBasicBlock("omp_if.end"); |
2183 | | // Generate the branch (If-stmt) |
2184 | 277 | CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); |
2185 | 277 | CGF.EmitBlock(ThenBlock); |
2186 | 277 | } |
2187 | 970 | } |
2188 | 277 | void Done(CodeGenFunction &CGF) { |
2189 | | // Emit the rest of blocks/branches |
2190 | 277 | CGF.EmitBranch(ContBlock); |
2191 | 277 | CGF.EmitBlock(ContBlock, true); |
2192 | 277 | } |
2193 | 1.59k | void Exit(CodeGenFunction &CGF) override { |
2194 | 1.59k | CGF.EmitRuntimeCall(ExitCallee, ExitArgs); |
2195 | 1.59k | } |
2196 | | }; |
2197 | | } // anonymous namespace |
2198 | | |
2199 | | void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, |
2200 | | StringRef CriticalName, |
2201 | | const RegionCodeGenTy &CriticalOpGen, |
2202 | 224 | SourceLocation Loc, const Expr *Hint) { |
2203 | | // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); |
2204 | | // CriticalOpGen(); |
2205 | | // __kmpc_end_critical(ident_t *, gtid, Lock); |
2206 | | // Prepare arguments and build a call to __kmpc_critical |
2207 | 224 | if (!CGF.HaveInsertPoint()) |
2208 | 0 | return; |
2209 | 224 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2210 | 224 | getCriticalRegionLock(CriticalName)}; |
2211 | 224 | llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), |
2212 | 224 | std::end(Args)); |
2213 | 224 | if (Hint) { |
2214 | 6 | EnterArgs.push_back(CGF.Builder.CreateIntCast( |
2215 | 6 | CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); |
2216 | 6 | } |
2217 | 224 | CommonActionTy Action( |
2218 | 224 | OMPBuilder.getOrCreateRuntimeFunction( |
2219 | 224 | CGM.getModule(), |
2220 | 224 | Hint ? OMPRTL___kmpc_critical_with_hint6 : OMPRTL___kmpc_critical218 ), |
2221 | 224 | EnterArgs, |
2222 | 224 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2223 | 224 | OMPRTL___kmpc_end_critical), |
2224 | 224 | Args); |
2225 | 224 | CriticalOpGen.setAction(Action); |
2226 | 224 | emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); |
2227 | 224 | } |
2228 | | |
2229 | | void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, |
2230 | | const RegionCodeGenTy &MasterOpGen, |
2231 | 192 | SourceLocation Loc) { |
2232 | 192 | if (!CGF.HaveInsertPoint()) |
2233 | 0 | return; |
2234 | | // if(__kmpc_master(ident_t *, gtid)) { |
2235 | | // MasterOpGen(); |
2236 | | // __kmpc_end_master(ident_t *, gtid); |
2237 | | // } |
2238 | | // Prepare arguments and build a call to __kmpc_master |
2239 | 192 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2240 | 192 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2241 | 192 | CGM.getModule(), OMPRTL___kmpc_master), |
2242 | 192 | Args, |
2243 | 192 | OMPBuilder.getOrCreateRuntimeFunction( |
2244 | 192 | CGM.getModule(), OMPRTL___kmpc_end_master), |
2245 | 192 | Args, |
2246 | 192 | /*Conditional=*/true); |
2247 | 192 | MasterOpGen.setAction(Action); |
2248 | 192 | emitInlinedDirective(CGF, OMPD_master, MasterOpGen); |
2249 | 192 | Action.Done(CGF); |
2250 | 192 | } |
2251 | | |
2252 | | void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, |
2253 | | const RegionCodeGenTy &MaskedOpGen, |
2254 | 24 | SourceLocation Loc, const Expr *Filter) { |
2255 | 24 | if (!CGF.HaveInsertPoint()) |
2256 | 0 | return; |
2257 | | // if(__kmpc_masked(ident_t *, gtid, filter)) { |
2258 | | // MaskedOpGen(); |
2259 | | // __kmpc_end_masked(iden_t *, gtid); |
2260 | | // } |
2261 | | // Prepare arguments and build a call to __kmpc_masked |
2262 | 24 | llvm::Value *FilterVal = Filter |
2263 | 24 | ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)18 |
2264 | 24 | : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0)6 ; |
2265 | 24 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2266 | 24 | FilterVal}; |
2267 | 24 | llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), |
2268 | 24 | getThreadID(CGF, Loc)}; |
2269 | 24 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2270 | 24 | CGM.getModule(), OMPRTL___kmpc_masked), |
2271 | 24 | Args, |
2272 | 24 | OMPBuilder.getOrCreateRuntimeFunction( |
2273 | 24 | CGM.getModule(), OMPRTL___kmpc_end_masked), |
2274 | 24 | ArgsEnd, |
2275 | 24 | /*Conditional=*/true); |
2276 | 24 | MaskedOpGen.setAction(Action); |
2277 | 24 | emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); |
2278 | 24 | Action.Done(CGF); |
2279 | 24 | } |
2280 | | |
2281 | | void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, |
2282 | 19 | SourceLocation Loc) { |
2283 | 19 | if (!CGF.HaveInsertPoint()) |
2284 | 0 | return; |
2285 | 19 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2286 | 10 | OMPBuilder.createTaskyield(CGF.Builder); |
2287 | 10 | } else { |
2288 | | // Build call __kmpc_omp_taskyield(loc, thread_id, 0); |
2289 | 9 | llvm::Value *Args[] = { |
2290 | 9 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2291 | 9 | llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; |
2292 | 9 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2293 | 9 | CGM.getModule(), OMPRTL___kmpc_omp_taskyield), |
2294 | 9 | Args); |
2295 | 9 | } |
2296 | | |
2297 | 19 | if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2298 | 7 | Region->emitUntiedSwitch(CGF); |
2299 | 19 | } |
2300 | | |
2301 | | void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, |
2302 | | const RegionCodeGenTy &TaskgroupOpGen, |
2303 | 251 | SourceLocation Loc) { |
2304 | 251 | if (!CGF.HaveInsertPoint()) |
2305 | 0 | return; |
2306 | | // __kmpc_taskgroup(ident_t *, gtid); |
2307 | | // TaskgroupOpGen(); |
2308 | | // __kmpc_end_taskgroup(ident_t *, gtid); |
2309 | | // Prepare arguments and build a call to __kmpc_taskgroup |
2310 | 251 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2311 | 251 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2312 | 251 | CGM.getModule(), OMPRTL___kmpc_taskgroup), |
2313 | 251 | Args, |
2314 | 251 | OMPBuilder.getOrCreateRuntimeFunction( |
2315 | 251 | CGM.getModule(), OMPRTL___kmpc_end_taskgroup), |
2316 | 251 | Args); |
2317 | 251 | TaskgroupOpGen.setAction(Action); |
2318 | 251 | emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); |
2319 | 251 | } |
2320 | | |
2321 | | /// Given an array of pointers to variables, project the address of a |
2322 | | /// given variable. |
2323 | | static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, |
2324 | 1.35k | unsigned Index, const VarDecl *Var) { |
2325 | | // Pull out the pointer to the variable. |
2326 | 1.35k | Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); |
2327 | 1.35k | llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); |
2328 | | |
2329 | 1.35k | llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); |
2330 | 1.35k | return Address( |
2331 | 1.35k | CGF.Builder.CreateBitCast( |
2332 | 1.35k | Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), |
2333 | 1.35k | ElemTy, CGF.getContext().getDeclAlign(Var)); |
2334 | 1.35k | } |
2335 | | |
2336 | | static llvm::Value *emitCopyprivateCopyFunction( |
2337 | | CodeGenModule &CGM, llvm::Type *ArgsElemType, |
2338 | | ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, |
2339 | | ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, |
2340 | 28 | SourceLocation Loc) { |
2341 | 28 | ASTContext &C = CGM.getContext(); |
2342 | | // void copy_func(void *LHSArg, void *RHSArg); |
2343 | 28 | FunctionArgList Args; |
2344 | 28 | ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2345 | 28 | ImplicitParamDecl::Other); |
2346 | 28 | ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2347 | 28 | ImplicitParamDecl::Other); |
2348 | 28 | Args.push_back(&LHSArg); |
2349 | 28 | Args.push_back(&RHSArg); |
2350 | 28 | const auto &CGFI = |
2351 | 28 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
2352 | 28 | std::string Name = |
2353 | 28 | CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); |
2354 | 28 | auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), |
2355 | 28 | llvm::GlobalValue::InternalLinkage, Name, |
2356 | 28 | &CGM.getModule()); |
2357 | 28 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
2358 | 28 | Fn->setDoesNotRecurse(); |
2359 | 28 | CodeGenFunction CGF(CGM); |
2360 | 28 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
2361 | | // Dest = (void*[n])(LHSArg); |
2362 | | // Src = (void*[n])(RHSArg); |
2363 | 28 | Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2364 | 28 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), |
2365 | 28 | ArgsElemType->getPointerTo()), |
2366 | 28 | ArgsElemType, CGF.getPointerAlign()); |
2367 | 28 | Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2368 | 28 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), |
2369 | 28 | ArgsElemType->getPointerTo()), |
2370 | 28 | ArgsElemType, CGF.getPointerAlign()); |
2371 | | // *(Type0*)Dst[0] = *(Type0*)Src[0]; |
2372 | | // *(Type1*)Dst[1] = *(Type1*)Src[1]; |
2373 | | // ... |
2374 | | // *(Typen*)Dst[n] = *(Typen*)Src[n]; |
2375 | 97 | for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I69 ) { |
2376 | 69 | const auto *DestVar = |
2377 | 69 | cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); |
2378 | 69 | Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); |
2379 | | |
2380 | 69 | const auto *SrcVar = |
2381 | 69 | cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); |
2382 | 69 | Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); |
2383 | | |
2384 | 69 | const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); |
2385 | 69 | QualType Type = VD->getType(); |
2386 | 69 | CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); |
2387 | 69 | } |
2388 | 28 | CGF.FinishFunction(); |
2389 | 28 | return Fn; |
2390 | 28 | } |
2391 | | |
2392 | | void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, |
2393 | | const RegionCodeGenTy &SingleOpGen, |
2394 | | SourceLocation Loc, |
2395 | | ArrayRef<const Expr *> CopyprivateVars, |
2396 | | ArrayRef<const Expr *> SrcExprs, |
2397 | | ArrayRef<const Expr *> DstExprs, |
2398 | 61 | ArrayRef<const Expr *> AssignmentOps) { |
2399 | 61 | if (!CGF.HaveInsertPoint()) |
2400 | 0 | return; |
2401 | 61 | assert(CopyprivateVars.size() == SrcExprs.size() && |
2402 | 61 | CopyprivateVars.size() == DstExprs.size() && |
2403 | 61 | CopyprivateVars.size() == AssignmentOps.size()); |
2404 | 0 | ASTContext &C = CGM.getContext(); |
2405 | | // int32 did_it = 0; |
2406 | | // if(__kmpc_single(ident_t *, gtid)) { |
2407 | | // SingleOpGen(); |
2408 | | // __kmpc_end_single(ident_t *, gtid); |
2409 | | // did_it = 1; |
2410 | | // } |
2411 | | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2412 | | // <copy_func>, did_it); |
2413 | | |
2414 | 61 | Address DidIt = Address::invalid(); |
2415 | 61 | if (!CopyprivateVars.empty()) { |
2416 | | // int32 did_it = 0; |
2417 | 28 | QualType KmpInt32Ty = |
2418 | 28 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
2419 | 28 | DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); |
2420 | 28 | CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); |
2421 | 28 | } |
2422 | | // Prepare arguments and build a call to __kmpc_single |
2423 | 61 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2424 | 61 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2425 | 61 | CGM.getModule(), OMPRTL___kmpc_single), |
2426 | 61 | Args, |
2427 | 61 | OMPBuilder.getOrCreateRuntimeFunction( |
2428 | 61 | CGM.getModule(), OMPRTL___kmpc_end_single), |
2429 | 61 | Args, |
2430 | 61 | /*Conditional=*/true); |
2431 | 61 | SingleOpGen.setAction(Action); |
2432 | 61 | emitInlinedDirective(CGF, OMPD_single, SingleOpGen); |
2433 | 61 | if (DidIt.isValid()) { |
2434 | | // did_it = 1; |
2435 | 28 | CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); |
2436 | 28 | } |
2437 | 61 | Action.Done(CGF); |
2438 | | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2439 | | // <copy_func>, did_it); |
2440 | 61 | if (DidIt.isValid()) { |
2441 | 28 | llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); |
2442 | 28 | QualType CopyprivateArrayTy = C.getConstantArrayType( |
2443 | 28 | C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, |
2444 | 28 | /*IndexTypeQuals=*/0); |
2445 | | // Create a list of all private variables for copyprivate. |
2446 | 28 | Address CopyprivateList = |
2447 | 28 | CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); |
2448 | 97 | for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I69 ) { |
2449 | 69 | Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); |
2450 | 69 | CGF.Builder.CreateStore( |
2451 | 69 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2452 | 69 | CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), |
2453 | 69 | CGF.VoidPtrTy), |
2454 | 69 | Elem); |
2455 | 69 | } |
2456 | | // Build function that copies private values from single region to all other |
2457 | | // threads in the corresponding parallel region. |
2458 | 28 | llvm::Value *CpyFn = emitCopyprivateCopyFunction( |
2459 | 28 | CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, |
2460 | 28 | SrcExprs, DstExprs, AssignmentOps, Loc); |
2461 | 28 | llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); |
2462 | 28 | Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2463 | 28 | CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); |
2464 | 28 | llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); |
2465 | 28 | llvm::Value *Args[] = { |
2466 | 28 | emitUpdateLocation(CGF, Loc), // ident_t *<loc> |
2467 | 28 | getThreadID(CGF, Loc), // i32 <gtid> |
2468 | 28 | BufSize, // size_t <buf_size> |
2469 | 28 | CL.getPointer(), // void *<copyprivate list> |
2470 | 28 | CpyFn, // void (*) (void *, void *) <copy_func> |
2471 | 28 | DidItVal // i32 did_it |
2472 | 28 | }; |
2473 | 28 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2474 | 28 | CGM.getModule(), OMPRTL___kmpc_copyprivate), |
2475 | 28 | Args); |
2476 | 28 | } |
2477 | 61 | } |
2478 | | |
2479 | | void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, |
2480 | | const RegionCodeGenTy &OrderedOpGen, |
2481 | 24 | SourceLocation Loc, bool IsThreads) { |
2482 | 24 | if (!CGF.HaveInsertPoint()) |
2483 | 0 | return; |
2484 | | // __kmpc_ordered(ident_t *, gtid); |
2485 | | // OrderedOpGen(); |
2486 | | // __kmpc_end_ordered(ident_t *, gtid); |
2487 | | // Prepare arguments and build a call to __kmpc_ordered |
2488 | 24 | if (IsThreads) { |
2489 | 16 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2490 | 16 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2491 | 16 | CGM.getModule(), OMPRTL___kmpc_ordered), |
2492 | 16 | Args, |
2493 | 16 | OMPBuilder.getOrCreateRuntimeFunction( |
2494 | 16 | CGM.getModule(), OMPRTL___kmpc_end_ordered), |
2495 | 16 | Args); |
2496 | 16 | OrderedOpGen.setAction(Action); |
2497 | 16 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2498 | 16 | return; |
2499 | 16 | } |
2500 | 8 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2501 | 8 | } |
2502 | | |
2503 | 1.01k | unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { |
2504 | 1.01k | unsigned Flags; |
2505 | 1.01k | if (Kind == OMPD_for) |
2506 | 618 | Flags = OMP_IDENT_BARRIER_IMPL_FOR; |
2507 | 392 | else if (Kind == OMPD_sections) |
2508 | 48 | Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; |
2509 | 344 | else if (Kind == OMPD_single) |
2510 | 26 | Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; |
2511 | 318 | else if (Kind == OMPD_barrier) |
2512 | 14 | Flags = OMP_IDENT_BARRIER_EXPL; |
2513 | 304 | else |
2514 | 304 | Flags = OMP_IDENT_BARRIER_IMPL; |
2515 | 1.01k | return Flags; |
2516 | 1.01k | } |
2517 | | |
2518 | | void CGOpenMPRuntime::getDefaultScheduleAndChunk( |
2519 | | CodeGenFunction &CGF, const OMPLoopDirective &S, |
2520 | 3.44k | OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { |
2521 | | // Check if the loop directive is actually a doacross loop directive. In this |
2522 | | // case choose static, 1 schedule. |
2523 | 3.44k | if (llvm::any_of( |
2524 | 3.44k | S.getClausesOfKind<OMPOrderedClause>(), |
2525 | 3.44k | [](const OMPOrderedClause *C) { return C->getNumForLoops(); }36 )) { |
2526 | 24 | ScheduleKind = OMPC_SCHEDULE_static; |
2527 | | // Chunk size is 1 in this case. |
2528 | 24 | llvm::APInt ChunkSize(32, 1); |
2529 | 24 | ChunkExpr = IntegerLiteral::Create( |
2530 | 24 | CGF.getContext(), ChunkSize, |
2531 | 24 | CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), |
2532 | 24 | SourceLocation()); |
2533 | 24 | } |
2534 | 3.44k | } |
2535 | | |
2536 | | void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, |
2537 | | OpenMPDirectiveKind Kind, bool EmitChecks, |
2538 | 889 | bool ForceSimpleCall) { |
2539 | | // Check if we should use the OMPBuilder |
2540 | 889 | auto *OMPRegionInfo = |
2541 | 889 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); |
2542 | 889 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2543 | 52 | CGF.Builder.restoreIP(OMPBuilder.createBarrier( |
2544 | 52 | CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); |
2545 | 52 | return; |
2546 | 52 | } |
2547 | | |
2548 | 837 | if (!CGF.HaveInsertPoint()) |
2549 | 0 | return; |
2550 | | // Build call __kmpc_cancel_barrier(loc, thread_id); |
2551 | | // Build call __kmpc_barrier(loc, thread_id); |
2552 | 837 | unsigned Flags = getDefaultFlagsForBarriers(Kind); |
2553 | | // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, |
2554 | | // thread_id); |
2555 | 837 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), |
2556 | 837 | getThreadID(CGF, Loc)}; |
2557 | 837 | if (OMPRegionInfo) { |
2558 | 510 | if (!ForceSimpleCall && OMPRegionInfo->hasCancel()320 ) { |
2559 | 32 | llvm::Value *Result = CGF.EmitRuntimeCall( |
2560 | 32 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2561 | 32 | OMPRTL___kmpc_cancel_barrier), |
2562 | 32 | Args); |
2563 | 32 | if (EmitChecks) { |
2564 | | // if (__kmpc_cancel_barrier()) { |
2565 | | // exit from construct; |
2566 | | // } |
2567 | 4 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); |
2568 | 4 | llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); |
2569 | 4 | llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); |
2570 | 4 | CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); |
2571 | 4 | CGF.EmitBlock(ExitBB); |
2572 | | // exit from construct; |
2573 | 4 | CodeGenFunction::JumpDest CancelDestination = |
2574 | 4 | CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); |
2575 | 4 | CGF.EmitBranchThroughCleanup(CancelDestination); |
2576 | 4 | CGF.EmitBlock(ContBB, /*IsFinished=*/true); |
2577 | 4 | } |
2578 | 32 | return; |
2579 | 32 | } |
2580 | 510 | } |
2581 | 805 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2582 | 805 | CGM.getModule(), OMPRTL___kmpc_barrier), |
2583 | 805 | Args); |
2584 | 805 | } |
2585 | | |
2586 | | /// Map the OpenMP loop schedule to the runtime enumeration. |
2587 | | static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, |
2588 | 16.4k | bool Chunked, bool Ordered) { |
2589 | 16.4k | switch (ScheduleKind) { |
2590 | 2.52k | case OMPC_SCHEDULE_static: |
2591 | 2.52k | return Chunked ? (1.54k Ordered1.54k ? OMP_ord_static_chunked7 : OMP_sch_static_chunked1.53k ) |
2592 | 2.52k | : (983 Ordered983 ? OMP_ord_static10 : OMP_sch_static973 ); |
2593 | 1.95k | case OMPC_SCHEDULE_dynamic: |
2594 | 1.95k | return Ordered ? OMP_ord_dynamic_chunked8 : OMP_sch_dynamic_chunked1.94k ; |
2595 | 543 | case OMPC_SCHEDULE_guided: |
2596 | 543 | return Ordered ? OMP_ord_guided_chunked1 : OMP_sch_guided_chunked542 ; |
2597 | 560 | case OMPC_SCHEDULE_runtime: |
2598 | 560 | return Ordered ? OMP_ord_runtime10 : OMP_sch_runtime550 ; |
2599 | 562 | case OMPC_SCHEDULE_auto: |
2600 | 562 | return Ordered ? OMP_ord_auto9 : OMP_sch_auto553 ; |
2601 | 10.2k | case OMPC_SCHEDULE_unknown: |
2602 | 10.2k | assert(!Chunked && "chunk was specified but schedule kind not known"); |
2603 | 10.2k | return Ordered ? OMP_ord_static12 : OMP_sch_static10.2k ; |
2604 | 16.4k | } |
2605 | 0 | llvm_unreachable("Unexpected runtime schedule"); |
2606 | 0 | } |
2607 | | |
2608 | | /// Map the OpenMP distribute schedule to the runtime enumeration. |
2609 | | static OpenMPSchedType |
2610 | 13.4k | getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { |
2611 | | // only static is allowed for dist_schedule |
2612 | 13.4k | return Chunked ? OMP_dist_sch_static_chunked1.58k : OMP_dist_sch_static11.9k ; |
2613 | 13.4k | } |
2614 | | |
2615 | | bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, |
2616 | 5.77k | bool Chunked) const { |
2617 | 5.77k | OpenMPSchedType Schedule = |
2618 | 5.77k | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2619 | 5.77k | return Schedule == OMP_sch_static; |
2620 | 5.77k | } |
2621 | | |
2622 | | bool CGOpenMPRuntime::isStaticNonchunked( |
2623 | 4.49k | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2624 | 4.49k | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2625 | 4.49k | return Schedule == OMP_dist_sch_static; |
2626 | 4.49k | } |
2627 | | |
2628 | | bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, |
2629 | 4.77k | bool Chunked) const { |
2630 | 4.77k | OpenMPSchedType Schedule = |
2631 | 4.77k | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2632 | 4.77k | return Schedule == OMP_sch_static_chunked; |
2633 | 4.77k | } |
2634 | | |
2635 | | bool CGOpenMPRuntime::isStaticChunked( |
2636 | 4.49k | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2637 | 4.49k | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2638 | 4.49k | return Schedule == OMP_dist_sch_static_chunked; |
2639 | 4.49k | } |
2640 | | |
2641 | 998 | bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { |
2642 | 998 | OpenMPSchedType Schedule = |
2643 | 998 | getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); |
2644 | 998 | assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); |
2645 | 0 | return Schedule != OMP_sch_static; |
2646 | 998 | } |
2647 | | |
2648 | | static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, |
2649 | | OpenMPScheduleClauseModifier M1, |
2650 | 9.39k | OpenMPScheduleClauseModifier M2) { |
2651 | 9.39k | int Modifier = 0; |
2652 | 9.39k | switch (M1) { |
2653 | 17 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2654 | 17 | Modifier = OMP_sch_modifier_monotonic; |
2655 | 17 | break; |
2656 | 12 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2657 | 12 | Modifier = OMP_sch_modifier_nonmonotonic; |
2658 | 12 | break; |
2659 | 12 | case OMPC_SCHEDULE_MODIFIER_simd: |
2660 | 12 | if (Schedule == OMP_sch_static_chunked) |
2661 | 6 | Schedule = OMP_sch_static_balanced_chunked; |
2662 | 12 | break; |
2663 | 0 | case OMPC_SCHEDULE_MODIFIER_last: |
2664 | 9.35k | case OMPC_SCHEDULE_MODIFIER_unknown: |
2665 | 9.35k | break; |
2666 | 9.39k | } |
2667 | 9.39k | switch (M2) { |
2668 | 0 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2669 | 0 | Modifier = OMP_sch_modifier_monotonic; |
2670 | 0 | break; |
2671 | 6 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2672 | 6 | Modifier = OMP_sch_modifier_nonmonotonic; |
2673 | 6 | break; |
2674 | 0 | case OMPC_SCHEDULE_MODIFIER_simd: |
2675 | 0 | if (Schedule == OMP_sch_static_chunked) |
2676 | 0 | Schedule = OMP_sch_static_balanced_chunked; |
2677 | 0 | break; |
2678 | 0 | case OMPC_SCHEDULE_MODIFIER_last: |
2679 | 9.39k | case OMPC_SCHEDULE_MODIFIER_unknown: |
2680 | 9.39k | break; |
2681 | 9.39k | } |
2682 | | // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. |
2683 | | // If the static schedule kind is specified or if the ordered clause is |
2684 | | // specified, and if the nonmonotonic modifier is not specified, the effect is |
2685 | | // as if the monotonic modifier is specified. Otherwise, unless the monotonic |
2686 | | // modifier is specified, the effect is as if the nonmonotonic modifier is |
2687 | | // specified. |
2688 | 9.39k | if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 06.35k ) { |
2689 | 6.32k | if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static6.01k || |
2690 | 6.32k | Schedule == OMP_sch_static_balanced_chunked3.58k || |
2691 | 6.32k | Schedule == OMP_ord_static_chunked3.58k || Schedule == OMP_ord_static3.57k || |
2692 | 6.32k | Schedule == OMP_dist_sch_static_chunked3.56k || |
2693 | 6.32k | Schedule == OMP_dist_sch_static3.14k )) |
2694 | 525 | Modifier = OMP_sch_modifier_nonmonotonic; |
2695 | 6.32k | } |
2696 | 9.39k | return Schedule | Modifier; |
2697 | 9.39k | } |
2698 | | |
2699 | | void CGOpenMPRuntime::emitForDispatchInit( |
2700 | | CodeGenFunction &CGF, SourceLocation Loc, |
2701 | | const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, |
2702 | 764 | bool Ordered, const DispatchRTInput &DispatchValues) { |
2703 | 764 | if (!CGF.HaveInsertPoint()) |
2704 | 0 | return; |
2705 | 764 | OpenMPSchedType Schedule = getRuntimeSchedule( |
2706 | 764 | ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); |
2707 | 764 | assert(Ordered || |
2708 | 764 | (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && |
2709 | 764 | Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && |
2710 | 764 | Schedule != OMP_sch_static_balanced_chunked)); |
2711 | | // Call __kmpc_dispatch_init( |
2712 | | // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, |
2713 | | // kmp_int[32|64] lower, kmp_int[32|64] upper, |
2714 | | // kmp_int[32|64] stride, kmp_int[32|64] chunk); |
2715 | | |
2716 | | // If the Chunk was not specified in the clause - use default value 1. |
2717 | 764 | llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk139 |
2718 | 764 | : CGF.Builder.getIntN(IVSize, 1)625 ; |
2719 | 764 | llvm::Value *Args[] = { |
2720 | 764 | emitUpdateLocation(CGF, Loc), |
2721 | 764 | getThreadID(CGF, Loc), |
2722 | 764 | CGF.Builder.getInt32(addMonoNonMonoModifier( |
2723 | 764 | CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type |
2724 | 764 | DispatchValues.LB, // Lower |
2725 | 764 | DispatchValues.UB, // Upper |
2726 | 764 | CGF.Builder.getIntN(IVSize, 1), // Stride |
2727 | 764 | Chunk // Chunk |
2728 | 764 | }; |
2729 | 764 | CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); |
2730 | 764 | } |
2731 | | |
2732 | | static void emitForStaticInitCall( |
2733 | | CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, |
2734 | | llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, |
2735 | | OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, |
2736 | 8.63k | const CGOpenMPRuntime::StaticRTInput &Values) { |
2737 | 8.63k | if (!CGF.HaveInsertPoint()) |
2738 | 0 | return; |
2739 | | |
2740 | 8.63k | assert(!Values.Ordered); |
2741 | 0 | assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || |
2742 | 8.63k | Schedule == OMP_sch_static_balanced_chunked || |
2743 | 8.63k | Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || |
2744 | 8.63k | Schedule == OMP_dist_sch_static || |
2745 | 8.63k | Schedule == OMP_dist_sch_static_chunked); |
2746 | | |
2747 | | // Call __kmpc_for_static_init( |
2748 | | // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, |
2749 | | // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, |
2750 | | // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, |
2751 | | // kmp_int[32|64] incr, kmp_int[32|64] chunk); |
2752 | 0 | llvm::Value *Chunk = Values.Chunk; |
2753 | 8.63k | if (Chunk == nullptr) { |
2754 | 7.69k | assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || |
2755 | 7.69k | Schedule == OMP_dist_sch_static) && |
2756 | 7.69k | "expected static non-chunked schedule"); |
2757 | | // If the Chunk was not specified in the clause - use default value 1. |
2758 | 0 | Chunk = CGF.Builder.getIntN(Values.IVSize, 1); |
2759 | 7.69k | } else { |
2760 | 942 | assert((Schedule == OMP_sch_static_chunked || |
2761 | 942 | Schedule == OMP_sch_static_balanced_chunked || |
2762 | 942 | Schedule == OMP_ord_static_chunked || |
2763 | 942 | Schedule == OMP_dist_sch_static_chunked) && |
2764 | 942 | "expected static chunked schedule"); |
2765 | 942 | } |
2766 | 0 | llvm::Value *Args[] = { |
2767 | 8.63k | UpdateLocation, |
2768 | 8.63k | ThreadId, |
2769 | 8.63k | CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, |
2770 | 8.63k | M2)), // Schedule type |
2771 | 8.63k | Values.IL.getPointer(), // &isLastIter |
2772 | 8.63k | Values.LB.getPointer(), // &LB |
2773 | 8.63k | Values.UB.getPointer(), // &UB |
2774 | 8.63k | Values.ST.getPointer(), // &Stride |
2775 | 8.63k | CGF.Builder.getIntN(Values.IVSize, 1), // Incr |
2776 | 8.63k | Chunk // Chunk |
2777 | 8.63k | }; |
2778 | 8.63k | CGF.EmitRuntimeCall(ForStaticInitFunction, Args); |
2779 | 8.63k | } |
2780 | | |
2781 | | void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, |
2782 | | SourceLocation Loc, |
2783 | | OpenMPDirectiveKind DKind, |
2784 | | const OpenMPScheduleTy &ScheduleKind, |
2785 | 4.13k | const StaticRTInput &Values) { |
2786 | 4.13k | OpenMPSchedType ScheduleNum = getRuntimeSchedule( |
2787 | 4.13k | ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); |
2788 | 4.13k | assert(isOpenMPWorksharingDirective(DKind) && |
2789 | 4.13k | "Expected loop-based or sections-based directive."); |
2790 | 0 | llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, |
2791 | 4.13k | isOpenMPLoopDirective(DKind) |
2792 | 4.13k | ? OMP_IDENT_WORK_LOOP4.05k |
2793 | 4.13k | : OMP_IDENT_WORK_SECTIONS80 ); |
2794 | 4.13k | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2795 | 4.13k | llvm::FunctionCallee StaticInitFunction = |
2796 | 4.13k | createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); |
2797 | 4.13k | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2798 | 4.13k | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2799 | 4.13k | ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); |
2800 | 4.13k | } |
2801 | | |
2802 | | void CGOpenMPRuntime::emitDistributeStaticInit( |
2803 | | CodeGenFunction &CGF, SourceLocation Loc, |
2804 | | OpenMPDistScheduleClauseKind SchedKind, |
2805 | 4.49k | const CGOpenMPRuntime::StaticRTInput &Values) { |
2806 | 4.49k | OpenMPSchedType ScheduleNum = |
2807 | 4.49k | getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); |
2808 | 4.49k | llvm::Value *UpdatedLocation = |
2809 | 4.49k | emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); |
2810 | 4.49k | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2811 | 4.49k | llvm::FunctionCallee StaticInitFunction; |
2812 | 4.49k | bool isGPUDistribute = |
2813 | 4.49k | CGM.getLangOpts().OpenMPIsDevice && |
2814 | 4.49k | (859 CGM.getTriple().isAMDGCN()859 || CGM.getTriple().isNVPTX()859 ); |
2815 | 4.49k | StaticInitFunction = createForStaticInitFunction( |
2816 | 4.49k | Values.IVSize, Values.IVSigned, isGPUDistribute); |
2817 | | |
2818 | 4.49k | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2819 | 4.49k | ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, |
2820 | 4.49k | OMPC_SCHEDULE_MODIFIER_unknown, Values); |
2821 | 4.49k | } |
2822 | | |
2823 | | void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, |
2824 | | SourceLocation Loc, |
2825 | 8.68k | OpenMPDirectiveKind DKind) { |
2826 | 8.68k | if (!CGF.HaveInsertPoint()) |
2827 | 0 | return; |
2828 | | // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); |
2829 | 8.68k | llvm::Value *Args[] = { |
2830 | 8.68k | emitUpdateLocation(CGF, Loc, |
2831 | 8.68k | isOpenMPDistributeDirective(DKind) |
2832 | 8.68k | ? OMP_IDENT_WORK_DISTRIBUTE6.83k |
2833 | 8.68k | : isOpenMPLoopDirective(DKind)1.84k |
2834 | 1.84k | ? OMP_IDENT_WORK_LOOP1.73k |
2835 | 1.84k | : OMP_IDENT_WORK_SECTIONS112 ), |
2836 | 8.68k | getThreadID(CGF, Loc)}; |
2837 | 8.68k | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2838 | 8.68k | if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice6.83k && |
2839 | 8.68k | (1.17k CGM.getTriple().isAMDGCN()1.17k || CGM.getTriple().isNVPTX()1.17k )) |
2840 | 432 | CGF.EmitRuntimeCall( |
2841 | 432 | OMPBuilder.getOrCreateRuntimeFunction( |
2842 | 432 | CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), |
2843 | 432 | Args); |
2844 | 8.24k | else |
2845 | 8.24k | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2846 | 8.24k | CGM.getModule(), OMPRTL___kmpc_for_static_fini), |
2847 | 8.24k | Args); |
2848 | 8.68k | } |
2849 | | |
2850 | | void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, |
2851 | | SourceLocation Loc, |
2852 | | unsigned IVSize, |
2853 | 57 | bool IVSigned) { |
2854 | 57 | if (!CGF.HaveInsertPoint()) |
2855 | 0 | return; |
2856 | | // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); |
2857 | 57 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2858 | 57 | CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); |
2859 | 57 | } |
2860 | | |
2861 | | llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, |
2862 | | SourceLocation Loc, unsigned IVSize, |
2863 | | bool IVSigned, Address IL, |
2864 | | Address LB, Address UB, |
2865 | 764 | Address ST) { |
2866 | | // Call __kmpc_dispatch_next( |
2867 | | // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, |
2868 | | // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, |
2869 | | // kmp_int[32|64] *p_stride); |
2870 | 764 | llvm::Value *Args[] = { |
2871 | 764 | emitUpdateLocation(CGF, Loc), |
2872 | 764 | getThreadID(CGF, Loc), |
2873 | 764 | IL.getPointer(), // &isLastIter |
2874 | 764 | LB.getPointer(), // &Lower |
2875 | 764 | UB.getPointer(), // &Upper |
2876 | 764 | ST.getPointer() // &Stride |
2877 | 764 | }; |
2878 | 764 | llvm::Value *Call = |
2879 | 764 | CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); |
2880 | 764 | return CGF.EmitScalarConversion( |
2881 | 764 | Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), |
2882 | 764 | CGF.getContext().BoolTy, Loc); |
2883 | 764 | } |
2884 | | |
2885 | | void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, |
2886 | | llvm::Value *NumThreads, |
2887 | 330 | SourceLocation Loc) { |
2888 | 330 | if (!CGF.HaveInsertPoint()) |
2889 | 0 | return; |
2890 | | // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) |
2891 | 330 | llvm::Value *Args[] = { |
2892 | 330 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2893 | 330 | CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; |
2894 | 330 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2895 | 330 | CGM.getModule(), OMPRTL___kmpc_push_num_threads), |
2896 | 330 | Args); |
2897 | 330 | } |
2898 | | |
2899 | | void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, |
2900 | | ProcBindKind ProcBind, |
2901 | 62 | SourceLocation Loc) { |
2902 | 62 | if (!CGF.HaveInsertPoint()) |
2903 | 0 | return; |
2904 | 62 | assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); |
2905 | | // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) |
2906 | 0 | llvm::Value *Args[] = { |
2907 | 62 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2908 | 62 | llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; |
2909 | 62 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2910 | 62 | CGM.getModule(), OMPRTL___kmpc_push_proc_bind), |
2911 | 62 | Args); |
2912 | 62 | } |
2913 | | |
2914 | | void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, |
2915 | 104 | SourceLocation Loc, llvm::AtomicOrdering AO) { |
2916 | 104 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2917 | 20 | OMPBuilder.createFlush(CGF.Builder); |
2918 | 84 | } else { |
2919 | 84 | if (!CGF.HaveInsertPoint()) |
2920 | 0 | return; |
2921 | | // Build call void __kmpc_flush(ident_t *loc) |
2922 | 84 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2923 | 84 | CGM.getModule(), OMPRTL___kmpc_flush), |
2924 | 84 | emitUpdateLocation(CGF, Loc)); |
2925 | 84 | } |
2926 | 104 | } |
2927 | | |
2928 | | namespace { |
2929 | | /// Indexes of fields for type kmp_task_t. |
2930 | | enum KmpTaskTFields { |
2931 | | /// List of shared variables. |
2932 | | KmpTaskTShareds, |
2933 | | /// Task routine. |
2934 | | KmpTaskTRoutine, |
2935 | | /// Partition id for the untied tasks. |
2936 | | KmpTaskTPartId, |
2937 | | /// Function with call of destructors for private variables. |
2938 | | Data1, |
2939 | | /// Task priority. |
2940 | | Data2, |
2941 | | /// (Taskloops only) Lower bound. |
2942 | | KmpTaskTLowerBound, |
2943 | | /// (Taskloops only) Upper bound. |
2944 | | KmpTaskTUpperBound, |
2945 | | /// (Taskloops only) Stride. |
2946 | | KmpTaskTStride, |
2947 | | /// (Taskloops only) Is last iteration flag. |
2948 | | KmpTaskTLastIter, |
2949 | | /// (Taskloops only) Reduction data. |
2950 | | KmpTaskTReductions, |
2951 | | }; |
2952 | | } // anonymous namespace |
2953 | | |
2954 | 5.58k | bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { |
2955 | 5.58k | return OffloadEntriesTargetRegion.empty() && |
2956 | 5.58k | OffloadEntriesDeviceGlobalVar.empty()1.20k ; |
2957 | 5.58k | } |
2958 | | |
2959 | | /// Initialize target region entry. |
2960 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
2961 | | initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, |
2962 | | StringRef ParentName, unsigned LineNum, |
2963 | 2.82k | unsigned Order) { |
2964 | 2.82k | assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " |
2965 | 2.82k | "only required for the device " |
2966 | 2.82k | "code generation."); |
2967 | 0 | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = |
2968 | 2.82k | OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, |
2969 | 2.82k | OMPTargetRegionEntryTargetRegion); |
2970 | 2.82k | ++OffloadingEntriesNum; |
2971 | 2.82k | } |
2972 | | |
2973 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
2974 | | registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, |
2975 | | StringRef ParentName, unsigned LineNum, |
2976 | | llvm::Constant *Addr, llvm::Constant *ID, |
2977 | 11.6k | OMPTargetRegionEntryKind Flags) { |
2978 | | // If we are emitting code for a target, the entry is already initialized, |
2979 | | // only has to be registered. |
2980 | 11.6k | if (CGM.getLangOpts().OpenMPIsDevice) { |
2981 | | // This could happen if the device compilation is invoked standalone. |
2982 | 2.79k | if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) |
2983 | 0 | return; |
2984 | 2.79k | auto &Entry = |
2985 | 2.79k | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; |
2986 | 2.79k | Entry.setAddress(Addr); |
2987 | 2.79k | Entry.setID(ID); |
2988 | 2.79k | Entry.setFlags(Flags); |
2989 | 8.83k | } else { |
2990 | 8.83k | if (Flags == |
2991 | 8.83k | OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && |
2992 | 8.83k | hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, |
2993 | 8.72k | /*IgnoreAddressId*/ true)) |
2994 | 4 | return; |
2995 | 8.82k | assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && |
2996 | 8.82k | "Target region entry already registered!"); |
2997 | 0 | OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); |
2998 | 8.82k | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; |
2999 | 8.82k | ++OffloadingEntriesNum; |
3000 | 8.82k | } |
3001 | 11.6k | } |
3002 | | |
3003 | | bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( |
3004 | | unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, |
3005 | 24.0k | bool IgnoreAddressId) const { |
3006 | 24.0k | auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); |
3007 | 24.0k | if (PerDevice == OffloadEntriesTargetRegion.end()) |
3008 | 3.88k | return false; |
3009 | 20.1k | auto PerFile = PerDevice->second.find(FileID); |
3010 | 20.1k | if (PerFile == PerDevice->second.end()) |
3011 | 0 | return false; |
3012 | 20.1k | auto PerParentName = PerFile->second.find(ParentName); |
3013 | 20.1k | if (PerParentName == PerFile->second.end()) |
3014 | 5.99k | return false; |
3015 | 14.1k | auto PerLine = PerParentName->second.find(LineNum); |
3016 | 14.1k | if (PerLine == PerParentName->second.end()) |
3017 | 8.42k | return false; |
3018 | | // Fail if this entry is already registered. |
3019 | 5.72k | if (!IgnoreAddressId && |
3020 | 5.72k | (5.72k PerLine->second.getAddress()5.72k || PerLine->second.getID()5.44k )) |
3021 | 279 | return false; |
3022 | 5.45k | return true; |
3023 | 5.72k | } |
3024 | | |
3025 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( |
3026 | 2.44k | const OffloadTargetRegionEntryInfoActTy &Action) { |
3027 | | // Scan all target region entries and perform the provided action. |
3028 | 2.44k | for (const auto &D : OffloadEntriesTargetRegion) |
3029 | 2.43k | for (const auto &F : D.second) |
3030 | 2.43k | for (const auto &P : F.second) |
3031 | 6.50k | for (const auto &L : P.second) |
3032 | 11.6k | Action(D.first, F.first, P.first(), L.first, L.second); |
3033 | 2.44k | } |
3034 | | |
3035 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3036 | | initializeDeviceGlobalVarEntryInfo(StringRef Name, |
3037 | | OMPTargetGlobalVarEntryKind Flags, |
3038 | 148 | unsigned Order) { |
3039 | 148 | assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " |
3040 | 148 | "only required for the device " |
3041 | 148 | "code generation."); |
3042 | 0 | OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); |
3043 | 148 | ++OffloadingEntriesNum; |
3044 | 148 | } |
3045 | | |
3046 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3047 | | registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, |
3048 | | CharUnits VarSize, |
3049 | | OMPTargetGlobalVarEntryKind Flags, |
3050 | 971 | llvm::GlobalValue::LinkageTypes Linkage) { |
3051 | 971 | if (CGM.getLangOpts().OpenMPIsDevice) { |
3052 | | // This could happen if the device compilation is invoked standalone. |
3053 | 308 | if (!hasDeviceGlobalVarEntryInfo(VarName)) |
3054 | 6 | return; |
3055 | 302 | auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; |
3056 | 302 | if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)158 ) { |
3057 | 158 | if (Entry.getVarSize().isZero()) { |
3058 | 6 | Entry.setVarSize(VarSize); |
3059 | 6 | Entry.setLinkage(Linkage); |
3060 | 6 | } |
3061 | 158 | return; |
3062 | 158 | } |
3063 | 144 | Entry.setVarSize(VarSize); |
3064 | 144 | Entry.setLinkage(Linkage); |
3065 | 144 | Entry.setAddress(Addr); |
3066 | 663 | } else { |
3067 | 663 | if (hasDeviceGlobalVarEntryInfo(VarName)) { |
3068 | 482 | auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; |
3069 | 482 | assert(Entry.isValid() && Entry.getFlags() == Flags && |
3070 | 482 | "Entry not initialized!"); |
3071 | 482 | if (Entry.getVarSize().isZero()) { |
3072 | 28 | Entry.setVarSize(VarSize); |
3073 | 28 | Entry.setLinkage(Linkage); |
3074 | 28 | } |
3075 | 482 | return; |
3076 | 482 | } |
3077 | 181 | OffloadEntriesDeviceGlobalVar.try_emplace( |
3078 | 181 | VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); |
3079 | 181 | ++OffloadingEntriesNum; |
3080 | 181 | } |
3081 | 971 | } |
3082 | | |
3083 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3084 | | actOnDeviceGlobalVarEntriesInfo( |
3085 | 2.44k | const OffloadDeviceGlobalVarEntryInfoActTy &Action) { |
3086 | | // Scan all target region entries and perform the provided action. |
3087 | 2.44k | for (const auto &E : OffloadEntriesDeviceGlobalVar) |
3088 | 325 | Action(E.getKey(), E.getValue()); |
3089 | 2.44k | } |
3090 | | |
3091 | | void CGOpenMPRuntime::createOffloadEntry( |
3092 | | llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, |
3093 | 11.0k | llvm::GlobalValue::LinkageTypes Linkage) { |
3094 | 11.0k | OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags); |
3095 | 11.0k | } |
3096 | | |
3097 | 5.86k | void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { |
3098 | | // Emit the offloading entries and metadata so that the device codegen side |
3099 | | // can easily figure out what to emit. The produced metadata looks like |
3100 | | // this: |
3101 | | // |
3102 | | // !omp_offload.info = !{!1, ...} |
3103 | | // |
3104 | | // Right now we only generate metadata for function that contain target |
3105 | | // regions. |
3106 | | |
3107 | | // If we are in simd mode or there are no entries, we don't need to do |
3108 | | // anything. |
3109 | 5.86k | if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()3.39k ) |
3110 | 3.41k | return; |
3111 | | |
3112 | 2.44k | llvm::Module &M = CGM.getModule(); |
3113 | 2.44k | llvm::LLVMContext &C = M.getContext(); |
3114 | 2.44k | SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, |
3115 | 2.44k | SourceLocation, StringRef>, |
3116 | 2.44k | 16> |
3117 | 2.44k | OrderedEntries(OffloadEntriesInfoManager.size()); |
3118 | 2.44k | llvm::SmallVector<StringRef, 16> ParentFunctions( |
3119 | 2.44k | OffloadEntriesInfoManager.size()); |
3120 | | |
3121 | | // Auxiliary methods to create metadata values and strings. |
3122 | 58.9k | auto &&GetMDInt = [this](unsigned V) { |
3123 | 58.9k | return llvm::ConstantAsMetadata::get( |
3124 | 58.9k | llvm::ConstantInt::get(CGM.Int32Ty, V)); |
3125 | 58.9k | }; |
3126 | | |
3127 | 11.9k | auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; |
3128 | | |
3129 | | // Create the offloading info metadata node. |
3130 | 2.44k | llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); |
3131 | | |
3132 | | // Create function that emits metadata for each target region entry; |
3133 | 2.44k | auto &&TargetRegionMetadataEmitter = |
3134 | 2.44k | [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, |
3135 | 2.44k | &GetMDString]( |
3136 | 2.44k | unsigned DeviceID, unsigned FileID, StringRef ParentName, |
3137 | 2.44k | unsigned Line, |
3138 | 11.6k | const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { |
3139 | | // Generate metadata for target regions. Each entry of this metadata |
3140 | | // contains: |
3141 | | // - Entry 0 -> Kind of this type of metadata (0). |
3142 | | // - Entry 1 -> Device ID of the file where the entry was identified. |
3143 | | // - Entry 2 -> File ID of the file where the entry was identified. |
3144 | | // - Entry 3 -> Mangled name of the function where the entry was |
3145 | | // identified. |
3146 | | // - Entry 4 -> Line in the file where the entry was identified. |
3147 | | // - Entry 5 -> Order the entry was created. |
3148 | | // The first element of the metadata node is the kind. |
3149 | 11.6k | llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), |
3150 | 11.6k | GetMDInt(FileID), GetMDString(ParentName), |
3151 | 11.6k | GetMDInt(Line), GetMDInt(E.getOrder())}; |
3152 | | |
3153 | 11.6k | SourceLocation Loc; |
3154 | 11.6k | for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), |
3155 | 11.6k | E = CGM.getContext().getSourceManager().fileinfo_end(); |
3156 | 24.9k | I != E; ++I13.3k ) { |
3157 | 13.3k | if (I->getFirst()->getUniqueID().getDevice() == DeviceID && |
3158 | 13.3k | I->getFirst()->getUniqueID().getFile() == FileID) { |
3159 | 0 | Loc = CGM.getContext().getSourceManager().translateFileLineCol( |
3160 | 0 | I->getFirst(), Line, 1); |
3161 | 0 | break; |
3162 | 0 | } |
3163 | 13.3k | } |
3164 | | // Save this entry in the right position of the ordered entries array. |
3165 | 11.6k | OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); |
3166 | 11.6k | ParentFunctions[E.getOrder()] = ParentName; |
3167 | | |
3168 | | // Add metadata to the named metadata node. |
3169 | 11.6k | MD->addOperand(llvm::MDNode::get(C, Ops)); |
3170 | 11.6k | }; |
3171 | | |
3172 | 2.44k | OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( |
3173 | 2.44k | TargetRegionMetadataEmitter); |
3174 | | |
3175 | | // Create function that emits metadata for each device global variable entry; |
3176 | 2.44k | auto &&DeviceGlobalVarMetadataEmitter = |
3177 | 2.44k | [&C, &OrderedEntries, &GetMDInt, &GetMDString, |
3178 | 2.44k | MD](StringRef MangledName, |
3179 | 2.44k | const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar |
3180 | 2.44k | &E) { |
3181 | | // Generate metadata for global variables. Each entry of this metadata |
3182 | | // contains: |
3183 | | // - Entry 0 -> Kind of this type of metadata (1). |
3184 | | // - Entry 1 -> Mangled name of the variable. |
3185 | | // - Entry 2 -> Declare target kind. |
3186 | | // - Entry 3 -> Order the entry was created. |
3187 | | // The first element of the metadata node is the kind. |
3188 | 325 | llvm::Metadata *Ops[] = { |
3189 | 325 | GetMDInt(E.getKind()), GetMDString(MangledName), |
3190 | 325 | GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; |
3191 | | |
3192 | | // Save this entry in the right position of the ordered entries array. |
3193 | 325 | OrderedEntries[E.getOrder()] = |
3194 | 325 | std::make_tuple(&E, SourceLocation(), MangledName); |
3195 | | |
3196 | | // Add metadata to the named metadata node. |
3197 | 325 | MD->addOperand(llvm::MDNode::get(C, Ops)); |
3198 | 325 | }; |
3199 | | |
3200 | 2.44k | OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( |
3201 | 2.44k | DeviceGlobalVarMetadataEmitter); |
3202 | | |
3203 | 11.9k | for (const auto &E : OrderedEntries) { |
3204 | 11.9k | assert(std::get<0>(E) && "All ordered entries must exist!"); |
3205 | 11.9k | if (const auto *CE = |
3206 | 11.9k | dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( |
3207 | 11.9k | std::get<0>(E))) { |
3208 | 11.6k | if (!CE->getID() || !CE->getAddress()11.6k ) { |
3209 | | // Do not blame the entry if the parent funtion is not emitted. |
3210 | 4 | StringRef FnName = ParentFunctions[CE->getOrder()]; |
3211 | 4 | if (!CGM.GetGlobalValue(FnName)) |
3212 | 2 | continue; |
3213 | 2 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3214 | 2 | DiagnosticsEngine::Error, |
3215 | 2 | "Offloading entry for target region in %0 is incorrect: either the " |
3216 | 2 | "address or the ID is invalid."); |
3217 | 2 | CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; |
3218 | 2 | continue; |
3219 | 4 | } |
3220 | 11.6k | createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, |
3221 | 11.6k | CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); |
3222 | 11.6k | } else if (const auto *325 CE325 = dyn_cast<OffloadEntriesInfoManagerTy:: |
3223 | 325 | OffloadEntryInfoDeviceGlobalVar>( |
3224 | 325 | std::get<0>(E))) { |
3225 | 325 | OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = |
3226 | 325 | static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( |
3227 | 325 | CE->getFlags()); |
3228 | 325 | switch (Flags) { |
3229 | 275 | case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { |
3230 | 275 | if (CGM.getLangOpts().OpenMPIsDevice && |
3231 | 275 | CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()126 ) |
3232 | 2 | continue; |
3233 | 273 | if (!CE->getAddress()) { |
3234 | 0 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3235 | 0 | DiagnosticsEngine::Error, "Offloading entry for declare target " |
3236 | 0 | "variable %0 is incorrect: the " |
3237 | 0 | "address is invalid."); |
3238 | 0 | CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); |
3239 | 0 | continue; |
3240 | 0 | } |
3241 | | // The vaiable has no definition - no need to add the entry. |
3242 | 273 | if (CE->getVarSize().isZero()) |
3243 | 49 | continue; |
3244 | 224 | break; |
3245 | 273 | } |
3246 | 224 | case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: |
3247 | 50 | assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || |
3248 | 50 | (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && |
3249 | 50 | "Declaret target link address is set."); |
3250 | 50 | if (CGM.getLangOpts().OpenMPIsDevice) |
3251 | 18 | continue; |
3252 | 32 | if (!CE->getAddress()) { |
3253 | 0 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3254 | 0 | DiagnosticsEngine::Error, |
3255 | 0 | "Offloading entry for declare target variable is incorrect: the " |
3256 | 0 | "address is invalid."); |
3257 | 0 | CGM.getDiags().Report(DiagID); |
3258 | 0 | continue; |
3259 | 0 | } |
3260 | 32 | break; |
3261 | 325 | } |
3262 | | |
3263 | | // Hidden or internal symbols on the device are not externally visible. We |
3264 | | // should not attempt to register them by creating an offloading entry. |
3265 | 256 | if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress())) |
3266 | 256 | if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()244 ) |
3267 | 13 | continue; |
3268 | | |
3269 | 243 | createOffloadEntry(CE->getAddress(), CE->getAddress(), |
3270 | 243 | CE->getVarSize().getQuantity(), Flags, |
3271 | 243 | CE->getLinkage()); |
3272 | 243 | } else { |
3273 | 0 | llvm_unreachable("Unsupported entry kind."); |
3274 | 0 | } |
3275 | 11.9k | } |
3276 | 2.44k | } |
3277 | | |
3278 | | /// Loads all the offload entries information from the host IR |
3279 | | /// metadata. |
3280 | 5.88k | void CGOpenMPRuntime::loadOffloadInfoMetadata() { |
3281 | | // If we are in target mode, load the metadata from the host IR. This code has |
3282 | | // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). |
3283 | | |
3284 | 5.88k | if (!CGM.getLangOpts().OpenMPIsDevice) |
3285 | 5.36k | return; |
3286 | | |
3287 | 521 | if (CGM.getLangOpts().OMPHostIRFile.empty()) |
3288 | 3 | return; |
3289 | | |
3290 | 518 | auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); |
3291 | 518 | if (auto EC = Buf.getError()) { |
3292 | 0 | CGM.getDiags().Report(diag::err_cannot_open_file) |
3293 | 0 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3294 | 0 | return; |
3295 | 0 | } |
3296 | | |
3297 | 518 | llvm::LLVMContext C; |
3298 | 518 | auto ME = expectedToErrorOrAndEmitErrors( |
3299 | 518 | C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); |
3300 | | |
3301 | 518 | if (auto EC = ME.getError()) { |
3302 | 0 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3303 | 0 | DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); |
3304 | 0 | CGM.getDiags().Report(DiagID) |
3305 | 0 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3306 | 0 | return; |
3307 | 0 | } |
3308 | | |
3309 | 518 | llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); |
3310 | 518 | if (!MD) |
3311 | 27 | return; |
3312 | | |
3313 | 2.97k | for (llvm::MDNode *MN : MD->operands())491 { |
3314 | 14.5k | auto &&GetMDInt = [MN](unsigned Idx) { |
3315 | 14.5k | auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); |
3316 | 14.5k | return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); |
3317 | 14.5k | }; |
3318 | | |
3319 | 2.97k | auto &&GetMDString = [MN](unsigned Idx) { |
3320 | 2.97k | auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); |
3321 | 2.97k | return V->getString(); |
3322 | 2.97k | }; |
3323 | | |
3324 | 2.97k | switch (GetMDInt(0)) { |
3325 | 0 | default: |
3326 | 0 | llvm_unreachable("Unexpected metadata!"); |
3327 | 0 | break; |
3328 | 2.82k | case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: |
3329 | 2.82k | OffloadingEntryInfoTargetRegion: |
3330 | 2.82k | OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( |
3331 | 2.82k | /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), |
3332 | 2.82k | /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), |
3333 | 2.82k | /*Order=*/GetMDInt(5)); |
3334 | 2.82k | break; |
3335 | 148 | case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: |
3336 | 148 | OffloadingEntryInfoDeviceGlobalVar: |
3337 | 148 | OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( |
3338 | 148 | /*MangledName=*/GetMDString(1), |
3339 | 148 | static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( |
3340 | 148 | /*Flags=*/GetMDInt(2)), |
3341 | 148 | /*Order=*/GetMDInt(3)); |
3342 | 148 | break; |
3343 | 2.97k | } |
3344 | 2.97k | } |
3345 | 491 | } |
3346 | | |
3347 | 930 | void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { |
3348 | 930 | if (!KmpRoutineEntryPtrTy) { |
3349 | | // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. |
3350 | 398 | ASTContext &C = CGM.getContext(); |
3351 | 398 | QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; |
3352 | 398 | FunctionProtoType::ExtProtoInfo EPI; |
3353 | 398 | KmpRoutineEntryPtrQTy = C.getPointerType( |
3354 | 398 | C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); |
3355 | 398 | KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); |
3356 | 398 | } |
3357 | 930 | } |
3358 | | |
3359 | | namespace { |
3360 | | struct PrivateHelpersTy { |
3361 | | PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, |
3362 | | const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) |
3363 | | : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), |
3364 | 1.70k | PrivateElemInit(PrivateElemInit) {} |
3365 | 14 | PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} |
3366 | | const Expr *OriginalRef = nullptr; |
3367 | | const VarDecl *Original = nullptr; |
3368 | | const VarDecl *PrivateCopy = nullptr; |
3369 | | const VarDecl *PrivateElemInit = nullptr; |
3370 | 5.54k | bool isLocalPrivate() const { |
3371 | 5.54k | return !OriginalRef && !PrivateCopy42 && !PrivateElemInit42 ; |
3372 | 5.54k | } |
3373 | | }; |
3374 | | typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; |
3375 | | } // anonymous namespace |
3376 | | |
3377 | 168 | static bool isAllocatableDecl(const VarDecl *VD) { |
3378 | 168 | const VarDecl *CVD = VD->getCanonicalDecl(); |
3379 | 168 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
3380 | 33 | return false; |
3381 | 135 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
3382 | | // Use the default allocation. |
3383 | 135 | return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && |
3384 | 135 | !AA->getAllocator()22 ); |
3385 | 168 | } |
3386 | | |
3387 | | static RecordDecl * |
3388 | 930 | createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { |
3389 | 930 | if (!Privates.empty()) { |
3390 | 593 | ASTContext &C = CGM.getContext(); |
3391 | | // Build struct .kmp_privates_t. { |
3392 | | // /* private vars */ |
3393 | | // }; |
3394 | 593 | RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); |
3395 | 593 | RD->startDefinition(); |
3396 | 1.72k | for (const auto &Pair : Privates) { |
3397 | 1.72k | const VarDecl *VD = Pair.second.Original; |
3398 | 1.72k | QualType Type = VD->getType().getNonReferenceType(); |
3399 | | // If the private variable is a local variable with lvalue ref type, |
3400 | | // allocate the pointer instead of the pointee type. |
3401 | 1.72k | if (Pair.second.isLocalPrivate()) { |
3402 | 14 | if (VD->getType()->isLValueReferenceType()) |
3403 | 0 | Type = C.getPointerType(Type); |
3404 | 14 | if (isAllocatableDecl(VD)) |
3405 | 3 | Type = C.getPointerType(Type); |
3406 | 14 | } |
3407 | 1.72k | FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); |
3408 | 1.72k | if (VD->hasAttrs()) { |
3409 | 57 | for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), |
3410 | 57 | E(VD->getAttrs().end()); |
3411 | 111 | I != E; ++I54 ) |
3412 | 54 | FD->addAttr(*I); |
3413 | 57 | } |
3414 | 1.72k | } |
3415 | 593 | RD->completeDefinition(); |
3416 | 593 | return RD; |
3417 | 593 | } |
3418 | 337 | return nullptr; |
3419 | 930 | } |
3420 | | |
3421 | | static RecordDecl * |
3422 | | createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, |
3423 | | QualType KmpInt32Ty, |
3424 | 414 | QualType KmpRoutineEntryPointerQTy) { |
3425 | 414 | ASTContext &C = CGM.getContext(); |
3426 | | // Build struct kmp_task_t { |
3427 | | // void * shareds; |
3428 | | // kmp_routine_entry_t routine; |
3429 | | // kmp_int32 part_id; |
3430 | | // kmp_cmplrdata_t data1; |
3431 | | // kmp_cmplrdata_t data2; |
3432 | | // For taskloops additional fields: |
3433 | | // kmp_uint64 lb; |
3434 | | // kmp_uint64 ub; |
3435 | | // kmp_int64 st; |
3436 | | // kmp_int32 liter; |
3437 | | // void * reductions; |
3438 | | // }; |
3439 | 414 | RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); |
3440 | 414 | UD->startDefinition(); |
3441 | 414 | addFieldToRecordDecl(C, UD, KmpInt32Ty); |
3442 | 414 | addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); |
3443 | 414 | UD->completeDefinition(); |
3444 | 414 | QualType KmpCmplrdataTy = C.getRecordType(UD); |
3445 | 414 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); |
3446 | 414 | RD->startDefinition(); |
3447 | 414 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3448 | 414 | addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); |
3449 | 414 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3450 | 414 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3451 | 414 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3452 | 414 | if (isOpenMPTaskLoopDirective(Kind)) { |
3453 | 128 | QualType KmpUInt64Ty = |
3454 | 128 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); |
3455 | 128 | QualType KmpInt64Ty = |
3456 | 128 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); |
3457 | 128 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3458 | 128 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3459 | 128 | addFieldToRecordDecl(C, RD, KmpInt64Ty); |
3460 | 128 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3461 | 128 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3462 | 128 | } |
3463 | 414 | RD->completeDefinition(); |
3464 | 414 | return RD; |
3465 | 414 | } |
3466 | | |
3467 | | static RecordDecl * |
3468 | | createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, |
3469 | 930 | ArrayRef<PrivateDataTy> Privates) { |
3470 | 930 | ASTContext &C = CGM.getContext(); |
3471 | | // Build struct kmp_task_t_with_privates { |
3472 | | // kmp_task_t task_data; |
3473 | | // .kmp_privates_t. privates; |
3474 | | // }; |
3475 | 930 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); |
3476 | 930 | RD->startDefinition(); |
3477 | 930 | addFieldToRecordDecl(C, RD, KmpTaskTQTy); |
3478 | 930 | if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) |
3479 | 593 | addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); |
3480 | 930 | RD->completeDefinition(); |
3481 | 930 | return RD; |
3482 | 930 | } |
3483 | | |
3484 | | /// Emit a proxy function which accepts kmp_task_t as the second |
3485 | | /// argument. |
3486 | | /// \code |
3487 | | /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { |
3488 | | /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, |
3489 | | /// For taskloops: |
3490 | | /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3491 | | /// tt->reductions, tt->shareds); |
3492 | | /// return 0; |
3493 | | /// } |
3494 | | /// \endcode |
3495 | | static llvm::Function * |
3496 | | emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, |
3497 | | OpenMPDirectiveKind Kind, QualType KmpInt32Ty, |
3498 | | QualType KmpTaskTWithPrivatesPtrQTy, |
3499 | | QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, |
3500 | | QualType SharedsPtrTy, llvm::Function *TaskFunction, |
3501 | 930 | llvm::Value *TaskPrivatesMap) { |
3502 | 930 | ASTContext &C = CGM.getContext(); |
3503 | 930 | FunctionArgList Args; |
3504 | 930 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3505 | 930 | ImplicitParamDecl::Other); |
3506 | 930 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3507 | 930 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3508 | 930 | ImplicitParamDecl::Other); |
3509 | 930 | Args.push_back(&GtidArg); |
3510 | 930 | Args.push_back(&TaskTypeArg); |
3511 | 930 | const auto &TaskEntryFnInfo = |
3512 | 930 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3513 | 930 | llvm::FunctionType *TaskEntryTy = |
3514 | 930 | CGM.getTypes().GetFunctionType(TaskEntryFnInfo); |
3515 | 930 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); |
3516 | 930 | auto *TaskEntry = llvm::Function::Create( |
3517 | 930 | TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3518 | 930 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); |
3519 | 930 | TaskEntry->setDoesNotRecurse(); |
3520 | 930 | CodeGenFunction CGF(CGM); |
3521 | 930 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, |
3522 | 930 | Loc, Loc); |
3523 | | |
3524 | | // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, |
3525 | | // tt, |
3526 | | // For taskloops: |
3527 | | // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3528 | | // tt->task_data.shareds); |
3529 | 930 | llvm::Value *GtidParam = CGF.EmitLoadOfScalar( |
3530 | 930 | CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); |
3531 | 930 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3532 | 930 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3533 | 930 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3534 | 930 | const auto *KmpTaskTWithPrivatesQTyRD = |
3535 | 930 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3536 | 930 | LValue Base = |
3537 | 930 | CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3538 | 930 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
3539 | 930 | auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); |
3540 | 930 | LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); |
3541 | 930 | llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); |
3542 | | |
3543 | 930 | auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); |
3544 | 930 | LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); |
3545 | 930 | llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3546 | 930 | CGF.EmitLoadOfScalar(SharedsLVal, Loc), |
3547 | 930 | CGF.ConvertTypeForMem(SharedsPtrTy)); |
3548 | | |
3549 | 930 | auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); |
3550 | 930 | llvm::Value *PrivatesParam; |
3551 | 930 | if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { |
3552 | 593 | LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); |
3553 | 593 | PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3554 | 593 | PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); |
3555 | 593 | } else { |
3556 | 337 | PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
3557 | 337 | } |
3558 | | |
3559 | 930 | llvm::Value *CommonArgs[] = { |
3560 | 930 | GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, |
3561 | 930 | CGF.Builder |
3562 | 930 | .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), |
3563 | 930 | CGF.VoidPtrTy, CGF.Int8Ty) |
3564 | 930 | .getPointer()}; |
3565 | 930 | SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), |
3566 | 930 | std::end(CommonArgs)); |
3567 | 930 | if (isOpenMPTaskLoopDirective(Kind)) { |
3568 | 226 | auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); |
3569 | 226 | LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); |
3570 | 226 | llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); |
3571 | 226 | auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); |
3572 | 226 | LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); |
3573 | 226 | llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); |
3574 | 226 | auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); |
3575 | 226 | LValue StLVal = CGF.EmitLValueForField(Base, *StFI); |
3576 | 226 | llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); |
3577 | 226 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3578 | 226 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3579 | 226 | llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); |
3580 | 226 | auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); |
3581 | 226 | LValue RLVal = CGF.EmitLValueForField(Base, *RFI); |
3582 | 226 | llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); |
3583 | 226 | CallArgs.push_back(LBParam); |
3584 | 226 | CallArgs.push_back(UBParam); |
3585 | 226 | CallArgs.push_back(StParam); |
3586 | 226 | CallArgs.push_back(LIParam); |
3587 | 226 | CallArgs.push_back(RParam); |
3588 | 226 | } |
3589 | 930 | CallArgs.push_back(SharedsParam); |
3590 | | |
3591 | 930 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, |
3592 | 930 | CallArgs); |
3593 | 930 | CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), |
3594 | 930 | CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); |
3595 | 930 | CGF.FinishFunction(); |
3596 | 930 | return TaskEntry; |
3597 | 930 | } |
3598 | | |
3599 | | static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, |
3600 | | SourceLocation Loc, |
3601 | | QualType KmpInt32Ty, |
3602 | | QualType KmpTaskTWithPrivatesPtrQTy, |
3603 | 81 | QualType KmpTaskTWithPrivatesQTy) { |
3604 | 81 | ASTContext &C = CGM.getContext(); |
3605 | 81 | FunctionArgList Args; |
3606 | 81 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3607 | 81 | ImplicitParamDecl::Other); |
3608 | 81 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3609 | 81 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3610 | 81 | ImplicitParamDecl::Other); |
3611 | 81 | Args.push_back(&GtidArg); |
3612 | 81 | Args.push_back(&TaskTypeArg); |
3613 | 81 | const auto &DestructorFnInfo = |
3614 | 81 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3615 | 81 | llvm::FunctionType *DestructorFnTy = |
3616 | 81 | CGM.getTypes().GetFunctionType(DestructorFnInfo); |
3617 | 81 | std::string Name = |
3618 | 81 | CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); |
3619 | 81 | auto *DestructorFn = |
3620 | 81 | llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, |
3621 | 81 | Name, &CGM.getModule()); |
3622 | 81 | CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, |
3623 | 81 | DestructorFnInfo); |
3624 | 81 | DestructorFn->setDoesNotRecurse(); |
3625 | 81 | CodeGenFunction CGF(CGM); |
3626 | 81 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, |
3627 | 81 | Args, Loc, Loc); |
3628 | | |
3629 | 81 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3630 | 81 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3631 | 81 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3632 | 81 | const auto *KmpTaskTWithPrivatesQTyRD = |
3633 | 81 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3634 | 81 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3635 | 81 | Base = CGF.EmitLValueForField(Base, *FI); |
3636 | 81 | for (const auto *Field : |
3637 | 362 | cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { |
3638 | 362 | if (QualType::DestructionKind DtorKind = |
3639 | 362 | Field->getType().isDestructedType()) { |
3640 | 162 | LValue FieldLValue = CGF.EmitLValueForField(Base, Field); |
3641 | 162 | CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); |
3642 | 162 | } |
3643 | 362 | } |
3644 | 81 | CGF.FinishFunction(); |
3645 | 81 | return DestructorFn; |
3646 | 81 | } |
3647 | | |
3648 | | /// Emit a privates mapping function for correct handling of private and |
3649 | | /// firstprivate variables. |
3650 | | /// \code |
3651 | | /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> |
3652 | | /// **noalias priv1,..., <tyn> **noalias privn) { |
3653 | | /// *priv1 = &.privates.priv1; |
3654 | | /// ...; |
3655 | | /// *privn = &.privates.privn; |
3656 | | /// } |
3657 | | /// \endcode |
3658 | | static llvm::Value * |
3659 | | emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, |
3660 | | const OMPTaskDataTy &Data, QualType PrivatesQTy, |
3661 | 593 | ArrayRef<PrivateDataTy> Privates) { |
3662 | 593 | ASTContext &C = CGM.getContext(); |
3663 | 593 | FunctionArgList Args; |
3664 | 593 | ImplicitParamDecl TaskPrivatesArg( |
3665 | 593 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3666 | 593 | C.getPointerType(PrivatesQTy).withConst().withRestrict(), |
3667 | 593 | ImplicitParamDecl::Other); |
3668 | 593 | Args.push_back(&TaskPrivatesArg); |
3669 | 593 | llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; |
3670 | 593 | unsigned Counter = 1; |
3671 | 593 | for (const Expr *E : Data.PrivateVars) { |
3672 | 170 | Args.push_back(ImplicitParamDecl::Create( |
3673 | 170 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3674 | 170 | C.getPointerType(C.getPointerType(E->getType())) |
3675 | 170 | .withConst() |
3676 | 170 | .withRestrict(), |
3677 | 170 | ImplicitParamDecl::Other)); |
3678 | 170 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3679 | 170 | PrivateVarsPos[VD] = Counter; |
3680 | 170 | ++Counter; |
3681 | 170 | } |
3682 | 1.38k | for (const Expr *E : Data.FirstprivateVars) { |
3683 | 1.38k | Args.push_back(ImplicitParamDecl::Create( |
3684 | 1.38k | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3685 | 1.38k | C.getPointerType(C.getPointerType(E->getType())) |
3686 | 1.38k | .withConst() |
3687 | 1.38k | .withRestrict(), |
3688 | 1.38k | ImplicitParamDecl::Other)); |
3689 | 1.38k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3690 | 1.38k | PrivateVarsPos[VD] = Counter; |
3691 | 1.38k | ++Counter; |
3692 | 1.38k | } |
3693 | 593 | for (const Expr *E : Data.LastprivateVars) { |
3694 | 151 | Args.push_back(ImplicitParamDecl::Create( |
3695 | 151 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3696 | 151 | C.getPointerType(C.getPointerType(E->getType())) |
3697 | 151 | .withConst() |
3698 | 151 | .withRestrict(), |
3699 | 151 | ImplicitParamDecl::Other)); |
3700 | 151 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3701 | 151 | PrivateVarsPos[VD] = Counter; |
3702 | 151 | ++Counter; |
3703 | 151 | } |
3704 | 593 | for (const VarDecl *VD : Data.PrivateLocals) { |
3705 | 14 | QualType Ty = VD->getType().getNonReferenceType(); |
3706 | 14 | if (VD->getType()->isLValueReferenceType()) |
3707 | 0 | Ty = C.getPointerType(Ty); |
3708 | 14 | if (isAllocatableDecl(VD)) |
3709 | 3 | Ty = C.getPointerType(Ty); |
3710 | 14 | Args.push_back(ImplicitParamDecl::Create( |
3711 | 14 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3712 | 14 | C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), |
3713 | 14 | ImplicitParamDecl::Other)); |
3714 | 14 | PrivateVarsPos[VD] = Counter; |
3715 | 14 | ++Counter; |
3716 | 14 | } |
3717 | 593 | const auto &TaskPrivatesMapFnInfo = |
3718 | 593 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3719 | 593 | llvm::FunctionType *TaskPrivatesMapTy = |
3720 | 593 | CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); |
3721 | 593 | std::string Name = |
3722 | 593 | CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); |
3723 | 593 | auto *TaskPrivatesMap = llvm::Function::Create( |
3724 | 593 | TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, |
3725 | 593 | &CGM.getModule()); |
3726 | 593 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, |
3727 | 593 | TaskPrivatesMapFnInfo); |
3728 | 593 | if (CGM.getLangOpts().Optimize) { |
3729 | 0 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); |
3730 | 0 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); |
3731 | 0 | TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); |
3732 | 0 | } |
3733 | 593 | CodeGenFunction CGF(CGM); |
3734 | 593 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, |
3735 | 593 | TaskPrivatesMapFnInfo, Args, Loc, Loc); |
3736 | | |
3737 | | // *privi = &.privates.privi; |
3738 | 593 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3739 | 593 | CGF.GetAddrOfLocalVar(&TaskPrivatesArg), |
3740 | 593 | TaskPrivatesArg.getType()->castAs<PointerType>()); |
3741 | 593 | const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); |
3742 | 593 | Counter = 0; |
3743 | 1.72k | for (const FieldDecl *Field : PrivatesQTyRD->fields()) { |
3744 | 1.72k | LValue FieldLVal = CGF.EmitLValueForField(Base, Field); |
3745 | 1.72k | const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; |
3746 | 1.72k | LValue RefLVal = |
3747 | 1.72k | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); |
3748 | 1.72k | LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( |
3749 | 1.72k | RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); |
3750 | 1.72k | CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); |
3751 | 1.72k | ++Counter; |
3752 | 1.72k | } |
3753 | 593 | CGF.FinishFunction(); |
3754 | 593 | return TaskPrivatesMap; |
3755 | 593 | } |
3756 | | |
3757 | | /// Emit initialization for private variables in task-based directives. |
3758 | | static void emitPrivatesInit(CodeGenFunction &CGF, |
3759 | | const OMPExecutableDirective &D, |
3760 | | Address KmpTaskSharedsPtr, LValue TDBase, |
3761 | | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3762 | | QualType SharedsTy, QualType SharedsPtrTy, |
3763 | | const OMPTaskDataTy &Data, |
3764 | 692 | ArrayRef<PrivateDataTy> Privates, bool ForDup) { |
3765 | 692 | ASTContext &C = CGF.getContext(); |
3766 | 692 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3767 | 692 | LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); |
3768 | 692 | OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) |
3769 | 692 | ? OMPD_taskloop250 |
3770 | 692 | : OMPD_task442 ; |
3771 | 692 | const CapturedStmt &CS = *D.getCapturedStmt(Kind); |
3772 | 692 | CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); |
3773 | 692 | LValue SrcBase; |
3774 | 692 | bool IsTargetTask = |
3775 | 692 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || |
3776 | 692 | isOpenMPTargetExecutionDirective(D.getDirectiveKind())604 ; |
3777 | | // For target-based directives skip 4 firstprivate arrays BasePointersArray, |
3778 | | // PointersArray, SizesArray, and MappersArray. The original variables for |
3779 | | // these arrays are not captured and we get their addresses explicitly. |
3780 | 692 | if ((!IsTargetTask && !Data.FirstprivateVars.empty()334 && ForDup162 ) || |
3781 | 692 | (666 IsTargetTask666 && KmpTaskSharedsPtr.isValid()358 )) { |
3782 | 324 | SrcBase = CGF.MakeAddrLValue( |
3783 | 324 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3784 | 324 | KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), |
3785 | 324 | CGF.ConvertTypeForMem(SharedsTy)), |
3786 | 324 | SharedsTy); |
3787 | 324 | } |
3788 | 692 | FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); |
3789 | 2.09k | for (const PrivateDataTy &Pair : Privates) { |
3790 | | // Do not initialize private locals. |
3791 | 2.09k | if (Pair.second.isLocalPrivate()) { |
3792 | 14 | ++FI; |
3793 | 14 | continue; |
3794 | 14 | } |
3795 | 2.07k | const VarDecl *VD = Pair.second.PrivateCopy; |
3796 | 2.07k | const Expr *Init = VD->getAnyInitializer(); |
3797 | 2.07k | if (Init && (1.70k !ForDup1.70k || (206 isa<CXXConstructExpr>(Init)206 && |
3798 | 1.64k | !CGF.isTrivialInitializer(Init)146 ))) { |
3799 | 1.64k | LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); |
3800 | 1.64k | if (const VarDecl *Elem = Pair.second.PrivateElemInit) { |
3801 | 1.43k | const VarDecl *OriginalVD = Pair.second.Original; |
3802 | | // Check if the variable is the target-based BasePointersArray, |
3803 | | // PointersArray, SizesArray, or MappersArray. |
3804 | 1.43k | LValue SharedRefLValue; |
3805 | 1.43k | QualType Type = PrivateLValue.getType(); |
3806 | 1.43k | const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); |
3807 | 1.43k | if (IsTargetTask && !SharedField1.10k ) { |
3808 | 710 | assert(isa<ImplicitParamDecl>(OriginalVD) && |
3809 | 710 | isa<CapturedDecl>(OriginalVD->getDeclContext()) && |
3810 | 710 | cast<CapturedDecl>(OriginalVD->getDeclContext()) |
3811 | 710 | ->getNumParams() == 0 && |
3812 | 710 | isa<TranslationUnitDecl>( |
3813 | 710 | cast<CapturedDecl>(OriginalVD->getDeclContext()) |
3814 | 710 | ->getDeclContext()) && |
3815 | 710 | "Expected artificial target data variable."); |
3816 | 0 | SharedRefLValue = |
3817 | 710 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); |
3818 | 728 | } else if (ForDup) { |
3819 | 50 | SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); |
3820 | 50 | SharedRefLValue = CGF.MakeAddrLValue( |
3821 | 50 | SharedRefLValue.getAddress(CGF).withAlignment( |
3822 | 50 | C.getDeclAlign(OriginalVD)), |
3823 | 50 | SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), |
3824 | 50 | SharedRefLValue.getTBAAInfo()); |
3825 | 678 | } else if (CGF.LambdaCaptureFields.count( |
3826 | 678 | Pair.second.Original->getCanonicalDecl()) > 0 || |
3827 | 678 | isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)676 ) { |
3828 | 13 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3829 | 665 | } else { |
3830 | | // Processing for implicitly captured variables. |
3831 | 665 | InlinedOpenMPRegionRAII Region( |
3832 | 665 | CGF, [](CodeGenFunction &, PrePostActionTy &) {}0 , OMPD_unknown, |
3833 | 665 | /*HasCancel=*/false, /*NoInheritance=*/true); |
3834 | 665 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3835 | 665 | } |
3836 | 1.43k | if (Type->isArrayType()) { |
3837 | | // Initialize firstprivate array. |
3838 | 803 | if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)52 ) { |
3839 | | // Perform simple memcpy. |
3840 | 751 | CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); |
3841 | 751 | } else { |
3842 | | // Initialize firstprivate array using element-by-element |
3843 | | // initialization. |
3844 | 52 | CGF.EmitOMPAggregateAssign( |
3845 | 52 | PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), |
3846 | 52 | Type, |
3847 | 52 | [&CGF, Elem, Init, &CapturesInfo](Address DestElement, |
3848 | 52 | Address SrcElement) { |
3849 | | // Clean up any temporaries needed by the initialization. |
3850 | 52 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3851 | 52 | InitScope.addPrivate(Elem, SrcElement); |
3852 | 52 | (void)InitScope.Privatize(); |
3853 | | // Emit initialization for single element. |
3854 | 52 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( |
3855 | 52 | CGF, &CapturesInfo); |
3856 | 52 | CGF.EmitAnyExprToMem(Init, DestElement, |
3857 | 52 | Init->getType().getQualifiers(), |
3858 | 52 | /*IsInitializer=*/false); |
3859 | 52 | }); |
3860 | 52 | } |
3861 | 803 | } else { |
3862 | 635 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3863 | 635 | InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); |
3864 | 635 | (void)InitScope.Privatize(); |
3865 | 635 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); |
3866 | 635 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, |
3867 | 635 | /*capturedByInit=*/false); |
3868 | 635 | } |
3869 | 1.43k | } else { |
3870 | 202 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); |
3871 | 202 | } |
3872 | 1.64k | } |
3873 | 0 | ++FI; |
3874 | 2.07k | } |
3875 | 692 | } |
3876 | | |
3877 | | /// Check if duplication function is required for taskloops. |
3878 | | static bool checkInitIsRequired(CodeGenFunction &CGF, |
3879 | 102 | ArrayRef<PrivateDataTy> Privates) { |
3880 | 102 | bool InitRequired = false; |
3881 | 206 | for (const PrivateDataTy &Pair : Privates) { |
3882 | 206 | if (Pair.second.isLocalPrivate()) |
3883 | 0 | continue; |
3884 | 206 | const VarDecl *VD = Pair.second.PrivateCopy; |
3885 | 206 | const Expr *Init = VD->getAnyInitializer(); |
3886 | 206 | InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && |
3887 | 206 | !CGF.isTrivialInitializer(Init)50 ); |
3888 | 206 | if (InitRequired) |
3889 | 50 | break; |
3890 | 206 | } |
3891 | 102 | return InitRequired; |
3892 | 102 | } |
3893 | | |
3894 | | |
3895 | | /// Emit task_dup function (for initialization of |
3896 | | /// private/firstprivate/lastprivate vars and last_iter flag) |
3897 | | /// \code |
3898 | | /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int |
3899 | | /// lastpriv) { |
3900 | | /// // setup lastprivate flag |
3901 | | /// task_dst->last = lastpriv; |
3902 | | /// // could be constructor calls here... |
3903 | | /// } |
3904 | | /// \endcode |
3905 | | static llvm::Value * |
3906 | | emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, |
3907 | | const OMPExecutableDirective &D, |
3908 | | QualType KmpTaskTWithPrivatesPtrQTy, |
3909 | | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3910 | | const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, |
3911 | | QualType SharedsPtrTy, const OMPTaskDataTy &Data, |
3912 | 99 | ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { |
3913 | 99 | ASTContext &C = CGM.getContext(); |
3914 | 99 | FunctionArgList Args; |
3915 | 99 | ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3916 | 99 | KmpTaskTWithPrivatesPtrQTy, |
3917 | 99 | ImplicitParamDecl::Other); |
3918 | 99 | ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3919 | 99 | KmpTaskTWithPrivatesPtrQTy, |
3920 | 99 | ImplicitParamDecl::Other); |
3921 | 99 | ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, |
3922 | 99 | ImplicitParamDecl::Other); |
3923 | 99 | Args.push_back(&DstArg); |
3924 | 99 | Args.push_back(&SrcArg); |
3925 | 99 | Args.push_back(&LastprivArg); |
3926 | 99 | const auto &TaskDupFnInfo = |
3927 | 99 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3928 | 99 | llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); |
3929 | 99 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); |
3930 | 99 | auto *TaskDup = llvm::Function::Create( |
3931 | 99 | TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3932 | 99 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); |
3933 | 99 | TaskDup->setDoesNotRecurse(); |
3934 | 99 | CodeGenFunction CGF(CGM); |
3935 | 99 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, |
3936 | 99 | Loc); |
3937 | | |
3938 | 99 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3939 | 99 | CGF.GetAddrOfLocalVar(&DstArg), |
3940 | 99 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3941 | | // task_dst->liter = lastpriv; |
3942 | 99 | if (WithLastIter) { |
3943 | 49 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3944 | 49 | LValue Base = CGF.EmitLValueForField( |
3945 | 49 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3946 | 49 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3947 | 49 | llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( |
3948 | 49 | CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); |
3949 | 49 | CGF.EmitStoreOfScalar(Lastpriv, LILVal); |
3950 | 49 | } |
3951 | | |
3952 | | // Emit initial values for private copies (if any). |
3953 | 99 | assert(!Privates.empty()); |
3954 | 0 | Address KmpTaskSharedsPtr = Address::invalid(); |
3955 | 99 | if (!Data.FirstprivateVars.empty()) { |
3956 | 26 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3957 | 26 | CGF.GetAddrOfLocalVar(&SrcArg), |
3958 | 26 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3959 | 26 | LValue Base = CGF.EmitLValueForField( |
3960 | 26 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3961 | 26 | KmpTaskSharedsPtr = Address( |
3962 | 26 | CGF.EmitLoadOfScalar(CGF.EmitLValueForField( |
3963 | 26 | Base, *std::next(KmpTaskTQTyRD->field_begin(), |
3964 | 26 | KmpTaskTShareds)), |
3965 | 26 | Loc), |
3966 | 26 | CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); |
3967 | 26 | } |
3968 | 99 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, |
3969 | 99 | SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); |
3970 | 99 | CGF.FinishFunction(); |
3971 | 99 | return TaskDup; |
3972 | 99 | } |
3973 | | |
3974 | | /// Checks if destructor function is required to be generated. |
3975 | | /// \return true if cleanups are required, false otherwise. |
3976 | | static bool |
3977 | | checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3978 | 593 | ArrayRef<PrivateDataTy> Privates) { |
3979 | 1.52k | for (const PrivateDataTy &P : Privates) { |
3980 | 1.52k | if (P.second.isLocalPrivate()) |
3981 | 14 | continue; |
3982 | 1.50k | QualType Ty = P.second.Original->getType().getNonReferenceType(); |
3983 | 1.50k | if (Ty.isDestructedType()) |
3984 | 81 | return true; |
3985 | 1.50k | } |
3986 | 512 | return false; |
3987 | 593 | } |
3988 | | |
3989 | | namespace { |
3990 | | /// Loop generator for OpenMP iterator expression. |
3991 | | class OMPIteratorGeneratorScope final |
3992 | | : public CodeGenFunction::OMPPrivateScope { |
3993 | | CodeGenFunction &CGF; |
3994 | | const OMPIteratorExpr *E = nullptr; |
3995 | | SmallVector<CodeGenFunction::JumpDest, 4> ContDests; |
3996 | | SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; |
3997 | | OMPIteratorGeneratorScope() = delete; |
3998 | | OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; |
3999 | | |
4000 | | public: |
4001 | | OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) |
4002 | 467 | : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { |
4003 | 467 | if (!E) |
4004 | 456 | return; |
4005 | 11 | SmallVector<llvm::Value *, 4> Uppers; |
4006 | 22 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I11 ) { |
4007 | 11 | Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); |
4008 | 11 | const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); |
4009 | 11 | addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); |
4010 | 11 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
4011 | 11 | addPrivate( |
4012 | 11 | HelperData.CounterVD, |
4013 | 11 | CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); |
4014 | 11 | } |
4015 | 11 | Privatize(); |
4016 | | |
4017 | 22 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I11 ) { |
4018 | 11 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
4019 | 11 | LValue CLVal = |
4020 | 11 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), |
4021 | 11 | HelperData.CounterVD->getType()); |
4022 | | // Counter = 0; |
4023 | 11 | CGF.EmitStoreOfScalar( |
4024 | 11 | llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), |
4025 | 11 | CLVal); |
4026 | 11 | CodeGenFunction::JumpDest &ContDest = |
4027 | 11 | ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); |
4028 | 11 | CodeGenFunction::JumpDest &ExitDest = |
4029 | 11 | ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); |
4030 | | // N = <number-of_iterations>; |
4031 | 11 | llvm::Value *N = Uppers[I]; |
4032 | | // cont: |
4033 | | // if (Counter < N) goto body; else goto exit; |
4034 | 11 | CGF.EmitBlock(ContDest.getBlock()); |
4035 | 11 | auto *CVal = |
4036 | 11 | CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); |
4037 | 11 | llvm::Value *Cmp = |
4038 | 11 | HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() |
4039 | 11 | ? CGF.Builder.CreateICmpSLT(CVal, N)9 |
4040 | 11 | : CGF.Builder.CreateICmpULT(CVal, N)2 ; |
4041 | 11 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); |
4042 | 11 | CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); |
4043 | | // body: |
4044 | 11 | CGF.EmitBlock(BodyBB); |
4045 | | // Iteri = Begini + Counter * Stepi; |
4046 | 11 | CGF.EmitIgnoredExpr(HelperData.Update); |
4047 | 11 | } |
4048 | 11 | } |
4049 | 467 | ~OMPIteratorGeneratorScope() { |
4050 | 467 | if (!E) |
4051 | 456 | return; |
4052 | 22 | for (unsigned I = E->numOfIterators(); 11 I > 0; --I11 ) { |
4053 | | // Counter = Counter + 1; |
4054 | 11 | const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); |
4055 | 11 | CGF.EmitIgnoredExpr(HelperData.CounterUpdate); |
4056 | | // goto cont; |
4057 | 11 | CGF.EmitBranchThroughCleanup(ContDests[I - 1]); |
4058 | | // exit: |
4059 | 11 | CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); |
4060 | 11 | } |
4061 | 11 | } |
4062 | | }; |
4063 | | } // namespace |
4064 | | |
4065 | | static std::pair<llvm::Value *, llvm::Value *> |
4066 | 1.01k | getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { |
4067 | 1.01k | const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); |
4068 | 1.01k | llvm::Value *Addr; |
4069 | 1.01k | if (OASE) { |
4070 | 6 | const Expr *Base = OASE->getBase(); |
4071 | 6 | Addr = CGF.EmitScalarExpr(Base); |
4072 | 1.01k | } else { |
4073 | 1.01k | Addr = CGF.EmitLValue(E).getPointer(CGF); |
4074 | 1.01k | } |
4075 | 1.01k | llvm::Value *SizeVal; |
4076 | 1.01k | QualType Ty = E->getType(); |
4077 | 1.01k | if (OASE) { |
4078 | 6 | SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); |
4079 | 18 | for (const Expr *SE : OASE->getDimensions()) { |
4080 | 18 | llvm::Value *Sz = CGF.EmitScalarExpr(SE); |
4081 | 18 | Sz = CGF.EmitScalarConversion( |
4082 | 18 | Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); |
4083 | 18 | SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); |
4084 | 18 | } |
4085 | 1.01k | } else if (const auto *ASE = |
4086 | 1.01k | dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { |
4087 | 42 | LValue UpAddrLVal = |
4088 | 42 | CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); |
4089 | 42 | Address UpAddrAddress = UpAddrLVal.getAddress(CGF); |
4090 | 42 | llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( |
4091 | 42 | UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); |
4092 | 42 | llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); |
4093 | 42 | llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); |
4094 | 42 | SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); |
4095 | 971 | } else { |
4096 | 971 | SizeVal = CGF.getTypeSize(Ty); |
4097 | 971 | } |
4098 | 1.01k | return std::make_pair(Addr, SizeVal); |
4099 | 1.01k | } |
4100 | | |
4101 | | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
4102 | 4 | static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { |
4103 | 4 | QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); |
4104 | 4 | if (KmpTaskAffinityInfoTy.isNull()) { |
4105 | 2 | RecordDecl *KmpAffinityInfoRD = |
4106 | 2 | C.buildImplicitRecord("kmp_task_affinity_info_t"); |
4107 | 2 | KmpAffinityInfoRD->startDefinition(); |
4108 | 2 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); |
4109 | 2 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); |
4110 | 2 | addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); |
4111 | 2 | KmpAffinityInfoRD->completeDefinition(); |
4112 | 2 | KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); |
4113 | 2 | } |
4114 | 4 | } |
4115 | | |
4116 | | CGOpenMPRuntime::TaskResultTy |
4117 | | CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, |
4118 | | const OMPExecutableDirective &D, |
4119 | | llvm::Function *TaskFunction, QualType SharedsTy, |
4120 | 930 | Address Shareds, const OMPTaskDataTy &Data) { |
4121 | 930 | ASTContext &C = CGM.getContext(); |
4122 | 930 | llvm::SmallVector<PrivateDataTy, 4> Privates; |
4123 | | // Aggregate privates and sort them by the alignment. |
4124 | 930 | const auto *I = Data.PrivateCopies.begin(); |
4125 | 930 | for (const Expr *E : Data.PrivateVars) { |
4126 | 170 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4127 | 170 | Privates.emplace_back( |
4128 | 170 | C.getDeclAlign(VD), |
4129 | 170 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4130 | 170 | /*PrivateElemInit=*/nullptr)); |
4131 | 170 | ++I; |
4132 | 170 | } |
4133 | 930 | I = Data.FirstprivateCopies.begin(); |
4134 | 930 | const auto *IElemInitRef = Data.FirstprivateInits.begin(); |
4135 | 1.38k | for (const Expr *E : Data.FirstprivateVars) { |
4136 | 1.38k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4137 | 1.38k | Privates.emplace_back( |
4138 | 1.38k | C.getDeclAlign(VD), |
4139 | 1.38k | PrivateHelpersTy( |
4140 | 1.38k | E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4141 | 1.38k | cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); |
4142 | 1.38k | ++I; |
4143 | 1.38k | ++IElemInitRef; |
4144 | 1.38k | } |
4145 | 930 | I = Data.LastprivateCopies.begin(); |
4146 | 930 | for (const Expr *E : Data.LastprivateVars) { |
4147 | 151 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4148 | 151 | Privates.emplace_back( |
4149 | 151 | C.getDeclAlign(VD), |
4150 | 151 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4151 | 151 | /*PrivateElemInit=*/nullptr)); |
4152 | 151 | ++I; |
4153 | 151 | } |
4154 | 930 | for (const VarDecl *VD : Data.PrivateLocals) { |
4155 | 14 | if (isAllocatableDecl(VD)) |
4156 | 3 | Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); |
4157 | 11 | else |
4158 | 11 | Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); |
4159 | 14 | } |
4160 | 930 | llvm::stable_sort(Privates, |
4161 | 1.64k | [](const PrivateDataTy &L, const PrivateDataTy &R) { |
4162 | 1.64k | return L.first > R.first; |
4163 | 1.64k | }); |
4164 | 930 | QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
4165 | | // Build type kmp_routine_entry_t (if not built yet). |
4166 | 930 | emitKmpRoutineEntryT(KmpInt32Ty); |
4167 | | // Build type kmp_task_t (if not built yet). |
4168 | 930 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { |
4169 | 226 | if (SavedKmpTaskloopTQTy.isNull()) { |
4170 | 128 | SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
4171 | 128 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
4172 | 128 | } |
4173 | 226 | KmpTaskTQTy = SavedKmpTaskloopTQTy; |
4174 | 704 | } else { |
4175 | 704 | assert((D.getDirectiveKind() == OMPD_task || |
4176 | 704 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || |
4177 | 704 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && |
4178 | 704 | "Expected taskloop, task or target directive"); |
4179 | 704 | if (SavedKmpTaskTQTy.isNull()) { |
4180 | 286 | SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
4181 | 286 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
4182 | 286 | } |
4183 | 704 | KmpTaskTQTy = SavedKmpTaskTQTy; |
4184 | 704 | } |
4185 | 0 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
4186 | | // Build particular struct kmp_task_t for the given task. |
4187 | 930 | const RecordDecl *KmpTaskTWithPrivatesQTyRD = |
4188 | 930 | createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); |
4189 | 930 | QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); |
4190 | 930 | QualType KmpTaskTWithPrivatesPtrQTy = |
4191 | 930 | C.getPointerType(KmpTaskTWithPrivatesQTy); |
4192 | 930 | llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); |
4193 | 930 | llvm::Type *KmpTaskTWithPrivatesPtrTy = |
4194 | 930 | KmpTaskTWithPrivatesTy->getPointerTo(); |
4195 | 930 | llvm::Value *KmpTaskTWithPrivatesTySize = |
4196 | 930 | CGF.getTypeSize(KmpTaskTWithPrivatesQTy); |
4197 | 930 | QualType SharedsPtrTy = C.getPointerType(SharedsTy); |
4198 | | |
4199 | | // Emit initial values for private copies (if any). |
4200 | 930 | llvm::Value *TaskPrivatesMap = nullptr; |
4201 | 930 | llvm::Type *TaskPrivatesMapTy = |
4202 | 930 | std::next(TaskFunction->arg_begin(), 3)->getType(); |
4203 | 930 | if (!Privates.empty()) { |
4204 | 593 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
4205 | 593 | TaskPrivatesMap = |
4206 | 593 | emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); |
4207 | 593 | TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4208 | 593 | TaskPrivatesMap, TaskPrivatesMapTy); |
4209 | 593 | } else { |
4210 | 337 | TaskPrivatesMap = llvm::ConstantPointerNull::get( |
4211 | 337 | cast<llvm::PointerType>(TaskPrivatesMapTy)); |
4212 | 337 | } |
4213 | | // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, |
4214 | | // kmp_task_t *tt); |
4215 | 930 | llvm::Function *TaskEntry = emitProxyTaskFunction( |
4216 | 930 | CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
4217 | 930 | KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, |
4218 | 930 | TaskPrivatesMap); |
4219 | | |
4220 | | // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, |
4221 | | // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, |
4222 | | // kmp_routine_entry_t *task_entry); |
4223 | | // Task flags. Format is taken from |
4224 | | // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, |
4225 | | // description of kmp_tasking_flags struct. |
4226 | 930 | enum { |
4227 | 930 | TiedFlag = 0x1, |
4228 | 930 | FinalFlag = 0x2, |
4229 | 930 | DestructorsFlag = 0x8, |
4230 | 930 | PriorityFlag = 0x20, |
4231 | 930 | DetachableFlag = 0x40, |
4232 | 930 | }; |
4233 | 930 | unsigned Flags = Data.Tied ? TiedFlag902 : 028 ; |
4234 | 930 | bool NeedsCleanup = false; |
4235 | 930 | if (!Privates.empty()) { |
4236 | 593 | NeedsCleanup = |
4237 | 593 | checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); |
4238 | 593 | if (NeedsCleanup) |
4239 | 81 | Flags = Flags | DestructorsFlag; |
4240 | 593 | } |
4241 | 930 | if (Data.Priority.getInt()) |
4242 | 25 | Flags = Flags | PriorityFlag; |
4243 | 930 | if (D.hasClausesOfKind<OMPDetachClause>()) |
4244 | 2 | Flags = Flags | DetachableFlag; |
4245 | 930 | llvm::Value *TaskFlags = |
4246 | 930 | Data.Final.getPointer() |
4247 | 930 | ? CGF.Builder.CreateSelect(Data.Final.getPointer(), |
4248 | 13 | CGF.Builder.getInt32(FinalFlag), |
4249 | 13 | CGF.Builder.getInt32(/*C=*/0)) |
4250 | 930 | : CGF.Builder.getInt32(917 Data.Final.getInt()917 ? FinalFlag14 : 0903 ); |
4251 | 930 | TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); |
4252 | 930 | llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); |
4253 | 930 | SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), |
4254 | 930 | getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, |
4255 | 930 | SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4256 | 930 | TaskEntry, KmpRoutineEntryPtrTy)}; |
4257 | 930 | llvm::Value *NewTask; |
4258 | 930 | if (D.hasClausesOfKind<OMPNowaitClause>()) { |
4259 | | // Check if we have any device clause associated with the directive. |
4260 | 296 | const Expr *Device = nullptr; |
4261 | 296 | if (auto *C = D.getSingleClause<OMPDeviceClause>()) |
4262 | 166 | Device = C->getDevice(); |
4263 | | // Emit device ID if any otherwise use default value. |
4264 | 296 | llvm::Value *DeviceID; |
4265 | 296 | if (Device) |
4266 | 166 | DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), |
4267 | 166 | CGF.Int64Ty, /*isSigned=*/true); |
4268 | 130 | else |
4269 | 130 | DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); |
4270 | 296 | AllocArgs.push_back(DeviceID); |
4271 | 296 | NewTask = CGF.EmitRuntimeCall( |
4272 | 296 | OMPBuilder.getOrCreateRuntimeFunction( |
4273 | 296 | CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), |
4274 | 296 | AllocArgs); |
4275 | 634 | } else { |
4276 | 634 | NewTask = |
4277 | 634 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4278 | 634 | CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), |
4279 | 634 | AllocArgs); |
4280 | 634 | } |
4281 | | // Emit detach clause initialization. |
4282 | | // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, |
4283 | | // task_descriptor); |
4284 | 930 | if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { |
4285 | 2 | const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); |
4286 | 2 | LValue EvtLVal = CGF.EmitLValue(Evt); |
4287 | | |
4288 | | // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, |
4289 | | // int gtid, kmp_task_t *task); |
4290 | 2 | llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); |
4291 | 2 | llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); |
4292 | 2 | Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); |
4293 | 2 | llvm::Value *EvtVal = CGF.EmitRuntimeCall( |
4294 | 2 | OMPBuilder.getOrCreateRuntimeFunction( |
4295 | 2 | CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), |
4296 | 2 | {Loc, Tid, NewTask}); |
4297 | 2 | EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), |
4298 | 2 | Evt->getExprLoc()); |
4299 | 2 | CGF.EmitStoreOfScalar(EvtVal, EvtLVal); |
4300 | 2 | } |
4301 | | // Process affinity clauses. |
4302 | 930 | if (D.hasClausesOfKind<OMPAffinityClause>()) { |
4303 | | // Process list of affinity data. |
4304 | 4 | ASTContext &C = CGM.getContext(); |
4305 | 4 | Address AffinitiesArray = Address::invalid(); |
4306 | | // Calculate number of elements to form the array of affinity data. |
4307 | 4 | llvm::Value *NumOfElements = nullptr; |
4308 | 4 | unsigned NumAffinities = 0; |
4309 | 6 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4310 | 6 | if (const Expr *Modifier = C->getModifier()) { |
4311 | 2 | const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); |
4312 | 4 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I2 ) { |
4313 | 2 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4314 | 2 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
4315 | 2 | NumOfElements = |
4316 | 2 | NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz)0 : Sz; |
4317 | 2 | } |
4318 | 4 | } else { |
4319 | 4 | NumAffinities += C->varlist_size(); |
4320 | 4 | } |
4321 | 6 | } |
4322 | 4 | getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); |
4323 | | // Fields ids in kmp_task_affinity_info record. |
4324 | 4 | enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; |
4325 | | |
4326 | 4 | QualType KmpTaskAffinityInfoArrayTy; |
4327 | 4 | if (NumOfElements) { |
4328 | 2 | NumOfElements = CGF.Builder.CreateNUWAdd( |
4329 | 2 | llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); |
4330 | 2 | auto *OVE = new (C) OpaqueValueExpr( |
4331 | 2 | Loc, |
4332 | 2 | C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), |
4333 | 2 | VK_PRValue); |
4334 | 2 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, |
4335 | 2 | RValue::get(NumOfElements)); |
4336 | 2 | KmpTaskAffinityInfoArrayTy = |
4337 | 2 | C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, |
4338 | 2 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4339 | | // Properly emit variable-sized array. |
4340 | 2 | auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, |
4341 | 2 | ImplicitParamDecl::Other); |
4342 | 2 | CGF.EmitVarDecl(*PD); |
4343 | 2 | AffinitiesArray = CGF.GetAddrOfLocalVar(PD); |
4344 | 2 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4345 | 2 | /*isSigned=*/false); |
4346 | 2 | } else { |
4347 | 2 | KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( |
4348 | 2 | KmpTaskAffinityInfoTy, |
4349 | 2 | llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, |
4350 | 2 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4351 | 2 | AffinitiesArray = |
4352 | 2 | CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); |
4353 | 2 | AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); |
4354 | 2 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, |
4355 | 2 | /*isSigned=*/false); |
4356 | 2 | } |
4357 | | |
4358 | 4 | const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); |
4359 | | // Fill array by elements without iterators. |
4360 | 4 | unsigned Pos = 0; |
4361 | 4 | bool HasIterator = false; |
4362 | 6 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4363 | 6 | if (C->getModifier()) { |
4364 | 2 | HasIterator = true; |
4365 | 2 | continue; |
4366 | 2 | } |
4367 | 4 | for (const Expr *E : C->varlists()) { |
4368 | 4 | llvm::Value *Addr; |
4369 | 4 | llvm::Value *Size; |
4370 | 4 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4371 | 4 | LValue Base = |
4372 | 4 | CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), |
4373 | 4 | KmpTaskAffinityInfoTy); |
4374 | | // affs[i].base_addr = &<Affinities[i].second>; |
4375 | 4 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4376 | 4 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4377 | 4 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4378 | 4 | BaseAddrLVal); |
4379 | | // affs[i].len = sizeof(<Affinities[i].second>); |
4380 | 4 | LValue LenLVal = CGF.EmitLValueForField( |
4381 | 4 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4382 | 4 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4383 | 4 | ++Pos; |
4384 | 4 | } |
4385 | 4 | } |
4386 | 4 | LValue PosLVal; |
4387 | 4 | if (HasIterator) { |
4388 | 2 | PosLVal = CGF.MakeAddrLValue( |
4389 | 2 | CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), |
4390 | 2 | C.getSizeType()); |
4391 | 2 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4392 | 2 | } |
4393 | | // Process elements with iterators. |
4394 | 6 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4395 | 6 | const Expr *Modifier = C->getModifier(); |
4396 | 6 | if (!Modifier) |
4397 | 4 | continue; |
4398 | 2 | OMPIteratorGeneratorScope IteratorScope( |
4399 | 2 | CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); |
4400 | 2 | for (const Expr *E : C->varlists()) { |
4401 | 2 | llvm::Value *Addr; |
4402 | 2 | llvm::Value *Size; |
4403 | 2 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4404 | 2 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4405 | 2 | LValue Base = CGF.MakeAddrLValue( |
4406 | 2 | CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); |
4407 | | // affs[i].base_addr = &<Affinities[i].second>; |
4408 | 2 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4409 | 2 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4410 | 2 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4411 | 2 | BaseAddrLVal); |
4412 | | // affs[i].len = sizeof(<Affinities[i].second>); |
4413 | 2 | LValue LenLVal = CGF.EmitLValueForField( |
4414 | 2 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4415 | 2 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4416 | 2 | Idx = CGF.Builder.CreateNUWAdd( |
4417 | 2 | Idx, llvm::ConstantInt::get(Idx->getType(), 1)); |
4418 | 2 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4419 | 2 | } |
4420 | 2 | } |
4421 | | // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, |
4422 | | // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 |
4423 | | // naffins, kmp_task_affinity_info_t *affin_list); |
4424 | 4 | llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); |
4425 | 4 | llvm::Value *GTid = getThreadID(CGF, Loc); |
4426 | 4 | llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4427 | 4 | AffinitiesArray.getPointer(), CGM.VoidPtrTy); |
4428 | | // FIXME: Emit the function and ignore its result for now unless the |
4429 | | // runtime function is properly implemented. |
4430 | 4 | (void)CGF.EmitRuntimeCall( |
4431 | 4 | OMPBuilder.getOrCreateRuntimeFunction( |
4432 | 4 | CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), |
4433 | 4 | {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); |
4434 | 4 | } |
4435 | 930 | llvm::Value *NewTaskNewTaskTTy = |
4436 | 930 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4437 | 930 | NewTask, KmpTaskTWithPrivatesPtrTy); |
4438 | 930 | LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, |
4439 | 930 | KmpTaskTWithPrivatesQTy); |
4440 | 930 | LValue TDBase = |
4441 | 930 | CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
4442 | | // Fill the data in the resulting kmp_task_t record. |
4443 | | // Copy shareds if there are any. |
4444 | 930 | Address KmpTaskSharedsPtr = Address::invalid(); |
4445 | 930 | if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { |
4446 | 561 | KmpTaskSharedsPtr = Address( |
4447 | 561 | CGF.EmitLoadOfScalar( |
4448 | 561 | CGF.EmitLValueForField( |
4449 | 561 | TDBase, |
4450 | 561 | *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), |
4451 | 561 | Loc), |
4452 | 561 | CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); |
4453 | 561 | LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); |
4454 | 561 | LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); |
4455 | 561 | CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); |
4456 | 561 | } |
4457 | | // Emit initial values for private copies (if any). |
4458 | 930 | TaskResultTy Result; |
4459 | 930 | if (!Privates.empty()) { |
4460 | 593 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, |
4461 | 593 | SharedsTy, SharedsPtrTy, Data, Privates, |
4462 | 593 | /*ForDup=*/false); |
4463 | 593 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
4464 | 593 | (151 !Data.LastprivateVars.empty()151 || checkInitIsRequired(CGF, Privates)102 )) { |
4465 | 99 | Result.TaskDupFn = emitTaskDupFunction( |
4466 | 99 | CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, |
4467 | 99 | KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, |
4468 | 99 | /*WithLastIter=*/!Data.LastprivateVars.empty()); |
4469 | 99 | } |
4470 | 593 | } |
4471 | | // Fields of union "kmp_cmplrdata_t" for destructors and priority. |
4472 | 930 | enum { Priority = 0, Destructors = 1 }; |
4473 | | // Provide pointer to function with destructors for privates. |
4474 | 930 | auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); |
4475 | 930 | const RecordDecl *KmpCmplrdataUD = |
4476 | 930 | (*FI)->getType()->getAsUnionType()->getDecl(); |
4477 | 930 | if (NeedsCleanup) { |
4478 | 81 | llvm::Value *DestructorFn = emitDestructorsFunction( |
4479 | 81 | CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
4480 | 81 | KmpTaskTWithPrivatesQTy); |
4481 | 81 | LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); |
4482 | 81 | LValue DestructorsLV = CGF.EmitLValueForField( |
4483 | 81 | Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); |
4484 | 81 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4485 | 81 | DestructorFn, KmpRoutineEntryPtrTy), |
4486 | 81 | DestructorsLV); |
4487 | 81 | } |
4488 | | // Set priority. |
4489 | 930 | if (Data.Priority.getInt()) { |
4490 | 25 | LValue Data2LV = CGF.EmitLValueForField( |
4491 | 25 | TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); |
4492 | 25 | LValue PriorityLV = CGF.EmitLValueForField( |
4493 | 25 | Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); |
4494 | 25 | CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); |
4495 | 25 | } |
4496 | 930 | Result.NewTask = NewTask; |
4497 | 930 | Result.TaskEntry = TaskEntry; |
4498 | 930 | Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; |
4499 | 930 | Result.TDBase = TDBase; |
4500 | 930 | Result.KmpTaskTQTyRD = KmpTaskTQTyRD; |
4501 | 930 | return Result; |
4502 | 930 | } |
4503 | | |
4504 | | namespace { |
4505 | | /// Dependence kind for RTL. |
4506 | | enum RTLDependenceKindTy { |
4507 | | DepIn = 0x01, |
4508 | | DepInOut = 0x3, |
4509 | | DepMutexInOutSet = 0x4, |
4510 | | DepInOutSet = 0x8, |
4511 | | DepOmpAllMem = 0x80, |
4512 | | }; |
4513 | | /// Fields ids in kmp_depend_info record. |
4514 | | enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; |
4515 | | } // namespace |
4516 | | |
4517 | | /// Translates internal dependency kind into the runtime kind. |
4518 | 1.02k | static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { |
4519 | 1.02k | RTLDependenceKindTy DepKind; |
4520 | 1.02k | switch (K) { |
4521 | 202 | case OMPC_DEPEND_in: |
4522 | 202 | DepKind = DepIn; |
4523 | 202 | break; |
4524 | | // Out and InOut dependencies must use the same code. |
4525 | 342 | case OMPC_DEPEND_out: |
4526 | 793 | case OMPC_DEPEND_inout: |
4527 | 793 | DepKind = DepInOut; |
4528 | 793 | break; |
4529 | 16 | case OMPC_DEPEND_mutexinoutset: |
4530 | 16 | DepKind = DepMutexInOutSet; |
4531 | 16 | break; |
4532 | 6 | case OMPC_DEPEND_inoutset: |
4533 | 6 | DepKind = DepInOutSet; |
4534 | 6 | break; |
4535 | 10 | case OMPC_DEPEND_outallmemory: |
4536 | 10 | DepKind = DepOmpAllMem; |
4537 | 10 | break; |
4538 | 0 | case OMPC_DEPEND_source: |
4539 | 0 | case OMPC_DEPEND_sink: |
4540 | 0 | case OMPC_DEPEND_depobj: |
4541 | 0 | case OMPC_DEPEND_inoutallmemory: |
4542 | 0 | case OMPC_DEPEND_unknown: |
4543 | 0 | llvm_unreachable("Unknown task dependence type"); |
4544 | 1.02k | } |
4545 | 1.02k | return DepKind; |
4546 | 1.02k | } |
4547 | | |
4548 | | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
4549 | | static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, |
4550 | 865 | QualType &FlagsTy) { |
4551 | 865 | FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); |
4552 | 865 | if (KmpDependInfoTy.isNull()) { |
4553 | 98 | RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); |
4554 | 98 | KmpDependInfoRD->startDefinition(); |
4555 | 98 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); |
4556 | 98 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); |
4557 | 98 | addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); |
4558 | 98 | KmpDependInfoRD->completeDefinition(); |
4559 | 98 | KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); |
4560 | 98 | } |
4561 | 865 | } |
4562 | | |
4563 | | std::pair<llvm::Value *, LValue> |
4564 | | CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, |
4565 | 12 | SourceLocation Loc) { |
4566 | 12 | ASTContext &C = CGM.getContext(); |
4567 | 12 | QualType FlagsTy; |
4568 | 12 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4569 | 12 | RecordDecl *KmpDependInfoRD = |
4570 | 12 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4571 | 12 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4572 | 12 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4573 | 12 | CGF.Builder.CreateElementBitCast( |
4574 | 12 | DepobjLVal.getAddress(CGF), |
4575 | 12 | CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), |
4576 | 12 | KmpDependInfoPtrTy->castAs<PointerType>()); |
4577 | 12 | Address DepObjAddr = CGF.Builder.CreateGEP( |
4578 | 12 | Base.getAddress(CGF), |
4579 | 12 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4580 | 12 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4581 | 12 | DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); |
4582 | | // NumDeps = deps[i].base_addr; |
4583 | 12 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4584 | 12 | NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4585 | 12 | llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); |
4586 | 12 | return std::make_pair(NumDeps, Base); |
4587 | 12 | } |
4588 | | |
4589 | | static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4590 | | llvm::PointerUnion<unsigned *, LValue *> Pos, |
4591 | | const OMPTaskDataTy::DependData &Data, |
4592 | 461 | Address DependenciesArray) { |
4593 | 461 | CodeGenModule &CGM = CGF.CGM; |
4594 | 461 | ASTContext &C = CGM.getContext(); |
4595 | 461 | QualType FlagsTy; |
4596 | 461 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4597 | 461 | RecordDecl *KmpDependInfoRD = |
4598 | 461 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4599 | 461 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
4600 | | |
4601 | 461 | OMPIteratorGeneratorScope IteratorScope( |
4602 | 461 | CGF, cast_or_null<OMPIteratorExpr>( |
4603 | 461 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()9 |
4604 | 461 | : nullptr452 )); |
4605 | 1.02k | for (const Expr *E : Data.DepExprs) { |
4606 | 1.02k | llvm::Value *Addr; |
4607 | 1.02k | llvm::Value *Size; |
4608 | | |
4609 | | // The expression will be a nullptr in the 'omp_all_memory' case. |
4610 | 1.02k | if (E) { |
4611 | 1.01k | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4612 | 1.01k | Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); |
4613 | 1.01k | } else { |
4614 | 10 | Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4615 | 10 | Size = llvm::ConstantInt::get(CGF.SizeTy, 0); |
4616 | 10 | } |
4617 | 1.02k | LValue Base; |
4618 | 1.02k | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4619 | 1.01k | Base = CGF.MakeAddrLValue( |
4620 | 1.01k | CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); |
4621 | 1.01k | } else { |
4622 | 10 | assert(E && "Expected a non-null expression"); |
4623 | 0 | LValue &PosLVal = *Pos.get<LValue *>(); |
4624 | 10 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4625 | 10 | Base = CGF.MakeAddrLValue( |
4626 | 10 | CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); |
4627 | 10 | } |
4628 | | // deps[i].base_addr = &<Dependencies[i].second>; |
4629 | 0 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4630 | 1.02k | Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4631 | 1.02k | CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); |
4632 | | // deps[i].len = sizeof(<Dependencies[i].second>); |
4633 | 1.02k | LValue LenLVal = CGF.EmitLValueForField( |
4634 | 1.02k | Base, *std::next(KmpDependInfoRD->field_begin(), Len)); |
4635 | 1.02k | CGF.EmitStoreOfScalar(Size, LenLVal); |
4636 | | // deps[i].flags = <Dependencies[i].first>; |
4637 | 1.02k | RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); |
4638 | 1.02k | LValue FlagsLVal = CGF.EmitLValueForField( |
4639 | 1.02k | Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); |
4640 | 1.02k | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), |
4641 | 1.02k | FlagsLVal); |
4642 | 1.02k | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4643 | 1.01k | ++(*P); |
4644 | 1.01k | } else { |
4645 | 10 | LValue &PosLVal = *Pos.get<LValue *>(); |
4646 | 10 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4647 | 10 | Idx = CGF.Builder.CreateNUWAdd(Idx, |
4648 | 10 | llvm::ConstantInt::get(Idx->getType(), 1)); |
4649 | 10 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4650 | 10 | } |
4651 | 1.02k | } |
4652 | 461 | } |
4653 | | |
4654 | | SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( |
4655 | | CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4656 | 2 | const OMPTaskDataTy::DependData &Data) { |
4657 | 2 | assert(Data.DepKind == OMPC_DEPEND_depobj && |
4658 | 2 | "Expected depobj dependecy kind."); |
4659 | 0 | SmallVector<llvm::Value *, 4> Sizes; |
4660 | 2 | SmallVector<LValue, 4> SizeLVals; |
4661 | 2 | ASTContext &C = CGF.getContext(); |
4662 | 2 | { |
4663 | 2 | OMPIteratorGeneratorScope IteratorScope( |
4664 | 2 | CGF, cast_or_null<OMPIteratorExpr>( |
4665 | 2 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()0 |
4666 | 2 | : nullptr)); |
4667 | 4 | for (const Expr *E : Data.DepExprs) { |
4668 | 4 | llvm::Value *NumDeps; |
4669 | 4 | LValue Base; |
4670 | 4 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4671 | 4 | std::tie(NumDeps, Base) = |
4672 | 4 | getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); |
4673 | 4 | LValue NumLVal = CGF.MakeAddrLValue( |
4674 | 4 | CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), |
4675 | 4 | C.getUIntPtrType()); |
4676 | 4 | CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), |
4677 | 4 | NumLVal.getAddress(CGF)); |
4678 | 4 | llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); |
4679 | 4 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); |
4680 | 4 | CGF.EmitStoreOfScalar(Add, NumLVal); |
4681 | 4 | SizeLVals.push_back(NumLVal); |
4682 | 4 | } |
4683 | 2 | } |
4684 | 6 | for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I4 ) { |
4685 | 4 | llvm::Value *Size = |
4686 | 4 | CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); |
4687 | 4 | Sizes.push_back(Size); |
4688 | 4 | } |
4689 | 2 | return Sizes; |
4690 | 2 | } |
4691 | | |
4692 | | void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, |
4693 | | QualType &KmpDependInfoTy, |
4694 | | LValue PosLVal, |
4695 | | const OMPTaskDataTy::DependData &Data, |
4696 | 2 | Address DependenciesArray) { |
4697 | 2 | assert(Data.DepKind == OMPC_DEPEND_depobj && |
4698 | 2 | "Expected depobj dependecy kind."); |
4699 | 0 | llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); |
4700 | 2 | { |
4701 | 2 | OMPIteratorGeneratorScope IteratorScope( |
4702 | 2 | CGF, cast_or_null<OMPIteratorExpr>( |
4703 | 2 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()0 |
4704 | 2 | : nullptr)); |
4705 | 6 | for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I4 ) { |
4706 | 4 | const Expr *E = Data.DepExprs[I]; |
4707 | 4 | llvm::Value *NumDeps; |
4708 | 4 | LValue Base; |
4709 | 4 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4710 | 4 | std::tie(NumDeps, Base) = |
4711 | 4 | getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); |
4712 | | |
4713 | | // memcopy dependency data. |
4714 | 4 | llvm::Value *Size = CGF.Builder.CreateNUWMul( |
4715 | 4 | ElSize, |
4716 | 4 | CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); |
4717 | 4 | llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4718 | 4 | Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); |
4719 | 4 | CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); |
4720 | | |
4721 | | // Increase pos. |
4722 | | // pos += size; |
4723 | 4 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); |
4724 | 4 | CGF.EmitStoreOfScalar(Add, PosLVal); |
4725 | 4 | } |
4726 | 2 | } |
4727 | 2 | } |
4728 | | |
4729 | | std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( |
4730 | | CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, |
4731 | 715 | SourceLocation Loc) { |
4732 | 715 | if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { |
4733 | 378 | return D.DepExprs.empty(); |
4734 | 378 | })) |
4735 | 337 | return std::make_pair(nullptr, Address::invalid()); |
4736 | | // Process list of dependencies. |
4737 | 378 | ASTContext &C = CGM.getContext(); |
4738 | 378 | Address DependenciesArray = Address::invalid(); |
4739 | 378 | llvm::Value *NumOfElements = nullptr; |
4740 | 378 | unsigned NumDependencies = std::accumulate( |
4741 | 378 | Dependencies.begin(), Dependencies.end(), 0, |
4742 | 457 | [](unsigned V, const OMPTaskDataTy::DependData &D) { |
4743 | 457 | return D.DepKind == OMPC_DEPEND_depobj |
4744 | 457 | ? V2 |
4745 | 457 | : (V + (455 D.IteratorExpr455 ? 07 : D.DepExprs.size()448 )); |
4746 | 457 | }); |
4747 | 378 | QualType FlagsTy; |
4748 | 378 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4749 | 378 | bool HasDepobjDeps = false; |
4750 | 378 | bool HasRegularWithIterators = false; |
4751 | 378 | llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4752 | 378 | llvm::Value *NumOfRegularWithIterators = |
4753 | 378 | llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4754 | | // Calculate number of depobj dependecies and regular deps with the iterators. |
4755 | 457 | for (const OMPTaskDataTy::DependData &D : Dependencies) { |
4756 | 457 | if (D.DepKind == OMPC_DEPEND_depobj) { |
4757 | 2 | SmallVector<llvm::Value *, 4> Sizes = |
4758 | 2 | emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); |
4759 | 4 | for (llvm::Value *Size : Sizes) { |
4760 | 4 | NumOfDepobjElements = |
4761 | 4 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); |
4762 | 4 | } |
4763 | 2 | HasDepobjDeps = true; |
4764 | 2 | continue; |
4765 | 2 | } |
4766 | | // Include number of iterations, if any. |
4767 | | |
4768 | 455 | if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { |
4769 | 14 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I7 ) { |
4770 | 7 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4771 | 7 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); |
4772 | 7 | llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( |
4773 | 7 | Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); |
4774 | 7 | NumOfRegularWithIterators = |
4775 | 7 | CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); |
4776 | 7 | } |
4777 | 7 | HasRegularWithIterators = true; |
4778 | 7 | continue; |
4779 | 7 | } |
4780 | 455 | } |
4781 | | |
4782 | 378 | QualType KmpDependInfoArrayTy; |
4783 | 378 | if (HasDepobjDeps || HasRegularWithIterators376 ) { |
4784 | 8 | NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, |
4785 | 8 | /*isSigned=*/false); |
4786 | 8 | if (HasDepobjDeps) { |
4787 | 2 | NumOfElements = |
4788 | 2 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); |
4789 | 2 | } |
4790 | 8 | if (HasRegularWithIterators) { |
4791 | 6 | NumOfElements = |
4792 | 6 | CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); |
4793 | 6 | } |
4794 | 8 | auto *OVE = new (C) OpaqueValueExpr( |
4795 | 8 | Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), |
4796 | 8 | VK_PRValue); |
4797 | 8 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, |
4798 | 8 | RValue::get(NumOfElements)); |
4799 | 8 | KmpDependInfoArrayTy = |
4800 | 8 | C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, |
4801 | 8 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4802 | | // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); |
4803 | | // Properly emit variable-sized array. |
4804 | 8 | auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, |
4805 | 8 | ImplicitParamDecl::Other); |
4806 | 8 | CGF.EmitVarDecl(*PD); |
4807 | 8 | DependenciesArray = CGF.GetAddrOfLocalVar(PD); |
4808 | 8 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4809 | 8 | /*isSigned=*/false); |
4810 | 370 | } else { |
4811 | 370 | KmpDependInfoArrayTy = C.getConstantArrayType( |
4812 | 370 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, |
4813 | 370 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4814 | 370 | DependenciesArray = |
4815 | 370 | CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); |
4816 | 370 | DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); |
4817 | 370 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, |
4818 | 370 | /*isSigned=*/false); |
4819 | 370 | } |
4820 | 378 | unsigned Pos = 0; |
4821 | 835 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I457 ) { |
4822 | 457 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4823 | 457 | Dependencies[I].IteratorExpr455 ) |
4824 | 9 | continue; |
4825 | 448 | emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], |
4826 | 448 | DependenciesArray); |
4827 | 448 | } |
4828 | | // Copy regular dependecies with iterators. |
4829 | 378 | LValue PosLVal = CGF.MakeAddrLValue( |
4830 | 378 | CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); |
4831 | 378 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4832 | 835 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I457 ) { |
4833 | 457 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4834 | 457 | !Dependencies[I].IteratorExpr455 ) |
4835 | 450 | continue; |
4836 | 7 | emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], |
4837 | 7 | DependenciesArray); |
4838 | 7 | } |
4839 | | // Copy final depobj arrays without iterators. |
4840 | 378 | if (HasDepobjDeps) { |
4841 | 6 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I4 ) { |
4842 | 4 | if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) |
4843 | 2 | continue; |
4844 | 2 | emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], |
4845 | 2 | DependenciesArray); |
4846 | 2 | } |
4847 | 2 | } |
4848 | 378 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4849 | 378 | DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); |
4850 | 378 | return std::make_pair(NumOfElements, DependenciesArray); |
4851 | 715 | } |
4852 | | |
4853 | | Address CGOpenMPRuntime::emitDepobjDependClause( |
4854 | | CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, |
4855 | 6 | SourceLocation Loc) { |
4856 | 6 | if (Dependencies.DepExprs.empty()) |
4857 | 0 | return Address::invalid(); |
4858 | | // Process list of dependencies. |
4859 | 6 | ASTContext &C = CGM.getContext(); |
4860 | 6 | Address DependenciesArray = Address::invalid(); |
4861 | 6 | unsigned NumDependencies = Dependencies.DepExprs.size(); |
4862 | 6 | QualType FlagsTy; |
4863 | 6 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4864 | 6 | RecordDecl *KmpDependInfoRD = |
4865 | 6 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4866 | | |
4867 | 6 | llvm::Value *Size; |
4868 | | // Define type kmp_depend_info[<Dependencies.size()>]; |
4869 | | // For depobj reserve one extra element to store the number of elements. |
4870 | | // It is required to handle depobj(x) update(in) construct. |
4871 | | // kmp_depend_info[<Dependencies.size()>] deps; |
4872 | 6 | llvm::Value *NumDepsVal; |
4873 | 6 | CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); |
4874 | 6 | if (const auto *IE = |
4875 | 6 | cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { |
4876 | 2 | NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); |
4877 | 4 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I2 ) { |
4878 | 2 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4879 | 2 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
4880 | 2 | NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); |
4881 | 2 | } |
4882 | 2 | Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), |
4883 | 2 | NumDepsVal); |
4884 | 2 | CharUnits SizeInBytes = |
4885 | 2 | C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); |
4886 | 2 | llvm::Value *RecSize = CGM.getSize(SizeInBytes); |
4887 | 2 | Size = CGF.Builder.CreateNUWMul(Size, RecSize); |
4888 | 2 | NumDepsVal = |
4889 | 2 | CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); |
4890 | 4 | } else { |
4891 | 4 | QualType KmpDependInfoArrayTy = C.getConstantArrayType( |
4892 | 4 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), |
4893 | 4 | nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); |
4894 | 4 | CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); |
4895 | 4 | Size = CGM.getSize(Sz.alignTo(Align)); |
4896 | 4 | NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); |
4897 | 4 | } |
4898 | | // Need to allocate on the dynamic memory. |
4899 | 6 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4900 | | // Use default allocator. |
4901 | 6 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4902 | 6 | llvm::Value *Args[] = {ThreadID, Size, Allocator}; |
4903 | | |
4904 | 6 | llvm::Value *Addr = |
4905 | 6 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4906 | 6 | CGM.getModule(), OMPRTL___kmpc_alloc), |
4907 | 6 | Args, ".dep.arr.addr"); |
4908 | 6 | llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); |
4909 | 6 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4910 | 6 | Addr, KmpDependInfoLlvmTy->getPointerTo()); |
4911 | 6 | DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); |
4912 | | // Write number of elements in the first element of array for depobj. |
4913 | 6 | LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); |
4914 | | // deps[i].base_addr = NumDependencies; |
4915 | 6 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4916 | 6 | Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4917 | 6 | CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); |
4918 | 6 | llvm::PointerUnion<unsigned *, LValue *> Pos; |
4919 | 6 | unsigned Idx = 1; |
4920 | 6 | LValue PosLVal; |
4921 | 6 | if (Dependencies.IteratorExpr) { |
4922 | 2 | PosLVal = CGF.MakeAddrLValue( |
4923 | 2 | CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), |
4924 | 2 | C.getSizeType()); |
4925 | 2 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, |
4926 | 2 | /*IsInit=*/true); |
4927 | 2 | Pos = &PosLVal; |
4928 | 4 | } else { |
4929 | 4 | Pos = &Idx; |
4930 | 4 | } |
4931 | 6 | emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); |
4932 | 6 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4933 | 6 | CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, |
4934 | 6 | CGF.Int8Ty); |
4935 | 6 | return DependenciesArray; |
4936 | 6 | } |
4937 | | |
4938 | | void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, |
4939 | 4 | SourceLocation Loc) { |
4940 | 4 | ASTContext &C = CGM.getContext(); |
4941 | 4 | QualType FlagsTy; |
4942 | 4 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4943 | 4 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4944 | 4 | DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); |
4945 | 4 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4946 | 4 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4947 | 4 | Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), |
4948 | 4 | CGF.ConvertTypeForMem(KmpDependInfoTy)); |
4949 | 4 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4950 | 4 | Addr.getElementType(), Addr.getPointer(), |
4951 | 4 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4952 | 4 | DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, |
4953 | 4 | CGF.VoidPtrTy); |
4954 | 4 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4955 | | // Use default allocator. |
4956 | 4 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4957 | 4 | llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; |
4958 | | |
4959 | | // _kmpc_free(gtid, addr, nullptr); |
4960 | 4 | (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4961 | 4 | CGM.getModule(), OMPRTL___kmpc_free), |
4962 | 4 | Args); |
4963 | 4 | } |
4964 | | |
4965 | | void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, |
4966 | | OpenMPDependClauseKind NewDepKind, |
4967 | 4 | SourceLocation Loc) { |
4968 | 4 | ASTContext &C = CGM.getContext(); |
4969 | 4 | QualType FlagsTy; |
4970 | 4 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4971 | 4 | RecordDecl *KmpDependInfoRD = |
4972 | 4 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4973 | 4 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
4974 | 4 | llvm::Value *NumDeps; |
4975 | 4 | LValue Base; |
4976 | 4 | std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); |
4977 | | |
4978 | 4 | Address Begin = Base.getAddress(CGF); |
4979 | | // Cast from pointer to array type to pointer to single element. |
4980 | 4 | llvm::Value *End = CGF.Builder.CreateGEP( |
4981 | 4 | Begin.getElementType(), Begin.getPointer(), NumDeps); |
4982 | | // The basic structure here is a while-do loop. |
4983 | 4 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); |
4984 | 4 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); |
4985 | 4 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
4986 | 4 | CGF.EmitBlock(BodyBB); |
4987 | 4 | llvm::PHINode *ElementPHI = |
4988 | 4 | CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); |
4989 | 4 | ElementPHI->addIncoming(Begin.getPointer(), EntryBB); |
4990 | 4 | Begin = Begin.withPointer(ElementPHI); |
4991 | 4 | Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), |
4992 | 4 | Base.getTBAAInfo()); |
4993 | | // deps[i].flags = NewDepKind; |
4994 | 4 | RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); |
4995 | 4 | LValue FlagsLVal = CGF.EmitLValueForField( |
4996 | 4 | Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); |
4997 | 4 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), |
4998 | 4 | FlagsLVal); |
4999 | | |
5000 | | // Shift the address forward by one element. |
5001 | 4 | Address ElementNext = |
5002 | 4 | CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); |
5003 | 4 | ElementPHI->addIncoming(ElementNext.getPointer(), |
5004 | 4 | CGF.Builder.GetInsertBlock()); |
5005 | 4 | llvm::Value *IsEmpty = |
5006 | 4 | CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); |
5007 | 4 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
5008 | | // Done. |
5009 | 4 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
5010 | 4 | } |
5011 | | |
5012 | | void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, |
5013 | | const OMPExecutableDirective &D, |
5014 | | llvm::Function *TaskFunction, |
5015 | | QualType SharedsTy, Address Shareds, |
5016 | | const Expr *IfCond, |
5017 | 704 | const OMPTaskDataTy &Data) { |
5018 | 704 | if (!CGF.HaveInsertPoint()) |
5019 | 0 | return; |
5020 | | |
5021 | 704 | TaskResultTy Result = |
5022 | 704 | emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); |
5023 | 704 | llvm::Value *NewTask = Result.NewTask; |
5024 | 704 | llvm::Function *TaskEntry = Result.TaskEntry; |
5025 | 704 | llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; |
5026 | 704 | LValue TDBase = Result.TDBase; |
5027 | 704 | const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; |
5028 | | // Process list of dependences. |
5029 | 704 | Address DependenciesArray = Address::invalid(); |
5030 | 704 | llvm::Value *NumOfElements; |
5031 | 704 | std::tie(NumOfElements, DependenciesArray) = |
5032 | 704 | emitDependClause(CGF, Data.Dependences, Loc); |
5033 | | |
5034 | | // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() |
5035 | | // libcall. |
5036 | | // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, |
5037 | | // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, |
5038 | | // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence |
5039 | | // list is not empty |
5040 | 704 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
5041 | 704 | llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); |
5042 | 704 | llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; |
5043 | 704 | llvm::Value *DepTaskArgs[7]; |
5044 | 704 | if (!Data.Dependences.empty()) { |
5045 | 374 | DepTaskArgs[0] = UpLoc; |
5046 | 374 | DepTaskArgs[1] = ThreadID; |
5047 | 374 | DepTaskArgs[2] = NewTask; |
5048 | 374 | DepTaskArgs[3] = NumOfElements; |
5049 | 374 | DepTaskArgs[4] = DependenciesArray.getPointer(); |
|