/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This provides a class for OpenMP runtime code generation. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "CGOpenMPRuntime.h" |
14 | | #include "CGCXXABI.h" |
15 | | #include "CGCleanup.h" |
16 | | #include "CGRecordLayout.h" |
17 | | #include "CodeGenFunction.h" |
18 | | #include "clang/AST/Attr.h" |
19 | | #include "clang/AST/Decl.h" |
20 | | #include "clang/AST/OpenMPClause.h" |
21 | | #include "clang/AST/StmtOpenMP.h" |
22 | | #include "clang/AST/StmtVisitor.h" |
23 | | #include "clang/Basic/BitmaskEnum.h" |
24 | | #include "clang/Basic/FileManager.h" |
25 | | #include "clang/Basic/OpenMPKinds.h" |
26 | | #include "clang/Basic/SourceManager.h" |
27 | | #include "clang/CodeGen/ConstantInitBuilder.h" |
28 | | #include "llvm/ADT/ArrayRef.h" |
29 | | #include "llvm/ADT/SetOperations.h" |
30 | | #include "llvm/ADT/StringExtras.h" |
31 | | #include "llvm/Bitcode/BitcodeReader.h" |
32 | | #include "llvm/IR/Constants.h" |
33 | | #include "llvm/IR/DerivedTypes.h" |
34 | | #include "llvm/IR/GlobalValue.h" |
35 | | #include "llvm/IR/Value.h" |
36 | | #include "llvm/Support/AtomicOrdering.h" |
37 | | #include "llvm/Support/Format.h" |
38 | | #include "llvm/Support/raw_ostream.h" |
39 | | #include <cassert> |
40 | | #include <numeric> |
41 | | |
42 | | using namespace clang; |
43 | | using namespace CodeGen; |
44 | | using namespace llvm::omp; |
45 | | |
46 | | namespace { |
47 | | /// Base class for handling code generation inside OpenMP regions. |
48 | | class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { |
49 | | public: |
50 | | /// Kinds of OpenMP regions used in codegen. |
51 | | enum CGOpenMPRegionKind { |
52 | | /// Region with outlined function for standalone 'parallel' |
53 | | /// directive. |
54 | | ParallelOutlinedRegion, |
55 | | /// Region with outlined function for standalone 'task' directive. |
56 | | TaskOutlinedRegion, |
57 | | /// Region for constructs that do not require function outlining, |
58 | | /// like 'for', 'sections', 'atomic' etc. directives. |
59 | | InlinedRegion, |
60 | | /// Region with outlined function for standalone 'target' directive. |
61 | | TargetRegion, |
62 | | }; |
63 | | |
64 | | CGOpenMPRegionInfo(const CapturedStmt &CS, |
65 | | const CGOpenMPRegionKind RegionKind, |
66 | | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
67 | | bool HasCancel) |
68 | | : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), |
69 | 24.1k | CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} |
70 | | |
71 | | CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, |
72 | | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
73 | | bool HasCancel) |
74 | | : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), |
75 | 42.0k | Kind(Kind), HasCancel(HasCancel) {} |
76 | | |
77 | | /// Get a variable or parameter for storing global thread id |
78 | | /// inside OpenMP construct. |
79 | | virtual const VarDecl *getThreadIDVariable() const = 0; |
80 | | |
81 | | /// Emit the captured statement body. |
82 | | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; |
83 | | |
84 | | /// Get an LValue for the current ThreadID variable. |
85 | | /// \return LValue for thread id variable. This LValue always has type int32*. |
86 | | virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); |
87 | | |
88 | 28 | virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} |
89 | | |
90 | 41.7k | CGOpenMPRegionKind getRegionKind() const { return RegionKind; } |
91 | | |
92 | 211 | OpenMPDirectiveKind getDirectiveKind() const { return Kind; } |
93 | | |
94 | 328 | bool hasCancel() const { return HasCancel; } |
95 | | |
96 | 140k | static bool classof(const CGCapturedStmtInfo *Info) { |
97 | 140k | return Info->getKind() == CR_OpenMP; |
98 | 140k | } |
99 | | |
100 | 66.1k | ~CGOpenMPRegionInfo() override = default; |
101 | | |
102 | | protected: |
103 | | CGOpenMPRegionKind RegionKind; |
104 | | RegionCodeGenTy CodeGen; |
105 | | OpenMPDirectiveKind Kind; |
106 | | bool HasCancel; |
107 | | }; |
108 | | |
109 | | /// API for captured statement code generation in OpenMP constructs. |
110 | | class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
111 | | public: |
112 | | CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, |
113 | | const RegionCodeGenTy &CodeGen, |
114 | | OpenMPDirectiveKind Kind, bool HasCancel, |
115 | | StringRef HelperName) |
116 | | : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, |
117 | | HasCancel), |
118 | 11.6k | ThreadIDVar(ThreadIDVar), HelperName(HelperName) { |
119 | 11.6k | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); |
120 | 11.6k | } |
121 | | |
122 | | /// Get a variable or parameter for storing global thread id |
123 | | /// inside OpenMP construct. |
124 | 38.3k | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
125 | | |
126 | | /// Get the name of the capture helper. |
127 | 11.6k | StringRef getHelperName() const override { return HelperName; } |
128 | | |
129 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { |
130 | 0 | return CGOpenMPRegionInfo::classof(Info) && |
131 | 0 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
132 | 0 | ParallelOutlinedRegion; |
133 | 0 | } |
134 | | |
135 | | private: |
136 | | /// A variable or parameter storing global thread id for OpenMP |
137 | | /// constructs. |
138 | | const VarDecl *ThreadIDVar; |
139 | | StringRef HelperName; |
140 | | }; |
141 | | |
142 | | /// API for captured statement code generation in OpenMP constructs. |
143 | | class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
144 | | public: |
145 | | class UntiedTaskActionTy final : public PrePostActionTy { |
146 | | bool Untied; |
147 | | const VarDecl *PartIDVar; |
148 | | const RegionCodeGenTy UntiedCodeGen; |
149 | | llvm::SwitchInst *UntiedSwitch = nullptr; |
150 | | |
151 | | public: |
152 | | UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, |
153 | | const RegionCodeGenTy &UntiedCodeGen) |
154 | 855 | : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} |
155 | 855 | void Enter(CodeGenFunction &CGF) override { |
156 | 855 | if (Untied) { |
157 | | // Emit task switching point. |
158 | 16 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
159 | 16 | CGF.GetAddrOfLocalVar(PartIDVar), |
160 | 16 | PartIDVar->getType()->castAs<PointerType>()); |
161 | 16 | llvm::Value *Res = |
162 | 16 | CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); |
163 | 16 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); |
164 | 16 | UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); |
165 | 16 | CGF.EmitBlock(DoneBB); |
166 | 16 | CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); |
167 | 16 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
168 | 16 | UntiedSwitch->addCase(CGF.Builder.getInt32(0), |
169 | 16 | CGF.Builder.GetInsertBlock()); |
170 | 16 | emitUntiedSwitch(CGF); |
171 | 16 | } |
172 | 855 | } |
173 | 34 | void emitUntiedSwitch(CodeGenFunction &CGF) const { |
174 | 34 | if (Untied) { |
175 | 30 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
176 | 30 | CGF.GetAddrOfLocalVar(PartIDVar), |
177 | 30 | PartIDVar->getType()->castAs<PointerType>()); |
178 | 30 | CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
179 | 30 | PartIdLVal); |
180 | 30 | UntiedCodeGen(CGF); |
181 | 30 | CodeGenFunction::JumpDest CurPoint = |
182 | 30 | CGF.getJumpDestInCurrentScope(".untied.next."); |
183 | 30 | CGF.EmitBranch(CGF.ReturnBlock.getBlock()); |
184 | 30 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
185 | 30 | UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
186 | 30 | CGF.Builder.GetInsertBlock()); |
187 | 30 | CGF.EmitBranchThroughCleanup(CurPoint); |
188 | 30 | CGF.EmitBlock(CurPoint.getBlock()); |
189 | 30 | } |
190 | 34 | } |
191 | 16 | unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } |
192 | | }; |
193 | | CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, |
194 | | const VarDecl *ThreadIDVar, |
195 | | const RegionCodeGenTy &CodeGen, |
196 | | OpenMPDirectiveKind Kind, bool HasCancel, |
197 | | const UntiedTaskActionTy &Action) |
198 | | : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), |
199 | 855 | ThreadIDVar(ThreadIDVar), Action(Action) { |
200 | 855 | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); |
201 | 855 | } |
202 | | |
203 | | /// Get a variable or parameter for storing global thread id |
204 | | /// inside OpenMP construct. |
205 | 282 | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
206 | | |
207 | | /// Get an LValue for the current ThreadID variable. |
208 | | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; |
209 | | |
210 | | /// Get the name of the capture helper. |
211 | 855 | StringRef getHelperName() const override { return ".omp_outlined."; } |
212 | | |
213 | 18 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
214 | 18 | Action.emitUntiedSwitch(CGF); |
215 | 18 | } |
216 | | |
217 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { |
218 | 0 | return CGOpenMPRegionInfo::classof(Info) && |
219 | 0 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
220 | 0 | TaskOutlinedRegion; |
221 | 0 | } |
222 | | |
223 | | private: |
224 | | /// A variable or parameter storing global thread id for OpenMP |
225 | | /// constructs. |
226 | | const VarDecl *ThreadIDVar; |
227 | | /// Action for emitting code for untied tasks. |
228 | | const UntiedTaskActionTy &Action; |
229 | | }; |
230 | | |
231 | | /// API for inlined captured statement code generation in OpenMP |
232 | | /// constructs. |
233 | | class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { |
234 | | public: |
235 | | CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, |
236 | | const RegionCodeGenTy &CodeGen, |
237 | | OpenMPDirectiveKind Kind, bool HasCancel) |
238 | | : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), |
239 | | OldCSI(OldCSI), |
240 | 42.0k | OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} |
241 | | |
242 | | // Retrieve the value of the context parameter. |
243 | 0 | llvm::Value *getContextValue() const override { |
244 | 0 | if (OuterRegionInfo) |
245 | 0 | return OuterRegionInfo->getContextValue(); |
246 | 0 | llvm_unreachable("No context value for inlined OpenMP region"); |
247 | 0 | } |
248 | | |
249 | 0 | void setContextValue(llvm::Value *V) override { |
250 | 0 | if (OuterRegionInfo) { |
251 | 0 | OuterRegionInfo->setContextValue(V); |
252 | 0 | return; |
253 | 0 | } |
254 | 0 | llvm_unreachable("No context value for inlined OpenMP region"); |
255 | 0 | } |
256 | | |
257 | | /// Lookup the captured field decl for a variable. |
258 | 26.0k | const FieldDecl *lookup(const VarDecl *VD) const override { |
259 | 26.0k | if (OuterRegionInfo) |
260 | 13.3k | return OuterRegionInfo->lookup(VD); |
261 | | // If there is no outer outlined region,no need to lookup in a list of |
262 | | // captured variables, we can use the original one. |
263 | 12.6k | return nullptr; |
264 | 12.6k | } |
265 | | |
266 | 0 | FieldDecl *getThisFieldDecl() const override { |
267 | 0 | if (OuterRegionInfo) |
268 | 0 | return OuterRegionInfo->getThisFieldDecl(); |
269 | 0 | return nullptr; |
270 | 0 | } |
271 | | |
272 | | /// Get a variable or parameter for storing global thread id |
273 | | /// inside OpenMP construct. |
274 | 7.10k | const VarDecl *getThreadIDVariable() const override { |
275 | 7.10k | if (OuterRegionInfo) |
276 | 6.92k | return OuterRegionInfo->getThreadIDVariable(); |
277 | 179 | return nullptr; |
278 | 179 | } |
279 | | |
280 | | /// Get an LValue for the current ThreadID variable. |
281 | 6.92k | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { |
282 | 6.92k | if (OuterRegionInfo) |
283 | 6.92k | return OuterRegionInfo->getThreadIDVariableLValue(CGF); |
284 | 0 | llvm_unreachable("No LValue for inlined OpenMP construct"); |
285 | 0 | } |
286 | | |
287 | | /// Get the name of the capture helper. |
288 | 0 | StringRef getHelperName() const override { |
289 | 0 | if (auto *OuterRegionInfo = getOldCSI()) |
290 | 0 | return OuterRegionInfo->getHelperName(); |
291 | 0 | llvm_unreachable("No helper name for inlined OpenMP construct"); |
292 | 0 | } |
293 | | |
294 | 16 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
295 | 16 | if (OuterRegionInfo) |
296 | 14 | OuterRegionInfo->emitUntiedSwitch(CGF); |
297 | 16 | } |
298 | | |
299 | 41.7k | CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } |
300 | | |
301 | 41.7k | static bool classof(const CGCapturedStmtInfo *Info) { |
302 | 41.7k | return CGOpenMPRegionInfo::classof(Info) && |
303 | 41.7k | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; |
304 | 41.7k | } |
305 | | |
306 | 42.0k | ~CGOpenMPInlinedRegionInfo() override = default; |
307 | | |
308 | | private: |
309 | | /// CodeGen info about outer OpenMP region. |
310 | | CodeGenFunction::CGCapturedStmtInfo *OldCSI; |
311 | | CGOpenMPRegionInfo *OuterRegionInfo; |
312 | | }; |
313 | | |
314 | | /// API for captured statement code generation in OpenMP target |
315 | | /// constructs. For this captures, implicit parameters are used instead of the |
316 | | /// captured fields. The name of the target region has to be unique in a given |
317 | | /// application so it is provided by the client, because only the client has |
318 | | /// the information to generate that. |
319 | | class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { |
320 | | public: |
321 | | CGOpenMPTargetRegionInfo(const CapturedStmt &CS, |
322 | | const RegionCodeGenTy &CodeGen, StringRef HelperName) |
323 | | : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, |
324 | | /*HasCancel=*/false), |
325 | 11.6k | HelperName(HelperName) {} |
326 | | |
327 | | /// This is unused for target regions because each starts executing |
328 | | /// with a single thread. |
329 | 2.26k | const VarDecl *getThreadIDVariable() const override { return nullptr; } |
330 | | |
331 | | /// Get the name of the capture helper. |
332 | 11.6k | StringRef getHelperName() const override { return HelperName; } |
333 | | |
334 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { |
335 | 0 | return CGOpenMPRegionInfo::classof(Info) && |
336 | 0 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; |
337 | 0 | } |
338 | | |
339 | | private: |
340 | | StringRef HelperName; |
341 | | }; |
342 | | |
343 | 0 | static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { |
344 | 0 | llvm_unreachable("No codegen for expressions"); |
345 | 0 | } |
346 | | /// API for generation of expressions captured in a innermost OpenMP |
347 | | /// region. |
348 | | class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { |
349 | | public: |
350 | | CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) |
351 | | : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, |
352 | | OMPD_unknown, |
353 | | /*HasCancel=*/false), |
354 | 282 | PrivScope(CGF) { |
355 | | // Make sure the globals captured in the provided statement are local by |
356 | | // using the privatization logic. We assume the same variable is not |
357 | | // captured more than once. |
358 | 420 | for (const auto &C : CS.captures()) { |
359 | 420 | if (!C.capturesVariable() && !C.capturesVariableByCopy()322 ) |
360 | 16 | continue; |
361 | | |
362 | 404 | const VarDecl *VD = C.getCapturedVar(); |
363 | 404 | if (VD->isLocalVarDeclOrParm()) |
364 | 324 | continue; |
365 | | |
366 | 80 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
367 | 80 | /*RefersToEnclosingVariableOrCapture=*/false, |
368 | 80 | VD->getType().getNonReferenceType(), VK_LValue, |
369 | 80 | C.getLocation()); |
370 | 80 | PrivScope.addPrivate( |
371 | 80 | VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); |
372 | 80 | } |
373 | 282 | (void)PrivScope.Privatize(); |
374 | 282 | } |
375 | | |
376 | | /// Lookup the captured field decl for a variable. |
377 | 0 | const FieldDecl *lookup(const VarDecl *VD) const override { |
378 | 0 | if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) |
379 | 0 | return FD; |
380 | 0 | return nullptr; |
381 | 0 | } |
382 | | |
383 | | /// Emit the captured statement body. |
384 | 0 | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { |
385 | 0 | llvm_unreachable("No body for expressions"); |
386 | 0 | } |
387 | | |
388 | | /// Get a variable or parameter for storing global thread id |
389 | | /// inside OpenMP construct. |
390 | 0 | const VarDecl *getThreadIDVariable() const override { |
391 | 0 | llvm_unreachable("No thread id for expressions"); |
392 | 0 | } |
393 | | |
394 | | /// Get the name of the capture helper. |
395 | 0 | StringRef getHelperName() const override { |
396 | 0 | llvm_unreachable("No helper name for expressions"); |
397 | 0 | } |
398 | | |
399 | 0 | static bool classof(const CGCapturedStmtInfo *Info) { return false; } |
400 | | |
401 | | private: |
402 | | /// Private scope to capture global variables. |
403 | | CodeGenFunction::OMPPrivateScope PrivScope; |
404 | | }; |
405 | | |
406 | | /// RAII for emitting code of OpenMP constructs. |
407 | | class InlinedOpenMPRegionRAII { |
408 | | CodeGenFunction &CGF; |
409 | | llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; |
410 | | FieldDecl *LambdaThisCaptureField = nullptr; |
411 | | const CodeGen::CGBlockInfo *BlockInfo = nullptr; |
412 | | |
413 | | public: |
414 | | /// Constructs region for combined constructs. |
415 | | /// \param CodeGen Code generation sequence for combined directives. Includes |
416 | | /// a list of functions used for code generation of implicitly inlined |
417 | | /// regions. |
418 | | InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, |
419 | | OpenMPDirectiveKind Kind, bool HasCancel) |
420 | 41.7k | : CGF(CGF) { |
421 | | // Start emission for the construct. |
422 | 41.7k | CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( |
423 | 41.7k | CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); |
424 | 41.7k | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
425 | 41.7k | LambdaThisCaptureField = CGF.LambdaThisCaptureField; |
426 | 41.7k | CGF.LambdaThisCaptureField = nullptr; |
427 | 41.7k | BlockInfo = CGF.BlockInfo; |
428 | 41.7k | CGF.BlockInfo = nullptr; |
429 | 41.7k | } |
430 | | |
431 | 41.7k | ~InlinedOpenMPRegionRAII() { |
432 | | // Restore original CapturedStmtInfo only if we're done with code emission. |
433 | 41.7k | auto *OldCSI = |
434 | 41.7k | cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); |
435 | 41.7k | delete CGF.CapturedStmtInfo; |
436 | 41.7k | CGF.CapturedStmtInfo = OldCSI; |
437 | 41.7k | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
438 | 41.7k | CGF.LambdaThisCaptureField = LambdaThisCaptureField; |
439 | 41.7k | CGF.BlockInfo = BlockInfo; |
440 | 41.7k | } |
441 | | }; |
442 | | |
443 | | /// Values for bit flags used in the ident_t to describe the fields. |
444 | | /// All enumeric elements are named and described in accordance with the code |
445 | | /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h |
446 | | enum OpenMPLocationFlags : unsigned { |
447 | | /// Use trampoline for internal microtask. |
448 | | OMP_IDENT_IMD = 0x01, |
449 | | /// Use c-style ident structure. |
450 | | OMP_IDENT_KMPC = 0x02, |
451 | | /// Atomic reduction option for kmpc_reduce. |
452 | | OMP_ATOMIC_REDUCE = 0x10, |
453 | | /// Explicit 'barrier' directive. |
454 | | OMP_IDENT_BARRIER_EXPL = 0x20, |
455 | | /// Implicit barrier in code. |
456 | | OMP_IDENT_BARRIER_IMPL = 0x40, |
457 | | /// Implicit barrier in 'for' directive. |
458 | | OMP_IDENT_BARRIER_IMPL_FOR = 0x40, |
459 | | /// Implicit barrier in 'sections' directive. |
460 | | OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, |
461 | | /// Implicit barrier in 'single' directive. |
462 | | OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, |
463 | | /// Call of __kmp_for_static_init for static loop. |
464 | | OMP_IDENT_WORK_LOOP = 0x200, |
465 | | /// Call of __kmp_for_static_init for sections. |
466 | | OMP_IDENT_WORK_SECTIONS = 0x400, |
467 | | /// Call of __kmp_for_static_init for distribute. |
468 | | OMP_IDENT_WORK_DISTRIBUTE = 0x800, |
469 | | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) |
470 | | }; |
471 | | |
472 | | namespace { |
473 | | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); |
474 | | /// Values for bit flags for marking which requires clauses have been used. |
475 | | enum OpenMPOffloadingRequiresDirFlags : int64_t { |
476 | | /// flag undefined. |
477 | | OMP_REQ_UNDEFINED = 0x000, |
478 | | /// no requires clause present. |
479 | | OMP_REQ_NONE = 0x001, |
480 | | /// reverse_offload clause. |
481 | | OMP_REQ_REVERSE_OFFLOAD = 0x002, |
482 | | /// unified_address clause. |
483 | | OMP_REQ_UNIFIED_ADDRESS = 0x004, |
484 | | /// unified_shared_memory clause. |
485 | | OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, |
486 | | /// dynamic_allocators clause. |
487 | | OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, |
488 | | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) |
489 | | }; |
490 | | |
491 | | enum OpenMPOffloadingReservedDeviceIDs { |
492 | | /// Device ID if the device was not defined, runtime should get it |
493 | | /// from environment variables in the spec. |
494 | | OMP_DEVICEID_UNDEF = -1, |
495 | | }; |
496 | | } // anonymous namespace |
497 | | |
498 | | /// Describes ident structure that describes a source location. |
499 | | /// All descriptions are taken from |
500 | | /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h |
501 | | /// Original structure: |
502 | | /// typedef struct ident { |
503 | | /// kmp_int32 reserved_1; /**< might be used in Fortran; |
504 | | /// see above */ |
505 | | /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; |
506 | | /// KMP_IDENT_KMPC identifies this union |
507 | | /// member */ |
508 | | /// kmp_int32 reserved_2; /**< not really used in Fortran any more; |
509 | | /// see above */ |
510 | | ///#if USE_ITT_BUILD |
511 | | /// /* but currently used for storing |
512 | | /// region-specific ITT */ |
513 | | /// /* contextual information. */ |
514 | | ///#endif /* USE_ITT_BUILD */ |
515 | | /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for |
516 | | /// C++ */ |
517 | | /// char const *psource; /**< String describing the source location. |
518 | | /// The string is composed of semi-colon separated |
519 | | // fields which describe the source file, |
520 | | /// the function and a pair of line numbers that |
521 | | /// delimit the construct. |
522 | | /// */ |
523 | | /// } ident_t; |
524 | | enum IdentFieldIndex { |
525 | | /// might be used in Fortran |
526 | | IdentField_Reserved_1, |
527 | | /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. |
528 | | IdentField_Flags, |
529 | | /// Not really used in Fortran any more |
530 | | IdentField_Reserved_2, |
531 | | /// Source[4] in Fortran, do not use for C++ |
532 | | IdentField_Reserved_3, |
533 | | /// String describing the source location. The string is composed of |
534 | | /// semi-colon separated fields which describe the source file, the function |
535 | | /// and a pair of line numbers that delimit the construct. |
536 | | IdentField_PSource |
537 | | }; |
538 | | |
539 | | /// Schedule types for 'omp for' loops (these enumerators are taken from |
540 | | /// the enum sched_type in kmp.h). |
541 | | enum OpenMPSchedType { |
542 | | /// Lower bound for default (unordered) versions. |
543 | | OMP_sch_lower = 32, |
544 | | OMP_sch_static_chunked = 33, |
545 | | OMP_sch_static = 34, |
546 | | OMP_sch_dynamic_chunked = 35, |
547 | | OMP_sch_guided_chunked = 36, |
548 | | OMP_sch_runtime = 37, |
549 | | OMP_sch_auto = 38, |
550 | | /// static with chunk adjustment (e.g., simd) |
551 | | OMP_sch_static_balanced_chunked = 45, |
552 | | /// Lower bound for 'ordered' versions. |
553 | | OMP_ord_lower = 64, |
554 | | OMP_ord_static_chunked = 65, |
555 | | OMP_ord_static = 66, |
556 | | OMP_ord_dynamic_chunked = 67, |
557 | | OMP_ord_guided_chunked = 68, |
558 | | OMP_ord_runtime = 69, |
559 | | OMP_ord_auto = 70, |
560 | | OMP_sch_default = OMP_sch_static, |
561 | | /// dist_schedule types |
562 | | OMP_dist_sch_static_chunked = 91, |
563 | | OMP_dist_sch_static = 92, |
564 | | /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. |
565 | | /// Set if the monotonic schedule modifier was present. |
566 | | OMP_sch_modifier_monotonic = (1 << 29), |
567 | | /// Set if the nonmonotonic schedule modifier was present. |
568 | | OMP_sch_modifier_nonmonotonic = (1 << 30), |
569 | | }; |
570 | | |
571 | | /// A basic class for pre|post-action for advanced codegen sequence for OpenMP |
572 | | /// region. |
573 | | class CleanupTy final : public EHScopeStack::Cleanup { |
574 | | PrePostActionTy *Action; |
575 | | |
576 | | public: |
577 | 15.4k | explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} |
578 | 15.5k | void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { |
579 | 15.5k | if (!CGF.HaveInsertPoint()) |
580 | 0 | return; |
581 | 15.5k | Action->Exit(CGF); |
582 | 15.5k | } |
583 | | }; |
584 | | |
585 | | } // anonymous namespace |
586 | | |
587 | 125k | void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { |
588 | 125k | CodeGenFunction::RunCleanupsScope Scope(CGF); |
589 | 125k | if (PrePostAction) { |
590 | 15.4k | CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); |
591 | 15.4k | Callback(CodeGen, CGF, *PrePostAction); |
592 | 109k | } else { |
593 | 109k | PrePostActionTy Action; |
594 | 109k | Callback(CodeGen, CGF, Action); |
595 | 109k | } |
596 | 125k | } |
597 | | |
598 | | /// Check if the combiner is a call to UDR combiner and if it is so return the |
599 | | /// UDR decl used for reduction. |
600 | | static const OMPDeclareReductionDecl * |
601 | 1.07k | getReductionInit(const Expr *ReductionOp) { |
602 | 1.07k | if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) |
603 | 117 | if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) |
604 | 77 | if (const auto *DRE = |
605 | 77 | dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) |
606 | 77 | if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) |
607 | 77 | return DRD; |
608 | 1.00k | return nullptr; |
609 | 1.00k | } |
610 | | |
611 | | static void emitInitWithReductionInitializer(CodeGenFunction &CGF, |
612 | | const OMPDeclareReductionDecl *DRD, |
613 | | const Expr *InitOp, |
614 | | Address Private, Address Original, |
615 | 61 | QualType Ty) { |
616 | 61 | if (DRD->getInitializer()) { |
617 | 53 | std::pair<llvm::Function *, llvm::Function *> Reduction = |
618 | 53 | CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); |
619 | 53 | const auto *CE = cast<CallExpr>(InitOp); |
620 | 53 | const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); |
621 | 53 | const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); |
622 | 53 | const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); |
623 | 53 | const auto *LHSDRE = |
624 | 53 | cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); |
625 | 53 | const auto *RHSDRE = |
626 | 53 | cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); |
627 | 53 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
628 | 53 | PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), |
629 | 53 | [=]() { return Private; }); |
630 | 53 | PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), |
631 | 53 | [=]() { return Original; }); |
632 | 53 | (void)PrivateScope.Privatize(); |
633 | 53 | RValue Func = RValue::get(Reduction.second); |
634 | 53 | CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); |
635 | 53 | CGF.EmitIgnoredExpr(InitOp); |
636 | 8 | } else { |
637 | 8 | llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); |
638 | 8 | std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); |
639 | 8 | auto *GV = new llvm::GlobalVariable( |
640 | 8 | CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, |
641 | 8 | llvm::GlobalValue::PrivateLinkage, Init, Name); |
642 | 8 | LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); |
643 | 8 | RValue InitRVal; |
644 | 8 | switch (CGF.getEvaluationKind(Ty)) { |
645 | 8 | case TEK_Scalar: |
646 | 8 | InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); |
647 | 8 | break; |
648 | 0 | case TEK_Complex: |
649 | 0 | InitRVal = |
650 | 0 | RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); |
651 | 0 | break; |
652 | 0 | case TEK_Aggregate: |
653 | 0 | InitRVal = RValue::getAggregate(LV.getAddress(CGF)); |
654 | 0 | break; |
655 | 8 | } |
656 | 8 | OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); |
657 | 8 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); |
658 | 8 | CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), |
659 | 8 | /*IsInitializer=*/false); |
660 | 8 | } |
661 | 61 | } |
662 | | |
663 | | /// Emit initialization of arrays of complex types. |
664 | | /// \param DestAddr Address of the array. |
665 | | /// \param Type Type of array. |
666 | | /// \param Init Initial expression of array. |
667 | | /// \param SrcAddr Address of the original array. |
668 | | static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, |
669 | | QualType Type, bool EmitDeclareReductionInit, |
670 | | const Expr *Init, |
671 | | const OMPDeclareReductionDecl *DRD, |
672 | 277 | Address SrcAddr = Address::invalid()) { |
673 | | // Perform element-by-element initialization. |
674 | 277 | QualType ElementTy; |
675 | | |
676 | | // Drill down to the base element type on both arrays. |
677 | 277 | const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); |
678 | 277 | llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); |
679 | 277 | DestAddr = |
680 | 277 | CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); |
681 | 277 | if (DRD) |
682 | 31 | SrcAddr = |
683 | 31 | CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); |
684 | | |
685 | 277 | llvm::Value *SrcBegin = nullptr; |
686 | 277 | if (DRD) |
687 | 31 | SrcBegin = SrcAddr.getPointer(); |
688 | 277 | llvm::Value *DestBegin = DestAddr.getPointer(); |
689 | | // Cast from pointer to array type to pointer to single element. |
690 | 277 | llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); |
691 | | // The basic structure here is a while-do loop. |
692 | 277 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); |
693 | 277 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); |
694 | 277 | llvm::Value *IsEmpty = |
695 | 277 | CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); |
696 | 277 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
697 | | |
698 | | // Enter the loop body, making that address the current address. |
699 | 277 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
700 | 277 | CGF.EmitBlock(BodyBB); |
701 | | |
702 | 277 | CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); |
703 | | |
704 | 277 | llvm::PHINode *SrcElementPHI = nullptr; |
705 | 277 | Address SrcElementCurrent = Address::invalid(); |
706 | 277 | if (DRD) { |
707 | 31 | SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, |
708 | 31 | "omp.arraycpy.srcElementPast"); |
709 | 31 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
710 | 31 | SrcElementCurrent = |
711 | 31 | Address(SrcElementPHI, |
712 | 31 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
713 | 31 | } |
714 | 277 | llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( |
715 | 277 | DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
716 | 277 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
717 | 277 | Address DestElementCurrent = |
718 | 277 | Address(DestElementPHI, |
719 | 277 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
720 | | |
721 | | // Emit copy. |
722 | 277 | { |
723 | 277 | CodeGenFunction::RunCleanupsScope InitScope(CGF); |
724 | 277 | if (EmitDeclareReductionInit) { |
725 | 31 | emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, |
726 | 31 | SrcElementCurrent, ElementTy); |
727 | 31 | } else |
728 | 246 | CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), |
729 | 246 | /*IsInitializer=*/false); |
730 | 277 | } |
731 | | |
732 | 277 | if (DRD) { |
733 | | // Shift the address forward by one element. |
734 | 31 | llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( |
735 | 31 | SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); |
736 | 31 | SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); |
737 | 31 | } |
738 | | |
739 | | // Shift the address forward by one element. |
740 | 277 | llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( |
741 | 277 | DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); |
742 | | // Check whether we've reached the end. |
743 | 277 | llvm::Value *Done = |
744 | 277 | CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); |
745 | 277 | CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); |
746 | 277 | DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); |
747 | | |
748 | | // Done. |
749 | 277 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
750 | 277 | } |
751 | | |
752 | 1.09k | LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { |
753 | 1.09k | return CGF.EmitOMPSharedLValue(E); |
754 | 1.09k | } |
755 | | |
756 | | LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, |
757 | 1.09k | const Expr *E) { |
758 | 1.09k | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) |
759 | 227 | return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); |
760 | 868 | return LValue(); |
761 | 868 | } |
762 | | |
763 | | void ReductionCodeGen::emitAggregateInitialization( |
764 | | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, |
765 | 277 | const OMPDeclareReductionDecl *DRD) { |
766 | | // Emit VarDecl with copy init for arrays. |
767 | | // Get the address of the original variable captured in current |
768 | | // captured region. |
769 | 277 | const auto *PrivateVD = |
770 | 277 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
771 | 277 | bool EmitDeclareReductionInit = |
772 | 277 | DRD && (31 DRD->getInitializer()31 || !PrivateVD->hasInit()4 ); |
773 | 277 | EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), |
774 | 277 | EmitDeclareReductionInit, |
775 | 31 | EmitDeclareReductionInit ? ClausesData[N].ReductionOp |
776 | 246 | : PrivateVD->getInit(), |
777 | 277 | DRD, SharedLVal.getAddress(CGF)); |
778 | 277 | } |
779 | | |
780 | | ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, |
781 | | ArrayRef<const Expr *> Origs, |
782 | | ArrayRef<const Expr *> Privates, |
783 | 27.3k | ArrayRef<const Expr *> ReductionOps) { |
784 | 27.3k | ClausesData.reserve(Shareds.size()); |
785 | 27.3k | SharedAddresses.reserve(Shareds.size()); |
786 | 27.3k | Sizes.reserve(Shareds.size()); |
787 | 27.3k | BaseDecls.reserve(Shareds.size()); |
788 | 27.3k | const auto *IOrig = Origs.begin(); |
789 | 27.3k | const auto *IPriv = Privates.begin(); |
790 | 27.3k | const auto *IRed = ReductionOps.begin(); |
791 | 1.05k | for (const Expr *Ref : Shareds) { |
792 | 1.05k | ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); |
793 | 1.05k | std::advance(IOrig, 1); |
794 | 1.05k | std::advance(IPriv, 1); |
795 | 1.05k | std::advance(IRed, 1); |
796 | 1.05k | } |
797 | 27.3k | } |
798 | | |
799 | 1.04k | void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { |
800 | 1.04k | assert(SharedAddresses.size() == N && OrigAddresses.size() == N && |
801 | 1.04k | "Number of generated lvalues must be exactly N."); |
802 | 1.04k | LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); |
803 | 1.04k | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); |
804 | 1.04k | SharedAddresses.emplace_back(First, Second); |
805 | 1.04k | if (ClausesData[N].Shared == ClausesData[N].Ref) { |
806 | 991 | OrigAddresses.emplace_back(First, Second); |
807 | 52 | } else { |
808 | 52 | LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); |
809 | 52 | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); |
810 | 52 | OrigAddresses.emplace_back(First, Second); |
811 | 52 | } |
812 | 1.04k | } |
813 | | |
814 | 1.04k | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { |
815 | 1.04k | const auto *PrivateVD = |
816 | 1.04k | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
817 | 1.04k | QualType PrivateType = PrivateVD->getType(); |
818 | 1.04k | bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); |
819 | 1.04k | if (!PrivateType->isVariablyModifiedType()) { |
820 | 808 | Sizes.emplace_back( |
821 | 808 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), |
822 | 808 | nullptr); |
823 | 808 | return; |
824 | 808 | } |
825 | 235 | llvm::Value *Size; |
826 | 235 | llvm::Value *SizeInChars; |
827 | 235 | auto *ElemType = |
828 | 235 | cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) |
829 | 235 | ->getElementType(); |
830 | 235 | auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); |
831 | 235 | if (AsArraySection) { |
832 | 184 | Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), |
833 | 184 | OrigAddresses[N].first.getPointer(CGF)); |
834 | 184 | Size = CGF.Builder.CreateNUWAdd( |
835 | 184 | Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); |
836 | 184 | SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); |
837 | 51 | } else { |
838 | 51 | SizeInChars = |
839 | 51 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); |
840 | 51 | Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); |
841 | 51 | } |
842 | 235 | Sizes.emplace_back(SizeInChars, Size); |
843 | 235 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
844 | 235 | CGF, |
845 | 235 | cast<OpaqueValueExpr>( |
846 | 235 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
847 | 235 | RValue::get(Size)); |
848 | 235 | CGF.EmitVariablyModifiedType(PrivateType); |
849 | 235 | } |
850 | | |
851 | | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, |
852 | 301 | llvm::Value *Size) { |
853 | 301 | const auto *PrivateVD = |
854 | 301 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
855 | 301 | QualType PrivateType = PrivateVD->getType(); |
856 | 301 | if (!PrivateType->isVariablyModifiedType()) { |
857 | 193 | assert(!Size && !Sizes[N].second && |
858 | 193 | "Size should be nullptr for non-variably modified reduction " |
859 | 193 | "items."); |
860 | 193 | return; |
861 | 193 | } |
862 | 108 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
863 | 108 | CGF, |
864 | 108 | cast<OpaqueValueExpr>( |
865 | 108 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
866 | 108 | RValue::get(Size)); |
867 | 108 | CGF.EmitVariablyModifiedType(PrivateType); |
868 | 108 | } |
869 | | |
870 | | void ReductionCodeGen::emitInitialization( |
871 | | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, |
872 | 937 | llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { |
873 | 937 | assert(SharedAddresses.size() > N && "No variable was generated"); |
874 | 937 | const auto *PrivateVD = |
875 | 937 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
876 | 937 | const OMPDeclareReductionDecl *DRD = |
877 | 937 | getReductionInit(ClausesData[N].ReductionOp); |
878 | 937 | QualType PrivateType = PrivateVD->getType(); |
879 | 937 | PrivateAddr = CGF.Builder.CreateElementBitCast( |
880 | 937 | PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); |
881 | 937 | QualType SharedType = SharedAddresses[N].first.getType(); |
882 | 937 | SharedLVal = CGF.MakeAddrLValue( |
883 | 937 | CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), |
884 | 937 | CGF.ConvertTypeForMem(SharedType)), |
885 | 937 | SharedType, SharedAddresses[N].first.getBaseInfo(), |
886 | 937 | CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); |
887 | 937 | if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { |
888 | 277 | if (DRD && DRD->getInitializer()31 ) |
889 | 27 | (void)DefaultInit(CGF); |
890 | 277 | emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); |
891 | 660 | } else if (DRD && (40 DRD->getInitializer()40 || !PrivateVD->hasInit()14 )) { |
892 | 30 | (void)DefaultInit(CGF); |
893 | 30 | emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, |
894 | 30 | PrivateAddr, SharedLVal.getAddress(CGF), |
895 | 30 | SharedLVal.getType()); |
896 | 630 | } else if (!DefaultInit(CGF) && PrivateVD->hasInit()77 && |
897 | 77 | !CGF.isTrivialInitializer(PrivateVD->getInit())) { |
898 | 77 | CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, |
899 | 77 | PrivateVD->getType().getQualifiers(), |
900 | 77 | /*IsInitializer=*/false); |
901 | 77 | } |
902 | 937 | } |
903 | | |
904 | 160 | bool ReductionCodeGen::needCleanups(unsigned N) { |
905 | 160 | const auto *PrivateVD = |
906 | 160 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
907 | 160 | QualType PrivateType = PrivateVD->getType(); |
908 | 160 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
909 | 160 | return DTorKind != QualType::DK_none; |
910 | 160 | } |
911 | | |
912 | | void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, |
913 | 19 | Address PrivateAddr) { |
914 | 19 | const auto *PrivateVD = |
915 | 19 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
916 | 19 | QualType PrivateType = PrivateVD->getType(); |
917 | 19 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
918 | 19 | if (needCleanups(N)) { |
919 | 19 | PrivateAddr = CGF.Builder.CreateElementBitCast( |
920 | 19 | PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); |
921 | 19 | CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); |
922 | 19 | } |
923 | 19 | } |
924 | | |
925 | | static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
926 | 179 | LValue BaseLV) { |
927 | 179 | BaseTy = BaseTy.getNonReferenceType(); |
928 | 299 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()179 ) && |
929 | 120 | !CGF.getContext().hasSameType(BaseTy, ElTy)) { |
930 | 120 | if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { |
931 | 120 | BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); |
932 | 0 | } else { |
933 | 0 | LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); |
934 | 0 | BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); |
935 | 0 | } |
936 | 120 | BaseTy = BaseTy->getPointeeType(); |
937 | 120 | } |
938 | 179 | return CGF.MakeAddrLValue( |
939 | 179 | CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), |
940 | 179 | CGF.ConvertTypeForMem(ElTy)), |
941 | 179 | BaseLV.getType(), BaseLV.getBaseInfo(), |
942 | 179 | CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); |
943 | 179 | } |
944 | | |
945 | | static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
946 | | llvm::Type *BaseLVType, CharUnits BaseLVAlignment, |
947 | 179 | llvm::Value *Addr) { |
948 | 179 | Address Tmp = Address::invalid(); |
949 | 179 | Address TopTmp = Address::invalid(); |
950 | 179 | Address MostTopTmp = Address::invalid(); |
951 | 179 | BaseTy = BaseTy.getNonReferenceType(); |
952 | 299 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()179 ) && |
953 | 120 | !CGF.getContext().hasSameType(BaseTy, ElTy)) { |
954 | 120 | Tmp = CGF.CreateMemTemp(BaseTy); |
955 | 120 | if (TopTmp.isValid()) |
956 | 58 | CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); |
957 | 62 | else |
958 | 62 | MostTopTmp = Tmp; |
959 | 120 | TopTmp = Tmp; |
960 | 120 | BaseTy = BaseTy->getPointeeType(); |
961 | 120 | } |
962 | 179 | llvm::Type *Ty = BaseLVType; |
963 | 179 | if (Tmp.isValid()) |
964 | 62 | Ty = Tmp.getElementType(); |
965 | 179 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); |
966 | 179 | if (Tmp.isValid()) { |
967 | 62 | CGF.Builder.CreateStore(Addr, Tmp); |
968 | 62 | return MostTopTmp; |
969 | 62 | } |
970 | 117 | return Address(Addr, BaseLVAlignment); |
971 | 117 | } |
972 | | |
973 | 1.05k | static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { |
974 | 1.05k | const VarDecl *OrigVD = nullptr; |
975 | 1.05k | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { |
976 | 277 | const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); |
977 | 411 | while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) |
978 | 134 | Base = TempOASE->getBase()->IgnoreParenImpCasts(); |
979 | 285 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
980 | 8 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
981 | 277 | DE = cast<DeclRefExpr>(Base); |
982 | 277 | OrigVD = cast<VarDecl>(DE->getDecl()); |
983 | 773 | } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { |
984 | 0 | const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); |
985 | 0 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
986 | 0 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
987 | 0 | DE = cast<DeclRefExpr>(Base); |
988 | 0 | OrigVD = cast<VarDecl>(DE->getDecl()); |
989 | 0 | } |
990 | 1.05k | return OrigVD; |
991 | 1.05k | } |
992 | | |
993 | | Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, |
994 | 886 | Address PrivateAddr) { |
995 | 886 | const DeclRefExpr *DE; |
996 | 886 | if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { |
997 | 179 | BaseDecls.emplace_back(OrigVD); |
998 | 179 | LValue OriginalBaseLValue = CGF.EmitLValue(DE); |
999 | 179 | LValue BaseLValue = |
1000 | 179 | loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), |
1001 | 179 | OriginalBaseLValue); |
1002 | 179 | llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( |
1003 | 179 | BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); |
1004 | 179 | llvm::Value *PrivatePointer = |
1005 | 179 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1006 | 179 | PrivateAddr.getPointer(), |
1007 | 179 | SharedAddresses[N].first.getAddress(CGF).getType()); |
1008 | 179 | llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); |
1009 | 179 | return castToBase(CGF, OrigVD->getType(), |
1010 | 179 | SharedAddresses[N].first.getType(), |
1011 | 179 | OriginalBaseLValue.getAddress(CGF).getType(), |
1012 | 179 | OriginalBaseLValue.getAlignment(), Ptr); |
1013 | 179 | } |
1014 | 707 | BaseDecls.emplace_back( |
1015 | 707 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); |
1016 | 707 | return PrivateAddr; |
1017 | 707 | } |
1018 | | |
1019 | 141 | bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { |
1020 | 141 | const OMPDeclareReductionDecl *DRD = |
1021 | 141 | getReductionInit(ClausesData[N].ReductionOp); |
1022 | 141 | return DRD && DRD->getInitializer()6 ; |
1023 | 141 | } |
1024 | | |
1025 | 12.7k | LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { |
1026 | 12.7k | return CGF.EmitLoadOfPointerLValue( |
1027 | 12.7k | CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1028 | 12.7k | getThreadIDVariable()->getType()->castAs<PointerType>()); |
1029 | 12.7k | } |
1030 | | |
1031 | 65.2k | void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { |
1032 | 65.2k | if (!CGF.HaveInsertPoint()) |
1033 | 0 | return; |
1034 | | // 1.2.2 OpenMP Language Terminology |
1035 | | // Structured block - An executable statement with a single entry at the |
1036 | | // top and a single exit at the bottom. |
1037 | | // The point of exit cannot be a branch out of the structured block. |
1038 | | // longjmp() and throw() must not violate the entry/exit criteria. |
1039 | 65.2k | CGF.EHStack.pushTerminate(); |
1040 | 65.2k | CodeGen(CGF); |
1041 | 65.2k | CGF.EHStack.popTerminate(); |
1042 | 65.2k | } |
1043 | | |
1044 | | LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( |
1045 | 94 | CodeGenFunction &CGF) { |
1046 | 94 | return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1047 | 94 | getThreadIDVariable()->getType(), |
1048 | 94 | AlignmentSource::Decl); |
1049 | 94 | } |
1050 | | |
1051 | | static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, |
1052 | 18.4k | QualType FieldTy) { |
1053 | 18.4k | auto *Field = FieldDecl::Create( |
1054 | 18.4k | C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, |
1055 | 18.4k | C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), |
1056 | 18.4k | /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); |
1057 | 18.4k | Field->setAccess(AS_public); |
1058 | 18.4k | DC->addDecl(Field); |
1059 | 18.4k | return Field; |
1060 | 18.4k | } |
1061 | | |
1062 | | CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, |
1063 | | StringRef Separator) |
1064 | | : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), |
1065 | 5.68k | OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { |
1066 | 5.68k | KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); |
1067 | | |
1068 | | // Initialize Types used in OpenMPIRBuilder from OMPKinds.def |
1069 | 5.68k | OMPBuilder.initialize(); |
1070 | 5.68k | loadOffloadInfoMetadata(); |
1071 | 5.68k | } |
1072 | | |
1073 | 5.68k | void CGOpenMPRuntime::clear() { |
1074 | 5.68k | InternalVars.clear(); |
1075 | | // Clean non-target variable declarations possibly used only in debug info. |
1076 | 12 | for (const auto &Data : EmittedNonTargetVariables) { |
1077 | 12 | if (!Data.getValue().pointsToAliveValue()) |
1078 | 0 | continue; |
1079 | 12 | auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); |
1080 | 12 | if (!GV) |
1081 | 0 | continue; |
1082 | 12 | if (!GV->isDeclaration() || GV->getNumUses() > 0) |
1083 | 11 | continue; |
1084 | 1 | GV->eraseFromParent(); |
1085 | 1 | } |
1086 | 5.68k | } |
1087 | | |
1088 | 50.0k | std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { |
1089 | 50.0k | SmallString<128> Buffer; |
1090 | 50.0k | llvm::raw_svector_ostream OS(Buffer); |
1091 | 50.0k | StringRef Sep = FirstSeparator; |
1092 | 96.5k | for (StringRef Part : Parts) { |
1093 | 96.5k | OS << Sep << Part; |
1094 | 96.5k | Sep = Separator; |
1095 | 96.5k | } |
1096 | 50.0k | return std::string(OS.str()); |
1097 | 50.0k | } |
1098 | | |
1099 | | static llvm::Function * |
1100 | | emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, |
1101 | | const Expr *CombinerInitializer, const VarDecl *In, |
1102 | 216 | const VarDecl *Out, bool IsCombiner) { |
1103 | | // void .omp_combiner.(Ty *in, Ty *out); |
1104 | 216 | ASTContext &C = CGM.getContext(); |
1105 | 216 | QualType PtrTy = C.getPointerType(Ty).withRestrict(); |
1106 | 216 | FunctionArgList Args; |
1107 | 216 | ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), |
1108 | 216 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1109 | 216 | ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), |
1110 | 216 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1111 | 216 | Args.push_back(&OmpOutParm); |
1112 | 216 | Args.push_back(&OmpInParm); |
1113 | 216 | const CGFunctionInfo &FnInfo = |
1114 | 216 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
1115 | 216 | llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); |
1116 | 216 | std::string Name = CGM.getOpenMPRuntime().getName( |
1117 | 137 | {IsCombiner ? "omp_combiner" : "omp_initializer"79 , ""}); |
1118 | 216 | auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, |
1119 | 216 | Name, &CGM.getModule()); |
1120 | 216 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); |
1121 | 216 | if (CGM.getLangOpts().Optimize) { |
1122 | 0 | Fn->removeFnAttr(llvm::Attribute::NoInline); |
1123 | 0 | Fn->removeFnAttr(llvm::Attribute::OptimizeNone); |
1124 | 0 | Fn->addFnAttr(llvm::Attribute::AlwaysInline); |
1125 | 0 | } |
1126 | 216 | CodeGenFunction CGF(CGM); |
1127 | | // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. |
1128 | | // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. |
1129 | 216 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), |
1130 | 216 | Out->getLocation()); |
1131 | 216 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
1132 | 216 | Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); |
1133 | 216 | Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { |
1134 | 216 | return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) |
1135 | 216 | .getAddress(CGF); |
1136 | 216 | }); |
1137 | 216 | Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); |
1138 | 216 | Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { |
1139 | 216 | return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) |
1140 | 216 | .getAddress(CGF); |
1141 | 216 | }); |
1142 | 216 | (void)Scope.Privatize(); |
1143 | 216 | if (!IsCombiner && Out->hasInit()79 && |
1144 | 50 | !CGF.isTrivialInitializer(Out->getInit())) { |
1145 | 50 | CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), |
1146 | 50 | Out->getType().getQualifiers(), |
1147 | 50 | /*IsInitializer=*/true); |
1148 | 50 | } |
1149 | 216 | if (CombinerInitializer) |
1150 | 166 | CGF.EmitIgnoredExpr(CombinerInitializer); |
1151 | 216 | Scope.ForceCleanup(); |
1152 | 216 | CGF.FinishFunction(); |
1153 | 216 | return Fn; |
1154 | 216 | } |
1155 | | |
1156 | | void CGOpenMPRuntime::emitUserDefinedReduction( |
1157 | 138 | CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { |
1158 | 138 | if (UDRMap.count(D) > 0) |
1159 | 1 | return; |
1160 | 137 | llvm::Function *Combiner = emitCombinerOrInitializer( |
1161 | 137 | CGM, D->getType(), D->getCombiner(), |
1162 | 137 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), |
1163 | 137 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), |
1164 | 137 | /*IsCombiner=*/true); |
1165 | 137 | llvm::Function *Initializer = nullptr; |
1166 | 137 | if (const Expr *Init = D->getInitializer()) { |
1167 | 79 | Initializer = emitCombinerOrInitializer( |
1168 | 79 | CGM, D->getType(), |
1169 | 29 | D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init |
1170 | 50 | : nullptr, |
1171 | 79 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), |
1172 | 79 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), |
1173 | 79 | /*IsCombiner=*/false); |
1174 | 79 | } |
1175 | 137 | UDRMap.try_emplace(D, Combiner, Initializer); |
1176 | 137 | if (CGF) { |
1177 | 38 | auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); |
1178 | 38 | Decls.second.push_back(D); |
1179 | 38 | } |
1180 | 137 | } |
1181 | | |
1182 | | std::pair<llvm::Function *, llvm::Function *> |
1183 | 240 | CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { |
1184 | 240 | auto I = UDRMap.find(D); |
1185 | 240 | if (I != UDRMap.end()) |
1186 | 206 | return I->second; |
1187 | 34 | emitUserDefinedReduction(/*CGF=*/nullptr, D); |
1188 | 34 | return UDRMap.lookup(D); |
1189 | 34 | } |
1190 | | |
1191 | | namespace { |
1192 | | // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR |
1193 | | // Builder if one is present. |
1194 | | struct PushAndPopStackRAII { |
1195 | | PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, |
1196 | | bool HasCancel) |
1197 | 11.6k | : OMPBuilder(OMPBuilder) { |
1198 | 11.6k | if (!OMPBuilder) |
1199 | 0 | return; |
1200 | | |
1201 | | // The following callback is the crucial part of clangs cleanup process. |
1202 | | // |
1203 | | // NOTE: |
1204 | | // Once the OpenMPIRBuilder is used to create parallel regions (and |
1205 | | // similar), the cancellation destination (Dest below) is determined via |
1206 | | // IP. That means if we have variables to finalize we split the block at IP, |
1207 | | // use the new block (=BB) as destination to build a JumpDest (via |
1208 | | // getJumpDestInCurrentScope(BB)) which then is fed to |
1209 | | // EmitBranchThroughCleanup. Furthermore, there will not be the need |
1210 | | // to push & pop an FinalizationInfo object. |
1211 | | // The FiniCB will still be needed but at the point where the |
1212 | | // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. |
1213 | 11.6k | auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { |
1214 | 0 | assert(IP.getBlock()->end() == IP.getPoint() && |
1215 | 0 | "Clang CG should cause non-terminated block!"); |
1216 | 0 | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1217 | 0 | CGF.Builder.restoreIP(IP); |
1218 | 0 | CodeGenFunction::JumpDest Dest = |
1219 | 0 | CGF.getOMPCancelDestination(OMPD_parallel); |
1220 | 0 | CGF.EmitBranchThroughCleanup(Dest); |
1221 | 0 | }; |
1222 | | |
1223 | | // TODO: Remove this once we emit parallel regions through the |
1224 | | // OpenMPIRBuilder as it can do this setup internally. |
1225 | 11.6k | llvm::OpenMPIRBuilder::FinalizationInfo FI( |
1226 | 11.6k | {FiniCB, OMPD_parallel, HasCancel}); |
1227 | 11.6k | OMPBuilder->pushFinalizationCB(std::move(FI)); |
1228 | 11.6k | } |
1229 | 11.6k | ~PushAndPopStackRAII() { |
1230 | 11.6k | if (OMPBuilder) |
1231 | 11.6k | OMPBuilder->popFinalizationCB(); |
1232 | 11.6k | } |
1233 | | llvm::OpenMPIRBuilder *OMPBuilder; |
1234 | | }; |
1235 | | } // namespace |
1236 | | |
1237 | | static llvm::Function *emitParallelOrTeamsOutlinedFunction( |
1238 | | CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, |
1239 | | const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, |
1240 | 11.6k | const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { |
1241 | 11.6k | assert(ThreadIDVar->getType()->isPointerType() && |
1242 | 11.6k | "thread id variable must be of type kmp_int32 *"); |
1243 | 11.6k | CodeGenFunction CGF(CGM, true); |
1244 | 11.6k | bool HasCancel = false; |
1245 | 11.6k | if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) |
1246 | 905 | HasCancel = OPD->hasCancel(); |
1247 | 10.7k | else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) |
1248 | 911 | HasCancel = OPD->hasCancel(); |
1249 | 9.79k | else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) |
1250 | 26 | HasCancel = OPSD->hasCancel(); |
1251 | 9.77k | else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) |
1252 | 207 | HasCancel = OPFD->hasCancel(); |
1253 | 9.56k | else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) |
1254 | 556 | HasCancel = OPFD->hasCancel(); |
1255 | 9.00k | else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) |
1256 | 412 | HasCancel = OPFD->hasCancel(); |
1257 | 8.59k | else if (const auto *OPFD = |
1258 | 680 | dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) |
1259 | 680 | HasCancel = OPFD->hasCancel(); |
1260 | 7.91k | else if (const auto *OPFD = |
1261 | 1.15k | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) |
1262 | 1.15k | HasCancel = OPFD->hasCancel(); |
1263 | | |
1264 | | // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new |
1265 | | // parallel region to make cancellation barriers work properly. |
1266 | 11.6k | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1267 | 11.6k | PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); |
1268 | 11.6k | CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, |
1269 | 11.6k | HasCancel, OutlinedHelperName); |
1270 | 11.6k | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1271 | 11.6k | return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); |
1272 | 11.6k | } |
1273 | | |
1274 | | llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( |
1275 | | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1276 | 6.05k | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1277 | 6.05k | const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); |
1278 | 6.05k | return emitParallelOrTeamsOutlinedFunction( |
1279 | 6.05k | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1280 | 6.05k | } |
1281 | | |
1282 | | llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( |
1283 | | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1284 | 5.56k | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1285 | 5.56k | const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); |
1286 | 5.56k | return emitParallelOrTeamsOutlinedFunction( |
1287 | 5.56k | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1288 | 5.56k | } |
1289 | | |
1290 | | llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( |
1291 | | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1292 | | const VarDecl *PartIDVar, const VarDecl *TaskTVar, |
1293 | | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1294 | 855 | bool Tied, unsigned &NumberOfParts) { |
1295 | 855 | auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, |
1296 | 30 | PrePostActionTy &) { |
1297 | 30 | llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); |
1298 | 30 | llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); |
1299 | 30 | llvm::Value *TaskArgs[] = { |
1300 | 30 | UpLoc, ThreadID, |
1301 | 30 | CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), |
1302 | 30 | TaskTVar->getType()->castAs<PointerType>()) |
1303 | 30 | .getPointer(CGF)}; |
1304 | 30 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1305 | 30 | CGM.getModule(), OMPRTL___kmpc_omp_task), |
1306 | 30 | TaskArgs); |
1307 | 30 | }; |
1308 | 855 | CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, |
1309 | 855 | UntiedCodeGen); |
1310 | 855 | CodeGen.setAction(Action); |
1311 | 855 | assert(!ThreadIDVar->getType()->isPointerType() && |
1312 | 855 | "thread id variable must be of type kmp_int32 for tasks"); |
1313 | 855 | const OpenMPDirectiveKind Region = |
1314 | 226 | isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop |
1315 | 629 | : OMPD_task; |
1316 | 855 | const CapturedStmt *CS = D.getCapturedStmt(Region); |
1317 | 855 | bool HasCancel = false; |
1318 | 855 | if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) |
1319 | 181 | HasCancel = TD->hasCancel(); |
1320 | 674 | else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) |
1321 | 39 | HasCancel = TD->hasCancel(); |
1322 | 635 | else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) |
1323 | 35 | HasCancel = TD->hasCancel(); |
1324 | 600 | else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) |
1325 | 33 | HasCancel = TD->hasCancel(); |
1326 | | |
1327 | 855 | CodeGenFunction CGF(CGM, true); |
1328 | 855 | CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, |
1329 | 855 | InnermostKind, HasCancel, Action); |
1330 | 855 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1331 | 855 | llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); |
1332 | 855 | if (!Tied) |
1333 | 16 | NumberOfParts = Action.getNumberOfParts(); |
1334 | 855 | return Res; |
1335 | 855 | } |
1336 | | |
1337 | | static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, |
1338 | | const RecordDecl *RD, const CGRecordLayout &RL, |
1339 | 10.5k | ArrayRef<llvm::Constant *> Data) { |
1340 | 10.5k | llvm::StructType *StructTy = RL.getLLVMType(); |
1341 | 10.5k | unsigned PrevIdx = 0; |
1342 | 10.5k | ConstantInitBuilder CIBuilder(CGM); |
1343 | 10.5k | auto DI = Data.begin(); |
1344 | 52.8k | for (const FieldDecl *FD : RD->fields()) { |
1345 | 52.8k | unsigned Idx = RL.getLLVMFieldNo(FD); |
1346 | | // Fill the alignment. |
1347 | 52.8k | for (unsigned I = PrevIdx; I < Idx; ++I0 ) |
1348 | 0 | Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); |
1349 | 52.8k | PrevIdx = Idx + 1; |
1350 | 52.8k | Fields.add(*DI); |
1351 | 52.8k | ++DI; |
1352 | 52.8k | } |
1353 | 10.5k | } |
1354 | | |
1355 | | template <class... As> |
1356 | | static llvm::GlobalVariable * |
1357 | | createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, |
1358 | | ArrayRef<llvm::Constant *> Data, const Twine &Name, |
1359 | 10.5k | As &&... Args) { |
1360 | 10.5k | const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); |
1361 | 10.5k | const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); |
1362 | 10.5k | ConstantInitBuilder CIBuilder(CGM); |
1363 | 10.5k | ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); |
1364 | 10.5k | buildStructValue(Fields, CGM, RD, RL, Data); |
1365 | 10.5k | return Fields.finishAndCreateGlobal( |
1366 | 10.5k | Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, |
1367 | 10.5k | std::forward<As>(Args)...); |
1368 | 10.5k | } |
1369 | | |
1370 | | template <typename T> |
1371 | | static void |
1372 | | createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, |
1373 | | ArrayRef<llvm::Constant *> Data, |
1374 | | T &Parent) { |
1375 | | const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); |
1376 | | const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); |
1377 | | ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); |
1378 | | buildStructValue(Fields, CGM, RD, RL, Data); |
1379 | | Fields.finishAndAddTo(Parent); |
1380 | | } |
1381 | | |
1382 | | void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, |
1383 | 2.54k | bool AtCurrentPoint) { |
1384 | 2.54k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1385 | 2.54k | assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); |
1386 | | |
1387 | 2.54k | llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); |
1388 | 2.54k | if (AtCurrentPoint) { |
1389 | 1.06k | Elem.second.ServiceInsertPt = new llvm::BitCastInst( |
1390 | 1.06k | Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); |
1391 | 1.47k | } else { |
1392 | 1.47k | Elem.second.ServiceInsertPt = |
1393 | 1.47k | new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); |
1394 | 1.47k | Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); |
1395 | 1.47k | } |
1396 | 2.54k | } |
1397 | | |
1398 | 10.4k | void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { |
1399 | 10.4k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1400 | 10.4k | if (Elem.second.ServiceInsertPt) { |
1401 | 2.54k | llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; |
1402 | 2.54k | Elem.second.ServiceInsertPt = nullptr; |
1403 | 2.54k | Ptr->eraseFromParent(); |
1404 | 2.54k | } |
1405 | 10.4k | } |
1406 | | |
1407 | | static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, |
1408 | | SourceLocation Loc, |
1409 | 222 | SmallString<128> &Buffer) { |
1410 | 222 | llvm::raw_svector_ostream OS(Buffer); |
1411 | | // Build debug location |
1412 | 222 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1413 | 222 | OS << ";" << PLoc.getFilename() << ";"; |
1414 | 222 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1415 | 222 | OS << FD->getQualifiedNameAsString(); |
1416 | 222 | OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; |
1417 | 222 | return OS.str(); |
1418 | 222 | } |
1419 | | |
1420 | | llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, |
1421 | | SourceLocation Loc, |
1422 | 49.3k | unsigned Flags) { |
1423 | 49.3k | llvm::Constant *SrcLocStr; |
1424 | 49.3k | if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || |
1425 | 48.5k | Loc.isInvalid()882 ) { |
1426 | 48.5k | SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); |
1427 | 818 | } else { |
1428 | 818 | std::string FunctionName = ""; |
1429 | 818 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1430 | 786 | FunctionName = FD->getQualifiedNameAsString(); |
1431 | 818 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1432 | 818 | const char *FileName = PLoc.getFilename(); |
1433 | 818 | unsigned Line = PLoc.getLine(); |
1434 | 818 | unsigned Column = PLoc.getColumn(); |
1435 | 818 | SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, |
1436 | 818 | Line, Column); |
1437 | 818 | } |
1438 | 49.3k | unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); |
1439 | 49.3k | return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), |
1440 | 49.3k | Reserved2Flags); |
1441 | 49.3k | } |
1442 | | |
1443 | | llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, |
1444 | 25.6k | SourceLocation Loc) { |
1445 | 25.6k | assert(CGF.CurFn && "No function in current CodeGenFunction."); |
1446 | | // If the OpenMPIRBuilder is used we need to use it for all thread id calls as |
1447 | | // the clang invariants used below might be broken. |
1448 | 25.6k | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1449 | 222 | SmallString<128> Buffer; |
1450 | 222 | OMPBuilder.updateToLocation(CGF.Builder.saveIP()); |
1451 | 222 | auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( |
1452 | 222 | getIdentStringFromSourceLocation(CGF, Loc, Buffer)); |
1453 | 222 | return OMPBuilder.getOrCreateThreadID( |
1454 | 222 | OMPBuilder.getOrCreateIdent(SrcLocStr)); |
1455 | 222 | } |
1456 | | |
1457 | 25.4k | llvm::Value *ThreadID = nullptr; |
1458 | | // Check whether we've already cached a load of the thread id in this |
1459 | | // function. |
1460 | 25.4k | auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); |
1461 | 25.4k | if (I != OpenMPLocThreadIDMap.end()) { |
1462 | 11.7k | ThreadID = I->second.ThreadID; |
1463 | 11.7k | if (ThreadID != nullptr) |
1464 | 10.7k | return ThreadID; |
1465 | 14.6k | } |
1466 | | // If exceptions are enabled, do not use parameter to avoid possible crash. |
1467 | 14.6k | if (auto *OMPRegionInfo = |
1468 | 13.8k | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { |
1469 | 13.8k | if (OMPRegionInfo->getThreadIDVariable()) { |
1470 | | // Check if this an outlined function with thread id passed as argument. |
1471 | 12.2k | LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); |
1472 | 12.2k | llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); |
1473 | 12.2k | if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || |
1474 | 821 | !CGF.getLangOpts().CXXExceptions || |
1475 | 821 | CGF.Builder.GetInsertBlock() == TopBlock || |
1476 | 318 | !isa<llvm::Instruction>(LVal.getPointer(CGF)) || |
1477 | 318 | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1478 | 318 | TopBlock || |
1479 | 318 | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1480 | 12.2k | CGF.Builder.GetInsertBlock()) { |
1481 | 12.2k | ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); |
1482 | | // If value loaded in entry block, cache it and use it everywhere in |
1483 | | // function. |
1484 | 12.2k | if (CGF.Builder.GetInsertBlock() == TopBlock) { |
1485 | 6.88k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1486 | 6.88k | Elem.second.ThreadID = ThreadID; |
1487 | 6.88k | } |
1488 | 12.2k | return ThreadID; |
1489 | 12.2k | } |
1490 | 2.39k | } |
1491 | 13.8k | } |
1492 | | |
1493 | | // This is not an outlined function region - need to call __kmpc_int32 |
1494 | | // kmpc_global_thread_num(ident_t *loc). |
1495 | | // Generate thread id value and cache this value for use across the |
1496 | | // function. |
1497 | 2.39k | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1498 | 2.39k | if (!Elem.second.ServiceInsertPt) |
1499 | 1.47k | setLocThreadIdInsertPt(CGF); |
1500 | 2.39k | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1501 | 2.39k | CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); |
1502 | 2.39k | llvm::CallInst *Call = CGF.Builder.CreateCall( |
1503 | 2.39k | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
1504 | 2.39k | OMPRTL___kmpc_global_thread_num), |
1505 | 2.39k | emitUpdateLocation(CGF, Loc)); |
1506 | 2.39k | Call->setCallingConv(CGF.getRuntimeCC()); |
1507 | 2.39k | Elem.second.ThreadID = Call; |
1508 | 2.39k | return Call; |
1509 | 2.39k | } |
1510 | | |
1511 | 68.5k | void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { |
1512 | 68.5k | assert(CGF.CurFn && "No function in current CodeGenFunction."); |
1513 | 68.5k | if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { |
1514 | 9.42k | clearLocThreadIdInsertPt(CGF); |
1515 | 9.42k | OpenMPLocThreadIDMap.erase(CGF.CurFn); |
1516 | 9.42k | } |
1517 | 68.5k | if (FunctionUDRMap.count(CGF.CurFn) > 0) { |
1518 | 27 | for(const auto *D : FunctionUDRMap[CGF.CurFn]) |
1519 | 38 | UDRMap.erase(D); |
1520 | 27 | FunctionUDRMap.erase(CGF.CurFn); |
1521 | 27 | } |
1522 | 68.5k | auto I = FunctionUDMMap.find(CGF.CurFn); |
1523 | 68.5k | if (I != FunctionUDMMap.end()) { |
1524 | 0 | for(const auto *D : I->second) |
1525 | 0 | UDMMap.erase(D); |
1526 | 0 | FunctionUDMMap.erase(I); |
1527 | 0 | } |
1528 | 68.5k | LastprivateConditionalToTypes.erase(CGF.CurFn); |
1529 | 68.5k | FunctionToUntiedTaskStackMap.erase(CGF.CurFn); |
1530 | 68.5k | } |
1531 | | |
1532 | 10.7k | llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { |
1533 | 10.7k | return OMPBuilder.IdentPtr; |
1534 | 10.7k | } |
1535 | | |
1536 | 10.2k | llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { |
1537 | 10.2k | if (!Kmpc_MicroTy) { |
1538 | | // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) |
1539 | 1.82k | llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), |
1540 | 1.82k | llvm::PointerType::getUnqual(CGM.Int32Ty)}; |
1541 | 1.82k | Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); |
1542 | 1.82k | } |
1543 | 10.2k | return llvm::PointerType::getUnqual(Kmpc_MicroTy); |
1544 | 10.2k | } |
1545 | | |
1546 | | llvm::FunctionCallee |
1547 | 8.51k | CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { |
1548 | 8.51k | assert((IVSize == 32 || IVSize == 64) && |
1549 | 8.51k | "IV size is not compatible with the omp runtime"); |
1550 | 8.17k | StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"8.03k |
1551 | 138 | : "__kmpc_for_static_init_4u") |
1552 | 337 | : (IVSigned ? "__kmpc_for_static_init_8"229 |
1553 | 108 | : "__kmpc_for_static_init_8u"); |
1554 | 8.17k | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty337 ; |
1555 | 8.51k | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1556 | 8.51k | llvm::Type *TypeParams[] = { |
1557 | 8.51k | getIdentTyPointerTy(), // loc |
1558 | 8.51k | CGM.Int32Ty, // tid |
1559 | 8.51k | CGM.Int32Ty, // schedtype |
1560 | 8.51k | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1561 | 8.51k | PtrTy, // p_lower |
1562 | 8.51k | PtrTy, // p_upper |
1563 | 8.51k | PtrTy, // p_stride |
1564 | 8.51k | ITy, // incr |
1565 | 8.51k | ITy // chunk |
1566 | 8.51k | }; |
1567 | 8.51k | auto *FnTy = |
1568 | 8.51k | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1569 | 8.51k | return CGM.CreateRuntimeFunction(FnTy, Name); |
1570 | 8.51k | } |
1571 | | |
1572 | | llvm::FunctionCallee |
1573 | 744 | CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { |
1574 | 744 | assert((IVSize == 32 || IVSize == 64) && |
1575 | 744 | "IV size is not compatible with the omp runtime"); |
1576 | 744 | StringRef Name = |
1577 | 744 | IVSize == 32 |
1578 | 703 | ? (IVSigned ? "__kmpc_dispatch_init_4"699 : "__kmpc_dispatch_init_4u"4 ) |
1579 | 41 | : (IVSigned ? "__kmpc_dispatch_init_8"15 : "__kmpc_dispatch_init_8u"26 ); |
1580 | 703 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty41 ; |
1581 | 744 | llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc |
1582 | 744 | CGM.Int32Ty, // tid |
1583 | 744 | CGM.Int32Ty, // schedtype |
1584 | 744 | ITy, // lower |
1585 | 744 | ITy, // upper |
1586 | 744 | ITy, // stride |
1587 | 744 | ITy // chunk |
1588 | 744 | }; |
1589 | 744 | auto *FnTy = |
1590 | 744 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1591 | 744 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1592 | 744 | } |
1593 | | |
1594 | | llvm::FunctionCallee |
1595 | 37 | CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { |
1596 | 37 | assert((IVSize == 32 || IVSize == 64) && |
1597 | 37 | "IV size is not compatible with the omp runtime"); |
1598 | 37 | StringRef Name = |
1599 | 37 | IVSize == 32 |
1600 | 29 | ? (IVSigned ? "__kmpc_dispatch_fini_4"25 : "__kmpc_dispatch_fini_4u"4 ) |
1601 | 8 | : (IVSigned ? "__kmpc_dispatch_fini_8"4 : "__kmpc_dispatch_fini_8u"4 ); |
1602 | 37 | llvm::Type *TypeParams[] = { |
1603 | 37 | getIdentTyPointerTy(), // loc |
1604 | 37 | CGM.Int32Ty, // tid |
1605 | 37 | }; |
1606 | 37 | auto *FnTy = |
1607 | 37 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); |
1608 | 37 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1609 | 37 | } |
1610 | | |
1611 | | llvm::FunctionCallee |
1612 | 744 | CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { |
1613 | 744 | assert((IVSize == 32 || IVSize == 64) && |
1614 | 744 | "IV size is not compatible with the omp runtime"); |
1615 | 744 | StringRef Name = |
1616 | 744 | IVSize == 32 |
1617 | 703 | ? (IVSigned ? "__kmpc_dispatch_next_4"699 : "__kmpc_dispatch_next_4u"4 ) |
1618 | 41 | : (IVSigned ? "__kmpc_dispatch_next_8"15 : "__kmpc_dispatch_next_8u"26 ); |
1619 | 703 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty41 ; |
1620 | 744 | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1621 | 744 | llvm::Type *TypeParams[] = { |
1622 | 744 | getIdentTyPointerTy(), // loc |
1623 | 744 | CGM.Int32Ty, // tid |
1624 | 744 | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1625 | 744 | PtrTy, // p_lower |
1626 | 744 | PtrTy, // p_upper |
1627 | 744 | PtrTy // p_stride |
1628 | 744 | }; |
1629 | 744 | auto *FnTy = |
1630 | 744 | llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); |
1631 | 744 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1632 | 744 | } |
1633 | | |
1634 | | /// Obtain information that uniquely identifies a target entry. This |
1635 | | /// consists of the file and device IDs as well as line number associated with |
1636 | | /// the relevant entry source location. |
1637 | | static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, |
1638 | | unsigned &DeviceID, unsigned &FileID, |
1639 | 15.6k | unsigned &LineNum) { |
1640 | 15.6k | SourceManager &SM = C.getSourceManager(); |
1641 | | |
1642 | | // The loc should be always valid and have a file ID (the user cannot use |
1643 | | // #pragma directives in macros) |
1644 | | |
1645 | 15.6k | assert(Loc.isValid() && "Source location is expected to be always valid."); |
1646 | | |
1647 | 15.6k | PresumedLoc PLoc = SM.getPresumedLoc(Loc); |
1648 | 15.6k | assert(PLoc.isValid() && "Source location is expected to be always valid."); |
1649 | | |
1650 | 15.6k | llvm::sys::fs::UniqueID ID; |
1651 | 15.6k | if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) |
1652 | 0 | SM.getDiagnostics().Report(diag::err_cannot_open_file) |
1653 | 0 | << PLoc.getFilename() << EC.message(); |
1654 | | |
1655 | 15.6k | DeviceID = ID.getDevice(); |
1656 | 15.6k | FileID = ID.getFile(); |
1657 | 15.6k | LineNum = PLoc.getLine(); |
1658 | 15.6k | } |
1659 | | |
1660 | 502 | Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { |
1661 | 502 | if (CGM.getLangOpts().OpenMPSimd) |
1662 | 20 | return Address::invalid(); |
1663 | 482 | llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1664 | 482 | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1665 | 482 | if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || |
1666 | 44 | (*Res == OMPDeclareTargetDeclAttr::MT_To && |
1667 | 482 | HasRequiresUnifiedSharedMemory44 ))) { |
1668 | 482 | SmallString<64> PtrName; |
1669 | 482 | { |
1670 | 482 | llvm::raw_svector_ostream OS(PtrName); |
1671 | 482 | OS << CGM.getMangledName(GlobalDecl(VD)); |
1672 | 482 | if (!VD->isExternallyVisible()) { |
1673 | 34 | unsigned DeviceID, FileID, Line; |
1674 | 34 | getTargetEntryUniqueInfo(CGM.getContext(), |
1675 | 34 | VD->getCanonicalDecl()->getBeginLoc(), |
1676 | 34 | DeviceID, FileID, Line); |
1677 | 34 | OS << llvm::format("_%x", FileID); |
1678 | 34 | } |
1679 | 482 | OS << "_decl_tgt_ref_ptr"; |
1680 | 482 | } |
1681 | 482 | llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); |
1682 | 482 | if (!Ptr) { |
1683 | 58 | QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); |
1684 | 58 | Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), |
1685 | 58 | PtrName); |
1686 | | |
1687 | 58 | auto *GV = cast<llvm::GlobalVariable>(Ptr); |
1688 | 58 | GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); |
1689 | | |
1690 | 58 | if (!CGM.getLangOpts().OpenMPIsDevice) |
1691 | 38 | GV->setInitializer(CGM.GetAddrOfGlobal(VD)); |
1692 | 58 | registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); |
1693 | 58 | } |
1694 | 482 | return Address(Ptr, CGM.getContext().getDeclAlign(VD)); |
1695 | 482 | } |
1696 | 0 | return Address::invalid(); |
1697 | 0 | } |
1698 | | |
1699 | | llvm::Constant * |
1700 | 138 | CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { |
1701 | 138 | assert(!CGM.getLangOpts().OpenMPUseTLS || |
1702 | 138 | !CGM.getContext().getTargetInfo().isTLSSupported()); |
1703 | | // Lookup the entry, lazily creating it if necessary. |
1704 | 138 | std::string Suffix = getName({"cache", ""}); |
1705 | 138 | return getOrCreateInternalVariable( |
1706 | 138 | CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); |
1707 | 138 | } |
1708 | | |
1709 | | Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, |
1710 | | const VarDecl *VD, |
1711 | | Address VDAddr, |
1712 | 254 | SourceLocation Loc) { |
1713 | 254 | if (CGM.getLangOpts().OpenMPUseTLS && |
1714 | 116 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1715 | 116 | return VDAddr; |
1716 | | |
1717 | 138 | llvm::Type *VarTy = VDAddr.getElementType(); |
1718 | 138 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
1719 | 138 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), |
1720 | 138 | CGM.Int8PtrTy), |
1721 | 138 | CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), |
1722 | 138 | getOrCreateThreadPrivateCache(VD)}; |
1723 | 138 | return Address(CGF.EmitRuntimeCall( |
1724 | 138 | OMPBuilder.getOrCreateRuntimeFunction( |
1725 | 138 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
1726 | 138 | Args), |
1727 | 138 | VDAddr.getAlignment()); |
1728 | 138 | } |
1729 | | |
1730 | | void CGOpenMPRuntime::emitThreadPrivateVarInit( |
1731 | | CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, |
1732 | 37 | llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { |
1733 | | // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime |
1734 | | // library. |
1735 | 37 | llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); |
1736 | 37 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1737 | 37 | CGM.getModule(), OMPRTL___kmpc_global_thread_num), |
1738 | 37 | OMPLoc); |
1739 | | // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) |
1740 | | // to register constructor/destructor for variable. |
1741 | 37 | llvm::Value *Args[] = { |
1742 | 37 | OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), |
1743 | 37 | Ctor, CopyCtor, Dtor}; |
1744 | 37 | CGF.EmitRuntimeCall( |
1745 | 37 | OMPBuilder.getOrCreateRuntimeFunction( |
1746 | 37 | CGM.getModule(), OMPRTL___kmpc_threadprivate_register), |
1747 | 37 | Args); |
1748 | 37 | } |
1749 | | |
1750 | | llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( |
1751 | | const VarDecl *VD, Address VDAddr, SourceLocation Loc, |
1752 | 145 | bool PerformInit, CodeGenFunction *CGF) { |
1753 | 145 | if (CGM.getLangOpts().OpenMPUseTLS && |
1754 | 72 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1755 | 72 | return nullptr; |
1756 | | |
1757 | 73 | VD = VD->getDefinition(CGM.getContext()); |
1758 | 73 | if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second61 ) { |
1759 | 48 | QualType ASTTy = VD->getType(); |
1760 | | |
1761 | 48 | llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; |
1762 | 48 | const Expr *Init = VD->getAnyInitializer(); |
1763 | 48 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1764 | | // Generate function that re-emits the declaration's initializer into the |
1765 | | // threadprivate copy of the variable VD |
1766 | 37 | CodeGenFunction CtorCGF(CGM); |
1767 | 37 | FunctionArgList Args; |
1768 | 37 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1769 | 37 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1770 | 37 | ImplicitParamDecl::Other); |
1771 | 37 | Args.push_back(&Dst); |
1772 | | |
1773 | 37 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1774 | 37 | CGM.getContext().VoidPtrTy, Args); |
1775 | 37 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1776 | 37 | std::string Name = getName({"__kmpc_global_ctor_", ""}); |
1777 | 37 | llvm::Function *Fn = |
1778 | 37 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1779 | 37 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, |
1780 | 37 | Args, Loc, Loc); |
1781 | 37 | llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( |
1782 | 37 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1783 | 37 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1784 | 37 | Address Arg = Address(ArgVal, VDAddr.getAlignment()); |
1785 | 37 | Arg = CtorCGF.Builder.CreateElementBitCast( |
1786 | 37 | Arg, CtorCGF.ConvertTypeForMem(ASTTy)); |
1787 | 37 | CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), |
1788 | 37 | /*IsInitializer=*/true); |
1789 | 37 | ArgVal = CtorCGF.EmitLoadOfScalar( |
1790 | 37 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1791 | 37 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1792 | 37 | CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); |
1793 | 37 | CtorCGF.FinishFunction(); |
1794 | 37 | Ctor = Fn; |
1795 | 37 | } |
1796 | 48 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1797 | | // Generate function that emits destructor call for the threadprivate copy |
1798 | | // of the variable VD |
1799 | 35 | CodeGenFunction DtorCGF(CGM); |
1800 | 35 | FunctionArgList Args; |
1801 | 35 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1802 | 35 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1803 | 35 | ImplicitParamDecl::Other); |
1804 | 35 | Args.push_back(&Dst); |
1805 | | |
1806 | 35 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1807 | 35 | CGM.getContext().VoidTy, Args); |
1808 | 35 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1809 | 35 | std::string Name = getName({"__kmpc_global_dtor_", ""}); |
1810 | 35 | llvm::Function *Fn = |
1811 | 35 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1812 | 35 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1813 | 35 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, |
1814 | 35 | Loc, Loc); |
1815 | | // Create a scope with an artificial location for the body of this function. |
1816 | 35 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1817 | 35 | llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( |
1818 | 35 | DtorCGF.GetAddrOfLocalVar(&Dst), |
1819 | 35 | /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1820 | 35 | DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, |
1821 | 35 | DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1822 | 35 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1823 | 35 | DtorCGF.FinishFunction(); |
1824 | 35 | Dtor = Fn; |
1825 | 35 | } |
1826 | | // Do not emit init function if it is not required. |
1827 | 48 | if (!Ctor && !Dtor11 ) |
1828 | 11 | return nullptr; |
1829 | | |
1830 | 37 | llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; |
1831 | 37 | auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, |
1832 | 37 | /*isVarArg=*/false) |
1833 | 37 | ->getPointerTo(); |
1834 | | // Copying constructor for the threadprivate variable. |
1835 | | // Must be NULL - reserved by runtime, but currently it requires that this |
1836 | | // parameter is always NULL. Otherwise it fires assertion. |
1837 | 37 | CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); |
1838 | 37 | if (Ctor == nullptr) { |
1839 | 0 | auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, |
1840 | 0 | /*isVarArg=*/false) |
1841 | 0 | ->getPointerTo(); |
1842 | 0 | Ctor = llvm::Constant::getNullValue(CtorTy); |
1843 | 0 | } |
1844 | 37 | if (Dtor == nullptr) { |
1845 | 2 | auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, |
1846 | 2 | /*isVarArg=*/false) |
1847 | 2 | ->getPointerTo(); |
1848 | 2 | Dtor = llvm::Constant::getNullValue(DtorTy); |
1849 | 2 | } |
1850 | 37 | if (!CGF) { |
1851 | 11 | auto *InitFunctionTy = |
1852 | 11 | llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); |
1853 | 11 | std::string Name = getName({"__omp_threadprivate_init_", ""}); |
1854 | 11 | llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( |
1855 | 11 | InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); |
1856 | 11 | CodeGenFunction InitCGF(CGM); |
1857 | 11 | FunctionArgList ArgList; |
1858 | 11 | InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, |
1859 | 11 | CGM.getTypes().arrangeNullaryFunction(), ArgList, |
1860 | 11 | Loc, Loc); |
1861 | 11 | emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1862 | 11 | InitCGF.FinishFunction(); |
1863 | 11 | return InitFunction; |
1864 | 11 | } |
1865 | 26 | emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1866 | 26 | } |
1867 | 51 | return nullptr; |
1868 | 73 | } |
1869 | | |
1870 | | bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, |
1871 | | llvm::GlobalVariable *Addr, |
1872 | 3.89k | bool PerformInit) { |
1873 | 3.89k | if (CGM.getLangOpts().OMPTargetTriples.empty() && |
1874 | 645 | !CGM.getLangOpts().OpenMPIsDevice) |
1875 | 583 | return false; |
1876 | 3.30k | Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1877 | 3.30k | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1878 | 3.30k | if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link146 || |
1879 | 146 | (*Res == OMPDeclareTargetDeclAttr::MT_To && |
1880 | 146 | HasRequiresUnifiedSharedMemory)) |
1881 | 3.16k | return CGM.getLangOpts().OpenMPIsDevice; |
1882 | 146 | VD = VD->getDefinition(CGM.getContext()); |
1883 | 146 | assert(VD && "Unknown VarDecl"); |
1884 | | |
1885 | 146 | if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) |
1886 | 3 | return CGM.getLangOpts().OpenMPIsDevice; |
1887 | | |
1888 | 143 | QualType ASTTy = VD->getType(); |
1889 | 143 | SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); |
1890 | | |
1891 | | // Produce the unique prefix to identify the new target regions. We use |
1892 | | // the source location of the variable declaration which we know to not |
1893 | | // conflict with any target region. |
1894 | 143 | unsigned DeviceID; |
1895 | 143 | unsigned FileID; |
1896 | 143 | unsigned Line; |
1897 | 143 | getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); |
1898 | 143 | SmallString<128> Buffer, Out; |
1899 | 143 | { |
1900 | 143 | llvm::raw_svector_ostream OS(Buffer); |
1901 | 143 | OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) |
1902 | 143 | << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; |
1903 | 143 | } |
1904 | | |
1905 | 143 | const Expr *Init = VD->getAnyInitializer(); |
1906 | 143 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1907 | 143 | llvm::Constant *Ctor; |
1908 | 143 | llvm::Constant *ID; |
1909 | 143 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1910 | | // Generate function that re-emits the declaration's initializer into |
1911 | | // the threadprivate copy of the variable VD |
1912 | 74 | CodeGenFunction CtorCGF(CGM); |
1913 | | |
1914 | 74 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1915 | 74 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1916 | 74 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1917 | 74 | FTy, Twine(Buffer, "_ctor"), FI, Loc); |
1918 | 74 | auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); |
1919 | 74 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1920 | 74 | FunctionArgList(), Loc, Loc); |
1921 | 74 | auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); |
1922 | 74 | CtorCGF.EmitAnyExprToMem(Init, |
1923 | 74 | Address(Addr, CGM.getContext().getDeclAlign(VD)), |
1924 | 74 | Init->getType().getQualifiers(), |
1925 | 74 | /*IsInitializer=*/true); |
1926 | 74 | CtorCGF.FinishFunction(); |
1927 | 74 | Ctor = Fn; |
1928 | 74 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1929 | 74 | CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); |
1930 | 69 | } else { |
1931 | 69 | Ctor = new llvm::GlobalVariable( |
1932 | 69 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1933 | 69 | llvm::GlobalValue::PrivateLinkage, |
1934 | 69 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); |
1935 | 69 | ID = Ctor; |
1936 | 69 | } |
1937 | | |
1938 | | // Register the information for the entry associated with the constructor. |
1939 | 143 | Out.clear(); |
1940 | 143 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1941 | 143 | DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, |
1942 | 143 | ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); |
1943 | 143 | } |
1944 | 143 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1945 | 92 | llvm::Constant *Dtor; |
1946 | 92 | llvm::Constant *ID; |
1947 | 92 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1948 | | // Generate function that emits destructor call for the threadprivate |
1949 | | // copy of the variable VD |
1950 | 58 | CodeGenFunction DtorCGF(CGM); |
1951 | | |
1952 | 58 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1953 | 58 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1954 | 58 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1955 | 58 | FTy, Twine(Buffer, "_dtor"), FI, Loc); |
1956 | 58 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1957 | 58 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1958 | 58 | FunctionArgList(), Loc, Loc); |
1959 | | // Create a scope with an artificial location for the body of this |
1960 | | // function. |
1961 | 58 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1962 | 58 | DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), |
1963 | 58 | ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1964 | 58 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1965 | 58 | DtorCGF.FinishFunction(); |
1966 | 58 | Dtor = Fn; |
1967 | 58 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1968 | 58 | CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); |
1969 | 34 | } else { |
1970 | 34 | Dtor = new llvm::GlobalVariable( |
1971 | 34 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1972 | 34 | llvm::GlobalValue::PrivateLinkage, |
1973 | 34 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); |
1974 | 34 | ID = Dtor; |
1975 | 34 | } |
1976 | | // Register the information for the entry associated with the destructor. |
1977 | 92 | Out.clear(); |
1978 | 92 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1979 | 92 | DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, |
1980 | 92 | ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); |
1981 | 92 | } |
1982 | 143 | return CGM.getLangOpts().OpenMPIsDevice; |
1983 | 143 | } |
1984 | | |
1985 | | Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, |
1986 | | QualType VarType, |
1987 | 152 | StringRef Name) { |
1988 | 152 | std::string Suffix = getName({"artificial", ""}); |
1989 | 152 | llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); |
1990 | 152 | llvm::Value *GAddr = |
1991 | 152 | getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); |
1992 | 152 | if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && |
1993 | 117 | CGM.getTarget().isTLSSupported()) { |
1994 | 81 | cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); |
1995 | 81 | return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); |
1996 | 81 | } |
1997 | 71 | std::string CacheSuffix = getName({"cache", ""}); |
1998 | 71 | llvm::Value *Args[] = { |
1999 | 71 | emitUpdateLocation(CGF, SourceLocation()), |
2000 | 71 | getThreadID(CGF, SourceLocation()), |
2001 | 71 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), |
2002 | 71 | CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, |
2003 | 71 | /*isSigned=*/false), |
2004 | 71 | getOrCreateInternalVariable( |
2005 | 71 | CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; |
2006 | 71 | return Address( |
2007 | 71 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2008 | 71 | CGF.EmitRuntimeCall( |
2009 | 71 | OMPBuilder.getOrCreateRuntimeFunction( |
2010 | 71 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
2011 | 71 | Args), |
2012 | 71 | VarLVType->getPointerTo(/*AddrSpace=*/0)), |
2013 | 71 | CGM.getContext().getTypeAlignInChars(VarType)); |
2014 | 71 | } |
2015 | | |
2016 | | void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, |
2017 | | const RegionCodeGenTy &ThenGen, |
2018 | 2.53k | const RegionCodeGenTy &ElseGen) { |
2019 | 2.53k | CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); |
2020 | | |
2021 | | // If the condition constant folds and can be elided, try to avoid emitting |
2022 | | // the condition and the dead arm of the if/else. |
2023 | 2.53k | bool CondConstant; |
2024 | 2.53k | if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { |
2025 | 1.30k | if (CondConstant) |
2026 | 746 | ThenGen(CGF); |
2027 | 554 | else |
2028 | 554 | ElseGen(CGF); |
2029 | 1.30k | return; |
2030 | 1.30k | } |
2031 | | |
2032 | | // Otherwise, the condition did not fold, or we couldn't elide it. Just |
2033 | | // emit the conditional branch. |
2034 | 1.23k | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2035 | 1.23k | llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); |
2036 | 1.23k | llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); |
2037 | 1.23k | CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); |
2038 | | |
2039 | | // Emit the 'then' code. |
2040 | 1.23k | CGF.EmitBlock(ThenBlock); |
2041 | 1.23k | ThenGen(CGF); |
2042 | 1.23k | CGF.EmitBranch(ContBlock); |
2043 | | // Emit the 'else' code if present. |
2044 | | // There is no need to emit line number for unconditional branch. |
2045 | 1.23k | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2046 | 1.23k | CGF.EmitBlock(ElseBlock); |
2047 | 1.23k | ElseGen(CGF); |
2048 | | // There is no need to emit line number for unconditional branch. |
2049 | 1.23k | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2050 | 1.23k | CGF.EmitBranch(ContBlock); |
2051 | | // Emit the continuation block for code after the if. |
2052 | 1.23k | CGF.EmitBlock(ContBlock, /*IsFinished=*/true); |
2053 | 1.23k | } |
2054 | | |
2055 | | void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, |
2056 | | llvm::Function *OutlinedFn, |
2057 | | ArrayRef<llvm::Value *> CapturedVars, |
2058 | 5.34k | const Expr *IfCond) { |
2059 | 5.34k | if (!CGF.HaveInsertPoint()) |
2060 | 0 | return; |
2061 | 5.34k | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2062 | 5.34k | auto &M = CGM.getModule(); |
2063 | 5.34k | auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, |
2064 | 5.13k | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2065 | | // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); |
2066 | 5.13k | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2067 | 5.13k | llvm::Value *Args[] = { |
2068 | 5.13k | RTLoc, |
2069 | 5.13k | CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars |
2070 | 5.13k | CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; |
2071 | 5.13k | llvm::SmallVector<llvm::Value *, 16> RealArgs; |
2072 | 5.13k | RealArgs.append(std::begin(Args), std::end(Args)); |
2073 | 5.13k | RealArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2074 | | |
2075 | 5.13k | llvm::FunctionCallee RTLFn = |
2076 | 5.13k | OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); |
2077 | 5.13k | CGF.EmitRuntimeCall(RTLFn, RealArgs); |
2078 | 5.13k | }; |
2079 | 5.34k | auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, |
2080 | 437 | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2081 | 437 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2082 | 437 | llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); |
2083 | | // Build calls: |
2084 | | // __kmpc_serialized_parallel(&Loc, GTid); |
2085 | 437 | llvm::Value *Args[] = {RTLoc, ThreadID}; |
2086 | 437 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2087 | 437 | M, OMPRTL___kmpc_serialized_parallel), |
2088 | 437 | Args); |
2089 | | |
2090 | | // OutlinedFn(>id, &zero_bound, CapturedStruct); |
2091 | 437 | Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); |
2092 | 437 | Address ZeroAddrBound = |
2093 | 437 | CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, |
2094 | 437 | /*Name=*/".bound.zero.addr"); |
2095 | 437 | CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); |
2096 | 437 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2097 | | // ThreadId for serialized parallels is 0. |
2098 | 437 | OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); |
2099 | 437 | OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); |
2100 | 437 | OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2101 | 437 | RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); |
2102 | | |
2103 | | // __kmpc_end_serialized_parallel(&Loc, GTid); |
2104 | 437 | llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; |
2105 | 437 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2106 | 437 | M, OMPRTL___kmpc_end_serialized_parallel), |
2107 | 437 | EndArgs); |
2108 | 437 | }; |
2109 | 5.34k | if (IfCond) { |
2110 | 563 | emitIfClause(CGF, IfCond, ThenGen, ElseGen); |
2111 | 4.78k | } else { |
2112 | 4.78k | RegionCodeGenTy ThenRCG(ThenGen); |
2113 | 4.78k | ThenRCG(CGF); |
2114 | 4.78k | } |
2115 | 5.34k | } |
2116 | | |
2117 | | // If we're inside an (outlined) parallel region, use the region info's |
2118 | | // thread-ID variable (it is passed in a first argument of the outlined function |
2119 | | // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in |
2120 | | // regular serial code region, get thread ID by calling kmp_int32 |
2121 | | // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and |
2122 | | // return the address of that temp. |
2123 | | Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, |
2124 | 1.53k | SourceLocation Loc) { |
2125 | 1.53k | if (auto *OMPRegionInfo = |
2126 | 1.50k | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2127 | 1.50k | if (OMPRegionInfo->getThreadIDVariable()) |
2128 | 668 | return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); |
2129 | | |
2130 | 866 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2131 | 866 | QualType Int32Ty = |
2132 | 866 | CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); |
2133 | 866 | Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); |
2134 | 866 | CGF.EmitStoreOfScalar(ThreadID, |
2135 | 866 | CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); |
2136 | | |
2137 | 866 | return ThreadIDTemp; |
2138 | 866 | } |
2139 | | |
2140 | | llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( |
2141 | 1.04k | llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { |
2142 | 1.04k | SmallString<256> Buffer; |
2143 | 1.04k | llvm::raw_svector_ostream Out(Buffer); |
2144 | 1.04k | Out << Name; |
2145 | 1.04k | StringRef RuntimeName = Out.str(); |
2146 | 1.04k | auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; |
2147 | 1.04k | if (Elem.second) { |
2148 | 612 | assert(Elem.second->getType()->getPointerElementType() == Ty && |
2149 | 612 | "OMP internal variable has different type than requested"); |
2150 | 612 | return &*Elem.second; |
2151 | 612 | } |
2152 | | |
2153 | 437 | return Elem.second = new llvm::GlobalVariable( |
2154 | 437 | CGM.getModule(), Ty, /*IsConstant*/ false, |
2155 | 437 | llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), |
2156 | 437 | Elem.first(), /*InsertBefore=*/nullptr, |
2157 | 437 | llvm::GlobalValue::NotThreadLocal, AddressSpace); |
2158 | 437 | } |
2159 | | |
2160 | 579 | llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { |
2161 | 579 | std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); |
2162 | 579 | std::string Name = getName({Prefix, "var"}); |
2163 | 579 | return getOrCreateInternalVariable(KmpCriticalNameTy, Name); |
2164 | 579 | } |
2165 | | |
2166 | | namespace { |
2167 | | /// Common pre(post)-action for different OpenMP constructs. |
2168 | | class CommonActionTy final : public PrePostActionTy { |
2169 | | llvm::FunctionCallee EnterCallee; |
2170 | | ArrayRef<llvm::Value *> EnterArgs; |
2171 | | llvm::FunctionCallee ExitCallee; |
2172 | | ArrayRef<llvm::Value *> ExitArgs; |
2173 | | bool Conditional; |
2174 | | llvm::BasicBlock *ContBlock = nullptr; |
2175 | | |
2176 | | public: |
2177 | | CommonActionTy(llvm::FunctionCallee EnterCallee, |
2178 | | ArrayRef<llvm::Value *> EnterArgs, |
2179 | | llvm::FunctionCallee ExitCallee, |
2180 | | ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) |
2181 | | : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), |
2182 | 1.46k | ExitArgs(ExitArgs), Conditional(Conditional) {} |
2183 | 872 | void Enter(CodeGenFunction &CGF) override { |
2184 | 872 | llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); |
2185 | 872 | if (Conditional) { |
2186 | 243 | llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); |
2187 | 243 | auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2188 | 243 | ContBlock = CGF.createBasicBlock("omp_if.end"); |
2189 | | // Generate the branch (If-stmt) |
2190 | 243 | CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); |
2191 | 243 | CGF.EmitBlock(ThenBlock); |
2192 | 243 | } |
2193 | 872 | } |
2194 | 243 | void Done(CodeGenFunction &CGF) { |
2195 | | // Emit the rest of blocks/branches |
2196 | 243 | CGF.EmitBranch(ContBlock); |
2197 | 243 | CGF.EmitBlock(ContBlock, true); |
2198 | 243 | } |
2199 | 1.56k | void Exit(CodeGenFunction &CGF) override { |
2200 | 1.56k | CGF.EmitRuntimeCall(ExitCallee, ExitArgs); |
2201 | 1.56k | } |
2202 | | }; |
2203 | | } // anonymous namespace |
2204 | | |
2205 | | void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, |
2206 | | StringRef CriticalName, |
2207 | | const RegionCodeGenTy &CriticalOpGen, |
2208 | 166 | SourceLocation Loc, const Expr *Hint) { |
2209 | | // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); |
2210 | | // CriticalOpGen(); |
2211 | | // __kmpc_end_critical(ident_t *, gtid, Lock); |
2212 | | // Prepare arguments and build a call to __kmpc_critical |
2213 | 166 | if (!CGF.HaveInsertPoint()) |
2214 | 0 | return; |
2215 | 166 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2216 | 166 | getCriticalRegionLock(CriticalName)}; |
2217 | 166 | llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), |
2218 | 166 | std::end(Args)); |
2219 | 166 | if (Hint) { |
2220 | 3 | EnterArgs.push_back(CGF.Builder.CreateIntCast( |
2221 | 3 | CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); |
2222 | 3 | } |
2223 | 166 | CommonActionTy Action( |
2224 | 166 | OMPBuilder.getOrCreateRuntimeFunction( |
2225 | 166 | CGM.getModule(), |
2226 | 163 | Hint ? OMPRTL___kmpc_critical_with_hint3 : OMPRTL___kmpc_critical), |
2227 | 166 | EnterArgs, |
2228 | 166 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2229 | 166 | OMPRTL___kmpc_end_critical), |
2230 | 166 | Args); |
2231 | 166 | CriticalOpGen.setAction(Action); |
2232 | 166 | emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); |
2233 | 166 | } |
2234 | | |
2235 | | void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, |
2236 | | const RegionCodeGenTy &MasterOpGen, |
2237 | 186 | SourceLocation Loc) { |
2238 | 186 | if (!CGF.HaveInsertPoint()) |
2239 | 0 | return; |
2240 | | // if(__kmpc_master(ident_t *, gtid)) { |
2241 | | // MasterOpGen(); |
2242 | | // __kmpc_end_master(ident_t *, gtid); |
2243 | | // } |
2244 | | // Prepare arguments and build a call to __kmpc_master |
2245 | 186 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2246 | 186 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2247 | 186 | CGM.getModule(), OMPRTL___kmpc_master), |
2248 | 186 | Args, |
2249 | 186 | OMPBuilder.getOrCreateRuntimeFunction( |
2250 | 186 | CGM.getModule(), OMPRTL___kmpc_end_master), |
2251 | 186 | Args, |
2252 | 186 | /*Conditional=*/true); |
2253 | 186 | MasterOpGen.setAction(Action); |
2254 | 186 | emitInlinedDirective(CGF, OMPD_master, MasterOpGen); |
2255 | 186 | Action.Done(CGF); |
2256 | 186 | } |
2257 | | |
2258 | | void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, |
2259 | 16 | SourceLocation Loc) { |
2260 | 16 | if (!CGF.HaveInsertPoint()) |
2261 | 0 | return; |
2262 | 16 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2263 | 8 | OMPBuilder.createTaskyield(CGF.Builder); |
2264 | 8 | } else { |
2265 | | // Build call __kmpc_omp_taskyield(loc, thread_id, 0); |
2266 | 8 | llvm::Value *Args[] = { |
2267 | 8 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2268 | 8 | llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; |
2269 | 8 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2270 | 8 | CGM.getModule(), OMPRTL___kmpc_omp_taskyield), |
2271 | 8 | Args); |
2272 | 8 | } |
2273 | | |
2274 | 16 | if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2275 | 4 | Region->emitUntiedSwitch(CGF); |
2276 | 16 | } |
2277 | | |
2278 | | void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, |
2279 | | const RegionCodeGenTy &TaskgroupOpGen, |
2280 | 247 | SourceLocation Loc) { |
2281 | 247 | if (!CGF.HaveInsertPoint()) |
2282 | 0 | return; |
2283 | | // __kmpc_taskgroup(ident_t *, gtid); |
2284 | | // TaskgroupOpGen(); |
2285 | | // __kmpc_end_taskgroup(ident_t *, gtid); |
2286 | | // Prepare arguments and build a call to __kmpc_taskgroup |
2287 | 247 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2288 | 247 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2289 | 247 | CGM.getModule(), OMPRTL___kmpc_taskgroup), |
2290 | 247 | Args, |
2291 | 247 | OMPBuilder.getOrCreateRuntimeFunction( |
2292 | 247 | CGM.getModule(), OMPRTL___kmpc_end_taskgroup), |
2293 | 247 | Args); |
2294 | 247 | TaskgroupOpGen.setAction(Action); |
2295 | 247 | emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); |
2296 | 247 | } |
2297 | | |
2298 | | /// Given an array of pointers to variables, project the address of a |
2299 | | /// given variable. |
2300 | | static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, |
2301 | 1.32k | unsigned Index, const VarDecl *Var) { |
2302 | | // Pull out the pointer to the variable. |
2303 | 1.32k | Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); |
2304 | 1.32k | llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); |
2305 | | |
2306 | 1.32k | Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); |
2307 | 1.32k | Addr = CGF.Builder.CreateElementBitCast( |
2308 | 1.32k | Addr, CGF.ConvertTypeForMem(Var->getType())); |
2309 | 1.32k | return Addr; |
2310 | 1.32k | } |
2311 | | |
2312 | | static llvm::Value *emitCopyprivateCopyFunction( |
2313 | | CodeGenModule &CGM, llvm::Type *ArgsType, |
2314 | | ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, |
2315 | | ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, |
2316 | 28 | SourceLocation Loc) { |
2317 | 28 | ASTContext &C = CGM.getContext(); |
2318 | | // void copy_func(void *LHSArg, void *RHSArg); |
2319 | 28 | FunctionArgList Args; |
2320 | 28 | ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2321 | 28 | ImplicitParamDecl::Other); |
2322 | 28 | ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2323 | 28 | ImplicitParamDecl::Other); |
2324 | 28 | Args.push_back(&LHSArg); |
2325 | 28 | Args.push_back(&RHSArg); |
2326 | 28 | const auto &CGFI = |
2327 | 28 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
2328 | 28 | std::string Name = |
2329 | 28 | CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); |
2330 | 28 | auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), |
2331 | 28 | llvm::GlobalValue::InternalLinkage, Name, |
2332 | 28 | &CGM.getModule()); |
2333 | 28 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
2334 | 28 | Fn->setDoesNotRecurse(); |
2335 | 28 | CodeGenFunction CGF(CGM); |
2336 | 28 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
2337 | | // Dest = (void*[n])(LHSArg); |
2338 | | // Src = (void*[n])(RHSArg); |
2339 | 28 | Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2340 | 28 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), |
2341 | 28 | ArgsType), CGF.getPointerAlign()); |
2342 | 28 | Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2343 | 28 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), |
2344 | 28 | ArgsType), CGF.getPointerAlign()); |
2345 | | // *(Type0*)Dst[0] = *(Type0*)Src[0]; |
2346 | | // *(Type1*)Dst[1] = *(Type1*)Src[1]; |
2347 | | // ... |
2348 | | // *(Typen*)Dst[n] = *(Typen*)Src[n]; |
2349 | 97 | for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I69 ) { |
2350 | 69 | const auto *DestVar = |
2351 | 69 | cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); |
2352 | 69 | Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); |
2353 | | |
2354 | 69 | const auto *SrcVar = |
2355 | 69 | cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); |
2356 | 69 | Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); |
2357 | | |
2358 | 69 | const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); |
2359 | 69 | QualType Type = VD->getType(); |
2360 | 69 | CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); |
2361 | 69 | } |
2362 | 28 | CGF.FinishFunction(); |
2363 | 28 | return Fn; |
2364 | 28 | } |
2365 | | |
2366 | | void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, |
2367 | | const RegionCodeGenTy &SingleOpGen, |
2368 | | SourceLocation Loc, |
2369 | | ArrayRef<const Expr *> CopyprivateVars, |
2370 | | ArrayRef<const Expr *> SrcExprs, |
2371 | | ArrayRef<const Expr *> DstExprs, |
2372 | 57 | ArrayRef<const Expr *> AssignmentOps) { |
2373 | 57 | if (!CGF.HaveInsertPoint()) |
2374 | 0 | return; |
2375 | 57 | assert(CopyprivateVars.size() == SrcExprs.size() && |
2376 | 57 | CopyprivateVars.size() == DstExprs.size() && |
2377 | 57 | CopyprivateVars.size() == AssignmentOps.size()); |
2378 | 57 | ASTContext &C = CGM.getContext(); |
2379 | | // int32 did_it = 0; |
2380 | | // if(__kmpc_single(ident_t *, gtid)) { |
2381 | | // SingleOpGen(); |
2382 | | // __kmpc_end_single(ident_t *, gtid); |
2383 | | // did_it = 1; |
2384 | | // } |
2385 | | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2386 | | // <copy_func>, did_it); |
2387 | | |
2388 | 57 | Address DidIt = Address::invalid(); |
2389 | 57 | if (!CopyprivateVars.empty()) { |
2390 | | // int32 did_it = 0; |
2391 | 28 | QualType KmpInt32Ty = |
2392 | 28 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
2393 | 28 | DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); |
2394 | 28 | CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); |
2395 | 28 | } |
2396 | | // Prepare arguments and build a call to __kmpc_single |
2397 | 57 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2398 | 57 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2399 | 57 | CGM.getModule(), OMPRTL___kmpc_single), |
2400 | 57 | Args, |
2401 | 57 | OMPBuilder.getOrCreateRuntimeFunction( |
2402 | 57 | CGM.getModule(), OMPRTL___kmpc_end_single), |
2403 | 57 | Args, |
2404 | 57 | /*Conditional=*/true); |
2405 | 57 | SingleOpGen.setAction(Action); |
2406 | 57 | emitInlinedDirective(CGF, OMPD_single, SingleOpGen); |
2407 | 57 | if (DidIt.isValid()) { |
2408 | | // did_it = 1; |
2409 | 28 | CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); |
2410 | 28 | } |
2411 | 57 | Action.Done(CGF); |
2412 | | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2413 | | // <copy_func>, did_it); |
2414 | 57 | if (DidIt.isValid()) { |
2415 | 28 | llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); |
2416 | 28 | QualType CopyprivateArrayTy = C.getConstantArrayType( |
2417 | 28 | C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, |
2418 | 28 | /*IndexTypeQuals=*/0); |
2419 | | // Create a list of all private variables for copyprivate. |
2420 | 28 | Address CopyprivateList = |
2421 | 28 | CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); |
2422 | 97 | for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I69 ) { |
2423 | 69 | Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); |
2424 | 69 | CGF.Builder.CreateStore( |
2425 | 69 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2426 | 69 | CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), |
2427 | 69 | CGF.VoidPtrTy), |
2428 | 69 | Elem); |
2429 | 69 | } |
2430 | | // Build function that copies private values from single region to all other |
2431 | | // threads in the corresponding parallel region. |
2432 | 28 | llvm::Value *CpyFn = emitCopyprivateCopyFunction( |
2433 | 28 | CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), |
2434 | 28 | CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); |
2435 | 28 | llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); |
2436 | 28 | Address CL = |
2437 | 28 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, |
2438 | 28 | CGF.VoidPtrTy); |
2439 | 28 | llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); |
2440 | 28 | llvm::Value *Args[] = { |
2441 | 28 | emitUpdateLocation(CGF, Loc), // ident_t *<loc> |
2442 | 28 | getThreadID(CGF, Loc), // i32 <gtid> |
2443 | 28 | BufSize, // size_t <buf_size> |
2444 | 28 | CL.getPointer(), // void *<copyprivate list> |
2445 | 28 | CpyFn, // void (*) (void *, void *) <copy_func> |
2446 | 28 | DidItVal // i32 did_it |
2447 | 28 | }; |
2448 | 28 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2449 | 28 | CGM.getModule(), OMPRTL___kmpc_copyprivate), |
2450 | 28 | Args); |
2451 | 28 | } |
2452 | 57 | } |
2453 | | |
2454 | | void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, |
2455 | | const RegionCodeGenTy &OrderedOpGen, |
2456 | 24 | SourceLocation Loc, bool IsThreads) { |
2457 | 24 | if (!CGF.HaveInsertPoint()) |
2458 | 0 | return; |
2459 | | // __kmpc_ordered(ident_t *, gtid); |
2460 | | // OrderedOpGen(); |
2461 | | // __kmpc_end_ordered(ident_t *, gtid); |
2462 | | // Prepare arguments and build a call to __kmpc_ordered |
2463 | 24 | if (IsThreads) { |
2464 | 16 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2465 | 16 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2466 | 16 | CGM.getModule(), OMPRTL___kmpc_ordered), |
2467 | 16 | Args, |
2468 | 16 | OMPBuilder.getOrCreateRuntimeFunction( |
2469 | 16 | CGM.getModule(), OMPRTL___kmpc_end_ordered), |
2470 | 16 | Args); |
2471 | 16 | OrderedOpGen.setAction(Action); |
2472 | 16 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2473 | 16 | return; |
2474 | 16 | } |
2475 | 8 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2476 | 8 | } |
2477 | | |
2478 | 972 | unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { |
2479 | 972 | unsigned Flags; |
2480 | 972 | if (Kind == OMPD_for) |
2481 | 575 | Flags = OMP_IDENT_BARRIER_IMPL_FOR; |
2482 | 397 | else if (Kind == OMPD_sections) |
2483 | 48 | Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; |
2484 | 349 | else if (Kind == OMPD_single) |
2485 | 22 | Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; |
2486 | 327 | else if (Kind == OMPD_barrier) |
2487 | 18 | Flags = OMP_IDENT_BARRIER_EXPL; |
2488 | 309 | else |
2489 | 309 | Flags = OMP_IDENT_BARRIER_IMPL; |
2490 | 972 | return Flags; |
2491 | 972 | } |
2492 | | |
2493 | | void CGOpenMPRuntime::getDefaultScheduleAndChunk( |
2494 | | CodeGenFunction &CGF, const OMPLoopDirective &S, |
2495 | 3.22k | OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { |
2496 | | // Check if the loop directive is actually a doacross loop directive. In this |
2497 | | // case choose static, 1 schedule. |
2498 | 3.22k | if (llvm::any_of( |
2499 | 3.22k | S.getClausesOfKind<OMPOrderedClause>(), |
2500 | 20 | [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { |
2501 | 12 | ScheduleKind = OMPC_SCHEDULE_static; |
2502 | | // Chunk size is 1 in this case. |
2503 | 12 | llvm::APInt ChunkSize(32, 1); |
2504 | 12 | ChunkExpr = IntegerLiteral::Create( |
2505 | 12 | CGF.getContext(), ChunkSize, |
2506 | 12 | CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), |
2507 | 12 | SourceLocation()); |
2508 | 12 | } |
2509 | 3.22k | } |
2510 | | |
2511 | | void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, |
2512 | | OpenMPDirectiveKind Kind, bool EmitChecks, |
2513 | 804 | bool ForceSimpleCall) { |
2514 | | // Check if we should use the OMPBuilder |
2515 | 804 | auto *OMPRegionInfo = |
2516 | 804 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); |
2517 | 804 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2518 | 48 | CGF.Builder.restoreIP(OMPBuilder.createBarrier( |
2519 | 48 | CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); |
2520 | 48 | return; |
2521 | 48 | } |
2522 | | |
2523 | 756 | if (!CGF.HaveInsertPoint()) |
2524 | 0 | return; |
2525 | | // Build call __kmpc_cancel_barrier(loc, thread_id); |
2526 | | // Build call __kmpc_barrier(loc, thread_id); |
2527 | 756 | unsigned Flags = getDefaultFlagsForBarriers(Kind); |
2528 | | // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, |
2529 | | // thread_id); |
2530 | 756 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), |
2531 | 756 | getThreadID(CGF, Loc)}; |
2532 | 756 | if (OMPRegionInfo) { |
2533 | 473 | if (!ForceSimpleCall && OMPRegionInfo->hasCancel()290 ) { |
2534 | 4 | llvm::Value *Result = CGF.EmitRuntimeCall( |
2535 | 4 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2536 | 4 | OMPRTL___kmpc_cancel_barrier), |
2537 | 4 | Args); |
2538 | 4 | if (EmitChecks) { |
2539 | | // if (__kmpc_cancel_barrier()) { |
2540 | | // exit from construct; |
2541 | | // } |
2542 | 4 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); |
2543 | 4 | llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); |
2544 | 4 | llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); |
2545 | 4 | CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); |
2546 | 4 | CGF.EmitBlock(ExitBB); |
2547 | | // exit from construct; |
2548 | 4 | CodeGenFunction::JumpDest CancelDestination = |
2549 | 4 | CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); |
2550 | 4 | CGF.EmitBranchThroughCleanup(CancelDestination); |
2551 | 4 | CGF.EmitBlock(ContBB, /*IsFinished=*/true); |
2552 | 4 | } |
2553 | 4 | return; |
2554 | 4 | } |
2555 | 752 | } |
2556 | 752 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2557 | 752 | CGM.getModule(), OMPRTL___kmpc_barrier), |
2558 | 752 | Args); |
2559 | 752 | } |
2560 | | |
2561 | | /// Map the OpenMP loop schedule to the runtime enumeration. |
2562 | | static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, |
2563 | 16.1k | bool Chunked, bool Ordered) { |
2564 | 16.1k | switch (ScheduleKind) { |
2565 | 2.88k | case OMPC_SCHEDULE_static: |
2566 | 1.89k | return Chunked ? (Ordered ? OMP_ord_static_chunked7 : OMP_sch_static_chunked1.88k ) |
2567 | 993 | : (Ordered ? OMP_ord_static6 : OMP_sch_static987 ); |
2568 | 1.94k | case OMPC_SCHEDULE_dynamic: |
2569 | 1.93k | return Ordered ? OMP_ord_dynamic_chunked4 : OMP_sch_dynamic_chunked; |
2570 | 543 | case OMPC_SCHEDULE_guided: |
2571 | 542 | return Ordered ? OMP_ord_guided_chunked1 : OMP_sch_guided_chunked; |
2572 | 548 | case OMPC_SCHEDULE_runtime: |
2573 | 542 | return Ordered ? OMP_ord_runtime6 : OMP_sch_runtime; |
2574 | 550 | case OMPC_SCHEDULE_auto: |
2575 | 545 | return Ordered ? OMP_ord_auto5 : OMP_sch_auto; |
2576 | 9.68k | case OMPC_SCHEDULE_unknown: |
2577 | 9.68k | assert(!Chunked && "chunk was specified but schedule kind not known"); |
2578 | 9.67k | return Ordered ? OMP_ord_static8 : OMP_sch_static; |
2579 | 0 | } |
2580 | 0 | llvm_unreachable("Unexpected runtime schedule"); |
2581 | 0 | } |
2582 | | |
2583 | | /// Map the OpenMP distribute schedule to the runtime enumeration. |
2584 | | static OpenMPSchedType |
2585 | 13.3k | getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { |
2586 | | // only static is allowed for dist_schedule |
2587 | 11.4k | return Chunked ? OMP_dist_sch_static_chunked1.95k : OMP_dist_sch_static; |
2588 | 13.3k | } |
2589 | | |
2590 | | bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, |
2591 | 5.67k | bool Chunked) const { |
2592 | 5.67k | OpenMPSchedType Schedule = |
2593 | 5.67k | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2594 | 5.67k | return Schedule == OMP_sch_static; |
2595 | 5.67k | } |
2596 | | |
2597 | | bool CGOpenMPRuntime::isStaticNonchunked( |
2598 | 4.45k | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2599 | 4.45k | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2600 | 4.45k | return Schedule == OMP_dist_sch_static; |
2601 | 4.45k | } |
2602 | | |
2603 | | bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, |
2604 | 4.66k | bool Chunked) const { |
2605 | 4.66k | OpenMPSchedType Schedule = |
2606 | 4.66k | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2607 | 4.66k | return Schedule == OMP_sch_static_chunked; |
2608 | 4.66k | } |
2609 | | |
2610 | | bool CGOpenMPRuntime::isStaticChunked( |
2611 | 4.45k | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2612 | 4.45k | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2613 | 4.45k | return Schedule == OMP_dist_sch_static_chunked; |
2614 | 4.45k | } |
2615 | | |
2616 | 1.01k | bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { |
2617 | 1.01k | OpenMPSchedType Schedule = |
2618 | 1.01k | getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); |
2619 | 1.01k | assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); |
2620 | 1.01k | return Schedule != OMP_sch_static; |
2621 | 1.01k | } |
2622 | | |
2623 | | static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, |
2624 | | OpenMPScheduleClauseModifier M1, |
2625 | 9.25k | OpenMPScheduleClauseModifier M2) { |
2626 | 9.25k | int Modifier = 0; |
2627 | 9.25k | switch (M1) { |
2628 | 17 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2629 | 17 | Modifier = OMP_sch_modifier_monotonic; |
2630 | 17 | break; |
2631 | 12 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2632 | 12 | Modifier = OMP_sch_modifier_nonmonotonic; |
2633 | 12 | break; |
2634 | 12 | case OMPC_SCHEDULE_MODIFIER_simd: |
2635 | 12 | if (Schedule == OMP_sch_static_chunked) |
2636 | 6 | Schedule = OMP_sch_static_balanced_chunked; |
2637 | 12 | break; |
2638 | 0 | case OMPC_SCHEDULE_MODIFIER_last: |
2639 | 9.21k | case OMPC_SCHEDULE_MODIFIER_unknown: |
2640 | 9.21k | break; |
2641 | 9.25k | } |
2642 | 9.25k | switch (M2) { |
2643 | 0 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2644 | 0 | Modifier = OMP_sch_modifier_monotonic; |
2645 | 0 | break; |
2646 | 6 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2647 | 6 | Modifier = OMP_sch_modifier_nonmonotonic; |
2648 | 6 | break; |
2649 | 0 | case OMPC_SCHEDULE_MODIFIER_simd: |
2650 | 0 | if (Schedule == OMP_sch_static_chunked) |
2651 | 0 | Schedule = OMP_sch_static_balanced_chunked; |
2652 | 0 | break; |
2653 | 0 | case OMPC_SCHEDULE_MODIFIER_last: |
2654 | 9.25k | case OMPC_SCHEDULE_MODIFIER_unknown: |
2655 | 9.25k | break; |
2656 | 9.25k | } |
2657 | | // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. |
2658 | | // If the static schedule kind is specified or if the ordered clause is |
2659 | | // specified, and if the nonmonotonic modifier is not specified, the effect is |
2660 | | // as if the monotonic modifier is specified. Otherwise, unless the monotonic |
2661 | | // modifier is specified, the effect is as if the nonmonotonic modifier is |
2662 | | // specified. |
2663 | 9.25k | if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 06.03k ) { |
2664 | 6.00k | if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static5.66k || |
2665 | 3.44k | Schedule == OMP_sch_static_balanced_chunked || |
2666 | 3.43k | Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static3.43k || |
2667 | 3.42k | Schedule == OMP_dist_sch_static_chunked || |
2668 | 2.95k | Schedule == OMP_dist_sch_static)) |
2669 | 519 | Modifier = OMP_sch_modifier_nonmonotonic; |
2670 | 6.00k | } |
2671 | 9.25k | return Schedule | Modifier; |
2672 | 9.25k | } |
2673 | | |
2674 | | void CGOpenMPRuntime::emitForDispatchInit( |
2675 | | CodeGenFunction &CGF, SourceLocation Loc, |
2676 | | const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, |
2677 | 744 | bool Ordered, const DispatchRTInput &DispatchValues) { |
2678 | 744 | if (!CGF.HaveInsertPoint()) |
2679 | 0 | return; |
2680 | 744 | OpenMPSchedType Schedule = getRuntimeSchedule( |
2681 | 744 | ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); |
2682 | 744 | assert(Ordered || |
2683 | 744 | (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && |
2684 | 744 | Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && |
2685 | 744 | Schedule != OMP_sch_static_balanced_chunked)); |
2686 | | // Call __kmpc_dispatch_init( |
2687 | | // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, |
2688 | | // kmp_int[32|64] lower, kmp_int[32|64] upper, |
2689 | | // kmp_int[32|64] stride, kmp_int[32|64] chunk); |
2690 | | |
2691 | | // If the Chunk was not specified in the clause - use default value 1. |
2692 | 139 | llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk |
2693 | 605 | : CGF.Builder.getIntN(IVSize, 1); |
2694 | 744 | llvm::Value *Args[] = { |
2695 | 744 | emitUpdateLocation(CGF, Loc), |
2696 | 744 | getThreadID(CGF, Loc), |
2697 | 744 | CGF.Builder.getInt32(addMonoNonMonoModifier( |
2698 | 744 | CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type |
2699 | 744 | DispatchValues.LB, // Lower |
2700 | 744 | DispatchValues.UB, // Upper |
2701 | 744 | CGF.Builder.getIntN(IVSize, 1), // Stride |
2702 | 744 | Chunk // Chunk |
2703 | 744 | }; |
2704 | 744 | CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); |
2705 | 744 | } |
2706 | | |
2707 | | static void emitForStaticInitCall( |
2708 | | CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, |
2709 | | llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, |
2710 | | OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, |
2711 | 8.51k | const CGOpenMPRuntime::StaticRTInput &Values) { |
2712 | 8.51k | if (!CGF.HaveInsertPoint()) |
2713 | 0 | return; |
2714 | | |
2715 | 8.51k | assert(!Values.Ordered); |
2716 | 8.51k | assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || |
2717 | 8.51k | Schedule == OMP_sch_static_balanced_chunked || |
2718 | 8.51k | Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || |
2719 | 8.51k | Schedule == OMP_dist_sch_static || |
2720 | 8.51k | Schedule == OMP_dist_sch_static_chunked); |
2721 | | |
2722 | | // Call __kmpc_for_static_init( |
2723 | | // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, |
2724 | | // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, |
2725 | | // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, |
2726 | | // kmp_int[32|64] incr, kmp_int[32|64] chunk); |
2727 | 8.51k | llvm::Value *Chunk = Values.Chunk; |
2728 | 8.51k | if (Chunk == nullptr) { |
2729 | 7.33k | assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || |
2730 | 7.33k | Schedule == OMP_dist_sch_static) && |
2731 | 7.33k | "expected static non-chunked schedule"); |
2732 | | // If the Chunk was not specified in the clause - use default value 1. |
2733 | 7.33k | Chunk = CGF.Builder.getIntN(Values.IVSize, 1); |
2734 | 1.17k | } else { |
2735 | 1.17k | assert((Schedule == OMP_sch_static_chunked || |
2736 | 1.17k | Schedule == OMP_sch_static_balanced_chunked || |
2737 | 1.17k | Schedule == OMP_ord_static_chunked || |
2738 | 1.17k | Schedule == OMP_dist_sch_static_chunked) && |
2739 | 1.17k | "expected static chunked schedule"); |
2740 | 1.17k | } |
2741 | 8.51k | llvm::Value *Args[] = { |
2742 | 8.51k | UpdateLocation, |
2743 | 8.51k | ThreadId, |
2744 | 8.51k | CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, |
2745 | 8.51k | M2)), // Schedule type |
2746 | 8.51k | Values.IL.getPointer(), // &isLastIter |
2747 | 8.51k | Values.LB.getPointer(), // &LB |
2748 | 8.51k | Values.UB.getPointer(), // &UB |
2749 | 8.51k | Values.ST.getPointer(), // &Stride |
2750 | 8.51k | CGF.Builder.getIntN(Values.IVSize, 1), // Incr |
2751 | 8.51k | Chunk // Chunk |
2752 | 8.51k | }; |
2753 | 8.51k | CGF.EmitRuntimeCall(ForStaticInitFunction, Args); |
2754 | 8.51k | } |
2755 | | |
2756 | | void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, |
2757 | | SourceLocation Loc, |
2758 | | OpenMPDirectiveKind DKind, |
2759 | | const OpenMPScheduleTy &ScheduleKind, |
2760 | 4.05k | const StaticRTInput &Values) { |
2761 | 4.05k | OpenMPSchedType ScheduleNum = getRuntimeSchedule( |
2762 | 4.05k | ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); |
2763 | 4.05k | assert(isOpenMPWorksharingDirective(DKind) && |
2764 | 4.05k | "Expected loop-based or sections-based directive."); |
2765 | 4.05k | llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, |
2766 | 4.05k | isOpenMPLoopDirective(DKind) |
2767 | 3.96k | ? OMP_IDENT_WORK_LOOP |
2768 | 88 | : OMP_IDENT_WORK_SECTIONS); |
2769 | 4.05k | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2770 | 4.05k | llvm::FunctionCallee StaticInitFunction = |
2771 | 4.05k | createForStaticInitFunction(Values.IVSize, Values.IVSigned); |
2772 | 4.05k | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2773 | 4.05k | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2774 | 4.05k | ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); |
2775 | 4.05k | } |
2776 | | |
2777 | | void CGOpenMPRuntime::emitDistributeStaticInit( |
2778 | | CodeGenFunction &CGF, SourceLocation Loc, |
2779 | | OpenMPDistScheduleClauseKind SchedKind, |
2780 | 4.45k | const CGOpenMPRuntime::StaticRTInput &Values) { |
2781 | 4.45k | OpenMPSchedType ScheduleNum = |
2782 | 4.45k | getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); |
2783 | 4.45k | llvm::Value *UpdatedLocation = |
2784 | 4.45k | emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); |
2785 | 4.45k | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2786 | 4.45k | llvm::FunctionCallee StaticInitFunction = |
2787 | 4.45k | createForStaticInitFunction(Values.IVSize, Values.IVSigned); |
2788 | 4.45k | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2789 | 4.45k | ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, |
2790 | 4.45k | OMPC_SCHEDULE_MODIFIER_unknown, Values); |
2791 | 4.45k | } |
2792 | | |
2793 | | void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, |
2794 | | SourceLocation Loc, |
2795 | 8.56k | OpenMPDirectiveKind DKind) { |
2796 | 8.56k | if (!CGF.HaveInsertPoint()) |
2797 | 0 | return; |
2798 | | // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); |
2799 | 8.56k | llvm::Value *Args[] = { |
2800 | 8.56k | emitUpdateLocation(CGF, Loc, |
2801 | 8.56k | isOpenMPDistributeDirective(DKind) |
2802 | 6.80k | ? OMP_IDENT_WORK_DISTRIBUTE |
2803 | 1.76k | : isOpenMPLoopDirective(DKind) |
2804 | 1.63k | ? OMP_IDENT_WORK_LOOP |
2805 | 128 | : OMP_IDENT_WORK_SECTIONS), |
2806 | 8.56k | getThreadID(CGF, Loc)}; |
2807 | 8.56k | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2808 | 8.56k | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2809 | 8.56k | CGM.getModule(), OMPRTL___kmpc_for_static_fini), |
2810 | 8.56k | Args); |
2811 | 8.56k | } |
2812 | | |
2813 | | void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, |
2814 | | SourceLocation Loc, |
2815 | | unsigned IVSize, |
2816 | 37 | bool IVSigned) { |
2817 | 37 | if (!CGF.HaveInsertPoint()) |
2818 | 0 | return; |
2819 | | // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); |
2820 | 37 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2821 | 37 | CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); |
2822 | 37 | } |
2823 | | |
2824 | | llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, |
2825 | | SourceLocation Loc, unsigned IVSize, |
2826 | | bool IVSigned, Address IL, |
2827 | | Address LB, Address UB, |
2828 | 744 | Address ST) { |
2829 | | // Call __kmpc_dispatch_next( |
2830 | | // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, |
2831 | | // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, |
2832 | | // kmp_int[32|64] *p_stride); |
2833 | 744 | llvm::Value *Args[] = { |
2834 | 744 | emitUpdateLocation(CGF, Loc), |
2835 | 744 | getThreadID(CGF, Loc), |
2836 | 744 | IL.getPointer(), // &isLastIter |
2837 | 744 | LB.getPointer(), // &Lower |
2838 | 744 | UB.getPointer(), // &Upper |
2839 | 744 | ST.getPointer() // &Stride |
2840 | 744 | }; |
2841 | 744 | llvm::Value *Call = |
2842 | 744 | CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); |
2843 | 744 | return CGF.EmitScalarConversion( |
2844 | 744 | Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), |
2845 | 744 | CGF.getContext().BoolTy, Loc); |
2846 | 744 | } |
2847 | | |
2848 | | void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, |
2849 | | llvm::Value *NumThreads, |
2850 | 292 | SourceLocation Loc) { |
2851 | 292 | if (!CGF.HaveInsertPoint()) |
2852 | 0 | return; |
2853 | | // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) |
2854 | 292 | llvm::Value *Args[] = { |
2855 | 292 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2856 | 292 | CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; |
2857 | 292 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2858 | 292 | CGM.getModule(), OMPRTL___kmpc_push_num_threads), |
2859 | 292 | Args); |
2860 | 292 | } |
2861 | | |
2862 | | void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, |
2863 | | ProcBindKind ProcBind, |
2864 | 68 | SourceLocation Loc) { |
2865 | 68 | if (!CGF.HaveInsertPoint()) |
2866 | 0 | return; |
2867 | 68 | assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); |
2868 | | // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) |
2869 | 68 | llvm::Value *Args[] = { |
2870 | 68 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2871 | 68 | llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; |
2872 | 68 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2873 | 68 | CGM.getModule(), OMPRTL___kmpc_push_proc_bind), |
2874 | 68 | Args); |
2875 | 68 | } |
2876 | | |
2877 | | void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, |
2878 | 104 | SourceLocation Loc, llvm::AtomicOrdering AO) { |
2879 | 104 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2880 | 20 | OMPBuilder.createFlush(CGF.Builder); |
2881 | 84 | } else { |
2882 | 84 | if (!CGF.HaveInsertPoint()) |
2883 | 0 | return; |
2884 | | // Build call void __kmpc_flush(ident_t *loc) |
2885 | 84 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2886 | 84 | CGM.getModule(), OMPRTL___kmpc_flush), |
2887 | 84 | emitUpdateLocation(CGF, Loc)); |
2888 | 84 | } |
2889 | 104 | } |
2890 | | |
2891 | | namespace { |
2892 | | /// Indexes of fields for type kmp_task_t. |
2893 | | enum KmpTaskTFields { |
2894 | | /// List of shared variables. |
2895 | | KmpTaskTShareds, |
2896 | | /// Task routine. |
2897 | | KmpTaskTRoutine, |
2898 | | /// Partition id for the untied tasks. |
2899 | | KmpTaskTPartId, |
2900 | | /// Function with call of destructors for private variables. |
2901 | | Data1, |
2902 | | /// Task priority. |
2903 | | Data2, |
2904 | | /// (Taskloops only) Lower bound. |
2905 | | KmpTaskTLowerBound, |
2906 | | /// (Taskloops only) Upper bound. |
2907 | | KmpTaskTUpperBound, |
2908 | | /// (Taskloops only) Stride. |
2909 | | KmpTaskTStride, |
2910 | | /// (Taskloops only) Is last iteration flag. |
2911 | | KmpTaskTLastIter, |
2912 | | /// (Taskloops only) Reduction data. |
2913 | | KmpTaskTReductions, |
2914 | | }; |
2915 | | } // anonymous namespace |
2916 | | |
2917 | 5.36k | bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { |
2918 | 5.36k | return OffloadEntriesTargetRegion.empty() && |
2919 | 1.05k | OffloadEntriesDeviceGlobalVar.empty(); |
2920 | 5.36k | } |
2921 | | |
2922 | | /// Initialize target region entry. |
2923 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
2924 | | initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, |
2925 | | StringRef ParentName, unsigned LineNum, |
2926 | 2.98k | unsigned Order) { |
2927 | 2.98k | assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " |
2928 | 2.98k | "only required for the device " |
2929 | 2.98k | "code generation."); |
2930 | 2.98k | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = |
2931 | 2.98k | OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, |
2932 | 2.98k | OMPTargetRegionEntryTargetRegion); |
2933 | 2.98k | ++OffloadingEntriesNum; |
2934 | 2.98k | } |
2935 | | |
2936 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
2937 | | registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, |
2938 | | StringRef ParentName, unsigned LineNum, |
2939 | | llvm::Constant *Addr, llvm::Constant *ID, |
2940 | 11.3k | OMPTargetRegionEntryKind Flags) { |
2941 | | // If we are emitting code for a target, the entry is already initialized, |
2942 | | // only has to be registered. |
2943 | 11.3k | if (CGM.getLangOpts().OpenMPIsDevice) { |
2944 | | // This could happen if the device compilation is invoked standalone. |
2945 | 2.94k | if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) |
2946 | 0 | initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, |
2947 | 0 | OffloadingEntriesNum); |
2948 | 2.94k | auto &Entry = |
2949 | 2.94k | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; |
2950 | 2.94k | Entry.setAddress(Addr); |
2951 | 2.94k | Entry.setID(ID); |
2952 | 2.94k | Entry.setFlags(Flags); |
2953 | 8.38k | } else { |
2954 | 8.38k | if (Flags == |
2955 | 8.38k | OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && |
2956 | 8.28k | hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, |
2957 | 8.28k | /*IgnoreAddressId*/ true)) |
2958 | 4 | return; |
2959 | 8.38k | assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && |
2960 | 8.38k | "Target region entry already registered!"); |
2961 | 8.38k | OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); |
2962 | 8.38k | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; |
2963 | 8.38k | ++OffloadingEntriesNum; |
2964 | 8.38k | } |
2965 | 11.3k | } |
2966 | | |
2967 | | bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( |
2968 | | unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, |
2969 | 23.4k | bool IgnoreAddressId) const { |
2970 | 23.4k | auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); |
2971 | 23.4k | if (PerDevice == OffloadEntriesTargetRegion.end()) |
2972 | 3.77k | return false; |
2973 | 19.6k | auto PerFile = PerDevice->second.find(FileID); |
2974 | 19.6k | if (PerFile == PerDevice->second.end()) |
2975 | 0 | return false; |
2976 | 19.6k | auto PerParentName = PerFile->second.find(ParentName); |
2977 | 19.6k | if (PerParentName == PerFile->second.end()) |
2978 | 5.37k | return false; |
2979 | 14.3k | auto PerLine = PerParentName->second.find(LineNum); |
2980 | 14.3k | if (PerLine == PerParentName->second.end()) |
2981 | 8.26k | return false; |
2982 | | // Fail if this entry is already registered. |
2983 | 6.04k | if (!IgnoreAddressId && |
2984 | 6.03k | (PerLine->second.getAddress() || PerLine->second.getID()5.76k )) |
2985 | 277 | return false; |
2986 | 5.76k | return true; |
2987 | 5.76k | } |
2988 | | |
2989 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( |
2990 | 2.43k | const OffloadTargetRegionEntryInfoActTy &Action) { |
2991 | | // Scan all target region entries and perform the provided action. |
2992 | 2.43k | for (const auto &D : OffloadEntriesTargetRegion) |
2993 | 2.42k | for (const auto &F : D.second) |
2994 | 2.42k | for (const auto &P : F.second) |
2995 | 6.18k | for (const auto &L : P.second) |
2996 | 11.3k | Action(D.first, F.first, P.first(), L.first, L.second); |
2997 | 2.43k | } |
2998 | | |
2999 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3000 | | initializeDeviceGlobalVarEntryInfo(StringRef Name, |
3001 | | OMPTargetGlobalVarEntryKind Flags, |
3002 | 144 | unsigned Order) { |
3003 | 144 | assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " |
3004 | 144 | "only required for the device " |
3005 | 144 | "code generation."); |
3006 | 144 | OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); |
3007 | 144 | ++OffloadingEntriesNum; |
3008 | 144 | } |
3009 | | |
3010 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3011 | | registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, |
3012 | | CharUnits VarSize, |
3013 | | OMPTargetGlobalVarEntryKind Flags, |
3014 | 746 | llvm::GlobalValue::LinkageTypes Linkage) { |
3015 | 746 | if (CGM.getLangOpts().OpenMPIsDevice) { |
3016 | | // This could happen if the device compilation is invoked standalone. |
3017 | 235 | if (!hasDeviceGlobalVarEntryInfo(VarName)) |
3018 | 3 | initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); |
3019 | 235 | auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; |
3020 | 235 | assert((!Entry.getAddress() || Entry.getAddress() == Addr) && |
3021 | 235 | "Resetting with the new address."); |
3022 | 235 | if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)95 ) { |
3023 | 95 | if (Entry.getVarSize().isZero()) { |
3024 | 4 | Entry.setVarSize(VarSize); |
3025 | 4 | Entry.setLinkage(Linkage); |
3026 | 4 | } |
3027 | 95 | return; |
3028 | 95 | } |
3029 | 140 | Entry.setVarSize(VarSize); |
3030 | 140 | Entry.setLinkage(Linkage); |
3031 | 140 | Entry.setAddress(Addr); |
3032 | 511 | } else { |
3033 | 511 | if (hasDeviceGlobalVarEntryInfo(VarName)) { |
3034 | 342 | auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; |
3035 | 342 | assert(Entry.isValid() && Entry.getFlags() == Flags && |
3036 | 342 | "Entry not initialized!"); |
3037 | 342 | assert((!Entry.getAddress() || Entry.getAddress() == Addr) && |
3038 | 342 | "Resetting with the new address."); |
3039 | 342 | if (Entry.getVarSize().isZero()) { |
3040 | 27 | Entry.setVarSize(VarSize); |
3041 | 27 | Entry.setLinkage(Linkage); |
3042 | 27 | } |
3043 | 342 | return; |
3044 | 342 | } |
3045 | 169 | OffloadEntriesDeviceGlobalVar.try_emplace( |
3046 | 169 | VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); |
3047 | 169 | ++OffloadingEntriesNum; |
3048 | 169 | } |
3049 | 746 | } |
3050 | | |
3051 | | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3052 | | actOnDeviceGlobalVarEntriesInfo( |
3053 | 2.43k | const OffloadDeviceGlobalVarEntryInfoActTy &Action) { |
3054 | | // Scan all target region entries and perform the provided action. |
3055 | 2.43k | for (const auto &E : OffloadEntriesDeviceGlobalVar) |
3056 | 309 | Action(E.getKey(), E.getValue()); |
3057 | 2.43k | } |
3058 | | |
3059 | | void CGOpenMPRuntime::createOffloadEntry( |
3060 | | llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, |
3061 | 10.5k | llvm::GlobalValue::LinkageTypes Linkage) { |
3062 | 10.5k | StringRef Name = Addr->getName(); |
3063 | 10.5k | llvm::Module &M = CGM.getModule(); |
3064 | 10.5k | llvm::LLVMContext &C = M.getContext(); |
3065 | | |
3066 | | // Create constant string with the name. |
3067 | 10.5k | llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); |
3068 | | |
3069 | 10.5k | std::string StringName = getName({"omp_offloading", "entry_name"}); |
3070 | 10.5k | auto *Str = new llvm::GlobalVariable( |
3071 | 10.5k | M, StrPtrInit->getType(), /*isConstant=*/true, |
3072 | 10.5k | llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); |
3073 | 10.5k | Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); |
3074 | | |
3075 | 10.5k | llvm::Constant *Data[] = { |
3076 | 10.5k | llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), |
3077 | 10.5k | llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), |
3078 | 10.5k | llvm::ConstantInt::get(CGM.SizeTy, Size), |
3079 | 10.5k | llvm::ConstantInt::get(CGM.Int32Ty, Flags), |
3080 | 10.5k | llvm::ConstantInt::get(CGM.Int32Ty, 0)}; |
3081 | 10.5k | std::string EntryName = getName({"omp_offloading", "entry", ""}); |
3082 | 10.5k | llvm::GlobalVariable *Entry = createGlobalStruct( |
3083 | 10.5k | CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, |
3084 | 10.5k | Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); |
3085 | | |
3086 | | // The entry has to be created in the section the linker expects it to be. |
3087 | 10.5k | Entry->setSection("omp_offloading_entries"); |
3088 | 10.5k | } |
3089 | | |
3090 | 5.65k | void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { |
3091 | | // Emit the offloading entries and metadata so that the device codegen side |
3092 | | // can easily figure out what to emit. The produced metadata looks like |
3093 | | // this: |
3094 | | // |
3095 | | // !omp_offload.info = !{!1, ...} |
3096 | | // |
3097 | | // Right now we only generate metadata for function that contain target |
3098 | | // regions. |
3099 | | |
3100 | | // If we are in simd mode or there are no entries, we don't need to do |
3101 | | // anything. |
3102 | 5.65k | if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()3.24k ) |
3103 | 3.22k | return; |
3104 | | |
3105 | 2.43k | llvm::Module &M = CGM.getModule(); |
3106 | 2.43k | llvm::LLVMContext &C = M.getContext(); |
3107 | 2.43k | SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, |
3108 | 2.43k | SourceLocation, StringRef>, |
3109 | 2.43k | 16> |
3110 | 2.43k | OrderedEntries(OffloadEntriesInfoManager.size()); |
3111 | 2.43k | llvm::SmallVector<StringRef, 16> ParentFunctions( |
3112 | 2.43k | OffloadEntriesInfoManager.size()); |
3113 | | |
3114 | | // Auxiliary methods to create metadata values and strings. |
3115 | 57.4k | auto &&GetMDInt = [this](unsigned V) { |
3116 | 57.4k | return llvm::ConstantAsMetadata::get( |
3117 | 57.4k | llvm::ConstantInt::get(CGM.Int32Ty, V)); |
3118 | 57.4k | }; |
3119 | | |
3120 | 11.6k | auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; |
3121 | | |
3122 | | // Create the offloading info metadata node. |
3123 | 2.43k | llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); |
3124 | | |
3125 | | // Create function that emits metadata for each target region entry; |
3126 | 2.43k | auto &&TargetRegionMetadataEmitter = |
3127 | 2.43k | [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, |
3128 | 2.43k | &GetMDString]( |
3129 | 2.43k | unsigned DeviceID, unsigned FileID, StringRef ParentName, |
3130 | 2.43k | unsigned Line, |
3131 | 11.3k | const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { |
3132 | | // Generate metadata for target regions. Each entry of this metadata |
3133 | | // contains: |
3134 | | // - Entry 0 -> Kind of this type of metadata (0). |
3135 | | // - Entry 1 -> Device ID of the file where the entry was identified. |
3136 | | // - Entry 2 -> File ID of the file where the entry was identified. |
3137 | | // - Entry 3 -> Mangled name of the function where the entry was |
3138 | | // identified. |
3139 | | // - Entry 4 -> Line in the file where the entry was identified. |
3140 | | // - Entry 5 -> Order the entry was created. |
3141 | | // The first element of the metadata node is the kind. |
3142 | 11.3k | llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), |
3143 | 11.3k | GetMDInt(FileID), GetMDString(ParentName), |
3144 | 11.3k | GetMDInt(Line), GetMDInt(E.getOrder())}; |
3145 | | |
3146 | 11.3k | SourceLocation Loc; |
3147 | 11.3k | for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), |
3148 | 11.3k | E = CGM.getContext().getSourceManager().fileinfo_end(); |
3149 | 24.5k | I != E; ++I13.2k ) { |
3150 | 13.2k | if (I->getFirst()->getUniqueID().getDevice() == DeviceID && |
3151 | 13.2k | I->getFirst()->getUniqueID().getFile() == FileID) { |
3152 | 0 | Loc = CGM.getContext().getSourceManager().translateFileLineCol( |
3153 | 0 | I->getFirst(), Line, 1); |
3154 | 0 | break; |
3155 | 0 | } |
3156 | 13.2k | } |
3157 | | // Save this entry in the right position of the ordered entries array. |
3158 | 11.3k | OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); |
3159 | 11.3k | ParentFunctions[E.getOrder()] = ParentName; |
3160 | | |
3161 | | // Add metadata to the named metadata node. |
3162 | 11.3k | MD->addOperand(llvm::MDNode::get(C, Ops)); |
3163 | 11.3k | }; |
3164 | | |
3165 | 2.43k | OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( |
3166 | 2.43k | TargetRegionMetadataEmitter); |
3167 | | |
3168 | | // Create function that emits metadata for each device global variable entry; |
3169 | 2.43k | auto &&DeviceGlobalVarMetadataEmitter = |
3170 | 2.43k | [&C, &OrderedEntries, &GetMDInt, &GetMDString, |
3171 | 2.43k | MD](StringRef MangledName, |
3172 | 2.43k | const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar |
3173 | 309 | &E) { |
3174 | | // Generate metadata for global variables. Each entry of this metadata |
3175 | | // contains: |
3176 | | // - Entry 0 -> Kind of this type of metadata (1). |
3177 | | // - Entry 1 -> Mangled name of the variable. |
3178 | | // - Entry 2 -> Declare target kind. |
3179 | | // - Entry 3 -> Order the entry was created. |
3180 | | // The first element of the metadata node is the kind. |
3181 | 309 | llvm::Metadata *Ops[] = { |
3182 | 309 | GetMDInt(E.getKind()), GetMDString(MangledName), |
3183 | 309 | GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; |
3184 | | |
3185 | | // Save this entry in the right position of the ordered entries array. |
3186 | 309 | OrderedEntries[E.getOrder()] = |
3187 | 309 | std::make_tuple(&E, SourceLocation(), MangledName); |
3188 | | |
3189 | | // Add metadata to the named metadata node. |
3190 | 309 | MD->addOperand(llvm::MDNode::get(C, Ops)); |
3191 | 309 | }; |
3192 | | |
3193 | 2.43k | OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( |
3194 | 2.43k | DeviceGlobalVarMetadataEmitter); |
3195 | | |
3196 | 11.6k | for (const auto &E : OrderedEntries) { |
3197 | 11.6k | assert(std::get<0>(E) && "All ordered entries must exist!"); |
3198 | 11.6k | if (const auto *CE = |
3199 | 11.3k | dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( |
3200 | 11.3k | std::get<0>(E))) { |
3201 | 11.3k | if (!CE->getID() || !CE->getAddress()11.3k ) { |
3202 | | // Do not blame the entry if the parent funtion is not emitted. |
3203 | 4 | StringRef FnName = ParentFunctions[CE->getOrder()]; |
3204 | 4 | if (!CGM.GetGlobalValue(FnName)) |
3205 | 2 | continue; |
3206 | 2 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3207 | 2 | DiagnosticsEngine::Error, |
3208 | 2 | "Offloading entry for target region in %0 is incorrect: either the " |
3209 | 2 | "address or the ID is invalid."); |
3210 | 2 | CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; |
3211 | 2 | continue; |
3212 | 2 | } |
3213 | 11.3k | createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, |
3214 | 11.3k | CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); |
3215 | 309 | } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: |
3216 | 309 | OffloadEntryInfoDeviceGlobalVar>( |
3217 | 309 | std::get<0>(E))) { |
3218 | 309 | OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = |
3219 | 309 | static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( |
3220 | 309 | CE->getFlags()); |
3221 | 309 | switch (Flags) { |
3222 | 259 | case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { |
3223 | 259 | if (CGM.getLangOpts().OpenMPIsDevice && |
3224 | 122 | CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) |
3225 | 2 | continue; |
3226 | 257 | if (!CE->getAddress()) { |
3227 | 0 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3228 | 0 | DiagnosticsEngine::Error, "Offloading entry for declare target " |
3229 | 0 | "variable %0 is incorrect: the " |
3230 | 0 | "address is invalid."); |
3231 | 0 | CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); |
3232 | 0 | continue; |
3233 | 0 | } |
3234 | | // The vaiable has no definition - no need to add the entry. |
3235 | 257 | if (CE->getVarSize().isZero()) |
3236 | 49 | continue; |
3237 | 208 | break; |
3238 | 208 | } |
3239 | 50 | case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: |
3240 | 50 | assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || |
3241 | 50 | (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && |
3242 | 50 | "Declaret target link address is set."); |
3243 | 50 | if (CGM.getLangOpts().OpenMPIsDevice) |
3244 | 18 | continue; |
3245 | 32 | if (!CE->getAddress()) { |
3246 | 0 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3247 | 0 | DiagnosticsEngine::Error, |
3248 | 0 | "Offloading entry for declare target variable is incorrect: the " |
3249 | 0 | "address is invalid."); |
3250 | 0 | CGM.getDiags().Report(DiagID); |
3251 | 0 | continue; |
3252 | 0 | } |
3253 | 32 | break; |
3254 | 240 | } |
3255 | 240 | createOffloadEntry(CE->getAddress(), CE->getAddress(), |
3256 | 240 | CE->getVarSize().getQuantity(), Flags, |
3257 | 240 | CE->getLinkage()); |
3258 | 0 | } else { |
3259 | 0 | llvm_unreachable("Unsupported entry kind."); |
3260 | 0 | } |
3261 | 11.6k | } |
3262 | 2.43k | } |
3263 | | |
3264 | | /// Loads all the offload entries information from the host IR |
3265 | | /// metadata. |
3266 | 5.68k | void CGOpenMPRuntime::loadOffloadInfoMetadata() { |
3267 | | // If we are in target mode, load the metadata from the host IR. This code has |
3268 | | // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). |
3269 | | |
3270 | 5.68k | if (!CGM.getLangOpts().OpenMPIsDevice) |
3271 | 5.12k | return; |
3272 | | |
3273 | 559 | if (CGM.getLangOpts().OMPHostIRFile.empty()) |
3274 | 0 | return; |
3275 | | |
3276 | 559 | auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); |
3277 | 559 | if (auto EC = Buf.getError()) { |
3278 | 0 | CGM.getDiags().Report(diag::err_cannot_open_file) |
3279 | 0 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3280 | 0 | return; |
3281 | 0 | } |
3282 | | |
3283 | 559 | llvm::LLVMContext C; |
3284 | 559 | auto ME = expectedToErrorOrAndEmitErrors( |
3285 | 559 | C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); |
3286 | | |
3287 | 559 | if (auto EC = ME.getError()) { |
3288 | 0 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3289 | 0 | DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); |
3290 | 0 | CGM.getDiags().Report(DiagID) |
3291 | 0 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3292 | 0 | return; |
3293 | 0 | } |
3294 | | |
3295 | 559 | llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); |
3296 | 559 | if (!MD) |
3297 | 28 | return; |
3298 | | |
3299 | 3.12k | for (llvm::MDNode *MN : MD->operands())531 { |
3300 | 15.3k | auto &&GetMDInt = [MN](unsigned Idx) { |
3301 | 15.3k | auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); |
3302 | 15.3k | return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); |
3303 | 15.3k | }; |
3304 | | |
3305 | 3.12k | auto &&GetMDString = [MN](unsigned Idx) { |
3306 | 3.12k | auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); |
3307 | 3.12k | return V->getString(); |
3308 | 3.12k | }; |
3309 | | |
3310 | 3.12k | switch (GetMDInt(0)) { |
3311 | 0 | default: |
3312 | 0 | llvm_unreachable("Unexpected metadata!"); |
3313 | 0 | break; |
3314 | 2.98k | case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: |
3315 | 2.98k | OffloadingEntryInfoTargetRegion: |
3316 | 2.98k | OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( |
3317 | 2.98k | /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), |
3318 | 2.98k | /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), |
3319 | 2.98k | /*Order=*/GetMDInt(5)); |
3320 | 2.98k | break; |
3321 | 141 | case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: |
3322 | 141 | OffloadingEntryInfoDeviceGlobalVar: |
3323 | 141 | OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( |
3324 | 141 | /*MangledName=*/GetMDString(1), |
3325 | 141 | static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( |
3326 | 141 | /*Flags=*/GetMDInt(2)), |
3327 | 141 | /*Order=*/GetMDInt(3)); |
3328 | 141 | break; |
3329 | 3.12k | } |
3330 | 3.12k | } |
3331 | 531 | } |
3332 | | |
3333 | 855 | void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { |
3334 | 855 | if (!KmpRoutineEntryPtrTy) { |
3335 | | // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. |
3336 | 381 | ASTContext &C = CGM.getContext(); |
3337 | 381 | QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; |
3338 | 381 | FunctionProtoType::ExtProtoInfo EPI; |
3339 | 381 | KmpRoutineEntryPtrQTy = C.getPointerType( |
3340 | 381 | C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); |
3341 | 381 | KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); |
3342 | 381 | } |
3343 | 855 | } |
3344 | | |
3345 | 10.5k | QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { |
3346 | | // Make sure the type of the entry is already created. This is the type we |
3347 | | // have to create: |
3348 | | // struct __tgt_offload_entry{ |
3349 | | // void *addr; // Pointer to the offload entry info. |
3350 | | // // (function or global) |
3351 | | // char *name; // Name of the function or global. |
3352 | | // size_t size; // Size of the entry info (0 if it a function). |
3353 | | // int32_t flags; // Flags associated with the entry, e.g. 'link'. |
3354 | | // int32_t reserved; // Reserved, to use by the runtime library. |
3355 | | // }; |
3356 | 10.5k | if (TgtOffloadEntryQTy.isNull()) { |
3357 | 2.23k | ASTContext &C = CGM.getContext(); |
3358 | 2.23k | RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); |
3359 | 2.23k | RD->startDefinition(); |
3360 | 2.23k | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3361 | 2.23k | addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); |
3362 | 2.23k | addFieldToRecordDecl(C, RD, C.getSizeType()); |
3363 | 2.23k | addFieldToRecordDecl( |
3364 | 2.23k | C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); |
3365 | 2.23k | addFieldToRecordDecl( |
3366 | 2.23k | C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); |
3367 | 2.23k | RD->completeDefinition(); |
3368 | 2.23k | RD->addAttr(PackedAttr::CreateImplicit(C)); |
3369 | 2.23k | TgtOffloadEntryQTy = C.getRecordType(RD); |
3370 | 2.23k | } |
3371 | 10.5k | return TgtOffloadEntryQTy; |
3372 | 10.5k | } |
3373 | | |
3374 | | namespace { |
3375 | | struct PrivateHelpersTy { |
3376 | | PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, |
3377 | | const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) |
3378 | | : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), |
3379 | 1.64k | PrivateElemInit(PrivateElemInit) {} |
3380 | 8 | PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} |
3381 | | const Expr *OriginalRef = nullptr; |
3382 | | const VarDecl *Original = nullptr; |
3383 | | const VarDecl *PrivateCopy = nullptr; |
3384 | | const VarDecl *PrivateElemInit = nullptr; |
3385 | 5.32k | bool isLocalPrivate() const { |
3386 | 5.32k | return !OriginalRef && !PrivateCopy24 && !PrivateElemInit24 ; |
3387 | 5.32k | } |
3388 | | }; |
3389 | | typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; |
3390 | | } // anonymous namespace |
3391 | | |
3392 | 73 | static bool isAllocatableDecl(const VarDecl *VD) { |
3393 | 73 | const VarDecl *CVD = VD->getCanonicalDecl(); |
3394 | 73 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
3395 | 18 | return false; |
3396 | 55 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
3397 | | // Use the default allocation. |
3398 | 55 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
3399 | 42 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
3400 | 20 | !AA->getAllocator()); |
3401 | 55 | } |
3402 | | |
3403 | | static RecordDecl * |
3404 | 855 | createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { |
3405 | 855 | if (!Privates.empty()) { |
3406 | 554 | ASTContext &C = CGM.getContext(); |
3407 | | // Build struct .kmp_privates_t. { |
3408 | | // /* private vars */ |
3409 | | // }; |
3410 | 554 | RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); |
3411 | 554 | RD->startDefinition(); |
3412 | 1.65k | for (const auto &Pair : Privates) { |
3413 | 1.65k | const VarDecl *VD = Pair.second.Original; |
3414 | 1.65k | QualType Type = VD->getType().getNonReferenceType(); |
3415 | | // If the private variable is a local variable with lvalue ref type, |
3416 | | // allocate the pointer instead of the pointee type. |
3417 | 1.65k | if (Pair.second.isLocalPrivate()) { |
3418 | 8 | if (VD->getType()->isLValueReferenceType()) |
3419 | 0 | Type = C.getPointerType(Type); |
3420 | 8 | if (isAllocatableDecl(VD)) |
3421 | 2 | Type = C.getPointerType(Type); |
3422 | 8 | } |
3423 | 1.65k | FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); |
3424 | 1.65k | if (VD->hasAttrs()) { |
3425 | 50 | for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), |
3426 | 50 | E(VD->getAttrs().end()); |
3427 | 98 | I != E; ++I48 ) |
3428 | 48 | FD->addAttr(*I); |
3429 | 50 | } |
3430 | 1.65k | } |
3431 | 554 | RD->completeDefinition(); |
3432 | 554 | return RD; |
3433 | 554 | } |
3434 | 301 | return nullptr; |
3435 | 301 | } |
3436 | | |
3437 | | static RecordDecl * |
3438 | | createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, |
3439 | | QualType KmpInt32Ty, |
3440 | 397 | QualType KmpRoutineEntryPointerQTy) { |
3441 | 397 | ASTContext &C = CGM.getContext(); |
3442 | | // Build struct kmp_task_t { |
3443 | | // void * shareds; |
3444 | | // kmp_routine_entry_t routine; |
3445 | | // kmp_int32 part_id; |
3446 | | // kmp_cmplrdata_t data1; |
3447 | | // kmp_cmplrdata_t data2; |
3448 | | // For taskloops additional fields: |
3449 | | // kmp_uint64 lb; |
3450 | | // kmp_uint64 ub; |
3451 | | // kmp_int64 st; |
3452 | | // kmp_int32 liter; |
3453 | | // void * reductions; |
3454 | | // }; |
3455 | 397 | RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); |
3456 | 397 | UD->startDefinition(); |
3457 | 397 | addFieldToRecordDecl(C, UD, KmpInt32Ty); |
3458 | 397 | addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); |
3459 | 397 | UD->completeDefinition(); |
3460 | 397 | QualType KmpCmplrdataTy = C.getRecordType(UD); |
3461 | 397 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); |
3462 | 397 | RD->startDefinition(); |
3463 | 397 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3464 | 397 | addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); |
3465 | 397 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3466 | 397 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3467 | 397 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3468 | 397 | if (isOpenMPTaskLoopDirective(Kind)) { |
3469 | 128 | QualType KmpUInt64Ty = |
3470 | 128 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); |
3471 | 128 | QualType KmpInt64Ty = |
3472 | 128 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); |
3473 | 128 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3474 | 128 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3475 | 128 | addFieldToRecordDecl(C, RD, KmpInt64Ty); |
3476 | 128 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3477 | 128 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3478 | 128 | } |
3479 | 397 | RD->completeDefinition(); |
3480 | 397 | return RD; |
3481 | 397 | } |
3482 | | |
3483 | | static RecordDecl * |
3484 | | createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, |
3485 | 855 | ArrayRef<PrivateDataTy> Privates) { |
3486 | 855 | ASTContext &C = CGM.getContext(); |
3487 | | // Build struct kmp_task_t_with_privates { |
3488 | | // kmp_task_t task_data; |
3489 | | // .kmp_privates_t. privates; |
3490 | | // }; |
3491 | 855 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); |
3492 | 855 | RD->startDefinition(); |
3493 | 855 | addFieldToRecordDecl(C, RD, KmpTaskTQTy); |
3494 | 855 | if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) |
3495 | 554 | addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); |
3496 | 855 | RD->completeDefinition(); |
3497 | 855 | return RD; |
3498 | 855 | } |
3499 | | |
3500 | | /// Emit a proxy function which accepts kmp_task_t as the second |
3501 | | /// argument. |
3502 | | /// \code |
3503 | | /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { |
3504 | | /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, |
3505 | | /// For taskloops: |
3506 | | /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3507 | | /// tt->reductions, tt->shareds); |
3508 | | /// return 0; |
3509 | | /// } |
3510 | | /// \endcode |
3511 | | static llvm::Function * |
3512 | | emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, |
3513 | | OpenMPDirectiveKind Kind, QualType KmpInt32Ty, |
3514 | | QualType KmpTaskTWithPrivatesPtrQTy, |
3515 | | QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, |
3516 | | QualType SharedsPtrTy, llvm::Function *TaskFunction, |
3517 | 855 | llvm::Value *TaskPrivatesMap) { |
3518 | 855 | ASTContext &C = CGM.getContext(); |
3519 | 855 | FunctionArgList Args; |
3520 | 855 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3521 | 855 | ImplicitParamDecl::Other); |
3522 | 855 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3523 | 855 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3524 | 855 | ImplicitParamDecl::Other); |
3525 | 855 | Args.push_back(&GtidArg); |
3526 | 855 | Args.push_back(&TaskTypeArg); |
3527 | 855 | const auto &TaskEntryFnInfo = |
3528 | 855 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3529 | 855 | llvm::FunctionType *TaskEntryTy = |
3530 | 855 | CGM.getTypes().GetFunctionType(TaskEntryFnInfo); |
3531 | 855 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); |
3532 | 855 | auto *TaskEntry = llvm::Function::Create( |
3533 | 855 | TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3534 | 855 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); |
3535 | 855 | TaskEntry->setDoesNotRecurse(); |
3536 | 855 | CodeGenFunction CGF(CGM); |
3537 | 855 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, |
3538 | 855 | Loc, Loc); |
3539 | | |
3540 | | // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, |
3541 | | // tt, |
3542 | | // For taskloops: |
3543 | | // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3544 | | // tt->task_data.shareds); |
3545 | 855 | llvm::Value *GtidParam = CGF.EmitLoadOfScalar( |
3546 | 855 | CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); |
3547 | 855 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3548 | 855 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3549 | 855 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3550 | 855 | const auto *KmpTaskTWithPrivatesQTyRD = |
3551 | 855 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3552 | 855 | LValue Base = |
3553 | 855 | CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3554 | 855 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
3555 | 855 | auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); |
3556 | 855 | LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); |
3557 | 855 | llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); |
3558 | | |
3559 | 855 | auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); |
3560 | 855 | LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); |
3561 | 855 | llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3562 | 855 | CGF.EmitLoadOfScalar(SharedsLVal, Loc), |
3563 | 855 | CGF.ConvertTypeForMem(SharedsPtrTy)); |
3564 | | |
3565 | 855 | auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); |
3566 | 855 | llvm::Value *PrivatesParam; |
3567 | 855 | if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { |
3568 | 554 | LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); |
3569 | 554 | PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3570 | 554 | PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); |
3571 | 301 | } else { |
3572 | 301 | PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
3573 | 301 | } |
3574 | | |
3575 | 855 | llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, |
3576 | 855 | TaskPrivatesMap, |
3577 | 855 | CGF.Builder |
3578 | 855 | .CreatePointerBitCastOrAddrSpaceCast( |
3579 | 855 | TDBase.getAddress(CGF), CGF.VoidPtrTy) |
3580 | 855 | .getPointer()}; |
3581 | 855 | SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), |
3582 | 855 | std::end(CommonArgs)); |
3583 | 855 | if (isOpenMPTaskLoopDirective(Kind)) { |
3584 | 226 | auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); |
3585 | 226 | LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); |
3586 | 226 | llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); |
3587 | 226 | auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); |
3588 | 226 | LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); |
3589 | 226 | llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); |
3590 | 226 | auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); |
3591 | 226 | LValue StLVal = CGF.EmitLValueForField(Base, *StFI); |
3592 | 226 | llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); |
3593 | 226 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3594 | 226 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3595 | 226 | llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); |
3596 | 226 | auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); |
3597 | 226 | LValue RLVal = CGF.EmitLValueForField(Base, *RFI); |
3598 | 226 | llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); |
3599 | 226 | CallArgs.push_back(LBParam); |
3600 | 226 | CallArgs.push_back(UBParam); |
3601 | 226 | CallArgs.push_back(StParam); |
3602 | 226 | CallArgs.push_back(LIParam); |
3603 | 226 | CallArgs.push_back(RParam); |
3604 | 226 | } |
3605 | 855 | CallArgs.push_back(SharedsParam); |
3606 | | |
3607 | 855 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, |
3608 | 855 | CallArgs); |
3609 | 855 | CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), |
3610 | 855 | CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); |
3611 | 855 | CGF.FinishFunction(); |
3612 | 855 | return TaskEntry; |
3613 | 855 | } |
3614 | | |
3615 | | static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, |
3616 | | SourceLocation Loc, |
3617 | | QualType KmpInt32Ty, |
3618 | | QualType KmpTaskTWithPrivatesPtrQTy, |
3619 | 81 | QualType KmpTaskTWithPrivatesQTy) { |
3620 | 81 | ASTContext &C = CGM.getContext(); |
3621 | 81 | FunctionArgList Args; |
3622 | 81 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3623 | 81 | ImplicitParamDecl::Other); |
3624 | 81 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3625 | 81 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3626 | 81 | ImplicitParamDecl::Other); |
3627 | 81 | Args.push_back(&GtidArg); |
3628 | 81 | Args.push_back(&TaskTypeArg); |
3629 | 81 | const auto &DestructorFnInfo = |
3630 | 81 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3631 | 81 | llvm::FunctionType *DestructorFnTy = |
3632 | 81 | CGM.getTypes().GetFunctionType(DestructorFnInfo); |
3633 | 81 | std::string Name = |
3634 | 81 | CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); |
3635 | 81 | auto *DestructorFn = |
3636 | 81 | llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, |
3637 | 81 | Name, &CGM.getModule()); |
3638 | 81 | CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, |
3639 | 81 | DestructorFnInfo); |
3640 | 81 | DestructorFn->setDoesNotRecurse(); |
3641 | 81 | CodeGenFunction CGF(CGM); |
3642 | 81 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, |
3643 | 81 | Args, Loc, Loc); |
3644 | | |
3645 | 81 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3646 | 81 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3647 | 81 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3648 | 81 | const auto *KmpTaskTWithPrivatesQTyRD = |
3649 | 81 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3650 | 81 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3651 | 81 | Base = CGF.EmitLValueForField(Base, *FI); |
3652 | 81 | for (const auto *Field : |
3653 | 362 | cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { |
3654 | 362 | if (QualType::DestructionKind DtorKind = |
3655 | 162 | Field->getType().isDestructedType()) { |
3656 | 162 | LValue FieldLValue = CGF.EmitLValueForField(Base, Field); |
3657 | 162 | CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); |
3658 | 162 | } |
3659 | 362 | } |
3660 | 81 | CGF.FinishFunction(); |
3661 | 81 | return DestructorFn; |
3662 | 81 | } |
3663 | | |
3664 | | /// Emit a privates mapping function for correct handling of private and |
3665 | | /// firstprivate variables. |
3666 | | /// \code |
3667 | | /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> |
3668 | | /// **noalias priv1,..., <tyn> **noalias privn) { |
3669 | | /// *priv1 = &.privates.priv1; |
3670 | | /// ...; |
3671 | | /// *privn = &.privates.privn; |
3672 | | /// } |
3673 | | /// \endcode |
3674 | | static llvm::Value * |
3675 | | emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, |
3676 | | const OMPTaskDataTy &Data, QualType PrivatesQTy, |
3677 | 554 | ArrayRef<PrivateDataTy> Privates) { |
3678 | 554 | ASTContext &C = CGM.getContext(); |
3679 | 554 | FunctionArgList Args; |
3680 | 554 | ImplicitParamDecl TaskPrivatesArg( |
3681 | 554 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3682 | 554 | C.getPointerType(PrivatesQTy).withConst().withRestrict(), |
3683 | 554 | ImplicitParamDecl::Other); |
3684 | 554 | Args.push_back(&TaskPrivatesArg); |
3685 | 554 | llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; |
3686 | 554 | unsigned Counter = 1; |
3687 | 170 | for (const Expr *E : Data.PrivateVars) { |
3688 | 170 | Args.push_back(ImplicitParamDecl::Create( |
3689 | 170 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3690 | 170 | C.getPointerType(C.getPointerType(E->getType())) |
3691 | 170 | .withConst() |
3692 | 170 | .withRestrict(), |
3693 | 170 | ImplicitParamDecl::Other)); |
3694 | 170 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3695 | 170 | PrivateVarsPos[VD] = Counter; |
3696 | 170 | ++Counter; |
3697 | 170 | } |
3698 | 1.32k | for (const Expr *E : Data.FirstprivateVars) { |
3699 | 1.32k | Args.push_back(ImplicitParamDecl::Create( |
3700 | 1.32k | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3701 | 1.32k | C.getPointerType(C.getPointerType(E->getType())) |
3702 | 1.32k | .withConst() |
3703 | 1.32k | .withRestrict(), |
3704 | 1.32k | ImplicitParamDecl::Other)); |
3705 | 1.32k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3706 | 1.32k | PrivateVarsPos[VD] = Counter; |
3707 | 1.32k | ++Counter; |
3708 | 1.32k | } |
3709 | 151 | for (const Expr *E : Data.LastprivateVars) { |
3710 | 151 | Args.push_back(ImplicitParamDecl::Create( |
3711 | 151 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3712 | 151 | C.getPointerType(C.getPointerType(E->getType())) |
3713 | 151 | .withConst() |
3714 | 151 | .withRestrict(), |
3715 | 151 | ImplicitParamDecl::Other)); |
3716 | 151 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3717 | 151 | PrivateVarsPos[VD] = Counter; |
3718 | 151 | ++Counter; |
3719 | 151 | } |
3720 | 8 | for (const VarDecl *VD : Data.PrivateLocals) { |
3721 | 8 | QualType Ty = VD->getType().getNonReferenceType(); |
3722 | 8 | if (VD->getType()->isLValueReferenceType()) |
3723 | 0 | Ty = C.getPointerType(Ty); |
3724 | 8 | if (isAllocatableDecl(VD)) |
3725 | 2 | Ty = C.getPointerType(Ty); |
3726 | 8 | Args.push_back(ImplicitParamDecl::Create( |
3727 | 8 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3728 | 8 | C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), |
3729 | 8 | ImplicitParamDecl::Other)); |
3730 | 8 | PrivateVarsPos[VD] = Counter; |
3731 | 8 | ++Counter; |
3732 | 8 | } |
3733 | 554 | const auto &TaskPrivatesMapFnInfo = |
3734 | 554 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3735 | 554 | llvm::FunctionType *TaskPrivatesMapTy = |
3736 | 554 | CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); |
3737 | 554 | std::string Name = |
3738 | 554 | CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); |
3739 | 554 | auto *TaskPrivatesMap = llvm::Function::Create( |
3740 | 554 | TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, |
3741 | 554 | &CGM.getModule()); |
3742 | 554 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, |
3743 | 554 | TaskPrivatesMapFnInfo); |
3744 | 554 | if (CGM.getLangOpts().Optimize) { |
3745 | 0 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); |
3746 | 0 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); |
3747 | 0 | TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); |
3748 | 0 | } |
3749 | 554 | CodeGenFunction CGF(CGM); |
3750 | 554 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, |
3751 | 554 | TaskPrivatesMapFnInfo, Args, Loc, Loc); |
3752 | | |
3753 | | // *privi = &.privates.privi; |
3754 | 554 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3755 | 554 | CGF.GetAddrOfLocalVar(&TaskPrivatesArg), |
3756 | 554 | TaskPrivatesArg.getType()->castAs<PointerType>()); |
3757 | 554 | const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); |
3758 | 554 | Counter = 0; |
3759 | 1.65k | for (const FieldDecl *Field : PrivatesQTyRD->fields()) { |
3760 | 1.65k | LValue FieldLVal = CGF.EmitLValueForField(Base, Field); |
3761 | 1.65k | const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; |
3762 | 1.65k | LValue RefLVal = |
3763 | 1.65k | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); |
3764 | 1.65k | LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( |
3765 | 1.65k | RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); |
3766 | 1.65k | CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); |
3767 | 1.65k | ++Counter; |
3768 | 1.65k | } |
3769 | 554 | CGF.FinishFunction(); |
3770 | 554 | return TaskPrivatesMap; |
3771 | 554 | } |
3772 | | |
3773 | | /// Emit initialization for private variables in task-based directives. |
3774 | | static void emitPrivatesInit(CodeGenFunction &CGF, |
3775 | | const OMPExecutableDirective &D, |
3776 | | Address KmpTaskSharedsPtr, LValue TDBase, |
3777 | | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3778 | | QualType SharedsTy, QualType SharedsPtrTy, |
3779 | | const OMPTaskDataTy &Data, |
3780 | 653 | ArrayRef<PrivateDataTy> Privates, bool ForDup) { |
3781 | 653 | ASTContext &C = CGF.getContext(); |
3782 | 653 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3783 | 653 | LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); |
3784 | 653 | OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) |
3785 | 250 | ? OMPD_taskloop |
3786 | 403 | : OMPD_task; |
3787 | 653 | const CapturedStmt &CS = *D.getCapturedStmt(Kind); |
3788 | 653 | CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); |
3789 | 653 | LValue SrcBase; |
3790 | 653 | bool IsTargetTask = |
3791 | 653 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || |
3792 | 573 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
3793 | | // For target-based directives skip 4 firstprivate arrays BasePointersArray, |
3794 | | // PointersArray, SizesArray, and MappersArray. The original variables for |
3795 | | // these arrays are not captured and we get their addresses explicitly. |
3796 | 653 | if ((!IsTargetTask && !Data.FirstprivateVars.empty()309 && ForDup137 ) || |
3797 | 627 | (IsTargetTask && KmpTaskSharedsPtr.isValid()344 )) { |
3798 | 322 | SrcBase = CGF.MakeAddrLValue( |
3799 | 322 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3800 | 322 | KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), |
3801 | 322 | SharedsTy); |
3802 | 322 | } |
3803 | 653 | FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); |
3804 | 2.01k | for (const PrivateDataTy &Pair : Privates) { |
3805 | | // Do not initialize private locals. |
3806 | 2.01k | if (Pair.second.isLocalPrivate()) { |
3807 | 8 | ++FI; |
3808 | 8 | continue; |
3809 | 8 | } |
3810 | 2.01k | const VarDecl *VD = Pair.second.PrivateCopy; |
3811 | 2.01k | const Expr *Init = VD->getAnyInitializer(); |
3812 | 2.01k | if (Init && (1.63k !ForDup1.63k || (206 isa<CXXConstructExpr>(Init)206 && |
3813 | 1.57k | !CGF.isTrivialInitializer(Init)146 ))) { |
3814 | 1.57k | LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); |
3815 | 1.57k | if (const VarDecl *Elem = Pair.second.PrivateElemInit) { |
3816 | 1.37k | const VarDecl *OriginalVD = Pair.second.Original; |
3817 | | // Check if the variable is the target-based BasePointersArray, |
3818 | | // PointersArray, SizesArray, or MappersArray. |
3819 | 1.37k | LValue SharedRefLValue; |
3820 | 1.37k | QualType Type = PrivateLValue.getType(); |
3821 | 1.37k | const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); |
3822 | 1.37k | if (IsTargetTask && !SharedField1.06k ) { |
3823 | 674 | assert(isa<ImplicitParamDecl>(OriginalVD) && |
3824 | 674 | isa<CapturedDecl>(OriginalVD->getDeclContext()) && |
3825 | 674 | cast<CapturedDecl>(OriginalVD->getDeclContext()) |
3826 | 674 | ->getNumParams() == 0 && |
3827 | 674 | isa<TranslationUnitDecl>( |
3828 | 674 | cast<CapturedDecl>(OriginalVD->getDeclContext()) |
3829 | 674 | ->getDeclContext()) && |
3830 | 674 | "Expected artificial target data variable."); |
3831 | 674 | SharedRefLValue = |
3832 | 674 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); |
3833 | 697 | } else if (ForDup) { |
3834 | 50 | SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); |
3835 | 50 | SharedRefLValue = CGF.MakeAddrLValue( |
3836 | 50 | Address(SharedRefLValue.getPointer(CGF), |
3837 | 50 | C.getDeclAlign(OriginalVD)), |
3838 | 50 | SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), |
3839 | 50 | SharedRefLValue.getTBAAInfo()); |
3840 | 647 | } else if (CGF.LambdaCaptureFields.count( |
3841 | 647 | Pair.second.Original->getCanonicalDecl()) > 0 || |
3842 | 645 | dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { |
3843 | 13 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3844 | 634 | } else { |
3845 | | // Processing for implicitly captured variables. |
3846 | 634 | InlinedOpenMPRegionRAII Region( |
3847 | 0 | CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, |
3848 | 634 | /*HasCancel=*/false); |
3849 | 634 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3850 | 634 | } |
3851 | 1.37k | if (Type->isArrayType()) { |
3852 | | // Initialize firstprivate array. |
3853 | 767 | if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)52 ) { |
3854 | | // Perform simple memcpy. |
3855 | 715 | CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); |
3856 | 52 | } else { |
3857 | | // Initialize firstprivate array using element-by-element |
3858 | | // initialization. |
3859 | 52 | CGF.EmitOMPAggregateAssign( |
3860 | 52 | PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), |
3861 | 52 | Type, |
3862 | 52 | [&CGF, Elem, Init, &CapturesInfo](Address DestElement, |
3863 | 52 | Address SrcElement) { |
3864 | | // Clean up any temporaries needed by the initialization. |
3865 | 52 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3866 | 52 | InitScope.addPrivate( |
3867 | 52 | Elem, [SrcElement]() -> Address { return SrcElement; }); |
3868 | 52 | (void)InitScope.Privatize(); |
3869 | | // Emit initialization for single element. |
3870 | 52 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( |
3871 | 52 | CGF, &CapturesInfo); |
3872 | 52 | CGF.EmitAnyExprToMem(Init, DestElement, |
3873 | 52 | Init->getType().getQualifiers(), |
3874 | 52 | /*IsInitializer=*/false); |
3875 | 52 | }); |
3876 | 52 | } |
3877 | 604 | } else { |
3878 | 604 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3879 | 604 | InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { |
3880 | 604 | return SharedRefLValue.getAddress(CGF); |
3881 | 604 | }); |
3882 | 604 | (void)InitScope.Privatize(); |
3883 | 604 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); |
3884 | 604 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, |
3885 | 604 | /*capturedByInit=*/false); |
3886 | 604 | } |
3887 | 202 | } else { |
3888 | 202 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); |
3889 | 202 | } |
3890 | 1.57k | } |
3891 | 2.01k | ++FI; |
3892 | 2.01k | } |
3893 | 653 | } |
3894 | | |
3895 | | /// Check if duplication function is required for taskloops. |
3896 | | static bool checkInitIsRequired(CodeGenFunction &CGF, |
3897 | 102 | ArrayRef<PrivateDataTy> Privates) { |
3898 | 102 | bool InitRequired = false; |
3899 | 206 | for (const PrivateDataTy &Pair : Privates) { |
3900 | 206 | if (Pair.second.isLocalPrivate()) |
3901 | 0 | continue; |
3902 | 206 | const VarDecl *VD = Pair.second.PrivateCopy; |
3903 | 206 | const Expr *Init = VD->getAnyInitializer(); |
3904 | 206 | InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init)146 && |
3905 | 50 | !CGF.isTrivialInitializer(Init)); |
3906 | 206 | if (InitRequired) |
3907 | 50 | break; |
3908 | 206 | } |
3909 | 102 | return InitRequired; |
3910 | 102 | } |
3911 | | |
3912 | | |
3913 | | /// Emit task_dup function (for initialization of |
3914 | | /// private/firstprivate/lastprivate vars and last_iter flag) |
3915 | | /// \code |
3916 | | /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int |
3917 | | /// lastpriv) { |
3918 | | /// // setup lastprivate flag |
3919 | | /// task_dst->last = lastpriv; |
3920 | | /// // could be constructor calls here... |
3921 | | /// } |
3922 | | /// \endcode |
3923 | | static llvm::Value * |
3924 | | emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, |
3925 | | const OMPExecutableDirective &D, |
3926 | | QualType KmpTaskTWithPrivatesPtrQTy, |
3927 | | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3928 | | const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, |
3929 | | QualType SharedsPtrTy, const OMPTaskDataTy &Data, |
3930 | 99 | ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { |
3931 | 99 | ASTContext &C = CGM.getContext(); |
3932 | 99 | FunctionArgList Args; |
3933 | 99 | ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3934 | 99 | KmpTaskTWithPrivatesPtrQTy, |
3935 | 99 | ImplicitParamDecl::Other); |
3936 | 99 | ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3937 | 99 | KmpTaskTWithPrivatesPtrQTy, |
3938 | 99 | ImplicitParamDecl::Other); |
3939 | 99 | ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, |
3940 | 99 | ImplicitParamDecl::Other); |
3941 | 99 | Args.push_back(&DstArg); |
3942 | 99 | Args.push_back(&SrcArg); |
3943 | 99 | Args.push_back(&LastprivArg); |
3944 | 99 | const auto &TaskDupFnInfo = |
3945 | 99 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3946 | 99 | llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); |
3947 | 99 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); |
3948 | 99 | auto *TaskDup = llvm::Function::Create( |
3949 | 99 | TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3950 | 99 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); |
3951 | 99 | TaskDup->setDoesNotRecurse(); |
3952 | 99 | CodeGenFunction CGF(CGM); |
3953 | 99 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, |
3954 | 99 | Loc); |
3955 | | |
3956 | 99 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3957 | 99 | CGF.GetAddrOfLocalVar(&DstArg), |
3958 | 99 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3959 | | // task_dst->liter = lastpriv; |
3960 | 99 | if (WithLastIter) { |
3961 | 49 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3962 | 49 | LValue Base = CGF.EmitLValueForField( |
3963 | 49 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3964 | 49 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3965 | 49 | llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( |
3966 | 49 | CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); |
3967 | 49 | CGF.EmitStoreOfScalar(Lastpriv, LILVal); |
3968 | 49 | } |
3969 | | |
3970 | | // Emit initial values for private copies (if any). |
3971 | 99 | assert(!Privates.empty()); |
3972 | 99 | Address KmpTaskSharedsPtr = Address::invalid(); |
3973 | 99 | if (!Data.FirstprivateVars.empty()) { |
3974 | 26 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3975 | 26 | CGF.GetAddrOfLocalVar(&SrcArg), |
3976 | 26 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3977 | 26 | LValue Base = CGF.EmitLValueForField( |
3978 | 26 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3979 | 26 | KmpTaskSharedsPtr = Address( |
3980 | 26 | CGF.EmitLoadOfScalar(CGF.EmitLValueForField( |
3981 | 26 | Base, *std::next(KmpTaskTQTyRD->field_begin(), |
3982 | 26 | KmpTaskTShareds)), |
3983 | 26 | Loc), |
3984 | 26 | CGM.getNaturalTypeAlignment(SharedsTy)); |
3985 | 26 | } |
3986 | 99 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, |
3987 | 99 | SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); |
3988 | 99 | CGF.FinishFunction(); |
3989 | 99 | return TaskDup; |
3990 | 99 | } |
3991 | | |
3992 | | /// Checks if destructor function is required to be generated. |
3993 | | /// \return true if cleanups are required, false otherwise. |
3994 | | static bool |
3995 | | checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3996 | 554 | ArrayRef<PrivateDataTy> Privates) { |
3997 | 1.44k | for (const PrivateDataTy &P : Privates) { |
3998 | 1.44k | if (P.second.isLocalPrivate()) |
3999 | 8 | continue; |
4000 | 1.44k | QualType Ty = P.second.Original->getType().getNonReferenceType(); |
4001 | 1.44k | if (Ty.isDestructedType()) |
4002 | 81 | return true; |
4003 | 1.44k | } |
4004 | 473 | return false; |
4005 | 554 | } |
4006 | | |
4007 | | namespace { |
4008 | | /// Loop generator for OpenMP iterator expression. |
4009 | | class OMPIteratorGeneratorScope final |
4010 | | : public CodeGenFunction::OMPPrivateScope { |
4011 | | CodeGenFunction &CGF; |
4012 | | const OMPIteratorExpr *E = nullptr; |
4013 | | SmallVector<CodeGenFunction::JumpDest, 4> ContDests; |
4014 | | SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; |
4015 | | OMPIteratorGeneratorScope() = delete; |
4016 | | OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; |
4017 | | |
4018 | | public: |
4019 | | OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) |
4020 | 424 | : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { |
4021 | 424 | if (!E) |
4022 | 418 | return; |
4023 | 6 | SmallVector<llvm::Value *, 4> Uppers; |
4024 | 12 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I6 ) { |
4025 | 6 | Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); |
4026 | 6 | const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); |
4027 | 6 | addPrivate(VD, [&CGF, VD]() { |
4028 | 6 | return CGF.CreateMemTemp(VD->getType(), VD->getName()); |
4029 | 6 | }); |
4030 | 6 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
4031 | 6 | addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { |
4032 | 6 | return CGF.CreateMemTemp(HelperData.CounterVD->getType(), |
4033 | 6 | "counter.addr"); |
4034 | 6 | }); |
4035 | 6 | } |
4036 | 6 | Privatize(); |
4037 | | |
4038 | 12 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I6 ) { |
4039 | 6 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
4040 | 6 | LValue CLVal = |
4041 | 6 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), |
4042 | 6 | HelperData.CounterVD->getType()); |
4043 | | // Counter = 0; |
4044 | 6 | CGF.EmitStoreOfScalar( |
4045 | 6 | llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), |
4046 | 6 | CLVal); |
4047 | 6 | CodeGenFunction::JumpDest &ContDest = |
4048 | 6 | ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); |
4049 | 6 | CodeGenFunction::JumpDest &ExitDest = |
4050 | 6 | ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); |
4051 | | // N = <number-of_iterations>; |
4052 | 6 | llvm::Value *N = Uppers[I]; |
4053 | | // cont: |
4054 | | // if (Counter < N) goto body; else goto exit; |
4055 | 6 | CGF.EmitBlock(ContDest.getBlock()); |
4056 | 6 | auto *CVal = |
4057 | 6 | CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); |
4058 | 6 | llvm::Value *Cmp = |
4059 | 6 | HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() |
4060 | 4 | ? CGF.Builder.CreateICmpSLT(CVal, N) |
4061 | 2 | : CGF.Builder.CreateICmpULT(CVal, N); |
4062 | 6 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); |
4063 | 6 | CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); |
4064 | | // body: |
4065 | 6 | CGF.EmitBlock(BodyBB); |
4066 | | // Iteri = Begini + Counter * Stepi; |
4067 | 6 | CGF.EmitIgnoredExpr(HelperData.Update); |
4068 | 6 | } |
4069 | 6 | } |
4070 | 424 | ~OMPIteratorGeneratorScope() { |
4071 | 424 | if (!E) |
4072 | 418 | return; |
4073 | 12 | for (unsigned I = E->numOfIterators(); 6 I > 0; --I6 ) { |
4074 | | // Counter = Counter + 1; |
4075 | 6 | const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); |
4076 | 6 | CGF.EmitIgnoredExpr(HelperData.CounterUpdate); |
4077 | | // goto cont; |
4078 | 6 | CGF.EmitBranchThroughCleanup(ContDests[I - 1]); |
4079 | | // exit: |
4080 | 6 | CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); |
4081 | 6 | } |
4082 | 6 | } |
4083 | | }; |
4084 | | } // namespace |
4085 | | |
4086 | | static std::pair<llvm::Value *, llvm::Value *> |
4087 | 958 | getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { |
4088 | 958 | const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); |
4089 | 958 | llvm::Value *Addr; |
4090 | 958 | if (OASE) { |
4091 | 6 | const Expr *Base = OASE->getBase(); |
4092 | 6 | Addr = CGF.EmitScalarExpr(Base); |
4093 | 952 | } else { |
4094 | 952 | Addr = CGF.EmitLValue(E).getPointer(CGF); |
4095 | 952 | } |
4096 | 958 | llvm::Value *SizeVal; |
4097 | 958 | QualType Ty = E->getType(); |
4098 | 958 | if (OASE) { |
4099 | 6 | SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); |
4100 | 18 | for (const Expr *SE : OASE->getDimensions()) { |
4101 | 18 | llvm::Value *Sz = CGF.EmitScalarExpr(SE); |
4102 | 18 | Sz = CGF.EmitScalarConversion( |
4103 | 18 | Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); |
4104 | 18 | SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); |
4105 | 18 | } |
4106 | 952 | } else if (const auto *ASE = |
4107 | 28 | dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { |
4108 | 28 | LValue UpAddrLVal = |
4109 | 28 | CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); |
4110 | 28 | llvm::Value *UpAddr = |
4111 | 28 | CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); |
4112 | 28 | llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); |
4113 | 28 | llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); |
4114 | 28 | SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); |
4115 | 924 | } else { |
4116 | 924 | SizeVal = CGF.getTypeSize(Ty); |
4117 | 924 | } |
4118 | 958 | return std::make_pair(Addr, SizeVal); |
4119 | 958 | } |
4120 | | |
4121 | | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
4122 | 4 | static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { |
4123 | 4 | QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); |
4124 | 4 | if (KmpTaskAffinityInfoTy.isNull()) { |
4125 | 2 | RecordDecl *KmpAffinityInfoRD = |
4126 | 2 | C.buildImplicitRecord("kmp_task_affinity_info_t"); |
4127 | 2 | KmpAffinityInfoRD->startDefinition(); |
4128 | 2 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); |
4129 | 2 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); |
4130 | 2 | addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); |
4131 | 2 | KmpAffinityInfoRD->completeDefinition(); |
4132 | 2 | KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); |
4133 | 2 | } |
4134 | 4 | } |
4135 | | |
4136 | | CGOpenMPRuntime::TaskResultTy |
4137 | | CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, |
4138 | | const OMPExecutableDirective &D, |
4139 | | llvm::Function *TaskFunction, QualType SharedsTy, |
4140 | 855 | Address Shareds, const OMPTaskDataTy &Data) { |
4141 | 855 | ASTContext &C = CGM.getContext(); |
4142 | 855 | llvm::SmallVector<PrivateDataTy, 4> Privates; |
4143 | | // Aggregate privates and sort them by the alignment. |
4144 | 855 | const auto *I = Data.PrivateCopies.begin(); |
4145 | 170 | for (const Expr *E : Data.PrivateVars) { |
4146 | 170 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4147 | 170 | Privates.emplace_back( |
4148 | 170 | C.getDeclAlign(VD), |
4149 | 170 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4150 | 170 | /*PrivateElemInit=*/nullptr)); |
4151 | 170 | ++I; |
4152 | 170 | } |
4153 | 855 | I = Data.FirstprivateCopies.begin(); |
4154 | 855 | const auto *IElemInitRef = Data.FirstprivateInits.begin(); |
4155 | 1.32k | for (const Expr *E : Data.FirstprivateVars) { |
4156 | 1.32k | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4157 | 1.32k | Privates.emplace_back( |
4158 | 1.32k | C.getDeclAlign(VD), |
4159 | 1.32k | PrivateHelpersTy( |
4160 | 1.32k | E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4161 | 1.32k | cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); |
4162 | 1.32k | ++I; |
4163 | 1.32k | ++IElemInitRef; |
4164 | 1.32k | } |
4165 | 855 | I = Data.LastprivateCopies.begin(); |
4166 | 151 | for (const Expr *E : Data.LastprivateVars) { |
4167 | 151 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4168 | 151 | Privates.emplace_back( |
4169 | 151 | C.getDeclAlign(VD), |
4170 | 151 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4171 | 151 | /*PrivateElemInit=*/nullptr)); |
4172 | 151 | ++I; |
4173 | 151 | } |
4174 | 8 | for (const VarDecl *VD : Data.PrivateLocals) { |
4175 | 8 | if (isAllocatableDecl(VD)) |
4176 | 2 | Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); |
4177 | 6 | else |
4178 | 6 | Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); |
4179 | 8 | } |
4180 | 855 | llvm::stable_sort(Privates, |
4181 | 1.60k | [](const PrivateDataTy &L, const PrivateDataTy &R) { |
4182 | 1.60k | return L.first > R.first; |
4183 | 1.60k | }); |
4184 | 855 | QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
4185 | | // Build type kmp_routine_entry_t (if not built yet). |
4186 | 855 | emitKmpRoutineEntryT(KmpInt32Ty); |
4187 | | // Build type kmp_task_t (if not built yet). |
4188 | 855 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { |
4189 | 226 | if (SavedKmpTaskloopTQTy.isNull()) { |
4190 | 128 | SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
4191 | 128 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
4192 | 128 | } |
4193 | 226 | KmpTaskTQTy = SavedKmpTaskloopTQTy; |
4194 | 629 | } else { |
4195 | 629 | assert((D.getDirectiveKind() == OMPD_task || |
4196 | 629 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || |
4197 | 629 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && |
4198 | 629 | "Expected taskloop, task or target directive"); |
4199 | 629 | if (SavedKmpTaskTQTy.isNull()) { |
4200 | 269 | SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
4201 | 269 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
4202 | 269 | } |
4203 | 629 | KmpTaskTQTy = SavedKmpTaskTQTy; |
4204 | 629 | } |
4205 | 855 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
4206 | | // Build particular struct kmp_task_t for the given task. |
4207 | 855 | const RecordDecl *KmpTaskTWithPrivatesQTyRD = |
4208 | 855 | createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); |
4209 | 855 | QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); |
4210 | 855 | QualType KmpTaskTWithPrivatesPtrQTy = |
4211 | 855 | C.getPointerType(KmpTaskTWithPrivatesQTy); |
4212 | 855 | llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); |
4213 | 855 | llvm::Type *KmpTaskTWithPrivatesPtrTy = |
4214 | 855 | KmpTaskTWithPrivatesTy->getPointerTo(); |
4215 | 855 | llvm::Value *KmpTaskTWithPrivatesTySize = |
4216 | 855 | CGF.getTypeSize(KmpTaskTWithPrivatesQTy); |
4217 | 855 | QualType SharedsPtrTy = C.getPointerType(SharedsTy); |
4218 | | |
4219 | | // Emit initial values for private copies (if any). |
4220 | 855 | llvm::Value *TaskPrivatesMap = nullptr; |
4221 | 855 | llvm::Type *TaskPrivatesMapTy = |
4222 | 855 | std::next(TaskFunction->arg_begin(), 3)->getType(); |
4223 | 855 | if (!Privates.empty()) { |
4224 | 554 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
4225 | 554 | TaskPrivatesMap = |
4226 | 554 | emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); |
4227 | 554 | TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4228 | 554 | TaskPrivatesMap, TaskPrivatesMapTy); |
4229 | 301 | } else { |
4230 | 301 | TaskPrivatesMap = llvm::ConstantPointerNull::get( |
4231 | 301 | cast<llvm::PointerType>(TaskPrivatesMapTy)); |
4232 | 301 | } |
4233 | | // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, |
4234 | | // kmp_task_t *tt); |
4235 | 855 | llvm::Function *TaskEntry = emitProxyTaskFunction( |
4236 | 855 | CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
4237 | 855 | KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, |
4238 | 855 | TaskPrivatesMap); |
4239 | | |
4240 | | // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, |
4241 | | // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, |
4242 | | // kmp_routine_entry_t *task_entry); |
4243 | | // Task flags. Format is taken from |
4244 | | // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, |
4245 | | // description of kmp_tasking_flags struct. |
4246 | 855 | enum { |
4247 | 855 | TiedFlag = 0x1, |
4248 | 855 | FinalFlag = 0x2, |
4249 | 855 | DestructorsFlag = 0x8, |
4250 | 855 | PriorityFlag = 0x20, |
4251 | 855 | DetachableFlag = 0x40, |
4252 | 855 | }; |
4253 | 839 | unsigned Flags = Data.Tied ? TiedFlag : 016 ; |
4254 | 855 | bool NeedsCleanup = false; |
4255 | 855 | if (!Privates.empty()) { |
4256 | 554 | NeedsCleanup = |
4257 | 554 | checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); |
4258 | 554 | if (NeedsCleanup) |
4259 | 81 | Flags = Flags | DestructorsFlag; |
4260 | 554 | } |
4261 | 855 | if (Data.Priority.getInt()) |
4262 | 22 | Flags = Flags | PriorityFlag; |
4263 | 855 | if (D.hasClausesOfKind<OMPDetachClause>()) |
4264 | 2 | Flags = Flags | DetachableFlag; |
4265 | 855 | llvm::Value *TaskFlags = |
4266 | 855 | Data.Final.getPointer() |
4267 | 10 | ? CGF.Builder.CreateSelect(Data.Final.getPointer(), |
4268 | 10 | CGF.Builder.getInt32(FinalFlag), |
4269 | 10 | CGF.Builder.getInt32(/*C=*/0)) |
4270 | 845 | : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag8 : 0837 ); |
4271 | 855 | TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); |
4272 | 855 | llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); |
4273 | 855 | SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), |
4274 | 855 | getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, |
4275 | 855 | SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4276 | 855 | TaskEntry, KmpRoutineEntryPtrTy)}; |
4277 | 855 | llvm::Value *NewTask; |
4278 | 855 | if (D.hasClausesOfKind<OMPNowaitClause>()) { |
4279 | | // Check if we have any device clause associated with the directive. |
4280 | 284 | const Expr *Device = nullptr; |
4281 | 284 | if (auto *C = D.getSingleClause<OMPDeviceClause>()) |
4282 | 166 | Device = C->getDevice(); |
4283 | | // Emit device ID if any otherwise use default value. |
4284 | 284 | llvm::Value *DeviceID; |
4285 | 284 | if (Device) |
4286 | 166 | DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), |
4287 | 166 | CGF.Int64Ty, /*isSigned=*/true); |
4288 | 118 | else |
4289 | 118 | DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); |
4290 | 284 | AllocArgs.push_back(DeviceID); |
4291 | 284 | NewTask = CGF.EmitRuntimeCall( |
4292 | 284 | OMPBuilder.getOrCreateRuntimeFunction( |
4293 | 284 | CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), |
4294 | 284 | AllocArgs); |
4295 | 571 | } else { |
4296 | 571 | NewTask = |
4297 | 571 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4298 | 571 | CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), |
4299 | 571 | AllocArgs); |
4300 | 571 | } |
4301 | | // Emit detach clause initialization. |
4302 | | // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, |
4303 | | // task_descriptor); |
4304 | 855 | if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { |
4305 | 2 | const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); |
4306 | 2 | LValue EvtLVal = CGF.EmitLValue(Evt); |
4307 | | |
4308 | | // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, |
4309 | | // int gtid, kmp_task_t *task); |
4310 | 2 | llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); |
4311 | 2 | llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); |
4312 | 2 | Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); |
4313 | 2 | llvm::Value *EvtVal = CGF.EmitRuntimeCall( |
4314 | 2 | OMPBuilder.getOrCreateRuntimeFunction( |
4315 | 2 | CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), |
4316 | 2 | {Loc, Tid, NewTask}); |
4317 | 2 | EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), |
4318 | 2 | Evt->getExprLoc()); |
4319 | 2 | CGF.EmitStoreOfScalar(EvtVal, EvtLVal); |
4320 | 2 | } |
4321 | | // Process affinity clauses. |
4322 | 855 | if (D.hasClausesOfKind<OMPAffinityClause>()) { |
4323 | | // Process list of affinity data. |
4324 | 4 | ASTContext &C = CGM.getContext(); |
4325 | 4 | Address AffinitiesArray = Address::invalid(); |
4326 | | // Calculate number of elements to form the array of affinity data. |
4327 | 4 | llvm::Value *NumOfElements = nullptr; |
4328 | 4 | unsigned NumAffinities = 0; |
4329 | 6 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4330 | 6 | if (const Expr *Modifier = C->getModifier()) { |
4331 | 2 | const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); |
4332 | 4 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I2 ) { |
4333 | 2 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4334 | 2 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
4335 | 2 | NumOfElements = |
4336 | 2 | NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz)0 : Sz; |
4337 | 2 | } |
4338 | 4 | } else { |
4339 | 4 | NumAffinities += C->varlist_size(); |
4340 | 4 | } |
4341 | 6 | } |
4342 | 4 | getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); |
4343 | | // Fields ids in kmp_task_affinity_info record. |
4344 | 4 | enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; |
4345 | | |
4346 | 4 | QualType KmpTaskAffinityInfoArrayTy; |
4347 | 4 | if (NumOfElements) { |
4348 | 2 | NumOfElements = CGF.Builder.CreateNUWAdd( |
4349 | 2 | llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); |
4350 | 2 | OpaqueValueExpr OVE( |
4351 | 2 | Loc, |
4352 | 2 | C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), |
4353 | 2 | VK_RValue); |
4354 | 2 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, |
4355 | 2 | RValue::get(NumOfElements)); |
4356 | 2 | KmpTaskAffinityInfoArrayTy = |
4357 | 2 | C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, |
4358 | 2 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4359 | | // Properly emit variable-sized array. |
4360 | 2 | auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, |
4361 | 2 | ImplicitParamDecl::Other); |
4362 | 2 | CGF.EmitVarDecl(*PD); |
4363 | 2 | AffinitiesArray = CGF.GetAddrOfLocalVar(PD); |
4364 | 2 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4365 | 2 | /*isSigned=*/false); |
4366 | 2 | } else { |
4367 | 2 | KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( |
4368 | 2 | KmpTaskAffinityInfoTy, |
4369 | 2 | llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, |
4370 | 2 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4371 | 2 | AffinitiesArray = |
4372 | 2 | CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); |
4373 | 2 | AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); |
4374 | 2 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, |
4375 | 2 | /*isSigned=*/false); |
4376 | 2 | } |
4377 | | |
4378 | 4 | const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); |
4379 | | // Fill array by elements without iterators. |
4380 | 4 | unsigned Pos = 0; |
4381 | 4 | bool HasIterator = false; |
4382 | 6 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4383 | 6 | if (C->getModifier()) { |
4384 | 2 | HasIterator = true; |
4385 | 2 | continue; |
4386 | 2 | } |
4387 | 4 | for (const Expr *E : C->varlists()) { |
4388 | 4 | llvm::Value *Addr; |
4389 | 4 | llvm::Value *Size; |
4390 | 4 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4391 | 4 | LValue Base = |
4392 | 4 | CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), |
4393 | 4 | KmpTaskAffinityInfoTy); |
4394 | | // affs[i].base_addr = &<Affinities[i].second>; |
4395 | 4 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4396 | 4 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4397 | 4 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4398 | 4 | BaseAddrLVal); |
4399 | | // affs[i].len = sizeof(<Affinities[i].second>); |
4400 | 4 | LValue LenLVal = CGF.EmitLValueForField( |
4401 | 4 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4402 | 4 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4403 | 4 | ++Pos; |
4404 | 4 | } |
4405 | 4 | } |
4406 | 4 | LValue PosLVal; |
4407 | 4 | if (HasIterator) { |
4408 | 2 | PosLVal = CGF.MakeAddrLValue( |
4409 | 2 | CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), |
4410 | 2 | C.getSizeType()); |
4411 | 2 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4412 | 2 | } |
4413 | | // Process elements with iterators. |
4414 | 6 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4415 | 6 | const Expr *Modifier = C->getModifier(); |
4416 | 6 | if (!Modifier) |
4417 | 4 | continue; |
4418 | 2 | OMPIteratorGeneratorScope IteratorScope( |
4419 | 2 | CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); |
4420 | 2 | for (const Expr *E : C->varlists()) { |
4421 | 2 | llvm::Value *Addr; |
4422 | 2 | llvm::Value *Size; |
4423 | 2 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4424 | 2 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4425 | 2 | LValue Base = CGF.MakeAddrLValue( |
4426 | 2 | Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), |
4427 | 2 | AffinitiesArray.getAlignment()), |
4428 | 2 | KmpTaskAffinityInfoTy); |
4429 | | // affs[i].base_addr = &<Affinities[i].second>; |
4430 | 2 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4431 | 2 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4432 | 2 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4433 | 2 | BaseAddrLVal); |
4434 | | // affs[i].len = sizeof(<Affinities[i].second>); |
4435 | 2 | LValue LenLVal = CGF.EmitLValueForField( |
4436 | 2 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4437 | 2 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4438 | 2 | Idx = CGF.Builder.CreateNUWAdd( |
4439 | 2 | Idx, llvm::ConstantInt::get(Idx->getType(), 1)); |
4440 | 2 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4441 | 2 | } |
4442 | 2 | } |
4443 | | // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, |
4444 | | // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 |
4445 | | // naffins, kmp_task_affinity_info_t *affin_list); |
4446 | 4 | llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); |
4447 | 4 | llvm::Value *GTid = getThreadID(CGF, Loc); |
4448 | 4 | llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4449 | 4 | AffinitiesArray.getPointer(), CGM.VoidPtrTy); |
4450 | | // FIXME: Emit the function and ignore its result for now unless the |
4451 | | // runtime function is properly implemented. |
4452 | 4 | (void)CGF.EmitRuntimeCall( |
4453 | 4 | OMPBuilder.getOrCreateRuntimeFunction( |
4454 | 4 | CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), |
4455 | 4 | {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); |
4456 | 4 | } |
4457 | 855 | llvm::Value *NewTaskNewTaskTTy = |
4458 | 855 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4459 | 855 | NewTask, KmpTaskTWithPrivatesPtrTy); |
4460 | 855 | LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, |
4461 | 855 | KmpTaskTWithPrivatesQTy); |
4462 | 855 | LValue TDBase = |
4463 | 855 | CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
4464 | | // Fill the data in the resulting kmp_task_t record. |
4465 | | // Copy shareds if there are any. |
4466 | 855 | Address KmpTaskSharedsPtr = Address::invalid(); |
4467 | 855 | if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { |
4468 | 544 | KmpTaskSharedsPtr = |
4469 | 544 | Address(CGF.EmitLoadOfScalar( |
4470 | 544 | CGF.EmitLValueForField( |
4471 | 544 | TDBase, *std::next(KmpTaskTQTyRD->field_begin(), |
4472 | 544 | KmpTaskTShareds)), |
4473 | 544 | Loc), |
4474 | 544 | CGM.getNaturalTypeAlignment(SharedsTy)); |
4475 | 544 | LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); |
4476 | 544 | LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); |
4477 | 544 | CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); |
4478 | 544 | } |
4479 | | // Emit initial values for private copies (if any). |
4480 | 855 | TaskResultTy Result; |
4481 | 855 | if (!Privates.empty()) { |
4482 | 554 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, |
4483 | 554 | SharedsTy, SharedsPtrTy, Data, Privates, |
4484 | 554 | /*ForDup=*/false); |
4485 | 554 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
4486 | 151 | (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates)102 )) { |
4487 | 99 | Result.TaskDupFn = emitTaskDupFunction( |
4488 | 99 | CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, |
4489 | 99 | KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, |
4490 | 99 | /*WithLastIter=*/!Data.LastprivateVars.empty()); |
4491 | 99 | } |
4492 | 554 | } |
4493 | | // Fields of union "kmp_cmplrdata_t" for destructors and priority. |
4494 | 855 | enum { Priority = 0, Destructors = 1 }; |
4495 | | // Provide pointer to function with destructors for privates. |
4496 | 855 | auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); |
4497 | 855 | const RecordDecl *KmpCmplrdataUD = |
4498 | 855 | (*FI)->getType()->getAsUnionType()->getDecl(); |
4499 | 855 | if (NeedsCleanup) { |
4500 | 81 | llvm::Value *DestructorFn = emitDestructorsFunction( |
4501 | 81 | CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
4502 | 81 | KmpTaskTWithPrivatesQTy); |
4503 | 81 | LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); |
4504 | 81 | LValue DestructorsLV = CGF.EmitLValueForField( |
4505 | 81 | Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); |
4506 | 81 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4507 | 81 | DestructorFn, KmpRoutineEntryPtrTy), |
4508 | 81 | DestructorsLV); |
4509 | 81 | } |
4510 | | // Set priority. |
4511 | 855 | if (Data.Priority.getInt()) { |
4512 | 22 | LValue Data2LV = CGF.EmitLValueForField( |
4513 | 22 | TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); |
4514 | 22 | LValue PriorityLV = CGF.EmitLValueForField( |
4515 | 22 | Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); |
4516 | 22 | CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); |
4517 | 22 | } |
4518 | 855 | Result.NewTask = NewTask; |
4519 | 855 | Result.TaskEntry = TaskEntry; |
4520 | 855 | Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; |
4521 | 855 | Result.TDBase = TDBase; |
4522 | 855 | Result.KmpTaskTQTyRD = KmpTaskTQTyRD; |
4523 | 855 | return Result; |
4524 | 855 | } |
4525 | | |
4526 | | namespace { |
4527 | | /// Dependence kind for RTL. |
4528 | | enum RTLDependenceKindTy { |
4529 | | DepIn = 0x01, |
4530 | | DepInOut = 0x3, |
4531 | | DepMutexInOutSet = 0x4 |
4532 | | }; |
4533 | | /// Fields ids in kmp_depend_info record. |
4534 | | enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; |
4535 | | } // namespace |
4536 | | |
4537 | | /// Translates internal dependency kind into the runtime kind. |
4538 | 956 | static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { |
4539 | 956 | RTLDependenceKindTy DepKind; |
4540 | 956 | switch (K) { |
4541 | 170 | case OMPC_DEPEND_in: |
4542 | 170 | DepKind = DepIn; |
4543 | 170 | break; |
4544 | | // Out and InOut dependencies must use the same code. |
4545 | 334 | case OMPC_DEPEND_out: |
4546 | 776 | case OMPC_DEPEND_inout: |
4547 | 776 | DepKind = DepInOut; |
4548 | 776 | break; |
4549 | 10 | case OMPC_DEPEND_mutexinoutset: |
4550 | 10 | DepKind = DepMutexInOutSet; |
4551 | 10 | break; |
4552 | 0 | case OMPC_DEPEND_source: |
4553 | 0 | case OMPC_DEPEND_sink: |
4554 | 0 | case OMPC_DEPEND_depobj: |
4555 | 0 | case OMPC_DEPEND_unknown: |
4556 | 0 | llvm_unreachable("Unknown task dependence type"); |
4557 | 956 | } |
4558 | 956 | return DepKind; |
4559 | 956 | } |
4560 | | |
4561 | | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
4562 | | static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, |
4563 | 784 | QualType &FlagsTy) { |
4564 | 784 | FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); |
4565 | 784 | if (KmpDependInfoTy.isNull()) { |
4566 | 90 | RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); |
4567 | 90 | KmpDependInfoRD->startDefinition(); |
4568 | 90 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); |
4569 | 90 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); |
4570 | 90 | addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); |
4571 | 90 | KmpDependInfoRD->completeDefinition(); |
4572 | 90 | KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); |
4573 | 90 | } |
4574 | 784 | } |
4575 | | |
4576 | | std::pair<llvm::Value *, LValue> |
4577 | | CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, |
4578 | 4 | SourceLocation Loc) { |
4579 | 4 | ASTContext &C = CGM.getContext(); |
4580 | 4 | QualType FlagsTy; |
4581 | 4 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4582 | 4 | RecordDecl *KmpDependInfoRD = |
4583 | 4 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4584 | 4 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4585 | 4 | DepobjLVal.getAddress(CGF), |
4586 | 4 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
4587 | 4 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4588 | 4 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4589 | 4 | Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); |
4590 | 4 | Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), |
4591 | 4 | Base.getTBAAInfo()); |
4592 | 4 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4593 | 4 | Addr.getPointer(), |
4594 | 4 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4595 | 4 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4596 | 4 | Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, |
4597 | 4 | Base.getBaseInfo(), Base.getTBAAInfo()); |
4598 | | // NumDeps = deps[i].base_addr; |
4599 | 4 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4600 | 4 | NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4601 | 4 | llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); |
4602 | 4 | return std::make_pair(NumDeps, Base); |
4603 | 4 | } |
4604 | | |
4605 | | static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4606 | | llvm::PointerUnion<unsigned *, LValue *> Pos, |
4607 | | const OMPTaskDataTy::DependData &Data, |
4608 | 418 | Address DependenciesArray) { |
4609 | 418 | CodeGenModule &CGM = CGF.CGM; |
4610 | 418 | ASTContext &C = CGM.getContext(); |
4611 | 418 | QualType FlagsTy; |
4612 | 418 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4613 | 418 | RecordDecl *KmpDependInfoRD = |
4614 | 418 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4615 | 418 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
4616 | | |
4617 | 418 | OMPIteratorGeneratorScope IteratorScope( |
4618 | 418 | CGF, cast_or_null<OMPIteratorExpr>( |
4619 | 4 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4620 | 414 | : nullptr)); |
4621 | 952 | for (const Expr *E : Data.DepExprs) { |
4622 | 952 | llvm::Value *Addr; |
4623 | 952 | llvm::Value *Size; |
4624 | 952 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4625 | 952 | LValue Base; |
4626 | 952 | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4627 | 948 | Base = CGF.MakeAddrLValue( |
4628 | 948 | CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); |
4629 | 4 | } else { |
4630 | 4 | LValue &PosLVal = *Pos.get<LValue *>(); |
4631 | 4 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4632 | 4 | Base = CGF.MakeAddrLValue( |
4633 | 4 | Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), |
4634 | 4 | DependenciesArray.getAlignment()), |
4635 | 4 | KmpDependInfoTy); |
4636 | 4 | } |
4637 | | // deps[i].base_addr = &<Dependencies[i].second>; |
4638 | 952 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4639 | 952 | Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4640 | 952 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4641 | 952 | BaseAddrLVal); |
4642 | | // deps[i].len = sizeof(<Dependencies[i].second>); |
4643 | 952 | LValue LenLVal = CGF.EmitLValueForField( |
4644 | 952 | Base, *std::next(KmpDependInfoRD->field_begin(), Len)); |
4645 | 952 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4646 | | // deps[i].flags = <Dependencies[i].first>; |
4647 | 952 | RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); |
4648 | 952 | LValue FlagsLVal = CGF.EmitLValueForField( |
4649 | 952 | Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); |
4650 | 952 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), |
4651 | 952 | FlagsLVal); |
4652 | 952 | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4653 | 948 | ++(*P); |
4654 | 4 | } else { |
4655 | 4 | LValue &PosLVal = *Pos.get<LValue *>(); |
4656 | 4 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4657 | 4 | Idx = CGF.Builder.CreateNUWAdd(Idx, |
4658 | 4 | llvm::ConstantInt::get(Idx->getType(), 1)); |
4659 | 4 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4660 | 4 | } |
4661 | 952 | } |
4662 | 418 | } |
4663 | | |
4664 | | static SmallVector<llvm::Value *, 4> |
4665 | | emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4666 | 2 | const OMPTaskDataTy::DependData &Data) { |
4667 | 2 | assert(Data.DepKind == OMPC_DEPEND_depobj && |
4668 | 2 | "Expected depobj dependecy kind."); |
4669 | 2 | SmallVector<llvm::Value *, 4> Sizes; |
4670 | 2 | SmallVector<LValue, 4> SizeLVals; |
4671 | 2 | ASTContext &C = CGF.getContext(); |
4672 | 2 | QualType FlagsTy; |
4673 | 2 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4674 | 2 | RecordDecl *KmpDependInfoRD = |
4675 | 2 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4676 | 2 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4677 | 2 | llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); |
4678 | 2 | { |
4679 | 2 | OMPIteratorGeneratorScope IteratorScope( |
4680 | 2 | CGF, cast_or_null<OMPIteratorExpr>( |
4681 | 0 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4682 | 2 | : nullptr)); |
4683 | 4 | for (const Expr *E : Data.DepExprs) { |
4684 | 4 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4685 | 4 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4686 | 4 | DepobjLVal.getAddress(CGF), |
4687 | 4 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
4688 | 4 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4689 | 4 | Base.getAddress(CGF), KmpDependInfoPtrT); |
4690 | 4 | Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), |
4691 | 4 | Base.getTBAAInfo()); |
4692 | 4 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4693 | 4 | Addr.getPointer(), |
4694 | 4 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4695 | 4 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4696 | 4 | Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, |
4697 | 4 | Base.getBaseInfo(), Base.getTBAAInfo()); |
4698 | | // NumDeps = deps[i].base_addr; |
4699 | 4 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4700 | 4 | NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4701 | 4 | llvm::Value *NumDeps = |
4702 | 4 | CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); |
4703 | 4 | LValue NumLVal = CGF.MakeAddrLValue( |
4704 | 4 | CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), |
4705 | 4 | C.getUIntPtrType()); |
4706 | 4 | CGF.InitTempAlloca(NumLVal.getAddress(CGF), |
4707 | 4 | llvm::ConstantInt::get(CGF.IntPtrTy, 0)); |
4708 | 4 | llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); |
4709 | 4 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); |
4710 | 4 | CGF.EmitStoreOfScalar(Add, NumLVal); |
4711 | 4 | SizeLVals.push_back(NumLVal); |
4712 | 4 | } |
4713 | 2 | } |
4714 | 6 | for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I4 ) { |
4715 | 4 | llvm::Value *Size = |
4716 | 4 | CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); |
4717 | 4 | Sizes.push_back(Size); |
4718 | 4 | } |
4719 | 2 | return Sizes; |
4720 | 2 | } |
4721 | | |
4722 | | static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4723 | | LValue PosLVal, |
4724 | | const OMPTaskDataTy::DependData &Data, |
4725 | 2 | Address DependenciesArray) { |
4726 | 2 | assert(Data.DepKind == OMPC_DEPEND_depobj && |
4727 | 2 | "Expected depobj dependecy kind."); |
4728 | 2 | ASTContext &C = CGF.getContext(); |
4729 | 2 | QualType FlagsTy; |
4730 | 2 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4731 | 2 | RecordDecl *KmpDependInfoRD = |
4732 | 2 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4733 | 2 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4734 | 2 | llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); |
4735 | 2 | llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); |
4736 | 2 | { |
4737 | 2 | OMPIteratorGeneratorScope IteratorScope( |
4738 | 2 | CGF, cast_or_null<OMPIteratorExpr>( |
4739 | 0 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4740 | 2 | : nullptr)); |
4741 | 6 | for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I4 ) { |
4742 | 4 | const Expr *E = Data.DepExprs[I]; |
4743 | 4 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4744 | 4 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4745 | 4 | DepobjLVal.getAddress(CGF), |
4746 | 4 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
4747 | 4 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4748 | 4 | Base.getAddress(CGF), KmpDependInfoPtrT); |
4749 | 4 | Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), |
4750 | 4 | Base.getTBAAInfo()); |
4751 | | |
4752 | | // Get number of elements in a single depobj. |
4753 | 4 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4754 | 4 | Addr.getPointer(), |
4755 | 4 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4756 | 4 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4757 | 4 | Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, |
4758 | 4 | Base.getBaseInfo(), Base.getTBAAInfo()); |
4759 | | // NumDeps = deps[i].base_addr; |
4760 | 4 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4761 | 4 | NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4762 | 4 | llvm::Value *NumDeps = |
4763 | 4 | CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); |
4764 | | |
4765 | | // memcopy dependency data. |
4766 | 4 | llvm::Value *Size = CGF.Builder.CreateNUWMul( |
4767 | 4 | ElSize, |
4768 | 4 | CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); |
4769 | 4 | llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4770 | 4 | Address DepAddr = |
4771 | 4 | Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), |
4772 | 4 | DependenciesArray.getAlignment()); |
4773 | 4 | CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); |
4774 | | |
4775 | | // Increase pos. |
4776 | | // pos += size; |
4777 | 4 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); |
4778 | 4 | CGF.EmitStoreOfScalar(Add, PosLVal); |
4779 | 4 | } |
4780 | 2 | } |
4781 | 2 | } |
4782 | | |
4783 | | std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( |
4784 | | CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, |
4785 | 629 | SourceLocation Loc) { |
4786 | 629 | if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { |
4787 | 344 | return D.DepExprs.empty(); |
4788 | 344 | })) |
4789 | 285 | return std::make_pair(nullptr, Address::invalid()); |
4790 | | // Process list of dependencies. |
4791 | 344 | ASTContext &C = CGM.getContext(); |
4792 | 344 | Address DependenciesArray = Address::invalid(); |
4793 | 344 | llvm::Value *NumOfElements = nullptr; |
4794 | 344 | unsigned NumDependencies = std::accumulate( |
4795 | 344 | Dependencies.begin(), Dependencies.end(), 0, |
4796 | 414 | [](unsigned V, const OMPTaskDataTy::DependData &D) { |
4797 | 414 | return D.DepKind == OMPC_DEPEND_depobj |
4798 | 2 | ? V |
4799 | 412 | : (V + (D.IteratorExpr ? 02 : D.DepExprs.size()410 )); |
4800 | 414 | }); |
4801 | 344 | QualType FlagsTy; |
4802 | 344 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4803 | 344 | bool HasDepobjDeps = false; |
4804 | 344 | bool HasRegularWithIterators = false; |
4805 | 344 | llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4806 | 344 | llvm::Value *NumOfRegularWithIterators = |
4807 | 344 | llvm::ConstantInt::get(CGF.IntPtrTy, 1); |
4808 | | // Calculate number of depobj dependecies and regular deps with the iterators. |
4809 | 414 | for (const OMPTaskDataTy::DependData &D : Dependencies) { |
4810 | 414 | if (D.DepKind == OMPC_DEPEND_depobj) { |
4811 | 2 | SmallVector<llvm::Value *, 4> Sizes = |
4812 | 2 | emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); |
4813 | 4 | for (llvm::Value *Size : Sizes) { |
4814 | 4 | NumOfDepobjElements = |
4815 | 4 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); |
4816 | 4 | } |
4817 | 2 | HasDepobjDeps = true; |
4818 | 2 | continue; |
4819 | 2 | } |
4820 | | // Include number of iterations, if any. |
4821 | 412 | if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { |
4822 | 4 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I2 ) { |
4823 | 2 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4824 | 2 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); |
4825 | 2 | NumOfRegularWithIterators = |
4826 | 2 | CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); |
4827 | 2 | } |
4828 | 2 | HasRegularWithIterators = true; |
4829 | 2 | continue; |
4830 | 2 | } |
4831 | 412 | } |
4832 | | |
4833 | 344 | QualType KmpDependInfoArrayTy; |
4834 | 344 | if (HasDepobjDeps || HasRegularWithIterators342 ) { |
4835 | 4 | NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, |
4836 | 4 | /*isSigned=*/false); |
4837 | 4 | if (HasDepobjDeps) { |
4838 | 2 | NumOfElements = |
4839 | 2 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); |
4840 | 2 | } |
4841 | 4 | if (HasRegularWithIterators) { |
4842 | 2 | NumOfElements = |
4843 | 2 | CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); |
4844 | 2 | } |
4845 | 4 | OpaqueValueExpr OVE(Loc, |
4846 | 4 | C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), |
4847 | 4 | VK_RValue); |
4848 | 4 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, |
4849 | 4 | RValue::get(NumOfElements)); |
4850 | 4 | KmpDependInfoArrayTy = |
4851 | 4 | C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, |
4852 | 4 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4853 | | // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); |
4854 | | // Properly emit variable-sized array. |
4855 | 4 | auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, |
4856 | 4 | ImplicitParamDecl::Other); |
4857 | 4 | CGF.EmitVarDecl(*PD); |
4858 | 4 | DependenciesArray = CGF.GetAddrOfLocalVar(PD); |
4859 | 4 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4860 | 4 | /*isSigned=*/false); |
4861 | 340 | } else { |
4862 | 340 | KmpDependInfoArrayTy = C.getConstantArrayType( |
4863 | 340 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, |
4864 | 340 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4865 | 340 | DependenciesArray = |
4866 | 340 | CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); |
4867 | 340 | DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); |
4868 | 340 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, |
4869 | 340 | /*isSigned=*/false); |
4870 | 340 | } |
4871 | 344 | unsigned Pos = 0; |
4872 | 758 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I414 ) { |
4873 | 414 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4874 | 412 | Dependencies[I].IteratorExpr) |
4875 | 4 | continue; |
4876 | 410 | emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], |
4877 | 410 | DependenciesArray); |
4878 | 410 | } |
4879 | | // Copy regular dependecies with iterators. |
4880 | 344 | LValue PosLVal = CGF.MakeAddrLValue( |
4881 | 344 | CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); |
4882 | 344 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4883 | 758 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I414 ) { |
4884 | 414 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4885 | 412 | !Dependencies[I].IteratorExpr) |
4886 | 412 | continue; |
4887 | 2 | emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], |
4888 | 2 | DependenciesArray); |
4889 | 2 | } |
4890 | | // Copy final depobj arrays without iterators. |
4891 | 344 | if (HasDepobjDeps) { |
4892 | 6 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I4 ) { |
4893 | 4 | if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) |
4894 | 2 | continue; |
4895 | 2 | emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], |
4896 | 2 | DependenciesArray); |
4897 | 2 | } |
4898 | 2 | } |
4899 | 344 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4900 | 344 | DependenciesArray, CGF.VoidPtrTy); |
4901 | 344 | return std::make_pair(NumOfElements, DependenciesArray); |
4902 | 344 | } |
4903 | | |
4904 | | Address CGOpenMPRuntime::emitDepobjDependClause( |
4905 | | CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, |
4906 | 6 | SourceLocation Loc) { |
4907 | 6 | if (Dependencies.DepExprs.empty()) |
4908 | 0 | return Address::invalid(); |
4909 | | // Process list of dependencies. |
4910 | 6 | ASTContext &C = CGM.getContext(); |
4911 | 6 | Address DependenciesArray = Address::invalid(); |
4912 | 6 | unsigned NumDependencies = Dependencies.DepExprs.size(); |
4913 | 6 | QualType FlagsTy; |
4914 | 6 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4915 | 6 | RecordDecl *KmpDependInfoRD = |
4916 | 6 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4917 | | |
4918 | 6 | llvm::Value *Size; |
4919 | | // Define type kmp_depend_info[<Dependencies.size()>]; |
4920 | | // For depobj reserve one extra element to store the number of elements. |
4921 | | // It is required to handle depobj(x) update(in) construct. |
4922 | | // kmp_depend_info[<Dependencies.size()>] deps; |
4923 | 6 | llvm::Value *NumDepsVal; |
4924 | 6 | CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); |
4925 | 6 | if (const auto *IE = |
4926 | 2 | cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { |
4927 | 2 | NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); |
4928 | 4 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I2 ) { |
4929 | 2 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4930 | 2 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
4931 | 2 | NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); |
4932 | 2 | } |
4933 | 2 | Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), |
4934 | 2 | NumDepsVal); |
4935 | 2 | CharUnits SizeInBytes = |
4936 | 2 | C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); |
4937 | 2 | llvm::Value *RecSize = CGM.getSize(SizeInBytes); |
4938 | 2 | Size = CGF.Builder.CreateNUWMul(Size, RecSize); |
4939 | 2 | NumDepsVal = |
4940 | 2 | CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); |
4941 | 4 | } else { |
4942 | 4 | QualType KmpDependInfoArrayTy = C.getConstantArrayType( |
4943 | 4 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), |
4944 | 4 | nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); |
4945 | 4 | CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); |
4946 | 4 | Size = CGM.getSize(Sz.alignTo(Align)); |
4947 | 4 | NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); |
4948 | 4 | } |
4949 | | // Need to allocate on the dynamic memory. |
4950 | 6 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4951 | | // Use default allocator. |
4952 | 6 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4953 | 6 | llvm::Value *Args[] = {ThreadID, Size, Allocator}; |
4954 | | |
4955 | 6 | llvm::Value *Addr = |
4956 | 6 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4957 | 6 | CGM.getModule(), OMPRTL___kmpc_alloc), |
4958 | 6 | Args, ".dep.arr.addr"); |
4959 | 6 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4960 | 6 | Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); |
4961 | 6 | DependenciesArray = Address(Addr, Align); |
4962 | | // Write number of elements in the first element of array for depobj. |
4963 | 6 | LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); |
4964 | | // deps[i].base_addr = NumDependencies; |
4965 | 6 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4966 | 6 | Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4967 | 6 | CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); |
4968 | 6 | llvm::PointerUnion<unsigned *, LValue *> Pos; |
4969 | 6 | unsigned Idx = 1; |
4970 | 6 | LValue PosLVal; |
4971 | 6 | if (Dependencies.IteratorExpr) { |
4972 | 2 | PosLVal = CGF.MakeAddrLValue( |
4973 | 2 | CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), |
4974 | 2 | C.getSizeType()); |
4975 | 2 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, |
4976 | 2 | /*IsInit=*/true); |
4977 | 2 | Pos = &PosLVal; |
4978 | 4 | } else { |
4979 | 4 | Pos = &Idx; |
4980 | 4 | } |
4981 | 6 | emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); |
4982 | 6 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4983 | 6 | CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); |
4984 | 6 | return DependenciesArray; |
4985 | 6 | } |
4986 | | |
4987 | | void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, |
4988 | 4 | SourceLocation Loc) { |
4989 | 4 | ASTContext &C = CGM.getContext(); |
4990 | 4 | QualType FlagsTy; |
4991 | 4 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4992 | 4 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4993 | 4 | DepobjLVal.getAddress(CGF), |
4994 | 4 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
4995 | 4 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4996 | 4 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4997 | 4 | Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); |
4998 | 4 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4999 | 4 | Addr.getPointer(), |
5000 | 4 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
5001 | 4 | DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, |
5002 | 4 | CGF.VoidPtrTy); |
5003 | 4 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
5004 | | // Use default allocator. |
5005 | 4 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
5006 | 4 | llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; |
5007 | | |
5008 | | // _kmpc_free(gtid, addr, nullptr); |
5009 | 4 | (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
5010 | 4 | CGM.getModule(), OMPRTL___kmpc_free), |
5011 | 4 | Args); |
5012 | 4 | } |
5013 | | |
5014 | | void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, |
5015 | | OpenMPDependClauseKind NewDepKind, |
5016 | 4 | SourceLocation Loc) { |
5017 | 4 | ASTContext &C = CGM.getContext(); |
5018 | 4 | QualType FlagsTy; |
5019 | 4 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
5020 | 4 | RecordDecl *KmpDependInfoRD = |
5021 | 4 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
5022 | 4 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
5023 | 4 | llvm::Value *NumDeps; |
5024 | 4 | LValue Base; |
5025 | 4 | std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); |
5026 | | |
5027 | 4 | Address Begin = Base.getAddress(CGF); |
5028 | | // Cast from pointer to array type to pointer to single element. |
5029 | 4 | llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); |
5030 | | // The basic structure here is a while-do loop. |
5031 | 4 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); |
5032 | 4 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); |
5033 | 4 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
5034 | 4 | CGF.EmitBlock(BodyBB); |
5035 | 4 | llvm::PHINode *ElementPHI = |
5036 | 4 | CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); |
5037 | 4 | ElementPHI->addIncoming(Begin.getPointer(), EntryBB); |
5038 | 4 | Begin = Address(ElementPHI, Begin.getAlignment()); |
5039 | 4 | Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), |
5040 | 4 | Base.getTBAAInfo()); |
5041 | | // deps[i].flags = NewDepKind; |
5042 | 4 | RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); |
5043 | 4 | LValue FlagsLVal = CGF.EmitLValueForField( |
5044 | 4 | Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); |
5045 | 4 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), |
5046 | 4 | FlagsLVal); |
5047 | | |
5048 | | // Shift the address forward by one element. |
5049 | 4 | Address ElementNext = |
5050 | 4 | CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); |
5051 | 4 | ElementPHI->addIncoming(ElementNext.getPointer(), |
5052 | 4 | CGF.Builder.GetInsertBlock()); |
5053 | 4 | llvm::Value *IsEmpty = |
5054 | 4 | CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); |
5055 | 4 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
5056 | | // Done. |
5057 | 4 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
5058 | 4 | } |
5059 | | |
5060 | | void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, |
5061 | | const OMPExecutableDirective &D, |
5062 | | llvm::Function *TaskFunction, |
5063 | | QualType SharedsTy, Address Shareds, |
5064 | | const Expr *IfCond, |
5065 | 629 | const OMPTaskDataTy &Data) { |
5066 | 629 | if (!CGF.HaveInsertPoint()) |
5067 | 0 | return; |
5068 | | |
5069 | 629 | TaskResultTy Result = |
5070 | 629 | emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); |
5071 | 629 | llvm::Value *NewTask = Result.NewTask; |
5072 | 629 | llvm::Function *TaskEntry = Result.TaskEntry; |
5073 | 629 | llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; |
5074 | 629 | LValue TDBase = Result.TDBase; |
5075 | 629 | const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; |
5076 | | // Process list of dependences. |
5077 | 629 | Address DependenciesArray = Address::invalid(); |
5078 | 629 | llvm::Value *NumOfElements; |
5079 | 629 | std::tie(NumOfElements, DependenciesArray) = |
5080 | 629 | emitDependClause(
|