Coverage Report

Created: 2022-07-16 07:03

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Line
Count
Source (jump to first uncovered line)
1
//===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This provides a generalized class for OpenMP runtime code generation
10
// specialized by GPU targets NVPTX and AMDGCN.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
15
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
16
17
#include "CGOpenMPRuntime.h"
18
#include "CodeGenFunction.h"
19
#include "clang/AST/StmtOpenMP.h"
20
21
namespace clang {
22
namespace CodeGen {
23
24
class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
25
public:
26
  /// Defines the execution mode.
27
  enum ExecutionMode {
28
    /// SPMD execution mode (all threads are worker threads).
29
    EM_SPMD,
30
    /// Non-SPMD execution mode (1 master thread, others are workers).
31
    EM_NonSPMD,
32
    /// Unknown execution mode (orphaned directive).
33
    EM_Unknown,
34
  };
35
private:
36
  /// Parallel outlined function work for workers to execute.
37
  llvm::SmallVector<llvm::Function *, 16> Work;
38
39
  struct EntryFunctionState {
40
    SourceLocation Loc;
41
  };
42
43
  ExecutionMode getExecutionMode() const;
44
45
4.25k
  bool requiresFullRuntime() const { return RequiresFullRuntime; }
46
47
  /// Get barrier to synchronize all threads in a block.
48
  void syncCTAThreads(CodeGenFunction &CGF);
49
50
  /// Helper for target directive initialization.
51
  void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST,
52
                      bool IsSPMD);
53
54
  /// Helper for target directive finalization.
55
  void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
56
                        bool IsSPMD);
57
58
  /// Helper for generic variables globalization prolog.
59
  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
60
                             bool WithSPMDCheck = false);
61
62
  /// Helper for generic variables globalization epilog.
63
  void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
64
65
  //
66
  // Base class overrides.
67
  //
68
69
  /// Creates offloading entry for the provided entry ID \a ID,
70
  /// address \a Addr, size \a Size, and flags \a Flags.
71
  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
72
                          uint64_t Size, int32_t Flags,
73
                          llvm::GlobalValue::LinkageTypes Linkage) override;
74
75
  /// Emit outlined function specialized for the Fork-Join
76
  /// programming model for applicable target directives on the NVPTX device.
77
  /// \param D Directive to emit.
78
  /// \param ParentName Name of the function that encloses the target region.
79
  /// \param OutlinedFn Outlined function value to be defined by this call.
80
  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
81
  /// \param IsOffloadEntry True if the outlined function is an offload entry.
82
  /// An outlined function may not be an entry if, e.g. the if clause always
83
  /// evaluates to false.
84
  void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
85
                         llvm::Function *&OutlinedFn,
86
                         llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
87
                         const RegionCodeGenTy &CodeGen);
88
89
  /// Emit outlined function specialized for the Single Program
90
  /// Multiple Data programming model for applicable target directives on the
91
  /// NVPTX device.
92
  /// \param D Directive to emit.
93
  /// \param ParentName Name of the function that encloses the target region.
94
  /// \param OutlinedFn Outlined function value to be defined by this call.
95
  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
96
  /// \param IsOffloadEntry True if the outlined function is an offload entry.
97
  /// \param CodeGen Object containing the target statements.
98
  /// An outlined function may not be an entry if, e.g. the if clause always
99
  /// evaluates to false.
100
  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
101
                      llvm::Function *&OutlinedFn,
102
                      llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
103
                      const RegionCodeGenTy &CodeGen);
104
105
  /// Emit outlined function for 'target' directive on the NVPTX
106
  /// device.
107
  /// \param D Directive to emit.
108
  /// \param ParentName Name of the function that encloses the target region.
109
  /// \param OutlinedFn Outlined function value to be defined by this call.
110
  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
111
  /// \param IsOffloadEntry True if the outlined function is an offload entry.
112
  /// An outlined function may not be an entry if, e.g. the if clause always
113
  /// evaluates to false.
114
  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
115
                                  StringRef ParentName,
116
                                  llvm::Function *&OutlinedFn,
117
                                  llvm::Constant *&OutlinedFnID,
118
                                  bool IsOffloadEntry,
119
                                  const RegionCodeGenTy &CodeGen) override;
120
121
  /// Emits code for parallel or serial call of the \a OutlinedFn with
122
  /// variables captured in a record which address is stored in \a
123
  /// CapturedStruct.
124
  /// This call is for the Non-SPMD Execution Mode.
125
  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
126
  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
127
  /// \param CapturedVars A pointer to the record with the references to
128
  /// variables used in \a OutlinedFn function.
129
  /// \param IfCond Condition in the associated 'if' clause, if it was
130
  /// specified, nullptr otherwise.
131
  void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
132
                               llvm::Value *OutlinedFn,
133
                               ArrayRef<llvm::Value *> CapturedVars,
134
                               const Expr *IfCond);
135
136
  /// Emits code for parallel or serial call of the \a OutlinedFn with
137
  /// variables captured in a record which address is stored in \a
138
  /// CapturedStruct.
139
  /// This call is for a parallel directive within an SPMD target directive.
140
  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
141
  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
142
  /// \param CapturedVars A pointer to the record with the references to
143
  /// variables used in \a OutlinedFn function.
144
  /// \param IfCond Condition in the associated 'if' clause, if it was
145
  /// specified, nullptr otherwise.
146
  ///
147
  void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
148
                            llvm::Function *OutlinedFn,
149
                            ArrayRef<llvm::Value *> CapturedVars,
150
                            const Expr *IfCond);
151
152
protected:
153
  /// Get the function name of an outlined region.
154
  //  The name can be customized depending on the target.
155
  //
156
888
  StringRef getOutlinedHelperName() const override {
157
888
    return "__omp_outlined__";
158
888
  }
159
160
  /// Check if the default location must be constant.
161
  /// Constant for NVPTX for better optimization.
162
0
  bool isDefaultLocationConstant() const override { return true; }
163
164
  /// Returns additional flags that can be stored in reserved_2 field of the
165
  /// default location.
166
  /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
167
  /// Full/Lightweight runtime mode. Used for better optimization.
168
  unsigned getDefaultLocationReserved2Flags() const override;
169
170
public:
171
  explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
172
  void clear() override;
173
174
  /// Declare generalized virtual functions which need to be defined
175
  /// by all specializations of OpenMPGPURuntime Targets like AMDGCN
176
  /// and NVPTX.
177
178
  /// Get the GPU warp size.
179
  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
180
181
  /// Get the id of the current thread on the GPU.
182
  llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
183
184
  /// Get the maximum number of threads in a block of the GPU.
185
  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
186
187
  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
188
  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
189
  virtual void emitProcBindClause(CodeGenFunction &CGF,
190
                                  llvm::omp::ProcBindKind ProcBind,
191
                                  SourceLocation Loc) override;
192
193
  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
194
  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
195
  /// clause.
196
  /// \param NumThreads An integer value of threads.
197
  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
198
                                    llvm::Value *NumThreads,
199
                                    SourceLocation Loc) override;
200
201
  /// This function ought to emit, in the general case, a call to
202
  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
203
  // as these numbers are obtained through the PTX grid and block configuration.
204
  /// \param NumTeams An integer expression of teams.
205
  /// \param ThreadLimit An integer expression of threads.
206
  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
207
                          const Expr *ThreadLimit, SourceLocation Loc) override;
208
209
  /// Emits inlined function for the specified OpenMP parallel
210
  //  directive.
211
  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
212
  /// kmp_int32 BoundID, struct context_vars*).
213
  /// \param D OpenMP directive.
214
  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
215
  /// \param InnermostKind Kind of innermost directive (for simple directives it
216
  /// is a directive itself, for combined - its innermost directive).
217
  /// \param CodeGen Code generation sequence for the \a D directive.
218
  llvm::Function *
219
  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
220
                               const VarDecl *ThreadIDVar,
221
                               OpenMPDirectiveKind InnermostKind,
222
                               const RegionCodeGenTy &CodeGen) override;
223
224
  /// Emits inlined function for the specified OpenMP teams
225
  //  directive.
226
  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
227
  /// kmp_int32 BoundID, struct context_vars*).
228
  /// \param D OpenMP directive.
229
  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
230
  /// \param InnermostKind Kind of innermost directive (for simple directives it
231
  /// is a directive itself, for combined - its innermost directive).
232
  /// \param CodeGen Code generation sequence for the \a D directive.
233
  llvm::Function *
234
  emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
235
                            const VarDecl *ThreadIDVar,
236
                            OpenMPDirectiveKind InnermostKind,
237
                            const RegionCodeGenTy &CodeGen) override;
238
239
  /// Emits code for teams call of the \a OutlinedFn with
240
  /// variables captured in a record which address is stored in \a
241
  /// CapturedStruct.
242
  /// \param OutlinedFn Outlined function to be run by team masters. Type of
243
  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
244
  /// \param CapturedVars A pointer to the record with the references to
245
  /// variables used in \a OutlinedFn function.
246
  ///
247
  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
248
                     SourceLocation Loc, llvm::Function *OutlinedFn,
249
                     ArrayRef<llvm::Value *> CapturedVars) override;
250
251
  /// Emits code for parallel or serial call of the \a OutlinedFn with
252
  /// variables captured in a record which address is stored in \a
253
  /// CapturedStruct.
254
  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
255
  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
256
  /// \param CapturedVars A pointer to the record with the references to
257
  /// variables used in \a OutlinedFn function.
258
  /// \param IfCond Condition in the associated 'if' clause, if it was
259
  /// specified, nullptr otherwise.
260
  /// \param NumThreads The value corresponding to the num_threads clause, if
261
  /// any,
262
  ///                   or nullptr.
263
  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
264
                        llvm::Function *OutlinedFn,
265
                        ArrayRef<llvm::Value *> CapturedVars,
266
                        const Expr *IfCond, llvm::Value *NumThreads) override;
267
268
  /// Emit an implicit/explicit barrier for OpenMP threads.
269
  /// \param Kind Directive for which this implicit barrier call must be
270
  /// generated. Must be OMPD_barrier for explicit barrier generation.
271
  /// \param EmitChecks true if need to emit checks for cancellation barriers.
272
  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
273
  /// runtime class decides which one to emit (simple or with cancellation
274
  /// checks).
275
  ///
276
  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
277
                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
278
                       bool ForceSimpleCall = false) override;
279
280
  /// Emits a critical region.
281
  /// \param CriticalName Name of the critical region.
282
  /// \param CriticalOpGen Generator for the statement associated with the given
283
  /// critical region.
284
  /// \param Hint Value of the 'hint' clause (optional).
285
  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
286
                          const RegionCodeGenTy &CriticalOpGen,
287
                          SourceLocation Loc,
288
                          const Expr *Hint = nullptr) override;
289
290
  /// Emit a code for reduction clause.
291
  ///
292
  /// \param Privates List of private copies for original reduction arguments.
293
  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
294
  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
295
  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
296
  /// or 'operator binop(LHS, RHS)'.
297
  /// \param Options List of options for reduction codegen:
298
  ///     WithNowait true if parent directive has also nowait clause, false
299
  ///     otherwise.
300
  ///     SimpleReduction Emit reduction operation only. Used for omp simd
301
  ///     directive on the host.
302
  ///     ReductionKind The kind of reduction to perform.
303
  virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
304
                             ArrayRef<const Expr *> Privates,
305
                             ArrayRef<const Expr *> LHSExprs,
306
                             ArrayRef<const Expr *> RHSExprs,
307
                             ArrayRef<const Expr *> ReductionOps,
308
                             ReductionOptionsTy Options) override;
309
310
  /// Returns specified OpenMP runtime function for the current OpenMP
311
  /// implementation.  Specialized for the NVPTX device.
312
  /// \param Function OpenMP runtime function.
313
  /// \return Specified function.
314
  llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
315
316
  /// Translates the native parameter of outlined function if this is required
317
  /// for target.
318
  /// \param FD Field decl from captured record for the parameter.
319
  /// \param NativeParam Parameter itself.
320
  const VarDecl *translateParameter(const FieldDecl *FD,
321
                                    const VarDecl *NativeParam) const override;
322
323
  /// Gets the address of the native argument basing on the address of the
324
  /// target-specific parameter.
325
  /// \param NativeParam Parameter itself.
326
  /// \param TargetParam Corresponding target-specific parameter.
327
  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
328
                              const VarDecl *TargetParam) const override;
329
330
  /// Emits call of the outlined function with the provided arguments,
331
  /// translating these arguments to correct target-specific arguments.
332
  void emitOutlinedFunctionCall(
333
      CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
334
      ArrayRef<llvm::Value *> Args = llvm::None) const override;
335
336
  /// Emits OpenMP-specific function prolog.
337
  /// Required for device constructs.
338
  void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
339
340
  /// Gets the OpenMP-specific address of the local variable.
341
  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
342
                                    const VarDecl *VD) override;
343
344
  /// Target codegen is specialized based on two data-sharing modes: CUDA, in
345
  /// which the local variables are actually global threadlocal, and Generic, in
346
  /// which the local variables are placed in global memory if they may escape
347
  /// their declaration context.
348
  enum DataSharingMode {
349
    /// CUDA data sharing mode.
350
    CUDA,
351
    /// Generic data-sharing mode.
352
    Generic,
353
  };
354
355
  /// Cleans up references to the objects in finished function.
356
  ///
357
  void functionFinished(CodeGenFunction &CGF) override;
358
359
  /// Choose a default value for the dist_schedule clause.
360
  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
361
      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
362
      llvm::Value *&Chunk) const override;
363
364
  /// Choose a default value for the schedule clause.
365
  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
366
      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
367
      const Expr *&ChunkExpr) const override;
368
369
  /// Adjust some parameters for the target-based directives, like addresses of
370
  /// the variables captured by reference in lambdas.
371
  void adjustTargetSpecificDataForLambdas(
372
      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
373
374
  /// Perform check on requires decl to ensure that target architecture
375
  /// supports unified addressing
376
  void processRequiresDirective(const OMPRequiresDecl *D) override;
377
378
  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
379
  /// the predefined allocator and translates it into the corresponding address
380
  /// space.
381
  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
382
383
private:
384
  /// Track the execution mode when codegening directives within a target
385
  /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
386
  /// target region and used by containing directives such as 'parallel'
387
  /// to emit optimized code.
388
  ExecutionMode CurrentExecutionMode = EM_Unknown;
389
390
  /// Check if the full runtime is required (default - yes).
391
  bool RequiresFullRuntime = true;
392
393
  /// true if we're emitting the code for the target region and next parallel
394
  /// region is L0 for sure.
395
  bool IsInTargetMasterThreadRegion = false;
396
  /// true if currently emitting code for target/teams/distribute region, false
397
  /// - otherwise.
398
  bool IsInTTDRegion = false;
399
  /// true if we're definitely in the parallel region.
400
  bool IsInParallelRegion = false;
401
402
  /// Map between an outlined function and its wrapper.
403
  llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
404
405
  /// Emit function which wraps the outline parallel region
406
  /// and controls the parameters which are passed to this function.
407
  /// The wrapper ensures that the outlined function is called
408
  /// with the correct arguments when data is shared.
409
  llvm::Function *createParallelDataSharingWrapper(
410
      llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
411
412
  /// The data for the single globalized variable.
413
  struct MappedVarData {
414
    /// Corresponding field in the global record.
415
    llvm::Value *GlobalizedVal = nullptr;
416
    /// Corresponding address.
417
    Address PrivateAddr = Address::invalid();
418
  };
419
  /// The map of local variables to their addresses in the global memory.
420
  using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
421
  /// Set of the parameters passed by value escaping OpenMP context.
422
  using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
423
  struct FunctionData {
424
    DeclToAddrMapTy LocalVarData;
425
    llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
426
    EscapedParamsTy EscapedParameters;
427
    llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
428
    llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
429
        EscapedVariableLengthDeclsAddrs;
430
    llvm::Value *IsInSPMDModeFlag = nullptr;
431
    std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
432
  };
433
  /// Maps the function to the list of the globalized variables with their
434
  /// addresses.
435
  llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
436
  llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
437
  /// List of the records with the list of fields for the reductions across the
438
  /// teams. Used to build the intermediate buffer for the fast teams
439
  /// reductions.
440
  /// All the records are gathered into a union `union.type` is created.
441
  llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
442
  /// Shared pointer for the global memory in the global memory buffer used for
443
  /// the given kernel.
444
  llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
445
  /// Pair of the Non-SPMD team and all reductions variables in this team
446
  /// region.
447
  std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
448
      TeamAndReductions;
449
};
450
451
} // CodeGen namespace.
452
} // clang namespace.
453
454
#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H