/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/include/polly/CodeGen/LoopGeneratorsKMP.h
Line | Count | Source |
1 | | //===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains functions to create scalar and OpenMP parallel loops |
10 | | // as LLVM-IR. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | #ifndef POLLY_LOOP_GENERATORS_KMP_H |
14 | | #define POLLY_LOOP_GENERATORS_KMP_H |
15 | | |
16 | | #include "polly/CodeGen/IRBuilder.h" |
17 | | #include "polly/CodeGen/LoopGenerators.h" |
18 | | #include "polly/Support/ScopHelper.h" |
19 | | #include "llvm/ADT/SetVector.h" |
20 | | |
21 | | namespace polly { |
22 | | using namespace llvm; |
23 | | |
24 | | /// This ParallelLoopGenerator subclass handles the generation of parallelized |
25 | | /// code, utilizing the LLVM OpenMP library. |
26 | | class ParallelLoopGeneratorKMP : public ParallelLoopGenerator { |
27 | | public: |
28 | | /// Create a parallel loop generator for the current function. |
29 | | ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI, |
30 | | DominatorTree &DT, const DataLayout &DL) |
31 | 10 | : ParallelLoopGenerator(Builder, LI, DT, DL) { |
32 | 10 | SourceLocationInfo = createSourceLocation(); |
33 | 10 | } |
34 | | |
35 | | protected: |
36 | | /// The source location struct of this loop. |
37 | | /// ident_t = type { i32, i32, i32, i32, i8* } |
38 | | GlobalValue *SourceLocationInfo; |
39 | | |
40 | | /// Convert the combination of given chunk size and scheduling type (which |
41 | | /// might have been set via the command line) into the corresponding |
42 | | /// scheduling type. This may result (e.g.) in a 'change' from |
43 | | /// "static chunked" scheduling to "static non-chunked" (regarding the |
44 | | /// provided and returned scheduling types). |
45 | | /// |
46 | | /// @param ChunkSize The chunk size, set via command line or its default. |
47 | | /// @param Scheduling The scheduling, set via command line or its default. |
48 | | /// |
49 | | /// @return The corresponding OMPGeneralSchedulingType. |
50 | | OMPGeneralSchedulingType |
51 | | getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const; |
52 | | |
53 | | /// Returns True if 'LongType' is 64bit wide, otherwise: False. |
54 | | bool is64BitArch(); |
55 | | |
56 | | public: |
57 | | // The functions below may be used if one does not want to generate a |
58 | | // specific OpenMP parallel loop, but generate individual parts of it |
59 | | // (e.g. the subfunction definition). |
60 | | |
61 | | /// Create a runtime library call to spawn the worker threads. |
62 | | /// |
63 | | /// @param SubFn The subfunction which holds the loop body. |
64 | | /// @param SubFnParam The parameter for the subfunction (basically the struct |
65 | | /// filled with the outside values). |
66 | | /// @param LB The lower bound for the loop we parallelize. |
67 | | /// @param UB The upper bound for the loop we parallelize. |
68 | | /// @param Stride The stride of the loop we parallelize. |
69 | | void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB, |
70 | | Value *UB, Value *Stride); |
71 | | |
72 | | void deployParallelExecution(Value *SubFn, Value *SubFnParam, Value *LB, |
73 | | Value *UB, Value *Stride) override; |
74 | | |
75 | | virtual Function *prepareSubFnDefinition(Function *F) const override; |
76 | | |
77 | | std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct, |
78 | | SetVector<Value *> UsedValues, |
79 | | ValueMapT &VMap) override; |
80 | | |
81 | | /// Create a runtime library call to get the current global thread number. |
82 | | /// |
83 | | /// @return A Value ref which holds the current global thread number. |
84 | | Value *createCallGlobalThreadNum(); |
85 | | |
86 | | /// Create a runtime library call to request a number of threads. |
87 | | /// Which will be used in the next OpenMP section (by the next fork). |
88 | | /// |
89 | | /// @param GlobalThreadID The global thread ID. |
90 | | /// @param NumThreads The number of threads to use. |
91 | | void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads); |
92 | | |
93 | | /// Create a runtime library call to prepare the OpenMP runtime. |
94 | | /// For dynamically scheduled loops, saving the loop arguments. |
95 | | /// |
96 | | /// @param GlobalThreadID The global thread ID. |
97 | | /// @param LB The loop's lower bound. |
98 | | /// @param UB The loop's upper bound. |
99 | | /// @param Inc The loop increment. |
100 | | /// @param ChunkSize The chunk size of the parallel loop. |
101 | | void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB, |
102 | | Value *Inc, Value *ChunkSize); |
103 | | |
104 | | /// Create a runtime library call to retrieve the next (dynamically) |
105 | | /// allocated chunk of work for this thread. |
106 | | /// |
107 | | /// @param GlobalThreadID The global thread ID. |
108 | | /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is |
109 | | /// the last chunk of work, or 0 otherwise. |
110 | | /// @param LBPtr Pointer to the lower bound for the next chunk. |
111 | | /// @param UBPtr Pointer to the upper bound for the next chunk. |
112 | | /// @param StridePtr Pointer to the stride for the next chunk. |
113 | | /// |
114 | | /// @return A Value which holds 1 if there is work to be done, 0 otherwise. |
115 | | Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr, |
116 | | Value *LBPtr, Value *UBPtr, Value *StridePtr); |
117 | | |
118 | | /// Create a runtime library call to prepare the OpenMP runtime. |
119 | | /// For statically scheduled loops, saving the loop arguments. |
120 | | /// |
121 | | /// @param GlobalThreadID The global thread ID. |
122 | | /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is |
123 | | /// the last chunk of work, or 0 otherwise. |
124 | | /// @param LBPtr Pointer to the lower bound for the next chunk. |
125 | | /// @param UBPtr Pointer to the upper bound for the next chunk. |
126 | | /// @param StridePtr Pointer to the stride for the next chunk. |
127 | | /// @param ChunkSize The chunk size of the parallel loop. |
128 | | void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr, |
129 | | Value *LBPtr, Value *UBPtr, Value *StridePtr, |
130 | | Value *ChunkSize); |
131 | | |
132 | | /// Create a runtime library call to mark the end of |
133 | | /// a statically scheduled loop. |
134 | | /// |
135 | | /// @param GlobalThreadID The global thread ID. |
136 | | void createCallStaticFini(Value *GlobalThreadID); |
137 | | |
138 | | /// Create the current source location. |
139 | | /// |
140 | | /// TODO: Generates only(!) dummy values. |
141 | | GlobalVariable *createSourceLocation(); |
142 | | }; |
143 | | } // end namespace polly |
144 | | #endif |