/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/CodeGen/LoopGenerators.cpp
Line | Count | Source |
1 | | //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains functions to create scalar loops and orchestrate the |
10 | | // creation of parallel loops as LLVM-IR. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "polly/CodeGen/LoopGenerators.h" |
15 | | #include "polly/Options.h" |
16 | | #include "polly/ScopDetection.h" |
17 | | #include "llvm/Analysis/LoopInfo.h" |
18 | | #include "llvm/IR/DataLayout.h" |
19 | | #include "llvm/IR/Dominators.h" |
20 | | #include "llvm/IR/Module.h" |
21 | | #include "llvm/Support/CommandLine.h" |
22 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
23 | | |
24 | | using namespace llvm; |
25 | | using namespace polly; |
26 | | |
27 | | int polly::PollyNumThreads; |
28 | | OMPGeneralSchedulingType polly::PollyScheduling; |
29 | | int polly::PollyChunkSize; |
30 | | |
31 | | static cl::opt<int, true> |
32 | | XPollyNumThreads("polly-num-threads", |
33 | | cl::desc("Number of threads to use (0 = auto)"), |
34 | | cl::Hidden, cl::location(polly::PollyNumThreads), |
35 | | cl::init(0), cl::cat(PollyCategory)); |
36 | | |
37 | | static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( |
38 | | "polly-scheduling", |
39 | | cl::desc("Scheduling type of parallel OpenMP for loops"), |
40 | | cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static", |
41 | | "Static scheduling"), |
42 | | clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic", |
43 | | "Dynamic scheduling"), |
44 | | clEnumValN(OMPGeneralSchedulingType::Guided, "guided", |
45 | | "Guided scheduling"), |
46 | | clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime", |
47 | | "Runtime determined (OMP_SCHEDULE)")), |
48 | | cl::Hidden, cl::location(polly::PollyScheduling), |
49 | | cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional, |
50 | | cl::cat(PollyCategory)); |
51 | | |
52 | | static cl::opt<int, true> |
53 | | XPollyChunkSize("polly-scheduling-chunksize", |
54 | | cl::desc("Chunksize to use by the OpenMP runtime calls"), |
55 | | cl::Hidden, cl::location(polly::PollyChunkSize), |
56 | | cl::init(0), cl::Optional, cl::cat(PollyCategory)); |
57 | | |
58 | | // We generate a loop of either of the following structures: |
59 | | // |
60 | | // BeforeBB BeforeBB |
61 | | // | | |
62 | | // v v |
63 | | // GuardBB PreHeaderBB |
64 | | // / | | _____ |
65 | | // __ PreHeaderBB | v \/ | |
66 | | // / \ / | HeaderBB latch |
67 | | // latch HeaderBB | |\ | |
68 | | // \ / \ / | \------/ |
69 | | // < \ / | |
70 | | // \ / v |
71 | | // ExitBB ExitBB |
72 | | // |
73 | | // depending on whether or not we know that it is executed at least once. If |
74 | | // not, GuardBB checks if the loop is executed at least once. If this is the |
75 | | // case we branch to PreHeaderBB and subsequently to the HeaderBB, which |
76 | | // contains the loop iv 'polly.indvar', the incremented loop iv |
77 | | // 'polly.indvar_next' as well as the condition to check if we execute another |
78 | | // iteration of the loop. After the loop has finished, we branch to ExitBB. |
79 | | // We expect the type of UB, LB, UB+Stride to be large enough for values that |
80 | | // UB may take throughout the execution of the loop, including the computation |
81 | | // of indvar + Stride before the final abort. |
82 | | Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, |
83 | | PollyIRBuilder &Builder, LoopInfo &LI, |
84 | | DominatorTree &DT, BasicBlock *&ExitBB, |
85 | | ICmpInst::Predicate Predicate, |
86 | | ScopAnnotator *Annotator, bool Parallel, bool UseGuard, |
87 | 324 | bool LoopVectDisabled) { |
88 | 324 | Function *F = Builder.GetInsertBlock()->getParent(); |
89 | 324 | LLVMContext &Context = F->getContext(); |
90 | 324 | |
91 | 324 | assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); |
92 | 324 | IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); |
93 | 324 | assert(LoopIVType && "UB is not integer?"); |
94 | 324 | |
95 | 324 | BasicBlock *BeforeBB = Builder.GetInsertBlock(); |
96 | 324 | BasicBlock *GuardBB = |
97 | 324 | UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F)133 : nullptr191 ; |
98 | 324 | BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); |
99 | 324 | BasicBlock *PreHeaderBB = |
100 | 324 | BasicBlock::Create(Context, "polly.loop_preheader", F); |
101 | 324 | |
102 | 324 | // Update LoopInfo |
103 | 324 | Loop *OuterLoop = LI.getLoopFor(BeforeBB); |
104 | 324 | Loop *NewLoop = LI.AllocateLoop(); |
105 | 324 | |
106 | 324 | if (OuterLoop) |
107 | 98 | OuterLoop->addChildLoop(NewLoop); |
108 | 226 | else |
109 | 226 | LI.addTopLevelLoop(NewLoop); |
110 | 324 | |
111 | 324 | if (OuterLoop) { |
112 | 98 | if (GuardBB) |
113 | 48 | OuterLoop->addBasicBlockToLoop(GuardBB, LI); |
114 | 98 | OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); |
115 | 98 | } |
116 | 324 | |
117 | 324 | NewLoop->addBasicBlockToLoop(HeaderBB, LI); |
118 | 324 | |
119 | 324 | // Notify the annotator (if present) that we have a new loop, but only |
120 | 324 | // after the header block is set. |
121 | 324 | if (Annotator) |
122 | 283 | Annotator->pushLoop(NewLoop, Parallel); |
123 | 324 | |
124 | 324 | // ExitBB |
125 | 324 | ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); |
126 | 324 | ExitBB->setName("polly.loop_exit"); |
127 | 324 | |
128 | 324 | // BeforeBB |
129 | 324 | if (GuardBB) { |
130 | 133 | BeforeBB->getTerminator()->setSuccessor(0, GuardBB); |
131 | 133 | DT.addNewBlock(GuardBB, BeforeBB); |
132 | 133 | |
133 | 133 | // GuardBB |
134 | 133 | Builder.SetInsertPoint(GuardBB); |
135 | 133 | Value *LoopGuard; |
136 | 133 | LoopGuard = Builder.CreateICmp(Predicate, LB, UB); |
137 | 133 | LoopGuard->setName("polly.loop_guard"); |
138 | 133 | Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); |
139 | 133 | DT.addNewBlock(PreHeaderBB, GuardBB); |
140 | 191 | } else { |
141 | 191 | BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); |
142 | 191 | DT.addNewBlock(PreHeaderBB, BeforeBB); |
143 | 191 | } |
144 | 324 | |
145 | 324 | // PreHeaderBB |
146 | 324 | Builder.SetInsertPoint(PreHeaderBB); |
147 | 324 | Builder.CreateBr(HeaderBB); |
148 | 324 | |
149 | 324 | // HeaderBB |
150 | 324 | DT.addNewBlock(HeaderBB, PreHeaderBB); |
151 | 324 | Builder.SetInsertPoint(HeaderBB); |
152 | 324 | PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); |
153 | 324 | IV->addIncoming(LB, PreHeaderBB); |
154 | 324 | Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); |
155 | 324 | Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); |
156 | 324 | Value *LoopCondition = |
157 | 324 | Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); |
158 | 324 | |
159 | 324 | // Create the loop latch and annotate it as such. |
160 | 324 | BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); |
161 | 324 | if (Annotator) |
162 | 283 | Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); |
163 | 324 | |
164 | 324 | IV->addIncoming(IncrementedIV, HeaderBB); |
165 | 324 | if (GuardBB) |
166 | 133 | DT.changeImmediateDominator(ExitBB, GuardBB); |
167 | 191 | else |
168 | 191 | DT.changeImmediateDominator(ExitBB, HeaderBB); |
169 | 324 | |
170 | 324 | // The loop body should be added here. |
171 | 324 | Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); |
172 | 324 | return IV; |
173 | 324 | } |
174 | | |
175 | | Value *ParallelLoopGenerator::createParallelLoop( |
176 | | Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, |
177 | 41 | ValueMapT &Map, BasicBlock::iterator *LoopBody) { |
178 | 41 | |
179 | 41 | AllocaInst *Struct = storeValuesIntoStruct(UsedValues); |
180 | 41 | BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); |
181 | 41 | |
182 | 41 | Value *IV; |
183 | 41 | Function *SubFn; |
184 | 41 | std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map); |
185 | 41 | *LoopBody = Builder.GetInsertPoint(); |
186 | 41 | Builder.SetInsertPoint(&*BeforeLoop); |
187 | 41 | |
188 | 41 | Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), |
189 | 41 | "polly.par.userContext"); |
190 | 41 | |
191 | 41 | // Add one as the upper bound provided by OpenMP is a < comparison |
192 | 41 | // whereas the codegenForSequential function creates a <= comparison. |
193 | 41 | UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); |
194 | 41 | |
195 | 41 | // Execute the prepared subfunction in parallel. |
196 | 41 | deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride); |
197 | 41 | |
198 | 41 | return IV; |
199 | 41 | } |
200 | | |
201 | 41 | Function *ParallelLoopGenerator::createSubFnDefinition() { |
202 | 41 | Function *F = Builder.GetInsertBlock()->getParent(); |
203 | 41 | Function *SubFn = prepareSubFnDefinition(F); |
204 | 41 | |
205 | 41 | // Certain backends (e.g., NVPTX) do not support '.'s in function names. |
206 | 41 | // Hence, we ensure that all '.'s are replaced by '_'s. |
207 | 41 | std::string FunctionName = SubFn->getName(); |
208 | 41 | std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); |
209 | 41 | SubFn->setName(FunctionName); |
210 | 41 | |
211 | 41 | // Do not run any polly pass on the new function. |
212 | 41 | SubFn->addFnAttr(PollySkipFnAttr); |
213 | 41 | |
214 | 41 | return SubFn; |
215 | 41 | } |
216 | | |
217 | | AllocaInst * |
218 | 41 | ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { |
219 | 41 | SmallVector<Type *, 8> Members; |
220 | 41 | |
221 | 41 | for (Value *V : Values) |
222 | 81 | Members.push_back(V->getType()); |
223 | 41 | |
224 | 41 | const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); |
225 | 41 | |
226 | 41 | // We do not want to allocate the alloca inside any loop, thus we allocate it |
227 | 41 | // in the entry block of the function and use annotations to denote the actual |
228 | 41 | // live span (similar to clang). |
229 | 41 | BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); |
230 | 41 | Instruction *IP = &*EntryBB.getFirstInsertionPt(); |
231 | 41 | StructType *Ty = StructType::get(Builder.getContext(), Members); |
232 | 41 | AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, |
233 | 41 | "polly.par.userContext", IP); |
234 | 41 | |
235 | 122 | for (unsigned i = 0; i < Values.size(); i++81 ) { |
236 | 81 | Value *Address = Builder.CreateStructGEP(Ty, Struct, i); |
237 | 81 | Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); |
238 | 81 | Builder.CreateStore(Values[i], Address); |
239 | 81 | } |
240 | 41 | |
241 | 41 | return Struct; |
242 | 41 | } |
243 | | |
244 | | void ParallelLoopGenerator::extractValuesFromStruct( |
245 | 41 | SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { |
246 | 122 | for (unsigned i = 0; i < OldValues.size(); i++81 ) { |
247 | 81 | Value *Address = Builder.CreateStructGEP(Ty, Struct, i); |
248 | 81 | Value *NewValue = Builder.CreateLoad(Address); |
249 | 81 | NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); |
250 | 81 | Map[OldValues[i]] = NewValue; |
251 | 81 | } |
252 | 41 | } |