/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/CodeGen/LoopGenerators.cpp
Line | Count | Source |
1 | | //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains functions to create scalar and parallel loops as LLVM-IR. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "polly/CodeGen/LoopGenerators.h" |
14 | | #include "polly/ScopDetection.h" |
15 | | #include "llvm/Analysis/LoopInfo.h" |
16 | | #include "llvm/IR/DataLayout.h" |
17 | | #include "llvm/IR/Dominators.h" |
18 | | #include "llvm/IR/Module.h" |
19 | | #include "llvm/Support/CommandLine.h" |
20 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
21 | | |
22 | | using namespace llvm; |
23 | | using namespace polly; |
24 | | |
25 | | static cl::opt<int> |
26 | | PollyNumThreads("polly-num-threads", |
27 | | cl::desc("Number of threads to use (0 = auto)"), cl::Hidden, |
28 | | cl::init(0)); |
29 | | |
30 | | // We generate a loop of either of the following structures: |
31 | | // |
32 | | // BeforeBB BeforeBB |
33 | | // | | |
34 | | // v v |
35 | | // GuardBB PreHeaderBB |
36 | | // / | | _____ |
37 | | // __ PreHeaderBB | v \/ | |
38 | | // / \ / | HeaderBB latch |
39 | | // latch HeaderBB | |\ | |
40 | | // \ / \ / | \------/ |
41 | | // < \ / | |
42 | | // \ / v |
43 | | // ExitBB ExitBB |
44 | | // |
45 | | // depending on whether or not we know that it is executed at least once. If |
46 | | // not, GuardBB checks if the loop is executed at least once. If this is the |
47 | | // case we branch to PreHeaderBB and subsequently to the HeaderBB, which |
48 | | // contains the loop iv 'polly.indvar', the incremented loop iv |
49 | | // 'polly.indvar_next' as well as the condition to check if we execute another |
50 | | // iteration of the loop. After the loop has finished, we branch to ExitBB. |
51 | | // We expect the type of UB, LB, UB+Stride to be large enough for values that |
52 | | // UB may take throughout the execution of the loop, including the computation |
53 | | // of indvar + Stride before the final abort. |
54 | | Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, |
55 | | PollyIRBuilder &Builder, LoopInfo &LI, |
56 | | DominatorTree &DT, BasicBlock *&ExitBB, |
57 | | ICmpInst::Predicate Predicate, |
58 | | ScopAnnotator *Annotator, bool Parallel, bool UseGuard, |
59 | 308 | bool LoopVectDisabled) { |
60 | 308 | Function *F = Builder.GetInsertBlock()->getParent(); |
61 | 308 | LLVMContext &Context = F->getContext(); |
62 | 308 | |
63 | 308 | assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); |
64 | 308 | IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); |
65 | 308 | assert(LoopIVType && "UB is not integer?"); |
66 | 308 | |
67 | 308 | BasicBlock *BeforeBB = Builder.GetInsertBlock(); |
68 | 308 | BasicBlock *GuardBB = |
69 | 308 | UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F)129 : nullptr179 ; |
70 | 308 | BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); |
71 | 308 | BasicBlock *PreHeaderBB = |
72 | 308 | BasicBlock::Create(Context, "polly.loop_preheader", F); |
73 | 308 | |
74 | 308 | // Update LoopInfo |
75 | 308 | Loop *OuterLoop = LI.getLoopFor(BeforeBB); |
76 | 308 | Loop *NewLoop = LI.AllocateLoop(); |
77 | 308 | |
78 | 308 | if (OuterLoop) |
79 | 94 | OuterLoop->addChildLoop(NewLoop); |
80 | 214 | else |
81 | 214 | LI.addTopLevelLoop(NewLoop); |
82 | 308 | |
83 | 308 | if (OuterLoop) { |
84 | 94 | if (GuardBB) |
85 | 45 | OuterLoop->addBasicBlockToLoop(GuardBB, LI); |
86 | 94 | OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); |
87 | 94 | } |
88 | 308 | |
89 | 308 | NewLoop->addBasicBlockToLoop(HeaderBB, LI); |
90 | 308 | |
91 | 308 | // Notify the annotator (if present) that we have a new loop, but only |
92 | 308 | // after the header block is set. |
93 | 308 | if (Annotator) |
94 | 278 | Annotator->pushLoop(NewLoop, Parallel); |
95 | 308 | |
96 | 308 | // ExitBB |
97 | 308 | ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); |
98 | 308 | ExitBB->setName("polly.loop_exit"); |
99 | 308 | |
100 | 308 | // BeforeBB |
101 | 308 | if (GuardBB) { |
102 | 129 | BeforeBB->getTerminator()->setSuccessor(0, GuardBB); |
103 | 129 | DT.addNewBlock(GuardBB, BeforeBB); |
104 | 129 | |
105 | 129 | // GuardBB |
106 | 129 | Builder.SetInsertPoint(GuardBB); |
107 | 129 | Value *LoopGuard; |
108 | 129 | LoopGuard = Builder.CreateICmp(Predicate, LB, UB); |
109 | 129 | LoopGuard->setName("polly.loop_guard"); |
110 | 129 | Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); |
111 | 129 | DT.addNewBlock(PreHeaderBB, GuardBB); |
112 | 179 | } else { |
113 | 179 | BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); |
114 | 179 | DT.addNewBlock(PreHeaderBB, BeforeBB); |
115 | 179 | } |
116 | 308 | |
117 | 308 | // PreHeaderBB |
118 | 308 | Builder.SetInsertPoint(PreHeaderBB); |
119 | 308 | Builder.CreateBr(HeaderBB); |
120 | 308 | |
121 | 308 | // HeaderBB |
122 | 308 | DT.addNewBlock(HeaderBB, PreHeaderBB); |
123 | 308 | Builder.SetInsertPoint(HeaderBB); |
124 | 308 | PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); |
125 | 308 | IV->addIncoming(LB, PreHeaderBB); |
126 | 308 | Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); |
127 | 308 | Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); |
128 | 308 | Value *LoopCondition = |
129 | 308 | Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); |
130 | 308 | |
131 | 308 | // Create the loop latch and annotate it as such. |
132 | 308 | BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); |
133 | 308 | if (Annotator) |
134 | 278 | Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); |
135 | 308 | |
136 | 308 | IV->addIncoming(IncrementedIV, HeaderBB); |
137 | 308 | if (GuardBB) |
138 | 129 | DT.changeImmediateDominator(ExitBB, GuardBB); |
139 | 179 | else |
140 | 179 | DT.changeImmediateDominator(ExitBB, HeaderBB); |
141 | 308 | |
142 | 308 | // The loop body should be added here. |
143 | 308 | Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); |
144 | 308 | return IV; |
145 | 308 | } |
146 | | |
147 | | Value *ParallelLoopGenerator::createParallelLoop( |
148 | | Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, |
149 | 30 | ValueMapT &Map, BasicBlock::iterator *LoopBody) { |
150 | 30 | Function *SubFn; |
151 | 30 | |
152 | 30 | AllocaInst *Struct = storeValuesIntoStruct(UsedValues); |
153 | 30 | BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); |
154 | 30 | Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn); |
155 | 30 | *LoopBody = Builder.GetInsertPoint(); |
156 | 30 | Builder.SetInsertPoint(&*BeforeLoop); |
157 | 30 | |
158 | 30 | Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), |
159 | 30 | "polly.par.userContext"); |
160 | 30 | |
161 | 30 | // Add one as the upper bound provided by OpenMP is a < comparison |
162 | 30 | // whereas the codegenForSequential function creates a <= comparison. |
163 | 30 | UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); |
164 | 30 | |
165 | 30 | // Tell the runtime we start a parallel loop |
166 | 30 | createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); |
167 | 30 | Builder.CreateCall(SubFn, SubFnParam); |
168 | 30 | createCallJoinThreads(); |
169 | 30 | |
170 | 30 | return IV; |
171 | 30 | } |
172 | | |
173 | | void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn, |
174 | | Value *SubFnParam, Value *LB, |
175 | 30 | Value *UB, Value *Stride) { |
176 | 30 | const std::string Name = "GOMP_parallel_loop_runtime_start"; |
177 | 30 | |
178 | 30 | Function *F = M->getFunction(Name); |
179 | 30 | |
180 | 30 | // If F is not available, declare it. |
181 | 30 | if (!F) { |
182 | 27 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
183 | 27 | |
184 | 27 | Type *Params[] = {PointerType::getUnqual(FunctionType::get( |
185 | 27 | Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), |
186 | 27 | Builder.getInt8PtrTy(), |
187 | 27 | Builder.getInt32Ty(), |
188 | 27 | LongType, |
189 | 27 | LongType, |
190 | 27 | LongType}; |
191 | 27 | |
192 | 27 | FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); |
193 | 27 | F = Function::Create(Ty, Linkage, Name, M); |
194 | 27 | } |
195 | 30 | |
196 | 30 | Value *NumberOfThreads = Builder.getInt32(PollyNumThreads); |
197 | 30 | Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride}; |
198 | 30 | |
199 | 30 | Builder.CreateCall(F, Args); |
200 | 30 | } |
201 | | |
202 | | Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr, |
203 | 30 | Value *UBPtr) { |
204 | 30 | const std::string Name = "GOMP_loop_runtime_next"; |
205 | 30 | |
206 | 30 | Function *F = M->getFunction(Name); |
207 | 30 | |
208 | 30 | // If F is not available, declare it. |
209 | 30 | if (!F) { |
210 | 27 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
211 | 27 | Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; |
212 | 27 | FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); |
213 | 27 | F = Function::Create(Ty, Linkage, Name, M); |
214 | 27 | } |
215 | 30 | |
216 | 30 | Value *Args[] = {LBPtr, UBPtr}; |
217 | 30 | Value *Return = Builder.CreateCall(F, Args); |
218 | 30 | Return = Builder.CreateICmpNE( |
219 | 30 | Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); |
220 | 30 | return Return; |
221 | 30 | } |
222 | | |
223 | 30 | void ParallelLoopGenerator::createCallJoinThreads() { |
224 | 30 | const std::string Name = "GOMP_parallel_end"; |
225 | 30 | |
226 | 30 | Function *F = M->getFunction(Name); |
227 | 30 | |
228 | 30 | // If F is not available, declare it. |
229 | 30 | if (!F) { |
230 | 27 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
231 | 27 | |
232 | 27 | FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); |
233 | 27 | F = Function::Create(Ty, Linkage, Name, M); |
234 | 27 | } |
235 | 30 | |
236 | 30 | Builder.CreateCall(F, {}); |
237 | 30 | } |
238 | | |
239 | 30 | void ParallelLoopGenerator::createCallCleanupThread() { |
240 | 30 | const std::string Name = "GOMP_loop_end_nowait"; |
241 | 30 | |
242 | 30 | Function *F = M->getFunction(Name); |
243 | 30 | |
244 | 30 | // If F is not available, declare it. |
245 | 30 | if (!F) { |
246 | 27 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
247 | 27 | |
248 | 27 | FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); |
249 | 27 | F = Function::Create(Ty, Linkage, Name, M); |
250 | 27 | } |
251 | 30 | |
252 | 30 | Builder.CreateCall(F, {}); |
253 | 30 | } |
254 | | |
255 | 30 | Function *ParallelLoopGenerator::createSubFnDefinition() { |
256 | 30 | Function *F = Builder.GetInsertBlock()->getParent(); |
257 | 30 | std::vector<Type *> Arguments(1, Builder.getInt8PtrTy()); |
258 | 30 | FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); |
259 | 30 | Function *SubFn = Function::Create(FT, Function::InternalLinkage, |
260 | 30 | F->getName() + "_polly_subfn", M); |
261 | 30 | |
262 | 30 | // Certain backends (e.g., NVPTX) do not support '.'s in function names. |
263 | 30 | // Hence, we ensure that all '.'s are replaced by '_'s. |
264 | 30 | std::string FunctionName = SubFn->getName(); |
265 | 30 | std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); |
266 | 30 | SubFn->setName(FunctionName); |
267 | 30 | |
268 | 30 | // Do not run any polly pass on the new function. |
269 | 30 | SubFn->addFnAttr(PollySkipFnAttr); |
270 | 30 | |
271 | 30 | Function::arg_iterator AI = SubFn->arg_begin(); |
272 | 30 | AI->setName("polly.par.userContext"); |
273 | 30 | |
274 | 30 | return SubFn; |
275 | 30 | } |
276 | | |
277 | | AllocaInst * |
278 | 30 | ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { |
279 | 30 | SmallVector<Type *, 8> Members; |
280 | 30 | |
281 | 30 | for (Value *V : Values) |
282 | 74 | Members.push_back(V->getType()); |
283 | 30 | |
284 | 30 | const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); |
285 | 30 | |
286 | 30 | // We do not want to allocate the alloca inside any loop, thus we allocate it |
287 | 30 | // in the entry block of the function and use annotations to denote the actual |
288 | 30 | // live span (similar to clang). |
289 | 30 | BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); |
290 | 30 | Instruction *IP = &*EntryBB.getFirstInsertionPt(); |
291 | 30 | StructType *Ty = StructType::get(Builder.getContext(), Members); |
292 | 30 | AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, |
293 | 30 | "polly.par.userContext", IP); |
294 | 30 | |
295 | 104 | for (unsigned i = 0; i < Values.size(); i++74 ) { |
296 | 74 | Value *Address = Builder.CreateStructGEP(Ty, Struct, i); |
297 | 74 | Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); |
298 | 74 | Builder.CreateStore(Values[i], Address); |
299 | 74 | } |
300 | 30 | |
301 | 30 | return Struct; |
302 | 30 | } |
303 | | |
304 | | void ParallelLoopGenerator::extractValuesFromStruct( |
305 | 30 | SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { |
306 | 104 | for (unsigned i = 0; i < OldValues.size(); i++74 ) { |
307 | 74 | Value *Address = Builder.CreateStructGEP(Ty, Struct, i); |
308 | 74 | Value *NewValue = Builder.CreateLoad(Address); |
309 | 74 | NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); |
310 | 74 | Map[OldValues[i]] = NewValue; |
311 | 74 | } |
312 | 30 | } |
313 | | |
314 | | Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData, |
315 | | SetVector<Value *> Data, |
316 | 30 | ValueMapT &Map, Function **SubFnPtr) { |
317 | 30 | BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB; |
318 | 30 | Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV; |
319 | 30 | Function *SubFn = createSubFnDefinition(); |
320 | 30 | LLVMContext &Context = SubFn->getContext(); |
321 | 30 | |
322 | 30 | // Store the previous basic block. |
323 | 30 | PrevBB = Builder.GetInsertBlock(); |
324 | 30 | |
325 | 30 | // Create basic blocks. |
326 | 30 | HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); |
327 | 30 | ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); |
328 | 30 | CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn); |
329 | 30 | PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); |
330 | 30 | |
331 | 30 | DT.addNewBlock(HeaderBB, PrevBB); |
332 | 30 | DT.addNewBlock(ExitBB, HeaderBB); |
333 | 30 | DT.addNewBlock(CheckNextBB, HeaderBB); |
334 | 30 | DT.addNewBlock(PreHeaderBB, HeaderBB); |
335 | 30 | |
336 | 30 | // Fill up basic block HeaderBB. |
337 | 30 | Builder.SetInsertPoint(HeaderBB); |
338 | 30 | LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); |
339 | 30 | UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); |
340 | 30 | UserContext = Builder.CreateBitCast( |
341 | 30 | &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext"); |
342 | 30 | |
343 | 30 | extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, |
344 | 30 | Map); |
345 | 30 | Builder.CreateBr(CheckNextBB); |
346 | 30 | |
347 | 30 | // Add code to check if another set of iterations will be executed. |
348 | 30 | Builder.SetInsertPoint(CheckNextBB); |
349 | 30 | Ret1 = createCallGetWorkItem(LBPtr, UBPtr); |
350 | 30 | HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), |
351 | 30 | "polly.par.hasNextScheduleBlock"); |
352 | 30 | Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); |
353 | 30 | |
354 | 30 | // Add code to load the iv bounds for this set of iterations. |
355 | 30 | Builder.SetInsertPoint(PreHeaderBB); |
356 | 30 | LB = Builder.CreateLoad(LBPtr, "polly.par.LB"); |
357 | 30 | UB = Builder.CreateLoad(UBPtr, "polly.par.UB"); |
358 | 30 | |
359 | 30 | // Subtract one as the upper bound provided by OpenMP is a < comparison |
360 | 30 | // whereas the codegenForSequential function creates a <= comparison. |
361 | 30 | UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), |
362 | 30 | "polly.par.UBAdjusted"); |
363 | 30 | |
364 | 30 | Builder.CreateBr(CheckNextBB); |
365 | 30 | Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); |
366 | 30 | IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE, |
367 | 30 | nullptr, true, /* UseGuard */ false); |
368 | 30 | |
369 | 30 | BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); |
370 | 30 | |
371 | 30 | // Add code to terminate this subfunction. |
372 | 30 | Builder.SetInsertPoint(ExitBB); |
373 | 30 | createCallCleanupThread(); |
374 | 30 | Builder.CreateRetVoid(); |
375 | 30 | |
376 | 30 | Builder.SetInsertPoint(&*LoopBody); |
377 | 30 | *SubFnPtr = SubFn; |
378 | 30 | |
379 | 30 | return IV; |
380 | 30 | } |