Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/CodeGen/LoopGenerators.cpp
Line
Count
Source
1
//===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains functions to create scalar loops and orchestrate the
10
// creation of parallel loops as LLVM-IR.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "polly/CodeGen/LoopGenerators.h"
15
#include "polly/Options.h"
16
#include "polly/ScopDetection.h"
17
#include "llvm/Analysis/LoopInfo.h"
18
#include "llvm/IR/DataLayout.h"
19
#include "llvm/IR/Dominators.h"
20
#include "llvm/IR/Module.h"
21
#include "llvm/Support/CommandLine.h"
22
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
23
24
using namespace llvm;
25
using namespace polly;
26
27
int polly::PollyNumThreads;
28
OMPGeneralSchedulingType polly::PollyScheduling;
29
int polly::PollyChunkSize;
30
31
static cl::opt<int, true>
32
    XPollyNumThreads("polly-num-threads",
33
                     cl::desc("Number of threads to use (0 = auto)"),
34
                     cl::Hidden, cl::location(polly::PollyNumThreads),
35
                     cl::init(0), cl::cat(PollyCategory));
36
37
static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
38
    "polly-scheduling",
39
    cl::desc("Scheduling type of parallel OpenMP for loops"),
40
    cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static",
41
                          "Static scheduling"),
42
               clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic",
43
                          "Dynamic scheduling"),
44
               clEnumValN(OMPGeneralSchedulingType::Guided, "guided",
45
                          "Guided scheduling"),
46
               clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime",
47
                          "Runtime determined (OMP_SCHEDULE)")),
48
    cl::Hidden, cl::location(polly::PollyScheduling),
49
    cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
50
    cl::cat(PollyCategory));
51
52
static cl::opt<int, true>
53
    XPollyChunkSize("polly-scheduling-chunksize",
54
                    cl::desc("Chunksize to use by the OpenMP runtime calls"),
55
                    cl::Hidden, cl::location(polly::PollyChunkSize),
56
                    cl::init(0), cl::Optional, cl::cat(PollyCategory));
57
58
// We generate a loop of either of the following structures:
59
//
60
//              BeforeBB                      BeforeBB
61
//                 |                             |
62
//                 v                             v
63
//              GuardBB                      PreHeaderBB
64
//              /      |                         |   _____
65
//     __  PreHeaderBB  |                        v  \/    |
66
//    /  \    /         |                     HeaderBB  latch
67
// latch  HeaderBB      |                        |\       |
68
//    \  /    \         /                        | \------/
69
//     <       \       /                         |
70
//              \     /                          v
71
//              ExitBB                         ExitBB
72
//
73
// depending on whether or not we know that it is executed at least once. If
74
// not, GuardBB checks if the loop is executed at least once. If this is the
75
// case we branch to PreHeaderBB and subsequently to the HeaderBB, which
76
// contains the loop iv 'polly.indvar', the incremented loop iv
77
// 'polly.indvar_next' as well as the condition to check if we execute another
78
// iteration of the loop. After the loop has finished, we branch to ExitBB.
79
// We expect the type of UB, LB, UB+Stride to be large enough for values that
80
// UB may take throughout the execution of the loop, including the computation
81
// of indvar + Stride before the final abort.
82
Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
83
                         PollyIRBuilder &Builder, LoopInfo &LI,
84
                         DominatorTree &DT, BasicBlock *&ExitBB,
85
                         ICmpInst::Predicate Predicate,
86
                         ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
87
324
                         bool LoopVectDisabled) {
88
324
  Function *F = Builder.GetInsertBlock()->getParent();
89
324
  LLVMContext &Context = F->getContext();
90
324
91
324
  assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
92
324
  IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
93
324
  assert(LoopIVType && "UB is not integer?");
94
324
95
324
  BasicBlock *BeforeBB = Builder.GetInsertBlock();
96
324
  BasicBlock *GuardBB =
97
324
      UseGuard ? 
BasicBlock::Create(Context, "polly.loop_if", F)133
:
nullptr191
;
98
324
  BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
99
324
  BasicBlock *PreHeaderBB =
100
324
      BasicBlock::Create(Context, "polly.loop_preheader", F);
101
324
102
324
  // Update LoopInfo
103
324
  Loop *OuterLoop = LI.getLoopFor(BeforeBB);
104
324
  Loop *NewLoop = LI.AllocateLoop();
105
324
106
324
  if (OuterLoop)
107
98
    OuterLoop->addChildLoop(NewLoop);
108
226
  else
109
226
    LI.addTopLevelLoop(NewLoop);
110
324
111
324
  if (OuterLoop) {
112
98
    if (GuardBB)
113
48
      OuterLoop->addBasicBlockToLoop(GuardBB, LI);
114
98
    OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
115
98
  }
116
324
117
324
  NewLoop->addBasicBlockToLoop(HeaderBB, LI);
118
324
119
324
  // Notify the annotator (if present) that we have a new loop, but only
120
324
  // after the header block is set.
121
324
  if (Annotator)
122
283
    Annotator->pushLoop(NewLoop, Parallel);
123
324
124
324
  // ExitBB
125
324
  ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
126
324
  ExitBB->setName("polly.loop_exit");
127
324
128
324
  // BeforeBB
129
324
  if (GuardBB) {
130
133
    BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
131
133
    DT.addNewBlock(GuardBB, BeforeBB);
132
133
133
133
    // GuardBB
134
133
    Builder.SetInsertPoint(GuardBB);
135
133
    Value *LoopGuard;
136
133
    LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
137
133
    LoopGuard->setName("polly.loop_guard");
138
133
    Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
139
133
    DT.addNewBlock(PreHeaderBB, GuardBB);
140
191
  } else {
141
191
    BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
142
191
    DT.addNewBlock(PreHeaderBB, BeforeBB);
143
191
  }
144
324
145
324
  // PreHeaderBB
146
324
  Builder.SetInsertPoint(PreHeaderBB);
147
324
  Builder.CreateBr(HeaderBB);
148
324
149
324
  // HeaderBB
150
324
  DT.addNewBlock(HeaderBB, PreHeaderBB);
151
324
  Builder.SetInsertPoint(HeaderBB);
152
324
  PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
153
324
  IV->addIncoming(LB, PreHeaderBB);
154
324
  Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
155
324
  Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
156
324
  Value *LoopCondition =
157
324
      Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
158
324
159
324
  // Create the loop latch and annotate it as such.
160
324
  BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
161
324
  if (Annotator)
162
283
    Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
163
324
164
324
  IV->addIncoming(IncrementedIV, HeaderBB);
165
324
  if (GuardBB)
166
133
    DT.changeImmediateDominator(ExitBB, GuardBB);
167
191
  else
168
191
    DT.changeImmediateDominator(ExitBB, HeaderBB);
169
324
170
324
  // The loop body should be added here.
171
324
  Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
172
324
  return IV;
173
324
}
174
175
Value *ParallelLoopGenerator::createParallelLoop(
176
    Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
177
41
    ValueMapT &Map, BasicBlock::iterator *LoopBody) {
178
41
179
41
  AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
180
41
  BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
181
41
182
41
  Value *IV;
183
41
  Function *SubFn;
184
41
  std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map);
185
41
  *LoopBody = Builder.GetInsertPoint();
186
41
  Builder.SetInsertPoint(&*BeforeLoop);
187
41
188
41
  Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
189
41
                                            "polly.par.userContext");
190
41
191
41
  // Add one as the upper bound provided by OpenMP is a < comparison
192
41
  // whereas the codegenForSequential function creates a <= comparison.
193
41
  UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
194
41
195
41
  // Execute the prepared subfunction in parallel.
196
41
  deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride);
197
41
198
41
  return IV;
199
41
}
200
201
41
Function *ParallelLoopGenerator::createSubFnDefinition() {
202
41
  Function *F = Builder.GetInsertBlock()->getParent();
203
41
  Function *SubFn = prepareSubFnDefinition(F);
204
41
205
41
  // Certain backends (e.g., NVPTX) do not support '.'s in function names.
206
41
  // Hence, we ensure that all '.'s are replaced by '_'s.
207
41
  std::string FunctionName = SubFn->getName();
208
41
  std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
209
41
  SubFn->setName(FunctionName);
210
41
211
41
  // Do not run any polly pass on the new function.
212
41
  SubFn->addFnAttr(PollySkipFnAttr);
213
41
214
41
  return SubFn;
215
41
}
216
217
AllocaInst *
218
41
ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
219
41
  SmallVector<Type *, 8> Members;
220
41
221
41
  for (Value *V : Values)
222
81
    Members.push_back(V->getType());
223
41
224
41
  const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
225
41
226
41
  // We do not want to allocate the alloca inside any loop, thus we allocate it
227
41
  // in the entry block of the function and use annotations to denote the actual
228
41
  // live span (similar to clang).
229
41
  BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
230
41
  Instruction *IP = &*EntryBB.getFirstInsertionPt();
231
41
  StructType *Ty = StructType::get(Builder.getContext(), Members);
232
41
  AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
233
41
                                      "polly.par.userContext", IP);
234
41
235
122
  for (unsigned i = 0; i < Values.size(); 
i++81
) {
236
81
    Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
237
81
    Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
238
81
    Builder.CreateStore(Values[i], Address);
239
81
  }
240
41
241
41
  return Struct;
242
41
}
243
244
void ParallelLoopGenerator::extractValuesFromStruct(
245
41
    SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
246
122
  for (unsigned i = 0; i < OldValues.size(); 
i++81
) {
247
81
    Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
248
81
    Value *NewValue = Builder.CreateLoad(Address);
249
81
    NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
250
81
    Map[OldValues[i]] = NewValue;
251
81
  }
252
41
}