Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/CodeGen/CodeGeneration.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- CodeGeneration.cpp - Code generate the Scops using ISL. ---------======//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// The CodeGeneration pass takes a Scop created by ScopInfo and translates it
10
// back to LLVM-IR using the ISL code generator.
11
//
12
// The Scop describes the high level memory behavior of a control flow region.
13
// Transformation passes can update the schedule (execution order) of statements
14
// in the Scop. ISL is used to generate an abstract syntax tree that reflects
15
// the updated execution order. This clast is used to create new LLVM-IR that is
16
// computationally equivalent to the original control flow region, but executes
17
// its code in the new execution order defined by the changed schedule.
18
//
19
//===----------------------------------------------------------------------===//
20
21
#include "polly/CodeGen/CodeGeneration.h"
22
#include "polly/CodeGen/IRBuilder.h"
23
#include "polly/CodeGen/IslAst.h"
24
#include "polly/CodeGen/IslNodeBuilder.h"
25
#include "polly/CodeGen/PerfMonitor.h"
26
#include "polly/CodeGen/Utils.h"
27
#include "polly/DependenceInfo.h"
28
#include "polly/LinkAllPasses.h"
29
#include "polly/Options.h"
30
#include "polly/ScopInfo.h"
31
#include "polly/Support/ScopHelper.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/Analysis/LoopInfo.h"
34
#include "llvm/Analysis/RegionInfo.h"
35
#include "llvm/IR/BasicBlock.h"
36
#include "llvm/IR/Dominators.h"
37
#include "llvm/IR/Function.h"
38
#include "llvm/IR/PassManager.h"
39
#include "llvm/IR/Verifier.h"
40
#include "llvm/Support/Debug.h"
41
#include "llvm/Support/ErrorHandling.h"
42
#include "llvm/Support/raw_ostream.h"
43
#include "isl/ast.h"
44
#include <cassert>
45
46
using namespace llvm;
47
using namespace polly;
48
49
#define DEBUG_TYPE "polly-codegen"
50
51
static cl::opt<bool> Verify("polly-codegen-verify",
52
                            cl::desc("Verify the function generated by Polly"),
53
                            cl::Hidden, cl::init(false), cl::ZeroOrMore,
54
                            cl::cat(PollyCategory));
55
56
bool polly::PerfMonitoring;
57
58
static cl::opt<bool, true>
59
    XPerfMonitoring("polly-codegen-perf-monitoring",
60
                    cl::desc("Add run-time performance monitoring"), cl::Hidden,
61
                    cl::location(polly::PerfMonitoring), cl::init(false),
62
                    cl::ZeroOrMore, cl::cat(PollyCategory));
63
64
STATISTIC(ScopsProcessed, "Number of SCoP processed");
65
STATISTIC(CodegenedScops, "Number of successfully generated SCoPs");
66
STATISTIC(CodegenedAffineLoops,
67
          "Number of original affine loops in SCoPs that have been generated");
68
STATISTIC(CodegenedBoxedLoops,
69
          "Number of original boxed loops in SCoPs that have been generated");
70
71
namespace polly {
72
73
/// Mark a basic block unreachable.
74
///
75
/// Marks the basic block @p Block unreachable by equipping it with an
76
/// UnreachableInst.
77
8
void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) {
78
8
  auto *OrigTerminator = Block.getTerminator();
79
8
  Builder.SetInsertPoint(OrigTerminator);
80
8
  Builder.CreateUnreachable();
81
8
  OrigTerminator->eraseFromParent();
82
8
}
83
} // namespace polly
84
85
346
static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) {
86
346
  if (!Verify || !verifyFunction(F, &errs()))
87
346
    return;
88
0
89
0
  LLVM_DEBUG({
90
0
    errs() << "== ISL Codegen created an invalid function ==\n\n== The "
91
0
              "SCoP ==\n";
92
0
    errs() << S;
93
0
    errs() << "\n== The isl AST ==\n";
94
0
    AI.print(errs());
95
0
    errs() << "\n== The invalid function ==\n";
96
0
    F.print(errs());
97
0
  });
98
0
99
0
  llvm_unreachable("Polly generated function could not be verified. Add "
100
0
                   "-polly-codegen-verify=false to disable this assertion.");
101
0
}
102
103
// CodeGeneration adds a lot of BBs without updating the RegionInfo
104
// We make all created BBs belong to the scop's parent region without any
105
// nested structure to keep the RegionInfo verifier happy.
106
301
static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) {
107
5.50k
  for (BasicBlock &BB : F) {
108
5.50k
    if (RI.getRegionFor(&BB))
109
3.60k
      continue;
110
1.89k
111
1.89k
    RI.setRegionFor(&BB, &ParentRegion);
112
1.89k
  }
113
301
}
114
115
/// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from
116
/// @R.
117
///
118
/// CodeGeneration does not copy lifetime markers into the optimized SCoP,
119
/// which would leave the them only in the original path. This can transform
120
/// code such as
121
///
122
///     llvm.lifetime.start(%p)
123
///     llvm.lifetime.end(%p)
124
///
125
/// into
126
///
127
///     if (RTC) {
128
///       // generated code
129
///     } else {
130
///       // original code
131
///       llvm.lifetime.start(%p)
132
///     }
133
///     llvm.lifetime.end(%p)
134
///
135
/// The current StackColoring algorithm cannot handle if some, but not all,
136
/// paths from the end marker to the entry block cross the start marker. Same
137
/// for start markers that do not always cross the end markers. We avoid any
138
/// issues by removing all lifetime markers, even from the original code.
139
///
140
/// A better solution could be to hoist all llvm.lifetime.start to the split
141
/// node and all llvm.lifetime.end to the merge node, which should be
142
/// conservatively correct.
143
305
static void removeLifetimeMarkers(Region *R) {
144
1.21k
  for (auto *BB : R->blocks()) {
145
1.21k
    auto InstIt = BB->begin();
146
1.21k
    auto InstEnd = BB->end();
147
1.21k
148
5.33k
    while (InstIt != InstEnd) {
149
4.11k
      auto NextIt = InstIt;
150
4.11k
      ++NextIt;
151
4.11k
152
4.11k
      if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) {
153
21
        switch (IT->getIntrinsicID()) {
154
21
        case Intrinsic::lifetime_start:
155
2
        case Intrinsic::lifetime_end:
156
2
          BB->getInstList().erase(InstIt);
157
2
          break;
158
19
        default:
159
19
          break;
160
4.11k
        }
161
4.11k
      }
162
4.11k
163
4.11k
      InstIt = NextIt;
164
4.11k
    }
165
1.21k
  }
166
305
}
167
168
static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
169
306
                    ScalarEvolution &SE, RegionInfo &RI) {
170
306
  // Check whether IslAstInfo uses the same isl_ctx. Since -polly-codegen
171
306
  // reports itself to preserve DependenceInfo and IslAstInfo, we might get
172
306
  // those analysis that were computed by a different ScopInfo for a different
173
306
  // Scop structure. When the ScopInfo/Scop object is freed, there is a high
174
306
  // probability that the new ScopInfo/Scop object will be created at the same
175
306
  // heap position with the same address. Comparing whether the Scop or ScopInfo
176
306
  // address is the expected therefore is unreliable.
177
306
  // Instead, we compare the address of the isl_ctx object. Both, DependenceInfo
178
306
  // and IslAstInfo must hold a reference to the isl_ctx object to ensure it is
179
306
  // not freed before the destruction of those analyses which might happen after
180
306
  // the destruction of the Scop/ScopInfo they refer to.  Hence, the isl_ctx
181
306
  // will not be freed and its space not reused as long there is a
182
306
  // DependenceInfo or IslAstInfo around.
183
306
  IslAst &Ast = AI.getIslAst();
184
306
  if (Ast.getSharedIslCtx() != S.getSharedIslCtx()) {
185
1
    LLVM_DEBUG(dbgs() << "Got an IstAst for a different Scop/isl_ctx\n");
186
1
    return false;
187
1
  }
188
305
189
305
  // Check if we created an isl_ast root node, otherwise exit.
190
305
  isl_ast_node *AstRoot = Ast.getAst();
191
305
  if (!AstRoot)
192
0
    return false;
193
305
194
305
  // Collect statistics. Do it before we modify the IR to avoid having it any
195
305
  // influence on the result.
196
305
  auto ScopStats = S.getStatistics();
197
305
  ScopsProcessed++;
198
305
199
305
  auto &DL = S.getFunction().getParent()->getDataLayout();
200
305
  Region *R = &S.getRegion();
201
305
  assert(!R->isTopLevelRegion() && "Top level regions are not supported");
202
305
203
305
  ScopAnnotator Annotator;
204
305
205
305
  simplifyRegion(R, &DT, &LI, &RI);
206
305
  assert(R->isSimple());
207
305
  BasicBlock *EnteringBB = S.getEnteringBlock();
208
305
  assert(EnteringBB);
209
305
  PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
210
305
211
305
  // Only build the run-time condition and parameters _after_ having
212
305
  // introduced the conditional branch. This is important as the conditional
213
305
  // branch will guard the original scop from new induction variables that
214
305
  // the SCEVExpander may introduce while code generating the parameters and
215
305
  // which may introduce scalar dependences that prevent us from correctly
216
305
  // code generating this scop.
217
305
  BBPair StartExitBlocks =
218
305
      std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI));
219
305
  BasicBlock *StartBlock = std::get<0>(StartExitBlocks);
220
305
  BasicBlock *ExitBlock = std::get<1>(StartExitBlocks);
221
305
222
305
  removeLifetimeMarkers(R);
223
305
  auto *SplitBlock = StartBlock->getSinglePredecessor();
224
305
225
305
  IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock);
226
305
227
305
  // All arrays must have their base pointers known before
228
305
  // ScopAnnotator::buildAliasScopes.
229
305
  NodeBuilder.allocateNewArrays(StartExitBlocks);
230
305
  Annotator.buildAliasScopes(S);
231
305
232
305
  if (PerfMonitoring) {
233
5
    PerfMonitor P(S, EnteringBB->getParent()->getParent());
234
5
    P.initialize();
235
5
    P.insertRegionStart(SplitBlock->getTerminator());
236
5
237
5
    BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor();
238
5
    P.insertRegionEnd(MergeBlock->getTerminator());
239
5
  }
240
305
241
305
  // First generate code for the hoisted invariant loads and transitively the
242
305
  // parameters they reference. Afterwards, for the remaining parameters that
243
305
  // might reference the hoisted loads. Finally, build the runtime check
244
305
  // that might reference both hoisted loads as well as parameters.
245
305
  // If the hoisting fails we have to bail and execute the original code.
246
305
  Builder.SetInsertPoint(SplitBlock->getTerminator());
247
305
  if (!NodeBuilder.preloadInvariantLoads()) {
248
4
    // Patch the introduced branch condition to ensure that we always execute
249
4
    // the original SCoP.
250
4
    auto *FalseI1 = Builder.getFalse();
251
4
    auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator();
252
4
    SplitBBTerm->setOperand(0, FalseI1);
253
4
254
4
    // Since the other branch is hence ignored we mark it as unreachable and
255
4
    // adjust the dominator tree accordingly.
256
4
    auto *ExitingBlock = StartBlock->getUniqueSuccessor();
257
4
    assert(ExitingBlock);
258
4
    auto *MergeBlock = ExitingBlock->getUniqueSuccessor();
259
4
    assert(MergeBlock);
260
4
    markBlockUnreachable(*StartBlock, Builder);
261
4
    markBlockUnreachable(*ExitingBlock, Builder);
262
4
    auto *ExitingBB = S.getExitingBlock();
263
4
    assert(ExitingBB);
264
4
    DT.changeImmediateDominator(MergeBlock, ExitingBB);
265
4
    DT.eraseNode(ExitingBlock);
266
4
267
4
    isl_ast_node_free(AstRoot);
268
301
  } else {
269
301
    NodeBuilder.addParameters(S.getContext().release());
270
301
    Value *RTC = NodeBuilder.createRTC(AI.getRunCondition());
271
301
272
301
    Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
273
301
274
301
    // Explicitly set the insert point to the end of the block to avoid that a
275
301
    // split at the builder's current
276
301
    // insert position would move the malloc calls to the wrong BasicBlock.
277
301
    // Ideally we would just split the block during allocation of the new
278
301
    // arrays, but this would break the assumption that there are no blocks
279
301
    // between polly.start and polly.exiting (at this point).
280
301
    Builder.SetInsertPoint(StartBlock->getTerminator());
281
301
282
301
    NodeBuilder.create(AstRoot);
283
301
    NodeBuilder.finalize();
284
301
    fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI);
285
301
286
301
    CodegenedScops++;
287
301
    CodegenedAffineLoops += ScopStats.NumAffineLoops;
288
301
    CodegenedBoxedLoops += ScopStats.NumBoxedLoops;
289
301
  }
290
305
291
305
  Function *F = EnteringBB->getParent();
292
305
  verifyGeneratedFunction(S, *F, AI);
293
305
  for (auto *SubF : NodeBuilder.getParallelSubfunctions())
294
41
    verifyGeneratedFunction(S, *SubF, AI);
295
305
296
305
  // Mark the function such that we run additional cleanup passes on this
297
305
  // function (e.g. mem2reg to rediscover phi nodes).
298
305
  F->addFnAttr("polly-optimized");
299
305
  return true;
300
305
}
301
302
namespace {
303
304
class CodeGeneration : public ScopPass {
305
public:
306
  static char ID;
307
308
  /// The data layout used.
309
  const DataLayout *DL;
310
311
  /// @name The analysis passes we need to generate code.
312
  ///
313
  ///{
314
  LoopInfo *LI;
315
  IslAstInfo *AI;
316
  DominatorTree *DT;
317
  ScalarEvolution *SE;
318
  RegionInfo *RI;
319
  ///}
320
321
327
  CodeGeneration() : ScopPass(ID) {}
322
323
  /// Generate LLVM-IR for the SCoP @p S.
324
306
  bool runOnScop(Scop &S) override {
325
306
    // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration.
326
306
    if (S.isToBeSkipped())
327
0
      return false;
328
306
329
306
    AI = &getAnalysis<IslAstInfoWrapperPass>().getAI();
330
306
    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
331
306
    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
332
306
    SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
333
306
    DL = &S.getFunction().getParent()->getDataLayout();
334
306
    RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
335
306
    return CodeGen(S, *AI, *LI, *DT, *SE, *RI);
336
306
  }
337
338
  /// Register all analyses and transformation required.
339
327
  void getAnalysisUsage(AnalysisUsage &AU) const override {
340
327
    ScopPass::getAnalysisUsage(AU);
341
327
342
327
    AU.addRequired<DominatorTreeWrapperPass>();
343
327
    AU.addRequired<IslAstInfoWrapperPass>();
344
327
    AU.addRequired<RegionInfoPass>();
345
327
    AU.addRequired<ScalarEvolutionWrapperPass>();
346
327
    AU.addRequired<ScopDetectionWrapperPass>();
347
327
    AU.addRequired<ScopInfoRegionPass>();
348
327
    AU.addRequired<LoopInfoWrapperPass>();
349
327
350
327
    AU.addPreserved<DependenceInfo>();
351
327
    AU.addPreserved<IslAstInfoWrapperPass>();
352
327
353
327
    // FIXME: We do not yet add regions for the newly generated code to the
354
327
    //        region tree.
355
327
  }
356
};
357
} // namespace
358
359
PreservedAnalyses CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM,
360
                                          ScopStandardAnalysisResults &AR,
361
0
                                          SPMUpdater &U) {
362
0
  auto &AI = SAM.getResult<IslAstAnalysis>(S, AR);
363
0
  if (CodeGen(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) {
364
0
    U.invalidateScop(S);
365
0
    return PreservedAnalyses::none();
366
0
  }
367
0
368
0
  return PreservedAnalyses::all();
369
0
}
370
371
char CodeGeneration::ID = 1;
372
373
0
Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); }
374
375
48.2k
INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen",
376
48.2k
                      "Polly - Create LLVM-IR from SCoPs", false, false);
377
48.2k
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
378
48.2k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
379
48.2k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
380
48.2k
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
381
48.2k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
382
48.2k
INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
383
48.2k
INITIALIZE_PASS_END(CodeGeneration, "polly-codegen",
384
                    "Polly - Create LLVM-IR from SCoPs", false, false)