Coverage Report

Created: 2017-04-27 19:33

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/Support/RegisterPasses.cpp
Line
Count
Source (jump to first uncovered line)
1
//===------ RegisterPasses.cpp - Add the Polly Passes to default passes  --===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file composes the individual LLVM-IR passes provided by Polly to a
11
// functional polyhedral optimizer. The polyhedral optimizer is automatically
12
// made available to LLVM based compilers by loading the Polly shared library
13
// into such a compiler.
14
//
15
// The Polly optimizer is made available by executing a static constructor that
16
// registers the individual Polly passes in the LLVM pass manager builder. The
17
// passes are registered such that the default behaviour of the compiler is not
18
// changed, but that the flag '-polly' provided at optimization level '-O3'
19
// enables additional polyhedral optimizations.
20
//===----------------------------------------------------------------------===//
21
22
#include "polly/RegisterPasses.h"
23
#include "polly/Canonicalization.h"
24
#include "polly/CodeGen/CodeGeneration.h"
25
#include "polly/CodeGen/CodegenCleanup.h"
26
#include "polly/DeLICM.h"
27
#include "polly/DependenceInfo.h"
28
#include "polly/FlattenSchedule.h"
29
#include "polly/LinkAllPasses.h"
30
#include "polly/Options.h"
31
#include "polly/PolyhedralInfo.h"
32
#include "polly/ScopDetection.h"
33
#include "polly/ScopInfo.h"
34
#include "polly/Simplify.h"
35
#include "polly/Support/DumpModulePass.h"
36
#include "llvm/Analysis/CFGPrinter.h"
37
#include "llvm/IR/LegacyPassManager.h"
38
#include "llvm/Transforms/IPO.h"
39
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
40
#include "llvm/Transforms/Scalar.h"
41
#include "llvm/Transforms/Vectorize.h"
42
43
using namespace llvm;
44
using namespace polly;
45
46
cl::OptionCategory PollyCategory("Polly Options",
47
                                 "Configure the polly loop optimizer");
48
49
static cl::opt<bool>
50
    PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"),
51
                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
52
53
static cl::opt<bool> PollyDetectOnly(
54
    "polly-only-scop-detection",
55
    cl::desc("Only run scop detection, but no other optimizations"),
56
    cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
57
58
enum PassPositionChoice {
59
  POSITION_EARLY,
60
  POSITION_AFTER_LOOPOPT,
61
  POSITION_BEFORE_VECTORIZER
62
};
63
64
enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL };
65
66
static cl::opt<PassPositionChoice> PassPosition(
67
    "polly-position", cl::desc("Where to run polly in the pass pipeline"),
68
    cl::values(
69
        clEnumValN(POSITION_EARLY, "early", "Before everything"),
70
        clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt",
71
                   "After the loop optimizer (but within the inline cycle)"),
72
        clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer",
73
                   "Right before the vectorizer")),
74
    cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore,
75
    cl::cat(PollyCategory));
76
77
static cl::opt<OptimizerChoice>
78
    Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"),
79
              cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"),
80
                         clEnumValN(OPTIMIZER_ISL, "isl",
81
                                    "The isl scheduling optimizer")),
82
              cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore,
83
              cl::cat(PollyCategory));
84
85
enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE };
86
static cl::opt<CodeGenChoice> CodeGeneration(
87
    "polly-code-generation", cl::desc("How much code-generation to perform"),
88
    cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"),
89
               clEnumValN(CODEGEN_AST, "ast", "Only AST generation"),
90
               clEnumValN(CODEGEN_NONE, "none", "No code generation")),
91
    cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
92
93
enum TargetChoice { TARGET_CPU, TARGET_GPU };
94
static cl::opt<TargetChoice>
95
    Target("polly-target", cl::desc("The hardware to target"),
96
           cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
97
#ifdef GPU_CODEGEN
98
                          ,
99
                      clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
100
#endif
101
                          ),
102
           cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
103
104
VectorizerChoice polly::PollyVectorizerChoice;
105
static cl::opt<polly::VectorizerChoice, true> Vectorizer(
106
    "polly-vectorizer", cl::desc("Select the vectorization strategy"),
107
    cl::values(
108
        clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"),
109
        clEnumValN(polly::VECTORIZER_POLLY, "polly",
110
                   "Polly internal vectorizer"),
111
        clEnumValN(
112
            polly::VECTORIZER_STRIPMINE, "stripmine",
113
            "Strip-mine outer loops for the loop-vectorizer to trigger")),
114
    cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE),
115
    cl::ZeroOrMore, cl::cat(PollyCategory));
116
117
static cl::opt<bool> ImportJScop(
118
    "polly-import",
119
    cl::desc("Export the polyhedral description of the detected Scops"),
120
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
121
122
static cl::opt<bool> ExportJScop(
123
    "polly-export",
124
    cl::desc("Export the polyhedral description of the detected Scops"),
125
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
126
127
static cl::opt<bool> DeadCodeElim("polly-run-dce",
128
                                  cl::desc("Run the dead code elimination"),
129
                                  cl::Hidden, cl::init(false), cl::ZeroOrMore,
130
                                  cl::cat(PollyCategory));
131
132
static cl::opt<bool> PollyViewer(
133
    "polly-show",
134
    cl::desc("Highlight the code regions that will be optimized in a "
135
             "(CFG BBs and LLVM-IR instructions)"),
136
    cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
137
138
static cl::opt<bool> PollyOnlyViewer(
139
    "polly-show-only",
140
    cl::desc("Highlight the code regions that will be optimized in "
141
             "a (CFG only BBs)"),
142
    cl::init(false), cl::cat(PollyCategory));
143
144
static cl::opt<bool>
145
    PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"),
146
                 cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"),
147
                 cl::init(false), cl::cat(PollyCategory));
148
149
static cl::opt<bool> PollyOnlyPrinter(
150
    "polly-dot-only",
151
    cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden,
152
    cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"),
153
    cl::init(false), cl::cat(PollyCategory));
154
155
static cl::opt<bool>
156
    CFGPrinter("polly-view-cfg",
157
               cl::desc("Show the Polly CFG right after code generation"),
158
               cl::Hidden, cl::init(false), cl::cat(PollyCategory));
159
160
static cl::opt<bool>
161
    EnablePolyhedralInfo("polly-enable-polyhedralinfo",
162
                         cl::desc("Enable polyhedral interface of Polly"),
163
                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
164
165
static cl::opt<bool>
166
    DumpBefore("polly-dump-before",
167
               cl::desc("Dump module before Polly transformations into a file "
168
                        "suffixed with \"-before\""),
169
               cl::init(false), cl::cat(PollyCategory));
170
171
static cl::list<std::string> DumpBeforeFile(
172
    "polly-dump-before-file",
173
    cl::desc("Dump module before Polly transformations to the given file"),
174
    cl::cat(PollyCategory));
175
176
static cl::opt<bool>
177
    DumpAfter("polly-dump-after",
178
              cl::desc("Dump module after Polly transformations into a file "
179
                       "suffixed with \"-after\""),
180
              cl::init(false), cl::cat(PollyCategory));
181
182
static cl::list<std::string> DumpAfterFile(
183
    "polly-dump-after-file",
184
    cl::desc("Dump module after Polly transformations to the given file"),
185
    cl::ZeroOrMore, cl::cat(PollyCategory));
186
187
static cl::opt<bool>
188
    EnableDeLICM("polly-enable-delicm",
189
                 cl::desc("Eliminate scalar loop carried dependences"),
190
                 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
191
192
static cl::opt<bool>
193
    EnableSimplify("polly-enable-simplify",
194
                   cl::desc("Simplify SCoP after optimizations"),
195
                   cl::init(false), cl::cat(PollyCategory));
196
197
static cl::opt<bool> EnablePruneUnprofitable(
198
    "polly-enable-prune-unprofitable",
199
    cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden,
200
    cl::init(true), cl::cat(PollyCategory));
201
202
namespace polly {
203
39.7k
void initializePollyPasses(PassRegistry &Registry) {
204
39.7k
  initializeCodeGenerationPass(Registry);
205
39.7k
206
39.7k
#ifdef GPU_CODEGEN
207
  initializePPCGCodeGenerationPass(Registry);
208
#endif
209
39.7k
  initializeCodePreparationPass(Registry);
210
39.7k
  initializeDeadCodeElimPass(Registry);
211
39.7k
  initializeDependenceInfoPass(Registry);
212
39.7k
  initializeDependenceInfoWrapperPassPass(Registry);
213
39.7k
  initializeJSONExporterPass(Registry);
214
39.7k
  initializeJSONImporterPass(Registry);
215
39.7k
  initializeIslAstInfoPass(Registry);
216
39.7k
  initializeIslScheduleOptimizerPass(Registry);
217
39.7k
  initializePollyCanonicalizePass(Registry);
218
39.7k
  initializePolyhedralInfoPass(Registry);
219
39.7k
  initializeScopDetectionPass(Registry);
220
39.7k
  initializeScopInfoRegionPassPass(Registry);
221
39.7k
  initializeScopInfoWrapperPassPass(Registry);
222
39.7k
  initializeCodegenCleanupPass(Registry);
223
39.7k
  initializeFlattenSchedulePass(Registry);
224
39.7k
  initializeDeLICMPass(Registry);
225
39.7k
  initializeSimplifyPass(Registry);
226
39.7k
  initializeDumpModulePass(Registry);
227
39.7k
  initializePruneUnprofitablePass(Registry);
228
39.7k
}
229
230
/// Register Polly passes such that they form a polyhedral optimizer.
231
///
232
/// The individual Polly passes are registered in the pass manager such that
233
/// they form a full polyhedral optimizer. The flow of the optimizer starts with
234
/// a set of preparing transformations that canonicalize the LLVM-IR such that
235
/// the LLVM-IR is easier for us to understand and to optimizes. On the
236
/// canonicalized LLVM-IR we first run the ScopDetection pass, which detects
237
/// static control flow regions. Those regions are then translated by the
238
/// ScopInfo pass into a polyhedral representation. As a next step, a scheduling
239
/// optimizer is run on the polyhedral representation and finally the optimized
240
/// polyhedral representation is code generated back to LLVM-IR.
241
///
242
/// Besides this core functionality, we optionally schedule passes that provide
243
/// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that
244
/// allow the export/import of the polyhedral representation
245
/// (JSCON[Exporter|Importer]) or that show the cfg after code generation.
246
///
247
/// For certain parts of the Polly optimizer, several alternatives are provided:
248
///
249
/// As scheduling optimizer we support the isl scheduling optimizer
250
/// (http://freecode.com/projects/isl).
251
/// It is also possible to run Polly with no optimizer. This mode is mainly
252
/// provided to analyze the run and compile time changes caused by the
253
/// scheduling optimizer.
254
///
255
/// Polly supports the isl internal code generator.
256
0
void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
257
0
  if (DumpBefore)
258
0
    PM.add(polly::createDumpModulePass("-before", true));
259
0
  for (auto &Filename : DumpBeforeFile)
260
0
    PM.add(polly::createDumpModulePass(Filename, false));
261
0
262
0
  PM.add(polly::createScopDetectionPass());
263
0
264
0
  if (PollyDetectOnly)
265
0
    return;
266
0
267
0
  
if (0
PollyViewer0
)
268
0
    PM.add(polly::createDOTViewerPass());
269
0
  if (PollyOnlyViewer)
270
0
    PM.add(polly::createDOTOnlyViewerPass());
271
0
  if (PollyPrinter)
272
0
    PM.add(polly::createDOTPrinterPass());
273
0
  if (PollyOnlyPrinter)
274
0
    PM.add(polly::createDOTOnlyPrinterPass());
275
0
276
0
  PM.add(polly::createScopInfoRegionPassPass());
277
0
  if (EnablePolyhedralInfo)
278
0
    PM.add(polly::createPolyhedralInfoPass());
279
0
280
0
  if (EnableDeLICM)
281
0
    PM.add(polly::createDeLICMPass());
282
0
  if (EnableSimplify)
283
0
    PM.add(polly::createSimplifyPass());
284
0
285
0
  if (ImportJScop)
286
0
    PM.add(polly::createJSONImporterPass());
287
0
288
0
  if (DeadCodeElim)
289
0
    PM.add(polly::createDeadCodeElimPass());
290
0
291
0
  if (EnablePruneUnprofitable)
292
0
    PM.add(polly::createPruneUnprofitablePass());
293
0
294
0
  if (
Target == TARGET_GPU0
)
{0
295
0
    // GPU generation provides its own scheduling optimization strategy.
296
0
  } else {
297
0
    switch (Optimizer) {
298
0
    case OPTIMIZER_NONE:
299
0
      break; /* Do nothing */
300
0
301
0
    case OPTIMIZER_ISL:
302
0
      PM.add(polly::createIslScheduleOptimizerPass());
303
0
      break;
304
0
    }
305
0
  }
306
0
307
0
  
if (0
ExportJScop0
)
308
0
    PM.add(polly::createJSONExporterPass());
309
0
310
0
  if (
Target == TARGET_GPU0
)
{0
311
0
#ifdef GPU_CODEGEN
312
    PM.add(polly::createPPCGCodeGenerationPass());
313
#endif
314
0
  } else {
315
0
    switch (CodeGeneration) {
316
0
    case CODEGEN_AST:
317
0
      PM.add(polly::createIslAstInfoPass());
318
0
      break;
319
0
    case CODEGEN_FULL:
320
0
      PM.add(polly::createCodeGenerationPass());
321
0
      break;
322
0
    case CODEGEN_NONE:
323
0
      break;
324
0
    }
325
0
  }
326
0
327
0
  // FIXME: This dummy ModulePass keeps some programs from miscompiling,
328
0
  // probably some not correctly preserved analyses. It acts as a barrier to
329
0
  // force all analysis results to be recomputed.
330
0
  PM.add(createBarrierNoopPass());
331
0
332
0
  if (DumpAfter)
333
0
    PM.add(polly::createDumpModulePass("-after", true));
334
0
  for (auto &Filename : DumpAfterFile)
335
0
    PM.add(polly::createDumpModulePass(Filename, false));
336
0
337
0
  if (CFGPrinter)
338
0
    PM.add(llvm::createCFGPrinterLegacyPassPass());
339
0
340
0
  if (
Target == TARGET_GPU0
)
{0
341
0
    // Invariant load hoisting not yet supported by GPU code generation.
342
0
    PollyInvariantLoadHoisting = false;
343
0
  }
344
0
}
345
346
50.7k
static bool shouldEnablePolly() {
347
50.7k
  if (
PollyOnlyPrinter || 50.7k
PollyPrinter50.7k
||
PollyOnlyViewer50.7k
||
PollyViewer50.7k
)
348
0
    PollyTrackFailures = true;
349
50.7k
350
50.7k
  if (
PollyOnlyPrinter || 50.7k
PollyPrinter50.7k
||
PollyOnlyViewer50.7k
||
PollyViewer50.7k
||
351
50.7k
      
ExportJScop50.7k
||
ImportJScop50.7k
)
352
0
    PollyEnabled = true;
353
50.7k
354
50.7k
  return PollyEnabled;
355
50.7k
}
356
357
static void
358
registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder,
359
16.9k
                                   llvm::legacy::PassManagerBase &PM) {
360
16.9k
  if (!polly::shouldEnablePolly())
361
16.9k
    return;
362
16.9k
363
0
  
if (0
PassPosition != POSITION_EARLY0
)
364
0
    return;
365
0
366
0
  registerCanonicalicationPasses(PM);
367
0
  polly::registerPollyPasses(PM);
368
0
}
369
370
static void
371
registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder,
372
16.9k
                                    llvm::legacy::PassManagerBase &PM) {
373
16.9k
  if (!polly::shouldEnablePolly())
374
16.9k
    return;
375
16.9k
376
0
  
if (0
PassPosition != POSITION_AFTER_LOOPOPT0
)
377
0
    return;
378
0
379
0
  PM.add(polly::createCodePreparationPass());
380
0
  polly::registerPollyPasses(PM);
381
0
  PM.add(createCodegenCleanupPass());
382
0
}
383
384
static void
385
registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder,
386
16.9k
                                       llvm::legacy::PassManagerBase &PM) {
387
16.9k
  if (!polly::shouldEnablePolly())
388
16.9k
    return;
389
16.9k
390
0
  
if (0
PassPosition != POSITION_BEFORE_VECTORIZER0
)
391
0
    return;
392
0
393
0
  PM.add(polly::createCodePreparationPass());
394
0
  polly::registerPollyPasses(PM);
395
0
  PM.add(createCodegenCleanupPass());
396
0
}
397
398
/// Register Polly to be available as an optimizer
399
///
400
///
401
/// We can currently run Polly at three different points int the pass manager.
402
/// a) very early, b) after the canonicalizing loop transformations and c) right
403
/// before the vectorizer.
404
///
405
/// The default is currently a), to register Polly such that it runs as early as
406
/// possible. This has several implications:
407
///
408
///   1) We need to schedule more canonicalization passes
409
///
410
///   As nothing is run before Polly, it is necessary to run a set of preparing
411
///   transformations before Polly to canonicalize the LLVM-IR and to allow
412
///   Polly to detect and understand the code.
413
///
414
///   2) LICM and LoopIdiom pass have not yet been run
415
///
416
///   Loop invariant code motion as well as the loop idiom recognition pass make
417
///   it more difficult for Polly to transform code. LICM may introduce
418
///   additional data dependences that are hard to eliminate and the loop idiom
419
///   recognition pass may introduce calls to memset that we currently do not
420
///   understand. By running Polly early enough (meaning before these passes) we
421
///   avoid difficulties that may be introduced by these passes.
422
///
423
///   3) We get the full -O3 optimization sequence after Polly
424
///
425
///   The LLVM-IR that is generated by Polly has been optimized on a high level,
426
///   but it may be rather inefficient on the lower/scalar level. By scheduling
427
///   Polly before all other passes, we have the full sequence of -O3
428
///   optimizations behind us, such that inefficiencies on the low level can
429
///   be optimized away.
430
///
431
/// We are currently evaluating the benefit or running Polly at position b) or
432
/// c). b) is likely to early as it interacts with the inliner. c) is nice
433
/// as everything is fully inlined and canonicalized, but we need to be able
434
/// to handle LICMed code to make it useful.
435
static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly(
436
    llvm::PassManagerBuilder::EP_ModuleOptimizerEarly,
437
    registerPollyEarlyAsPossiblePasses);
438
439
static llvm::RegisterStandardPasses
440
    RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd,
441
                                  registerPollyLoopOptimizerEndPasses);
442
443
static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate(
444
    llvm::PassManagerBuilder::EP_VectorizerStart,
445
    registerPollyScalarOptimizerLatePasses);
446
} // namespace polly