Coverage Report

Created: 2017-06-28 17:40

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/Support/RegisterPasses.cpp
Line
Count
Source (jump to first uncovered line)
1
//===------ RegisterPasses.cpp - Add the Polly Passes to default passes  --===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file composes the individual LLVM-IR passes provided by Polly to a
11
// functional polyhedral optimizer. The polyhedral optimizer is automatically
12
// made available to LLVM based compilers by loading the Polly shared library
13
// into such a compiler.
14
//
15
// The Polly optimizer is made available by executing a static constructor that
16
// registers the individual Polly passes in the LLVM pass manager builder. The
17
// passes are registered such that the default behaviour of the compiler is not
18
// changed, but that the flag '-polly' provided at optimization level '-O3'
19
// enables additional polyhedral optimizations.
20
//===----------------------------------------------------------------------===//
21
22
#include "polly/RegisterPasses.h"
23
#include "polly/Canonicalization.h"
24
#include "polly/CodeGen/CodeGeneration.h"
25
#include "polly/CodeGen/CodegenCleanup.h"
26
#include "polly/CodeGen/PPCGCodeGeneration.h"
27
#include "polly/DeLICM.h"
28
#include "polly/DependenceInfo.h"
29
#include "polly/FlattenSchedule.h"
30
#include "polly/LinkAllPasses.h"
31
#include "polly/Options.h"
32
#include "polly/PolyhedralInfo.h"
33
#include "polly/ScopDetection.h"
34
#include "polly/ScopInfo.h"
35
#include "polly/Simplify.h"
36
#include "polly/Support/DumpModulePass.h"
37
#include "llvm/Analysis/CFGPrinter.h"
38
#include "llvm/IR/LegacyPassManager.h"
39
#include "llvm/Transforms/IPO.h"
40
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
41
#include "llvm/Transforms/Scalar.h"
42
#include "llvm/Transforms/Vectorize.h"
43
44
using namespace llvm;
45
using namespace polly;
46
47
cl::OptionCategory PollyCategory("Polly Options",
48
                                 "Configure the polly loop optimizer");
49
50
static cl::opt<bool>
51
    PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"),
52
                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
53
54
static cl::opt<bool> PollyDetectOnly(
55
    "polly-only-scop-detection",
56
    cl::desc("Only run scop detection, but no other optimizations"),
57
    cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
58
59
enum PassPositionChoice {
60
  POSITION_EARLY,
61
  POSITION_AFTER_LOOPOPT,
62
  POSITION_BEFORE_VECTORIZER
63
};
64
65
enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL };
66
67
static cl::opt<PassPositionChoice> PassPosition(
68
    "polly-position", cl::desc("Where to run polly in the pass pipeline"),
69
    cl::values(
70
        clEnumValN(POSITION_EARLY, "early", "Before everything"),
71
        clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt",
72
                   "After the loop optimizer (but within the inline cycle)"),
73
        clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer",
74
                   "Right before the vectorizer")),
75
    cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore,
76
    cl::cat(PollyCategory));
77
78
static cl::opt<OptimizerChoice>
79
    Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"),
80
              cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"),
81
                         clEnumValN(OPTIMIZER_ISL, "isl",
82
                                    "The isl scheduling optimizer")),
83
              cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore,
84
              cl::cat(PollyCategory));
85
86
enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE };
87
static cl::opt<CodeGenChoice> CodeGeneration(
88
    "polly-code-generation", cl::desc("How much code-generation to perform"),
89
    cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"),
90
               clEnumValN(CODEGEN_AST, "ast", "Only AST generation"),
91
               clEnumValN(CODEGEN_NONE, "none", "No code generation")),
92
    cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
93
94
enum TargetChoice { TARGET_CPU, TARGET_GPU };
95
static cl::opt<TargetChoice>
96
    Target("polly-target", cl::desc("The hardware to target"),
97
           cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
98
#ifdef GPU_CODEGEN
99
                          ,
100
                      clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
101
#endif
102
                          ),
103
           cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
104
105
#ifdef GPU_CODEGEN
106
static cl::opt<GPURuntime> GPURuntimeChoice(
107
    "polly-gpu-runtime", cl::desc("The GPU Runtime API to target"),
108
    cl::values(clEnumValN(GPURuntime::CUDA, "libcudart",
109
                          "use the CUDA Runtime API"),
110
               clEnumValN(GPURuntime::OpenCL, "libopencl",
111
                          "use the OpenCL Runtime API")),
112
    cl::init(GPURuntime::CUDA), cl::ZeroOrMore, cl::cat(PollyCategory));
113
114
static cl::opt<GPUArch>
115
    GPUArchChoice("polly-gpu-arch", cl::desc("The GPU Architecture to target"),
116
                  cl::values(clEnumValN(GPUArch::NVPTX64, "nvptx64",
117
                                        "target NVIDIA 64-bit architecture")),
118
                  cl::init(GPUArch::NVPTX64), cl::ZeroOrMore,
119
                  cl::cat(PollyCategory));
120
#endif
121
122
VectorizerChoice polly::PollyVectorizerChoice;
123
static cl::opt<polly::VectorizerChoice, true> Vectorizer(
124
    "polly-vectorizer", cl::desc("Select the vectorization strategy"),
125
    cl::values(
126
        clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"),
127
        clEnumValN(polly::VECTORIZER_POLLY, "polly",
128
                   "Polly internal vectorizer"),
129
        clEnumValN(
130
            polly::VECTORIZER_STRIPMINE, "stripmine",
131
            "Strip-mine outer loops for the loop-vectorizer to trigger")),
132
    cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE),
133
    cl::ZeroOrMore, cl::cat(PollyCategory));
134
135
static cl::opt<bool> ImportJScop(
136
    "polly-import",
137
    cl::desc("Import the polyhedral description of the detected Scops"),
138
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
139
140
static cl::opt<bool> ExportJScop(
141
    "polly-export",
142
    cl::desc("Export the polyhedral description of the detected Scops"),
143
    cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
144
145
static cl::opt<bool> DeadCodeElim("polly-run-dce",
146
                                  cl::desc("Run the dead code elimination"),
147
                                  cl::Hidden, cl::init(false), cl::ZeroOrMore,
148
                                  cl::cat(PollyCategory));
149
150
static cl::opt<bool> PollyViewer(
151
    "polly-show",
152
    cl::desc("Highlight the code regions that will be optimized in a "
153
             "(CFG BBs and LLVM-IR instructions)"),
154
    cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
155
156
static cl::opt<bool> PollyOnlyViewer(
157
    "polly-show-only",
158
    cl::desc("Highlight the code regions that will be optimized in "
159
             "a (CFG only BBs)"),
160
    cl::init(false), cl::cat(PollyCategory));
161
162
static cl::opt<bool>
163
    PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"),
164
                 cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"),
165
                 cl::init(false), cl::cat(PollyCategory));
166
167
static cl::opt<bool> PollyOnlyPrinter(
168
    "polly-dot-only",
169
    cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden,
170
    cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"),
171
    cl::init(false), cl::cat(PollyCategory));
172
173
static cl::opt<bool>
174
    CFGPrinter("polly-view-cfg",
175
               cl::desc("Show the Polly CFG right after code generation"),
176
               cl::Hidden, cl::init(false), cl::cat(PollyCategory));
177
178
static cl::opt<bool>
179
    EnablePolyhedralInfo("polly-enable-polyhedralinfo",
180
                         cl::desc("Enable polyhedral interface of Polly"),
181
                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
182
183
static cl::opt<bool>
184
    DumpBefore("polly-dump-before",
185
               cl::desc("Dump module before Polly transformations into a file "
186
                        "suffixed with \"-before\""),
187
               cl::init(false), cl::cat(PollyCategory));
188
189
static cl::list<std::string> DumpBeforeFile(
190
    "polly-dump-before-file",
191
    cl::desc("Dump module before Polly transformations to the given file"),
192
    cl::cat(PollyCategory));
193
194
static cl::opt<bool>
195
    DumpAfter("polly-dump-after",
196
              cl::desc("Dump module after Polly transformations into a file "
197
                       "suffixed with \"-after\""),
198
              cl::init(false), cl::cat(PollyCategory));
199
200
static cl::list<std::string> DumpAfterFile(
201
    "polly-dump-after-file",
202
    cl::desc("Dump module after Polly transformations to the given file"),
203
    cl::ZeroOrMore, cl::cat(PollyCategory));
204
205
static cl::opt<bool>
206
    EnableDeLICM("polly-enable-delicm",
207
                 cl::desc("Eliminate scalar loop carried dependences"),
208
                 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
209
210
static cl::opt<bool>
211
    EnableSimplify("polly-enable-simplify",
212
                   cl::desc("Simplify SCoP after optimizations"),
213
                   cl::init(false), cl::cat(PollyCategory));
214
215
static cl::opt<bool> EnablePruneUnprofitable(
216
    "polly-enable-prune-unprofitable",
217
    cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden,
218
    cl::init(true), cl::cat(PollyCategory));
219
220
namespace polly {
221
41.0k
void initializePollyPasses(PassRegistry &Registry) {
222
41.0k
  initializeCodeGenerationPass(Registry);
223
41.0k
224
41.0k
#ifdef GPU_CODEGEN
225
  initializePPCGCodeGenerationPass(Registry);
226
#endif
227
41.0k
  initializeCodePreparationPass(Registry);
228
41.0k
  initializeDeadCodeElimPass(Registry);
229
41.0k
  initializeDependenceInfoPass(Registry);
230
41.0k
  initializeDependenceInfoWrapperPassPass(Registry);
231
41.0k
  initializeJSONExporterPass(Registry);
232
41.0k
  initializeJSONImporterPass(Registry);
233
41.0k
  initializeIslAstInfoWrapperPassPass(Registry);
234
41.0k
  initializeIslScheduleOptimizerPass(Registry);
235
41.0k
  initializePollyCanonicalizePass(Registry);
236
41.0k
  initializePolyhedralInfoPass(Registry);
237
41.0k
  initializeScopDetectionWrapperPassPass(Registry);
238
41.0k
  initializeScopInfoRegionPassPass(Registry);
239
41.0k
  initializeScopInfoWrapperPassPass(Registry);
240
41.0k
  initializeCodegenCleanupPass(Registry);
241
41.0k
  initializeFlattenSchedulePass(Registry);
242
41.0k
  initializeDeLICMPass(Registry);
243
41.0k
  initializeSimplifyPass(Registry);
244
41.0k
  initializeDumpModulePass(Registry);
245
41.0k
  initializePruneUnprofitablePass(Registry);
246
41.0k
}
247
248
/// Register Polly passes such that they form a polyhedral optimizer.
249
///
250
/// The individual Polly passes are registered in the pass manager such that
251
/// they form a full polyhedral optimizer. The flow of the optimizer starts with
252
/// a set of preparing transformations that canonicalize the LLVM-IR such that
253
/// the LLVM-IR is easier for us to understand and to optimizes. On the
254
/// canonicalized LLVM-IR we first run the ScopDetection pass, which detects
255
/// static control flow regions. Those regions are then translated by the
256
/// ScopInfo pass into a polyhedral representation. As a next step, a scheduling
257
/// optimizer is run on the polyhedral representation and finally the optimized
258
/// polyhedral representation is code generated back to LLVM-IR.
259
///
260
/// Besides this core functionality, we optionally schedule passes that provide
261
/// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that
262
/// allow the export/import of the polyhedral representation
263
/// (JSCON[Exporter|Importer]) or that show the cfg after code generation.
264
///
265
/// For certain parts of the Polly optimizer, several alternatives are provided:
266
///
267
/// As scheduling optimizer we support the isl scheduling optimizer
268
/// (http://freecode.com/projects/isl).
269
/// It is also possible to run Polly with no optimizer. This mode is mainly
270
/// provided to analyze the run and compile time changes caused by the
271
/// scheduling optimizer.
272
///
273
/// Polly supports the isl internal code generator.
274
0
void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
275
0
  if (DumpBefore)
276
0
    PM.add(polly::createDumpModulePass("-before", true));
277
0
  for (auto &Filename : DumpBeforeFile)
278
0
    PM.add(polly::createDumpModulePass(Filename, false));
279
0
280
0
  PM.add(polly::createScopDetectionWrapperPassPass());
281
0
282
0
  if (PollyDetectOnly)
283
0
    return;
284
0
285
0
  
if (0
PollyViewer0
)
286
0
    PM.add(polly::createDOTViewerPass());
287
0
  if (PollyOnlyViewer)
288
0
    PM.add(polly::createDOTOnlyViewerPass());
289
0
  if (PollyPrinter)
290
0
    PM.add(polly::createDOTPrinterPass());
291
0
  if (PollyOnlyPrinter)
292
0
    PM.add(polly::createDOTOnlyPrinterPass());
293
0
294
0
  PM.add(polly::createScopInfoRegionPassPass());
295
0
  if (EnablePolyhedralInfo)
296
0
    PM.add(polly::createPolyhedralInfoPass());
297
0
298
0
  if (EnableDeLICM)
299
0
    PM.add(polly::createDeLICMPass());
300
0
  if (EnableSimplify)
301
0
    PM.add(polly::createSimplifyPass());
302
0
303
0
  if (ImportJScop)
304
0
    PM.add(polly::createJSONImporterPass());
305
0
306
0
  if (DeadCodeElim)
307
0
    PM.add(polly::createDeadCodeElimPass());
308
0
309
0
  if (EnablePruneUnprofitable)
310
0
    PM.add(polly::createPruneUnprofitablePass());
311
0
312
0
  if (
Target == TARGET_GPU0
)
{0
313
0
    // GPU generation provides its own scheduling optimization strategy.
314
0
  } else {
315
0
    switch (Optimizer) {
316
0
    case OPTIMIZER_NONE:
317
0
      break; /* Do nothing */
318
0
319
0
    case OPTIMIZER_ISL:
320
0
      PM.add(polly::createIslScheduleOptimizerPass());
321
0
      break;
322
0
    }
323
0
  }
324
0
325
0
  
if (0
ExportJScop0
)
326
0
    PM.add(polly::createJSONExporterPass());
327
0
328
0
  if (
Target == TARGET_GPU0
)
{0
329
0
#ifdef GPU_CODEGEN
330
    PM.add(
331
        polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
332
#endif
333
0
  } else {
334
0
    switch (CodeGeneration) {
335
0
    case CODEGEN_AST:
336
0
      PM.add(polly::createIslAstInfoWrapperPassPass());
337
0
      break;
338
0
    case CODEGEN_FULL:
339
0
      PM.add(polly::createCodeGenerationPass());
340
0
      break;
341
0
    case CODEGEN_NONE:
342
0
      break;
343
0
    }
344
0
  }
345
0
346
0
  // FIXME: This dummy ModulePass keeps some programs from miscompiling,
347
0
  // probably some not correctly preserved analyses. It acts as a barrier to
348
0
  // force all analysis results to be recomputed.
349
0
  PM.add(createBarrierNoopPass());
350
0
351
0
  if (DumpAfter)
352
0
    PM.add(polly::createDumpModulePass("-after", true));
353
0
  for (auto &Filename : DumpAfterFile)
354
0
    PM.add(polly::createDumpModulePass(Filename, false));
355
0
356
0
  if (CFGPrinter)
357
0
    PM.add(llvm::createCFGPrinterLegacyPassPass());
358
0
}
359
360
52.2k
static bool shouldEnablePolly() {
361
52.2k
  if (
PollyOnlyPrinter || 52.2k
PollyPrinter52.2k
||
PollyOnlyViewer52.2k
||
PollyViewer52.2k
)
362
0
    PollyTrackFailures = true;
363
52.2k
364
52.2k
  if (
PollyOnlyPrinter || 52.2k
PollyPrinter52.2k
||
PollyOnlyViewer52.2k
||
PollyViewer52.2k
||
365
52.2k
      
ExportJScop52.2k
||
ImportJScop52.2k
)
366
0
    PollyEnabled = true;
367
52.2k
368
52.2k
  return PollyEnabled;
369
52.2k
}
370
371
static void
372
registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder,
373
17.4k
                                   llvm::legacy::PassManagerBase &PM) {
374
17.4k
  if (!polly::shouldEnablePolly())
375
17.4k
    return;
376
17.4k
377
0
  
if (0
PassPosition != POSITION_EARLY0
)
378
0
    return;
379
0
380
0
  registerCanonicalicationPasses(PM);
381
0
  polly::registerPollyPasses(PM);
382
0
}
383
384
static void
385
registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder,
386
17.4k
                                    llvm::legacy::PassManagerBase &PM) {
387
17.4k
  if (!polly::shouldEnablePolly())
388
17.4k
    return;
389
17.4k
390
0
  
if (0
PassPosition != POSITION_AFTER_LOOPOPT0
)
391
0
    return;
392
0
393
0
  PM.add(polly::createCodePreparationPass());
394
0
  polly::registerPollyPasses(PM);
395
0
  PM.add(createCodegenCleanupPass());
396
0
}
397
398
static void
399
registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder,
400
17.4k
                                       llvm::legacy::PassManagerBase &PM) {
401
17.4k
  if (!polly::shouldEnablePolly())
402
17.4k
    return;
403
17.4k
404
0
  
if (0
PassPosition != POSITION_BEFORE_VECTORIZER0
)
405
0
    return;
406
0
407
0
  PM.add(polly::createCodePreparationPass());
408
0
  polly::registerPollyPasses(PM);
409
0
  PM.add(createCodegenCleanupPass());
410
0
}
411
412
/// Register Polly to be available as an optimizer
413
///
414
///
415
/// We can currently run Polly at three different points int the pass manager.
416
/// a) very early, b) after the canonicalizing loop transformations and c) right
417
/// before the vectorizer.
418
///
419
/// The default is currently a), to register Polly such that it runs as early as
420
/// possible. This has several implications:
421
///
422
///   1) We need to schedule more canonicalization passes
423
///
424
///   As nothing is run before Polly, it is necessary to run a set of preparing
425
///   transformations before Polly to canonicalize the LLVM-IR and to allow
426
///   Polly to detect and understand the code.
427
///
428
///   2) LICM and LoopIdiom pass have not yet been run
429
///
430
///   Loop invariant code motion as well as the loop idiom recognition pass make
431
///   it more difficult for Polly to transform code. LICM may introduce
432
///   additional data dependences that are hard to eliminate and the loop idiom
433
///   recognition pass may introduce calls to memset that we currently do not
434
///   understand. By running Polly early enough (meaning before these passes) we
435
///   avoid difficulties that may be introduced by these passes.
436
///
437
///   3) We get the full -O3 optimization sequence after Polly
438
///
439
///   The LLVM-IR that is generated by Polly has been optimized on a high level,
440
///   but it may be rather inefficient on the lower/scalar level. By scheduling
441
///   Polly before all other passes, we have the full sequence of -O3
442
///   optimizations behind us, such that inefficiencies on the low level can
443
///   be optimized away.
444
///
445
/// We are currently evaluating the benefit or running Polly at position b) or
446
/// c). b) is likely to early as it interacts with the inliner. c) is nice
447
/// as everything is fully inlined and canonicalized, but we need to be able
448
/// to handle LICMed code to make it useful.
449
static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly(
450
    llvm::PassManagerBuilder::EP_ModuleOptimizerEarly,
451
    registerPollyEarlyAsPossiblePasses);
452
453
static llvm::RegisterStandardPasses
454
    RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd,
455
                                  registerPollyLoopOptimizerEndPasses);
456
457
static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate(
458
    llvm::PassManagerBuilder::EP_VectorizerStart,
459
    registerPollyScalarOptimizerLatePasses);
460
} // namespace polly