Coverage Report

Created: 2017-06-23 12:40

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/Analysis/ScopDetection.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----- ScopDetection.cpp  - Detect Scops --------------------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// Detect the maximal Scops of a function.
11
//
12
// A static control part (Scop) is a subgraph of the control flow graph (CFG)
13
// that only has statically known control flow and can therefore be described
14
// within the polyhedral model.
15
//
16
// Every Scop fulfills these restrictions:
17
//
18
// * It is a single entry single exit region
19
//
20
// * Only affine linear bounds in the loops
21
//
22
// Every natural loop in a Scop must have a number of loop iterations that can
23
// be described as an affine linear function in surrounding loop iterators or
24
// parameters. (A parameter is a scalar that does not change its value during
25
// execution of the Scop).
26
//
27
// * Only comparisons of affine linear expressions in conditions
28
//
29
// * All loops and conditions perfectly nested
30
//
31
// The control flow needs to be structured such that it could be written using
32
// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
33
// 'continue'.
34
//
35
// * Side effect free functions call
36
//
37
// Function calls and intrinsics that do not have side effects (readnone)
38
// or memory intrinsics (memset, memcpy, memmove) are allowed.
39
//
40
// The Scop detection finds the largest Scops by checking if the largest
41
// region is a Scop. If this is not the case, its canonical subregions are
42
// checked until a region is a Scop. It is now tried to extend this Scop by
43
// creating a larger non canonical region.
44
//
45
//===----------------------------------------------------------------------===//
46
47
#include "polly/ScopDetection.h"
48
#include "polly/CodeGen/CodeGeneration.h"
49
#include "polly/LinkAllPasses.h"
50
#include "polly/Options.h"
51
#include "polly/ScopDetectionDiagnostic.h"
52
#include "polly/Support/SCEVValidator.h"
53
#include "polly/Support/ScopLocation.h"
54
#include "llvm/ADT/Statistic.h"
55
#include "llvm/Analysis/AliasAnalysis.h"
56
#include "llvm/Analysis/Loads.h"
57
#include "llvm/Analysis/LoopInfo.h"
58
#include "llvm/Analysis/RegionIterator.h"
59
#include "llvm/Analysis/ScalarEvolution.h"
60
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
61
#include "llvm/IR/DebugInfo.h"
62
#include "llvm/IR/DiagnosticInfo.h"
63
#include "llvm/IR/DiagnosticPrinter.h"
64
#include "llvm/IR/IntrinsicInst.h"
65
#include "llvm/IR/LLVMContext.h"
66
#include "llvm/Support/Debug.h"
67
#include <set>
68
#include <stack>
69
70
using namespace llvm;
71
using namespace polly;
72
73
#define DEBUG_TYPE "polly-detect"
74
75
// This option is set to a very high value, as analyzing such loops increases
76
// compile time on several cases. For experiments that enable this option,
77
// a value of around 40 has been working to avoid run-time regressions with
78
// Polly while still exposing interesting optimization opportunities.
79
static cl::opt<int> ProfitabilityMinPerLoopInstructions(
80
    "polly-detect-profitability-min-per-loop-insts",
81
    cl::desc("The minimal number of per-loop instructions before a single loop "
82
             "region is considered profitable"),
83
    cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
84
85
bool polly::PollyProcessUnprofitable;
86
static cl::opt<bool, true> XPollyProcessUnprofitable(
87
    "polly-process-unprofitable",
88
    cl::desc(
89
        "Process scops that are unlikely to benefit from Polly optimizations."),
90
    cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
91
    cl::cat(PollyCategory));
92
93
static cl::list<std::string> OnlyFunctions(
94
    "polly-only-func",
95
    cl::desc("Only run on functions that contain a certain string. "
96
             "Multiple strings can be comma separated. "
97
             "Scop detection will run on all functions that contain "
98
             "any of the strings provided."),
99
    cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
100
101
static cl::opt<bool>
102
    AllowFullFunction("polly-detect-full-functions",
103
                      cl::desc("Allow the detection of full functions"),
104
                      cl::init(false), cl::cat(PollyCategory));
105
106
static cl::opt<std::string> OnlyRegion(
107
    "polly-only-region",
108
    cl::desc("Only run on certain regions (The provided identifier must "
109
             "appear in the name of the region's entry block"),
110
    cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
111
    cl::cat(PollyCategory));
112
113
static cl::opt<bool>
114
    IgnoreAliasing("polly-ignore-aliasing",
115
                   cl::desc("Ignore possible aliasing of the array bases"),
116
                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
117
                   cl::cat(PollyCategory));
118
119
bool polly::PollyAllowUnsignedOperations;
120
static cl::opt<bool, true> XPollyAllowUnsignedOperations(
121
    "polly-allow-unsigned-operations",
122
    cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
123
    cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore,
124
    cl::init(true), cl::cat(PollyCategory));
125
126
bool polly::PollyUseRuntimeAliasChecks;
127
static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
128
    "polly-use-runtime-alias-checks",
129
    cl::desc("Use runtime alias checks to resolve possible aliasing."),
130
    cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
131
    cl::init(true), cl::cat(PollyCategory));
132
133
static cl::opt<bool>
134
    ReportLevel("polly-report",
135
                cl::desc("Print information about the activities of Polly"),
136
                cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
137
138
static cl::opt<bool> AllowDifferentTypes(
139
    "polly-allow-differing-element-types",
140
    cl::desc("Allow different element types for array accesses"), cl::Hidden,
141
    cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
142
143
static cl::opt<bool>
144
    AllowNonAffine("polly-allow-nonaffine",
145
                   cl::desc("Allow non affine access functions in arrays"),
146
                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
147
                   cl::cat(PollyCategory));
148
149
static cl::opt<bool>
150
    AllowModrefCall("polly-allow-modref-calls",
151
                    cl::desc("Allow functions with known modref behavior"),
152
                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
153
                    cl::cat(PollyCategory));
154
155
static cl::opt<bool> AllowNonAffineSubRegions(
156
    "polly-allow-nonaffine-branches",
157
    cl::desc("Allow non affine conditions for branches"), cl::Hidden,
158
    cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
159
160
static cl::opt<bool>
161
    AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
162
                           cl::desc("Allow non affine conditions for loops"),
163
                           cl::Hidden, cl::init(false), cl::ZeroOrMore,
164
                           cl::cat(PollyCategory));
165
166
static cl::opt<bool, true>
167
    TrackFailures("polly-detect-track-failures",
168
                  cl::desc("Track failure strings in detecting scop regions"),
169
                  cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
170
                  cl::init(true), cl::cat(PollyCategory));
171
172
static cl::opt<bool> KeepGoing("polly-detect-keep-going",
173
                               cl::desc("Do not fail on the first error."),
174
                               cl::Hidden, cl::ZeroOrMore, cl::init(false),
175
                               cl::cat(PollyCategory));
176
177
static cl::opt<bool, true>
178
    PollyDelinearizeX("polly-delinearize",
179
                      cl::desc("Delinearize array access functions"),
180
                      cl::location(PollyDelinearize), cl::Hidden,
181
                      cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
182
183
static cl::opt<bool>
184
    VerifyScops("polly-detect-verify",
185
                cl::desc("Verify the detected SCoPs after each transformation"),
186
                cl::Hidden, cl::init(false), cl::ZeroOrMore,
187
                cl::cat(PollyCategory));
188
189
bool polly::PollyInvariantLoadHoisting;
190
static cl::opt<bool, true> XPollyInvariantLoadHoisting(
191
    "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
192
    cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
193
    cl::init(false), cl::cat(PollyCategory));
194
195
/// The minimal trip count under which loops are considered unprofitable.
196
static const unsigned MIN_LOOP_TRIP_COUNT = 8;
197
198
bool polly::PollyTrackFailures = false;
199
bool polly::PollyDelinearize = false;
200
StringRef polly::PollySkipFnAttr = "polly.skip.fn";
201
202
//===----------------------------------------------------------------------===//
203
// Statistics.
204
205
STATISTIC(NumScopRegions, "Number of scops");
206
STATISTIC(NumLoopsInScop, "Number of loops in scops");
207
STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
208
STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
209
STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
210
STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
211
STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
212
STATISTIC(NumScopsDepthLarger,
213
          "Number of scops with maximal loop depth 6 and larger");
214
STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
215
STATISTIC(NumLoopsInProfScop,
216
          "Number of loops in scops (profitable scops only)");
217
STATISTIC(NumLoopsOverall, "Number of total loops");
218
STATISTIC(NumProfScopsDepthOne,
219
          "Number of scops with maximal loop depth 1 (profitable scops only)");
220
STATISTIC(NumProfScopsDepthTwo,
221
          "Number of scops with maximal loop depth 2 (profitable scops only)");
222
STATISTIC(NumProfScopsDepthThree,
223
          "Number of scops with maximal loop depth 3 (profitable scops only)");
224
STATISTIC(NumProfScopsDepthFour,
225
          "Number of scops with maximal loop depth 4 (profitable scops only)");
226
STATISTIC(NumProfScopsDepthFive,
227
          "Number of scops with maximal loop depth 5 (profitable scops only)");
228
STATISTIC(NumProfScopsDepthLarger,
229
          "Number of scops with maximal loop depth 6 and larger "
230
          "(profitable scops only)");
231
STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
232
STATISTIC(MaxNumLoopsInProfScop,
233
          "Maximal number of loops in scops (profitable scops only)");
234
235
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
236
                                     bool OnlyProfitable);
237
238
class DiagnosticScopFound : public DiagnosticInfo {
239
private:
240
  static int PluginDiagnosticKind;
241
242
  Function &F;
243
  std::string FileName;
244
  unsigned EntryLine, ExitLine;
245
246
public:
247
  DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
248
                      unsigned ExitLine)
249
      : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
250
2
        EntryLine(EntryLine), ExitLine(ExitLine) {}
251
252
  virtual void print(DiagnosticPrinter &DP) const;
253
254
0
  static bool classof(const DiagnosticInfo *DI) {
255
0
    return DI->getKind() == PluginDiagnosticKind;
256
0
  }
257
};
258
259
int DiagnosticScopFound::PluginDiagnosticKind =
260
    getNextAvailablePluginDiagnosticKind();
261
262
2
void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
263
2
  DP << "Polly detected an optimizable loop region (scop) in function '" << F
264
2
     << "'\n";
265
2
266
2
  if (
FileName.empty()2
)
{0
267
0
    DP << "Scop location is unknown. Compile with debug info "
268
0
          "(-g) to get more precise information. ";
269
0
    return;
270
0
  }
271
2
272
2
  DP << FileName << ":" << EntryLine << ": Start of scop\n";
273
2
  DP << FileName << ":" << ExitLine << ": End of scop";
274
2
}
275
276
3
static bool IsFnNameListedInOnlyFunctions(StringRef FnName) {
277
3
  for (auto Name : OnlyFunctions)
278
5
    
if (5
FnName.count(Name) > 05
)
279
2
      return true;
280
1
  return false;
281
3
}
282
//===----------------------------------------------------------------------===//
283
// ScopDetection.
284
285
ScopDetection::ScopDetection(Function &F, const DominatorTree &DT,
286
                             ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI,
287
                             AliasAnalysis &AA)
288
1.23k
    : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA) {
289
1.23k
290
1.23k
  if (
!PollyProcessUnprofitable && 1.23k
LI.empty()19
)
291
0
    return;
292
1.23k
293
1.23k
  Region *TopRegion = RI.getTopLevelRegion();
294
1.23k
295
1.23k
  if (
OnlyFunctions.size() > 0 && 1.23k
!IsFnNameListedInOnlyFunctions(F.getName())3
)
296
1
    return;
297
1.23k
298
1.23k
  
if (1.23k
!isValidFunction(F)1.23k
)
299
27
    return;
300
1.23k
301
1.20k
  findScops(*TopRegion);
302
1.20k
303
1.20k
  NumScopRegions += ValidRegions.size();
304
1.20k
305
1.20k
  // Prune non-profitable regions.
306
3.19k
  for (auto &DIt : DetectionContextMap) {
307
3.19k
    auto &DC = DIt.getSecond();
308
3.19k
    if (DC.Log.hasErrors())
309
425
      continue;
310
2.77k
    
if (2.77k
!ValidRegions.count(&DC.CurRegion)2.77k
)
311
1.65k
      continue;
312
1.11k
    LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
313
1.11k
    updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
314
1.11k
    if (
isProfitableRegion(DC)1.11k
)
{1.10k
315
1.10k
      updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
316
1.10k
      continue;
317
1.10k
    }
318
1.11k
319
13
    ValidRegions.remove(&DC.CurRegion);
320
13
  }
321
1.20k
322
1.20k
  NumProfScopRegions += ValidRegions.size();
323
1.20k
  NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
324
1.20k
325
1.20k
  // Only makes sense when we tracked errors.
326
1.20k
  if (PollyTrackFailures)
327
1.20k
    emitMissedRemarks(F);
328
1.20k
329
1.20k
  if (ReportLevel)
330
2
    printLocations(F);
331
1.20k
332
1.20k
  assert(ValidRegions.size() <= DetectionContextMap.size() &&
333
1.20k
         "Cached more results than valid regions");
334
1.20k
}
335
336
template <class RR, typename... Args>
337
inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
338
601
                                   Args &&... Arguments) const {
339
601
340
601
  if (
!Context.Verifying601
)
{601
341
601
    RejectLog &Log = Context.Log;
342
601
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
601
344
601
    if (PollyTrackFailures)
345
601
      Log.report(RejectReason);
346
601
347
601
    DEBUG(dbgs() << RejectReason->getMessage());
348
601
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
601
353
601
  return false;
354
601
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNonAffBranch, llvm::BasicBlock*, llvm::SCEV const*&, llvm::SCEV const*&, llvm::SwitchInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::SCEV const*&&&, llvm::SCEV const*&&&, llvm::SwitchInst*&&&) const
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportInvalidCond, llvm::BranchInst*&, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BranchInst*&&&, llvm::BasicBlock*&&) const
bool polly::ScopDetection::invalid<polly::ReportUndefOperand, llvm::BasicBlock*, llvm::ICmpInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::ICmpInst*&&&) const
Line
Count
Source
338
5
                                   Args &&... Arguments) const {
339
5
340
5
  if (
!Context.Verifying5
)
{5
341
5
    RejectLog &Log = Context.Log;
342
5
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
5
344
5
    if (PollyTrackFailures)
345
5
      Log.report(RejectReason);
346
5
347
5
    DEBUG(dbgs() << RejectReason->getMessage());
348
5
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
5
353
5
  return false;
354
5
}
bool polly::ScopDetection::invalid<polly::ReportNonAffBranch, llvm::BasicBlock*, llvm::SCEV const*&, llvm::SCEV const*&, llvm::ICmpInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::SCEV const*&&&, llvm::SCEV const*&&&, llvm::ICmpInst*&&&) const
Line
Count
Source
338
5
                                   Args &&... Arguments) const {
339
5
340
5
  if (
!Context.Verifying5
)
{5
341
5
    RejectLog &Log = Context.Log;
342
5
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
5
344
5
    if (PollyTrackFailures)
345
5
      Log.report(RejectReason);
346
5
347
5
    DEBUG(dbgs() << RejectReason->getMessage());
348
5
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
5
353
5
  return false;
354
5
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportInvalidTerminator, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&) const
bool polly::ScopDetection::invalid<polly::ReportUndefCond, llvm::TerminatorInst*&, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::TerminatorInst*&&&, llvm::BasicBlock*&&) const
Line
Count
Source
338
158
                                   Args &&... Arguments) const {
339
158
340
158
  if (
!Context.Verifying158
)
{158
341
158
    RejectLog &Log = Context.Log;
342
158
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
158
344
158
    if (PollyTrackFailures)
345
158
      Log.report(RejectReason);
346
158
347
158
    DEBUG(dbgs() << RejectReason->getMessage());
348
158
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
158
353
158
  return false;
354
158
}
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const*&, llvm::Instruction const*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const*&&&, llvm::Instruction const*&&&, llvm::Value*&&&) const
Line
Count
Source
338
10
                                   Args &&... Arguments) const {
339
10
340
10
  if (
!Context.Verifying10
)
{10
341
10
    RejectLog &Log = Context.Log;
342
10
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
10
344
10
    if (PollyTrackFailures)
345
10
      Log.report(RejectReason);
346
10
347
10
    DEBUG(dbgs() << RejectReason->getMessage());
348
10
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
10
353
10
  return false;
354
10
}
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const* const&, llvm::Instruction const*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const* const&&&, llvm::Instruction const*&&&, llvm::Value*&&&) const
Line
Count
Source
338
24
                                   Args &&... Arguments) const {
339
24
340
24
  if (
!Context.Verifying24
)
{24
341
24
    RejectLog &Log = Context.Log;
342
24
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
24
344
24
    if (PollyTrackFailures)
345
24
      Log.report(RejectReason);
346
24
347
24
    DEBUG(dbgs() << RejectReason->getMessage());
348
24
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
24
353
24
  return false;
354
24
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNoBasePtr, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&) const
bool polly::ScopDetection::invalid<polly::ReportUndefBasePtr, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&) const
Line
Count
Source
338
5
                                   Args &&... Arguments) const {
339
5
340
5
  if (
!Context.Verifying5
)
{5
341
5
    RejectLog &Log = Context.Log;
342
5
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
5
344
5
    if (PollyTrackFailures)
345
5
      Log.report(RejectReason);
346
5
347
5
    DEBUG(dbgs() << RejectReason->getMessage());
348
5
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
5
353
5
  return false;
354
5
}
bool polly::ScopDetection::invalid<polly::ReportIntToPtr, llvm::IntToPtrInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::IntToPtrInst*&&&) const
Line
Count
Source
338
1
                                   Args &&... Arguments) const {
339
1
340
1
  if (
!Context.Verifying1
)
{1
341
1
    RejectLog &Log = Context.Log;
342
1
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
1
344
1
    if (PollyTrackFailures)
345
1
      Log.report(RejectReason);
346
1
347
1
    DEBUG(dbgs() << RejectReason->getMessage());
348
1
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
1
353
1
  return false;
354
1
}
bool polly::ScopDetection::invalid<polly::ReportVariantBasePtr, llvm::Value*&, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Value*&&&, llvm::Instruction*&&&) const
Line
Count
Source
338
10
                                   Args &&... Arguments) const {
339
10
340
10
  if (
!Context.Verifying10
)
{10
341
10
    RejectLog &Log = Context.Log;
342
10
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
10
344
10
    if (PollyTrackFailures)
345
10
      Log.report(RejectReason);
346
10
347
10
    DEBUG(dbgs() << RejectReason->getMessage());
348
10
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
10
353
10
  return false;
354
10
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportDifferentArrayElementSize, llvm::Instruction*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&, llvm::Value*&&&) const
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const*&, llvm::Instruction*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const*&&&, llvm::Instruction*&&&, llvm::Value*&&&) const
Line
Count
Source
338
22
                                   Args &&... Arguments) const {
339
22
340
22
  if (
!Context.Verifying22
)
{22
341
22
    RejectLog &Log = Context.Log;
342
22
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
22
344
22
    if (PollyTrackFailures)
345
22
      Log.report(RejectReason);
346
22
347
22
    DEBUG(dbgs() << RejectReason->getMessage());
348
22
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
22
353
22
  return false;
354
22
}
bool polly::ScopDetection::invalid<polly::ReportAlias, llvm::Instruction*&, llvm::AliasSet&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&, llvm::AliasSet&&&) const
Line
Count
Source
338
3
                                   Args &&... Arguments) const {
339
3
340
3
  if (
!Context.Verifying3
)
{3
341
3
    RejectLog &Log = Context.Log;
342
3
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
3
344
3
    if (PollyTrackFailures)
345
3
      Log.report(RejectReason);
346
3
347
3
    DEBUG(dbgs() << RejectReason->getMessage());
348
3
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
3
353
3
  return false;
354
3
}
bool polly::ScopDetection::invalid<polly::ReportFuncCall, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
Line
Count
Source
338
12
                                   Args &&... Arguments) const {
339
12
340
12
  if (
!Context.Verifying12
)
{12
341
12
    RejectLog &Log = Context.Log;
342
12
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
12
344
12
    if (PollyTrackFailures)
345
12
      Log.report(RejectReason);
346
12
347
12
    DEBUG(dbgs() << RejectReason->getMessage());
348
12
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
12
353
12
  return false;
354
12
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportAlloca, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNonSimpleMemoryAccess, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
bool polly::ScopDetection::invalid<polly::ReportUnknownInst, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
Line
Count
Source
338
51
                                   Args &&... Arguments) const {
339
51
340
51
  if (
!Context.Verifying51
)
{51
341
51
    RejectLog &Log = Context.Log;
342
51
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
51
344
51
    if (PollyTrackFailures)
345
51
      Log.report(RejectReason);
346
51
347
51
    DEBUG(dbgs() << RejectReason->getMessage());
348
51
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
51
353
51
  return false;
354
51
}
bool polly::ScopDetection::invalid<polly::ReportLoopHasNoExit, llvm::Loop*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&) const
Line
Count
Source
338
16
                                   Args &&... Arguments) const {
339
16
340
16
  if (
!Context.Verifying16
)
{16
341
16
    RejectLog &Log = Context.Log;
342
16
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
16
344
16
    if (PollyTrackFailures)
345
16
      Log.report(RejectReason);
346
16
347
16
    DEBUG(dbgs() << RejectReason->getMessage());
348
16
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
16
353
16
  return false;
354
16
}
bool polly::ScopDetection::invalid<polly::ReportLoopBound, llvm::Loop*&, llvm::SCEV const*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&, llvm::SCEV const*&&&) const
Line
Count
Source
338
151
                                   Args &&... Arguments) const {
339
151
340
151
  if (
!Context.Verifying151
)
{151
341
151
    RejectLog &Log = Context.Log;
342
151
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
151
344
151
    if (PollyTrackFailures)
345
151
      Log.report(RejectReason);
346
151
347
151
    DEBUG(dbgs() << RejectReason->getMessage());
348
151
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
151
353
151
  return false;
354
151
}
bool polly::ScopDetection::invalid<polly::ReportUnprofitable, llvm::Region*>(polly::ScopDetection::DetectionContext&, bool, llvm::Region*&&) const
Line
Count
Source
338
14
                                   Args &&... Arguments) const {
339
14
340
14
  if (
!Context.Verifying14
)
{14
341
14
    RejectLog &Log = Context.Log;
342
14
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
14
344
14
    if (PollyTrackFailures)
345
14
      Log.report(RejectReason);
346
14
347
14
    DEBUG(dbgs() << RejectReason->getMessage());
348
14
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
14
353
14
  return false;
354
14
}
bool polly::ScopDetection::invalid<polly::ReportUnreachableInExit, llvm::BasicBlock*, llvm::DebugLoc&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::DebugLoc&&&) const
Line
Count
Source
338
26
                                   Args &&... Arguments) const {
339
26
340
26
  if (
!Context.Verifying26
)
{26
341
26
    RejectLog &Log = Context.Log;
342
26
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
26
344
26
    if (PollyTrackFailures)
345
26
      Log.report(RejectReason);
346
26
347
26
    DEBUG(dbgs() << RejectReason->getMessage());
348
26
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
26
353
26
  return false;
354
26
}
bool polly::ScopDetection::invalid<polly::ReportEntry, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&) const
Line
Count
Source
338
85
                                   Args &&... Arguments) const {
339
85
340
85
  if (
!Context.Verifying85
)
{85
341
85
    RejectLog &Log = Context.Log;
342
85
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
85
344
85
    if (PollyTrackFailures)
345
85
      Log.report(RejectReason);
346
85
347
85
    DEBUG(dbgs() << RejectReason->getMessage());
348
85
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
85
353
85
  return false;
354
85
}
bool polly::ScopDetection::invalid<polly::ReportIrreducibleRegion, llvm::Region*, llvm::DebugLoc&>(polly::ScopDetection::DetectionContext&, bool, llvm::Region*&&, llvm::DebugLoc&&&) const
Line
Count
Source
338
3
                                   Args &&... Arguments) const {
339
3
340
3
  if (
!Context.Verifying3
)
{3
341
3
    RejectLog &Log = Context.Log;
342
3
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
343
3
344
3
    if (PollyTrackFailures)
345
3
      Log.report(RejectReason);
346
3
347
3
    DEBUG(dbgs() << RejectReason->getMessage());
348
3
    DEBUG(dbgs() << "\n");
349
0
  } else {
350
0
    assert(!Assert && "Verification of detected scop failed");
351
0
  }
352
3
353
3
  return false;
354
3
}
355
356
3.62k
bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
357
3.62k
  if (!ValidRegions.count(&R))
358
2.59k
    return false;
359
3.62k
360
1.02k
  
if (1.02k
Verify1.02k
)
{1.02k
361
1.02k
    DetectionContextMap.erase(getBBPairForRegion(&R));
362
1.02k
    const auto &It = DetectionContextMap.insert(std::make_pair(
363
1.02k
        getBBPairForRegion(&R),
364
1.02k
        DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/)));
365
1.02k
    DetectionContext &Context = It.first->second;
366
1.02k
    return isValidRegion(Context);
367
1.02k
  }
368
1.02k
369
0
  return true;
370
1.02k
}
371
372
4
std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
373
4
  // Get the first error we found. Even in keep-going mode, this is the first
374
4
  // reason that caused the candidate to be rejected.
375
4
  auto *Log = lookupRejectionLog(R);
376
4
377
4
  // This can happen when we marked a region invalid, but didn't track
378
4
  // an error for it.
379
4
  if (
!Log || 4
!Log->hasErrors()3
)
380
4
    return "";
381
4
382
0
  RejectReasonPtr RR = *Log->begin();
383
0
  return RR->getMessage();
384
4
}
385
386
bool ScopDetection::addOverApproximatedRegion(Region *AR,
387
653
                                              DetectionContext &Context) const {
388
653
389
653
  // If we already know about Ar we can exit.
390
653
  if (!Context.NonAffineSubRegionSet.insert(AR))
391
64
    return true;
392
653
393
653
  // All loops in the region have to be overapproximated too if there
394
653
  // are accesses that depend on the iteration count.
395
653
396
1.46k
  
for (BasicBlock *BB : AR->blocks()) 589
{1.46k
397
1.46k
    Loop *L = LI.getLoopFor(BB);
398
1.46k
    if (AR->contains(L))
399
237
      Context.BoxedLoopsSet.insert(L);
400
1.46k
  }
401
589
402
498
  return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
403
653
}
404
405
bool ScopDetection::onlyValidRequiredInvariantLoads(
406
36.5k
    InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
407
36.5k
  Region &CurRegion = Context.CurRegion;
408
36.5k
  const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
409
36.5k
410
36.5k
  if (
!PollyInvariantLoadHoisting && 36.5k
!RequiredILS.empty()29.0k
)
411
381
    return false;
412
36.5k
413
36.1k
  
for (LoadInst *Load : RequiredILS) 36.1k
{1.53k
414
1.53k
    // If we already know a load has been accepted as required invariant, we
415
1.53k
    // already run the validation below once and consequently don't need to
416
1.53k
    // run it again. Hence, we return early. For certain test cases (e.g.,
417
1.53k
    // COSMO this avoids us spending 50% of scop-detection time in this
418
1.53k
    // very function (and its children).
419
1.53k
    if (Context.RequiredILS.count(Load))
420
787
      continue;
421
1.53k
422
750
    
if (750
!isHoistableLoad(Load, CurRegion, LI, SE, DT)750
)
423
136
      return false;
424
750
425
614
    
for (auto NonAffineRegion : Context.NonAffineSubRegionSet) 614
{320
426
320
427
320
      if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
428
320
                                      Load->getAlignment(), DL))
429
6
        continue;
430
320
431
314
      
if (314
NonAffineRegion->contains(Load) &&314
432
14
          Load->getParent() != NonAffineRegion->getEntry())
433
14
        return false;
434
314
    }
435
614
  }
436
36.1k
437
36.0k
  Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
438
36.0k
439
36.0k
  return true;
440
36.1k
}
441
442
bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
443
11.9k
                                         Loop *Scope) const {
444
11.9k
  SetVector<Value *> Values;
445
11.9k
  findValues(S0, SE, Values);
446
11.9k
  if (S1)
447
4.21k
    findValues(S1, SE, Values);
448
11.9k
449
11.9k
  SmallPtrSet<Value *, 8> PtrVals;
450
5.20k
  for (auto *V : Values) {
451
5.20k
    if (auto *P2I = dyn_cast<PtrToIntInst>(V))
452
2
      V = P2I->getOperand(0);
453
5.20k
454
5.20k
    if (!V->getType()->isPointerTy())
455
5.13k
      continue;
456
5.20k
457
68
    auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
458
68
    if (isa<SCEVConstant>(PtrSCEV))
459
2
      continue;
460
68
461
66
    auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
462
66
    if (!BasePtr)
463
0
      return true;
464
66
465
66
    auto *BasePtrVal = BasePtr->getValue();
466
66
    if (
PtrVals.insert(BasePtrVal).second66
)
{66
467
66
      for (auto *PtrVal : PtrVals)
468
66
        
if (66
PtrVal != BasePtrVal && 66
!AA.isNoAlias(PtrVal, BasePtrVal)0
)
469
0
          return true;
470
66
    }
471
66
  }
472
11.9k
473
11.9k
  return false;
474
11.9k
}
475
476
bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
477
37.4k
                             DetectionContext &Context) const {
478
37.4k
479
37.4k
  InvariantLoadsSetTy AccessILS;
480
37.4k
  if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
481
859
    return false;
482
37.4k
483
36.5k
  
if (36.5k
!onlyValidRequiredInvariantLoads(AccessILS, Context)36.5k
)
484
531
    return false;
485
36.5k
486
36.0k
  return true;
487
36.5k
}
488
489
bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
490
                                  Value *Condition, bool IsLoopBranch,
491
47
                                  DetectionContext &Context) const {
492
47
  Loop *L = LI.getLoopFor(&BB);
493
47
  const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
494
47
495
47
  if (
IsLoopBranch && 47
L->isLoopLatch(&BB)5
)
496
1
    return false;
497
47
498
47
  // Check for invalid usage of different pointers in one expression.
499
46
  
if (46
involvesMultiplePtrs(ConditionSCEV, nullptr, L)46
)
500
0
    return false;
501
46
502
46
  
if (46
isAffine(ConditionSCEV, L, Context)46
)
503
44
    return true;
504
46
505
2
  
if (2
AllowNonAffineSubRegions &&2
506
2
      addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
507
2
    return true;
508
2
509
0
  return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
510
0
                                     ConditionSCEV, ConditionSCEV, SI);
511
2
}
512
513
bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
514
                                  Value *Condition, bool IsLoopBranch,
515
25.9k
                                  DetectionContext &Context) const {
516
25.9k
517
25.9k
  // Constant integer conditions are always affine.
518
25.9k
  if (isa<ConstantInt>(Condition))
519
13.8k
    return true;
520
25.9k
521
12.1k
  
if (BinaryOperator *12.1k
BinOp12.1k
= dyn_cast<BinaryOperator>(Condition))
{105
522
105
    auto Opcode = BinOp->getOpcode();
523
105
    if (
Opcode == Instruction::And || 105
Opcode == Instruction::Or94
)
{105
524
105
      Value *Op0 = BinOp->getOperand(0);
525
105
      Value *Op1 = BinOp->getOperand(1);
526
105
      return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
527
105
             isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
528
105
    }
529
105
  }
530
12.1k
531
12.1k
  // Non constant conditions of branches need to be ICmpInst.
532
11.9k
  
if (11.9k
!isa<ICmpInst>(Condition)11.9k
)
{100
533
100
    if (
!IsLoopBranch && 100
AllowNonAffineSubRegions100
&&
534
100
        addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
535
100
      return true;
536
0
    return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
537
100
  }
538
11.9k
539
11.8k
  ICmpInst *ICmp = cast<ICmpInst>(Condition);
540
11.8k
541
11.8k
  // Are both operands of the ICmp affine?
542
11.8k
  if (isa<UndefValue>(ICmp->getOperand(0)) ||
543
11.8k
      isa<UndefValue>(ICmp->getOperand(1)))
544
5
    return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
545
11.8k
546
11.8k
  Loop *L = LI.getLoopFor(&BB);
547
11.8k
  const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
548
11.8k
  const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
549
11.8k
550
11.8k
  // If unsigned operations are not allowed try to approximate the region.
551
11.8k
  if (
ICmp->isUnsigned() && 11.8k
!PollyAllowUnsignedOperations93
)
552
0
    
return !IsLoopBranch && 0
AllowNonAffineSubRegions0
&&
553
0
           addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
554
11.8k
555
11.8k
  // Check for invalid usage of different pointers in one expression.
556
11.8k
  
if (11.8k
ICmp->isEquality() && 11.8k
involvesMultiplePtrs(LHS, nullptr, L)7.68k
&&
557
0
      involvesMultiplePtrs(RHS, nullptr, L))
558
0
    return false;
559
11.8k
560
11.8k
  // Check for invalid usage of different pointers in a relational comparison.
561
11.8k
  
if (11.8k
ICmp->isRelational() && 11.8k
involvesMultiplePtrs(LHS, RHS, L)4.21k
)
562
0
    return false;
563
11.8k
564
11.8k
  
if (11.8k
isAffine(LHS, L, Context) && 11.8k
isAffine(RHS, L, Context)11.5k
)
565
11.2k
    return true;
566
11.8k
567
601
  
if (601
!IsLoopBranch && 601
AllowNonAffineSubRegions487
&&
568
485
      addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
569
482
    return true;
570
601
571
119
  
if (119
IsLoopBranch119
)
572
114
    return false;
573
119
574
5
  return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
575
5
                                     ICmp);
576
119
}
577
578
bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
579
                               bool AllowUnreachable,
580
25.9k
                               DetectionContext &Context) const {
581
25.9k
  Region &CurRegion = Context.CurRegion;
582
25.9k
583
25.9k
  TerminatorInst *TI = BB.getTerminator();
584
25.9k
585
25.9k
  if (
AllowUnreachable && 25.9k
isa<UnreachableInst>(TI)132
)
586
0
    return true;
587
25.9k
588
25.9k
  // Return instructions are only valid if the region is the top level region.
589
25.9k
  
if (25.9k
isa<ReturnInst>(TI) && 25.9k
CurRegion.isTopLevelRegion()4
)
590
4
    return true;
591
25.9k
592
25.9k
  Value *Condition = getConditionFromTerminator(TI);
593
25.9k
594
25.9k
  if (!Condition)
595
0
    return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
596
25.9k
597
25.9k
  // UndefValue is not allowed as condition.
598
25.9k
  
if (25.9k
isa<UndefValue>(Condition)25.9k
)
599
158
    return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
600
25.9k
601
25.7k
  
if (BranchInst *25.7k
BI25.7k
= dyn_cast<BranchInst>(TI))
602
25.7k
    return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
603
25.7k
604
47
  SwitchInst *SI = dyn_cast<SwitchInst>(TI);
605
47
  assert(SI && "Terminator was neither branch nor switch");
606
47
607
47
  return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
608
25.7k
}
609
610
bool ScopDetection::isValidCallInst(CallInst &CI,
611
206
                                    DetectionContext &Context) const {
612
206
  if (CI.doesNotReturn())
613
0
    return false;
614
206
615
206
  
if (206
CI.doesNotAccessMemory()206
)
616
101
    return true;
617
206
618
105
  
if (auto *105
II105
= dyn_cast<IntrinsicInst>(&CI))
619
66
    
if (66
isValidIntrinsicInst(*II, Context)66
)
620
54
      return true;
621
105
622
51
  Function *CalledFunction = CI.getCalledFunction();
623
51
624
51
  // Indirect calls are not supported.
625
51
  if (CalledFunction == nullptr)
626
0
    return false;
627
51
628
51
  
if (51
AllowModrefCall51
)
{39
629
39
    switch (AA.getModRefBehavior(CalledFunction)) {
630
0
    case FMRB_UnknownModRefBehavior:
631
0
      return false;
632
21
    case FMRB_DoesNotAccessMemory:
633
21
    case FMRB_OnlyReadsMemory:
634
21
      // Implicitly disable delinearization since we have an unknown
635
21
      // accesses with an unknown access function.
636
21
      Context.HasUnknownAccess = true;
637
21
      Context.AST.add(&CI);
638
21
      return true;
639
18
    case FMRB_OnlyReadsArgumentPointees:
640
18
    case FMRB_OnlyAccessesArgumentPointees:
641
56
      for (const auto &Arg : CI.arg_operands()) {
642
56
        if (!Arg->getType()->isPointerTy())
643
28
          continue;
644
56
645
56
        // Bail if a pointer argument has a base address not known to
646
56
        // ScalarEvolution. Note that a zero pointer is acceptable.
647
28
        auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
648
28
        if (ArgSCEV->isZero())
649
14
          continue;
650
28
651
14
        auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
652
14
        if (!BP)
653
0
          return false;
654
14
655
14
        // Implicitly disable delinearization since we have an unknown
656
14
        // accesses with an unknown access function.
657
14
        Context.HasUnknownAccess = true;
658
14
      }
659
18
660
18
      Context.AST.add(&CI);
661
18
      return true;
662
0
    case FMRB_DoesNotReadMemory:
663
0
    case FMRB_OnlyAccessesInaccessibleMem:
664
0
    case FMRB_OnlyAccessesInaccessibleOrArgMem:
665
0
      return false;
666
39
    }
667
39
  }
668
51
669
12
  return false;
670
51
}
671
672
bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
673
66
                                         DetectionContext &Context) const {
674
66
  if (isIgnoredIntrinsic(&II))
675
27
    return true;
676
66
677
66
  // The closest loop surrounding the call instruction.
678
39
  Loop *L = LI.getLoopFor(II.getParent());
679
39
680
39
  // The access function and base pointer for memory intrinsics.
681
39
  const SCEV *AF;
682
39
  const SCEVUnknown *BP;
683
39
684
39
  switch (II.getIntrinsicID()) {
685
39
  // Memory intrinsics that can be represented are supported.
686
16
  case llvm::Intrinsic::memmove:
687
16
  case llvm::Intrinsic::memcpy:
688
16
    AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
689
16
    if (
!AF->isZero()16
)
{16
690
16
      BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
691
16
      // Bail if the source pointer is not valid.
692
16
      if (!isValidAccess(&II, AF, BP, Context))
693
0
        return false;
694
16
    }
695
16
  // Fall through
696
33
  case llvm::Intrinsic::memset:
697
33
    AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
698
33
    if (
!AF->isZero()33
)
{29
699
29
      BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
700
29
      // Bail if the destination pointer is not valid.
701
29
      if (!isValidAccess(&II, AF, BP, Context))
702
0
        return false;
703
29
    }
704
33
705
33
    // Bail if the length is not affine.
706
33
    
if (33
!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,33
707
33
                  Context))
708
6
      return false;
709
33
710
27
    return true;
711
6
  default:
712
6
    break;
713
39
  }
714
39
715
6
  return false;
716
39
}
717
718
bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
719
11.1k
                                DetectionContext &Ctx) const {
720
11.1k
  // A reference to function argument or constant value is invariant.
721
11.1k
  if (
isa<Argument>(Val) || 11.1k
isa<Constant>(Val)3.16k
)
722
8.65k
    return true;
723
11.1k
724
2.45k
  Instruction *I = dyn_cast<Instruction>(&Val);
725
2.45k
  if (!I)
726
0
    return false;
727
2.45k
728
2.45k
  
if (2.45k
!Reg.contains(I)2.45k
)
729
1.97k
    return true;
730
2.45k
731
2.45k
  // Loads within the SCoP may read arbitrary values, need to hoist them. If it
732
2.45k
  // is not hoistable, it will be rejected later, but here we assume it is and
733
2.45k
  // that makes the value invariant.
734
480
  
if (auto 480
LI480
= dyn_cast<LoadInst>(I))
{470
735
470
    Ctx.RequiredILS.insert(LI);
736
470
    return true;
737
470
  }
738
480
739
10
  return false;
740
480
}
741
742
/// Remove smax of smax(0, size) expressions from a SCEV expression and
743
/// register the '...' components.
744
///
745
/// Array access expressions as they are generated by GFortran contain smax(0,
746
/// size) expressions that confuse the 'normal' delinearization algorithm.
747
/// However, if we extract such expressions before the normal delinearization
748
/// takes place they can actually help to identify array size expressions in
749
/// Fortran accesses. For the subsequently following delinearization the smax(0,
750
/// size) component can be replaced by just 'size'. This is correct as we will
751
/// always add and verify the assumption that for all subscript expressions
752
/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
753
/// that 0 <= size, which means smax(0, size) == size.
754
class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> {
755
public:
756
  static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
757
1.39k
                             std::vector<const SCEV *> *Terms = nullptr) {
758
1.39k
    SCEVRemoveMax Rewriter(SE, Terms);
759
1.39k
    return Rewriter.visit(Scev);
760
1.39k
  }
761
762
  SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
763
1.39k
      : SCEVRewriteVisitor(SE), Terms(Terms) {}
764
765
56
  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
766
56
    if (
(Expr->getNumOperands() == 2) && 56
Expr->getOperand(0)->isZero()56
)
{56
767
56
      auto Res = visit(Expr->getOperand(1));
768
56
      if (Terms)
769
28
        (*Terms).push_back(Res);
770
56
      return Res;
771
56
    }
772
56
773
0
    return Expr;
774
56
  }
775
776
private:
777
  std::vector<const SCEV *> *Terms;
778
};
779
780
SmallVector<const SCEV *, 4>
781
ScopDetection::getDelinearizationTerms(DetectionContext &Context,
782
433
                                       const SCEVUnknown *BasePointer) const {
783
433
  SmallVector<const SCEV *, 4> Terms;
784
705
  for (const auto &Pair : Context.Accesses[BasePointer]) {
785
705
    std::vector<const SCEV *> MaxTerms;
786
705
    SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
787
705
    if (
MaxTerms.size() > 0705
)
{16
788
16
      Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
789
16
      continue;
790
16
    }
791
705
    // In case the outermost expression is a plain add, we check if any of its
792
705
    // terms has the form 4 * %inst * %param * %param ..., aka a term that
793
705
    // contains a product between a parameter and an instruction that is
794
705
    // inside the scop. Such instructions, if allowed at all, are instructions
795
705
    // SCEV can not represent, but Polly is still looking through. As a
796
705
    // result, these instructions can depend on induction variables and are
797
705
    // most likely no array sizes. However, terms that are multiplied with
798
705
    // them are likely candidates for array sizes.
799
689
    
if (auto *689
AF689
= dyn_cast<SCEVAddExpr>(Pair.second))
{8
800
17
      for (auto Op : AF->operands()) {
801
17
        if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
802
4
          SE.collectParametricTerms(AF2, Terms);
803
17
        if (auto *
AF217
= dyn_cast<SCEVMulExpr>(Op))
{8
804
8
          SmallVector<const SCEV *, 0> Operands;
805
8
806
24
          for (auto *MulOp : AF2->operands()) {
807
24
            if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
808
6
              Operands.push_back(Const);
809
24
            if (auto *
Unknown24
= dyn_cast<SCEVUnknown>(MulOp))
{16
810
16
              if (auto *
Inst16
= dyn_cast<Instruction>(Unknown->getValue()))
{4
811
4
                if (!Context.CurRegion.contains(Inst))
812
0
                  Operands.push_back(MulOp);
813
4
814
12
              } else {
815
12
                Operands.push_back(MulOp);
816
12
              }
817
16
            }
818
24
          }
819
8
          if (Operands.size())
820
7
            Terms.push_back(SE.getMulExpr(Operands));
821
8
        }
822
17
      }
823
8
    }
824
689
    if (Terms.empty())
825
479
      SE.collectParametricTerms(Pair.second, Terms);
826
689
  }
827
433
  return Terms;
828
433
}
829
830
bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
831
                                       SmallVectorImpl<const SCEV *> &Sizes,
832
                                       const SCEVUnknown *BasePointer,
833
433
                                       Loop *Scope) const {
834
433
  // If no sizes were found, all sizes are trivially valid. We allow this case
835
433
  // to make it possible to pass known-affine accesses to the delinearization to
836
433
  // try to recover some interesting multi-dimensional accesses, but to still
837
433
  // allow the already known to be affine access in case the delinearization
838
433
  // fails. In such situations, the delinearization will just return a Sizes
839
433
  // array of size zero.
840
433
  if (Sizes.size() == 0)
841
65
    return true;
842
433
843
368
  Value *BaseValue = BasePointer->getValue();
844
368
  Region &CurRegion = Context.CurRegion;
845
1.03k
  for (const SCEV *DelinearizedSize : Sizes) {
846
1.03k
    if (
!isAffine(DelinearizedSize, Scope, Context)1.03k
)
{10
847
10
      Sizes.clear();
848
10
      break;
849
10
    }
850
1.02k
    
if (auto *1.02k
Unknown1.02k
= dyn_cast<SCEVUnknown>(DelinearizedSize))
{617
851
617
      auto *V = dyn_cast<Value>(Unknown->getValue());
852
617
      if (auto *
Load617
= dyn_cast<LoadInst>(V))
{176
853
176
        if (Context.CurRegion.contains(Load) &&
854
24
            isHoistableLoad(Load, CurRegion, LI, SE, DT))
855
24
          Context.RequiredILS.insert(Load);
856
176
        continue;
857
176
      }
858
617
    }
859
847
    
if (847
hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false)847
)
860
0
      return invalid<ReportNonAffineAccess>(
861
0
          Context, /*Assert=*/true, DelinearizedSize,
862
0
          Context.Accesses[BasePointer].front().first, BaseValue);
863
847
  }
864
368
865
368
  // No array shape derived.
866
368
  
if (368
Sizes.empty()368
)
{10
867
10
    if (AllowNonAffine)
868
0
      return true;
869
10
870
11
    
for (const auto &Pair : Context.Accesses[BasePointer]) 10
{11
871
11
      const Instruction *Insn = Pair.first;
872
11
      const SCEV *AF = Pair.second;
873
11
874
11
      if (
!isAffine(AF, Scope, Context)11
)
{10
875
10
        invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
876
10
                                       BaseValue);
877
10
        if (!KeepGoing)
878
10
          return false;
879
10
      }
880
11
    }
881
0
    return false;
882
10
  }
883
358
  return true;
884
368
}
885
886
// We first store the resulting memory accesses in TempMemoryAccesses. Only
887
// if the access functions for all memory accesses have been successfully
888
// delinearized we continue. Otherwise, we either report a failure or, if
889
// non-affine accesses are allowed, we drop the information. In case the
890
// information is dropped the memory accesses need to be overapproximated
891
// when translated to a polyhedral representation.
892
bool ScopDetection::computeAccessFunctions(
893
    DetectionContext &Context, const SCEVUnknown *BasePointer,
894
423
    std::shared_ptr<ArrayShape> Shape) const {
895
423
  Value *BaseValue = BasePointer->getValue();
896
423
  bool BasePtrHasNonAffine = false;
897
423
  MapInsnToMemAcc TempMemoryAccesses;
898
685
  for (const auto &Pair : Context.Accesses[BasePointer]) {
899
685
    const Instruction *Insn = Pair.first;
900
685
    auto *AF = Pair.second;
901
685
    AF = SCEVRemoveMax::rewrite(AF, SE);
902
685
    bool IsNonAffine = false;
903
685
    TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
904
685
    MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
905
685
    auto *Scope = LI.getLoopFor(Insn->getParent());
906
685
907
685
    if (
!AF685
)
{0
908
0
      if (isAffine(Pair.second, Scope, Context))
909
0
        Acc->DelinearizedSubscripts.push_back(Pair.second);
910
0
      else
911
0
        IsNonAffine = true;
912
685
    } else {
913
685
      if (
Shape->DelinearizedSizes.size() == 0685
)
{109
914
109
        Acc->DelinearizedSubscripts.push_back(AF);
915
576
      } else {
916
576
        SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
917
576
                                  Shape->DelinearizedSizes);
918
576
        if (Acc->DelinearizedSubscripts.size() == 0)
919
6
          IsNonAffine = true;
920
576
      }
921
685
      for (const SCEV *S : Acc->DelinearizedSubscripts)
922
1.76k
        
if (1.76k
!isAffine(S, Scope, Context)1.76k
)
923
92
          IsNonAffine = true;
924
685
    }
925
685
926
685
    // (Possibly) report non affine access
927
685
    if (
IsNonAffine685
)
{98
928
98
      BasePtrHasNonAffine = true;
929
98
      if (!AllowNonAffine)
930
24
        invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
931
24
                                       Insn, BaseValue);
932
98
      if (
!KeepGoing && 98
!AllowNonAffine87
)
933
17
        return false;
934
98
    }
935
685
  }
936
423
937
406
  
if (406
!BasePtrHasNonAffine406
)
938
351
    Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
939
351
                                TempMemoryAccesses.end());
940
406
941
406
  return true;
942
423
}
943
944
bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
945
                                          const SCEVUnknown *BasePointer,
946
433
                                          Loop *Scope) const {
947
433
  auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
948
433
949
433
  auto Terms = getDelinearizationTerms(Context, BasePointer);
950
433
951
433
  SE.findArrayDimensions(Terms, Shape->DelinearizedSizes,
952
433
                         Context.ElementSize[BasePointer]);
953
433
954
433
  if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
955
433
                          Scope))
956
10
    return false;
957
433
958
423
  return computeAccessFunctions(Context, BasePointer, Shape);
959
433
}
960
961
2.87k
bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
962
2.87k
  // TODO: If we have an unknown access and other non-affine accesses we do
963
2.87k
  //       not try to delinearize them for now.
964
2.87k
  if (
Context.HasUnknownAccess && 2.87k
!Context.NonAffineAccesses.empty()35
)
965
12
    return AllowNonAffine;
966
2.87k
967
2.86k
  
for (auto &Pair : Context.NonAffineAccesses) 2.86k
{433
968
433
    auto *BasePointer = Pair.first;
969
433
    auto *Scope = Pair.second;
970
433
    if (
!hasBaseAffineAccesses(Context, BasePointer, Scope)433
)
{27
971
27
      if (KeepGoing)
972
0
        continue;
973
27
      else
974
27
        return false;
975
27
    }
976
433
  }
977
2.83k
  return true;
978
2.86k
}
979
980
bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
981
                                  const SCEVUnknown *BP,
982
11.1k
                                  DetectionContext &Context) const {
983
11.1k
984
11.1k
  if (!BP)
985
0
    return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
986
11.1k
987
11.1k
  auto *BV = BP->getValue();
988
11.1k
  if (isa<UndefValue>(BV))
989
5
    return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
990
11.1k
991
11.1k
  // FIXME: Think about allowing IntToPtrInst
992
11.1k
  
if (IntToPtrInst *11.1k
Inst11.1k
= dyn_cast<IntToPtrInst>(BV))
993
1
    return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
994
11.1k
995
11.1k
  // Check that the base address of the access is invariant in the current
996
11.1k
  // region.
997
11.1k
  
if (11.1k
!isInvariant(*BV, Context.CurRegion, Context)11.1k
)
998
10
    return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
999
11.1k
1000
11.0k
  AF = SE.getMinusSCEV(AF, BP);
1001
11.0k
1002
11.0k
  const SCEV *Size;
1003
11.0k
  if (
!isa<MemIntrinsic>(Inst)11.0k
)
{11.0k
1004
11.0k
    Size = SE.getElementSize(Inst);
1005
45
  } else {
1006
45
    auto *SizeTy =
1007
45
        SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1008
45
    Size = SE.getConstant(SizeTy, 8);
1009
45
  }
1010
11.0k
1011
11.0k
  if (
Context.ElementSize[BP]11.0k
)
{6.30k
1012
6.30k
    if (
!AllowDifferentTypes && 6.30k
Context.ElementSize[BP] != Size0
)
1013
0
      return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1014
0
                                                      Inst, BV);
1015
6.30k
1016
6.30k
    Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1017
4.79k
  } else {
1018
4.79k
    Context.ElementSize[BP] = Size;
1019
4.79k
  }
1020
11.0k
1021
11.0k
  bool IsVariantInNonAffineLoop = false;
1022
11.0k
  SetVector<const Loop *> Loops;
1023
11.0k
  findLoops(AF, Loops);
1024
11.0k
  for (const Loop *L : Loops)
1025
6.56k
    
if (6.56k
Context.BoxedLoopsSet.count(L)6.56k
)
1026
39
      IsVariantInNonAffineLoop = true;
1027
11.0k
1028
11.0k
  auto *Scope = LI.getLoopFor(Inst->getParent());
1029
11.0k
  bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1030
11.0k
  // Do not try to delinearize memory intrinsics and force them to be affine.
1031
11.0k
  if (
isa<MemIntrinsic>(Inst) && 11.0k
!IsAffine45
)
{0
1032
0
    return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1033
0
                                          BV);
1034
11.0k
  } else 
if (11.0k
PollyDelinearize && 11.0k
!IsVariantInNonAffineLoop11.0k
)
{11.0k
1035
11.0k
    Context.Accesses[BP].push_back({Inst, AF});
1036
11.0k
1037
11.0k
    if (
!IsAffine || 11.0k
hasIVParams(AF)10.3k
)
1038
658
      Context.NonAffineAccesses.insert(
1039
658
          std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1040
68
  } else 
if (68
!AllowNonAffine && 68
!IsAffine28
)
{22
1041
22
    return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1042
22
                                          BV);
1043
22
  }
1044
11.0k
1045
11.0k
  
if (11.0k
IgnoreAliasing11.0k
)
1046
442
    return true;
1047
11.0k
1048
11.0k
  // Check if the base pointer of the memory access does alias with
1049
11.0k
  // any other pointer. This cannot be handled at the moment.
1050
10.6k
  AAMDNodes AATags;
1051
10.6k
  Inst->getAAMetadata(AATags);
1052
10.6k
  AliasSet &AS = Context.AST.getAliasSetForPointer(
1053
10.6k
      BP->getValue(), MemoryLocation::UnknownSize, AATags);
1054
10.6k
1055
10.6k
  if (
!AS.isMustAlias()10.6k
)
{3.05k
1056
3.05k
    if (
PollyUseRuntimeAliasChecks3.05k
)
{3.05k
1057
3.05k
      bool CanBuildRunTimeCheck = true;
1058
3.05k
      // The run-time alias check places code that involves the base pointer at
1059
3.05k
      // the beginning of the SCoP. This breaks if the base pointer is defined
1060
3.05k
      // inside the scop. Hence, we can only create a run-time check if we are
1061
3.05k
      // sure the base pointer is not an instruction defined inside the scop.
1062
3.05k
      // However, we can ignore loads that will be hoisted.
1063
10.8k
      for (const auto &Ptr : AS) {
1064
10.8k
        Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1065
10.8k
        if (
Inst && 10.8k
Context.CurRegion.contains(Inst)2.31k
)
{2.17k
1066
2.17k
          auto *Load = dyn_cast<LoadInst>(Inst);
1067
2.17k
          if (
Load && 2.17k
isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)2.17k
)
{2.17k
1068
2.17k
            Context.RequiredILS.insert(Load);
1069
2.17k
            continue;
1070
2.17k
          }
1071
2.17k
1072
2
          CanBuildRunTimeCheck = false;
1073
2
          break;
1074
2.17k
        }
1075
10.8k
      }
1076
3.05k
1077
3.05k
      if (CanBuildRunTimeCheck)
1078
3.04k
        return true;
1079
3.05k
    }
1080
3
    return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1081
3.05k
  }
1082
10.6k
1083
7.58k
  return true;
1084
10.6k
}
1085
1086
bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1087
11.0k
                                        DetectionContext &Context) const {
1088
11.0k
  Value *Ptr = Inst.getPointerOperand();
1089
11.0k
  Loop *L = LI.getLoopFor(Inst->getParent());
1090
11.0k
  const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1091
11.0k
  const SCEVUnknown *BasePointer;
1092
11.0k
1093
11.0k
  BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1094
11.0k
1095
11.0k
  return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1096
11.0k
}
1097
1098
bool ScopDetection::isValidInstruction(Instruction &Inst,
1099
48.0k
                                       DetectionContext &Context) const {
1100
89.9k
  for (auto &Op : Inst.operands()) {
1101
89.9k
    auto *OpInst = dyn_cast<Instruction>(&Op);
1102
89.9k
1103
89.9k
    if (!OpInst)
1104
36.7k
      continue;
1105
89.9k
1106
53.1k
    
if (53.1k
isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)53.1k
)
1107
2
      return false;
1108
53.1k
  }
1109
48.0k
1110
48.0k
  
if (48.0k
isa<LandingPadInst>(&Inst) || 48.0k
isa<ResumeInst>(&Inst)48.0k
)
1111
0
    return false;
1112
48.0k
1113
48.0k
  // We only check the call instruction but not invoke instruction.
1114
48.0k
  
if (CallInst *48.0k
CI48.0k
= dyn_cast<CallInst>(&Inst))
{206
1115
206
    if (isValidCallInst(*CI, Context))
1116
194
      return true;
1117
206
1118
12
    return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1119
206
  }
1120
48.0k
1121
47.8k
  
if (47.8k
!Inst.mayReadOrWriteMemory()47.8k
)
{36.7k
1122
36.7k
    if (!isa<AllocaInst>(Inst))
1123
36.7k
      return true;
1124
36.7k
1125
0
    return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1126
36.7k
  }
1127
47.8k
1128
47.8k
  // Check the access function.
1129
11.1k
  
if (auto 11.1k
MemInst11.1k
= MemAccInst::dyn_cast(Inst))
{11.0k
1130
11.0k
    Context.hasStores |= isa<StoreInst>(MemInst);
1131
11.0k
    Context.hasLoads |= isa<LoadInst>(MemInst);
1132
11.0k
    if (!MemInst.isSimple())
1133
0
      return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1134
0
                                                  &Inst);
1135
11.0k
1136
11.0k
    return isValidMemoryAccess(MemInst, Context);
1137
11.0k
  }
1138
11.1k
1139
11.1k
  // We do not know this instruction, therefore we assume it is invalid.
1140
51
  return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1141
11.1k
}
1142
1143
/// Check whether @p L has exiting blocks.
1144
///
1145
/// @param L The loop of interest
1146
///
1147
/// @return True if the loop has exiting blocks, false otherwise.
1148
4.75k
static bool hasExitingBlocks(Loop *L) {
1149
4.75k
  SmallVector<BasicBlock *, 4> ExitingBlocks;
1150
4.75k
  L->getExitingBlocks(ExitingBlocks);
1151
4.75k
  return !ExitingBlocks.empty();
1152
4.75k
}
1153
1154
bool ScopDetection::canUseISLTripCount(Loop *L,
1155
4.74k
                                       DetectionContext &Context) const {
1156
4.74k
  // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1157
4.74k
  // need to overapproximate it as a boxed loop.
1158
4.74k
  SmallVector<BasicBlock *, 4> LoopControlBlocks;
1159
4.74k
  L->getExitingBlocks(LoopControlBlocks);
1160
4.74k
  L->getLoopLatches(LoopControlBlocks);
1161
9.34k
  for (BasicBlock *ControlBB : LoopControlBlocks) {
1162
9.34k
    if (!isValidCFG(*ControlBB, true, false, Context))
1163
217
      return false;
1164
9.34k
  }
1165
4.74k
1166
4.74k
  // We can use ISL to compute the trip count of L.
1167
4.52k
  return true;
1168
4.74k
}
1169
1170
4.75k
bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
1171
4.75k
  // Loops that contain part but not all of the blocks of a region cannot be
1172
4.75k
  // handled by the schedule generation. Such loop constructs can happen
1173
4.75k
  // because a region can contain BBs that have no path to the exit block
1174
4.75k
  // (Infinite loops, UnreachableInst), but such blocks are never part of a
1175
4.75k
  // loop.
1176
4.75k
  //
1177
4.75k
  // _______________
1178
4.75k
  // | Loop Header | <-----------.
1179
4.75k
  // ---------------             |
1180
4.75k
  //        |                    |
1181
4.75k
  // _______________       ______________
1182
4.75k
  // | RegionEntry |-----> | RegionExit |----->
1183
4.75k
  // ---------------       --------------
1184
4.75k
  //        |
1185
4.75k
  // _______________
1186
4.75k
  // | EndlessLoop | <--.
1187
4.75k
  // ---------------    |
1188
4.75k
  //       |            |
1189
4.75k
  //       \------------/
1190
4.75k
  //
1191
4.75k
  // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1192
4.75k
  // neither entirely contained in the region RegionEntry->RegionExit
1193
4.75k
  // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1194
4.75k
  // in the loop.
1195
4.75k
  // The block EndlessLoop is contained in the region because Region::contains
1196
4.75k
  // tests whether it is not dominated by RegionExit. This is probably to not
1197
4.75k
  // having to query the PostdominatorTree. Instead of an endless loop, a dead
1198
4.75k
  // end can also be formed by an UnreachableInst. This case is already caught
1199
4.75k
  // by isErrorBlock(). We hence only have to reject endless loops here.
1200
4.75k
  if (!hasExitingBlocks(L))
1201
16
    return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1202
4.75k
1203
4.74k
  
if (4.74k
canUseISLTripCount(L, Context)4.74k
)
1204
4.52k
    return true;
1205
4.74k
1206
217
  
if (217
AllowNonAffineSubLoops && 217
AllowNonAffineSubRegions66
)
{66
1207
66
    Region *R = RI.getRegionFor(L->getHeader());
1208
77
    while (
R != &Context.CurRegion && 77
!R->contains(L)61
)
1209
11
      R = R->getParent();
1210
66
1211
66
    if (addOverApproximatedRegion(R, Context))
1212
66
      return true;
1213
66
  }
1214
217
1215
151
  const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1216
151
  return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1217
217
}
1218
1219
/// Return the number of loops in @p L (incl. @p L) that have a trip
1220
///        count that is not known to be less than @MinProfitableTrips.
1221
ScopDetection::LoopStats
1222
ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1223
4.73k
                                       unsigned MinProfitableTrips) {
1224
4.73k
  auto *TripCount = SE.getBackedgeTakenCount(L);
1225
4.73k
1226
4.73k
  int NumLoops = 1;
1227
4.73k
  int MaxLoopDepth = 1;
1228
4.73k
  if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1229
2.62k
    
if (2.62k
TripCountC->getType()->getScalarSizeInBits() <= 642.62k
)
1230
2.62k
      
if (2.62k
TripCountC->getValue()->getZExtValue() <= MinProfitableTrips2.62k
)
1231
186
        NumLoops -= 1;
1232
4.73k
1233
1.28k
  for (auto &SubLoop : *L) {
1234
1.28k
    LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1235
1.28k
    NumLoops += Stats.NumLoops;
1236
1.28k
    MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1237
1.28k
  }
1238
4.73k
1239
4.73k
  return {NumLoops, MaxLoopDepth};
1240
4.73k
}
1241
1242
ScopDetection::LoopStats
1243
ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1244
3.26k
                                    LoopInfo &LI, unsigned MinProfitableTrips) {
1245
3.26k
  int LoopNum = 0;
1246
3.26k
  int MaxLoopDepth = 0;
1247
3.26k
1248
3.26k
  auto L = LI.getLoopFor(R->getEntry());
1249
1.80k
  L = L ? 
R->outermostLoopInRegion(L)1.80k
:
nullptr1.46k
;
1250
1.73k
  L = L ? 
L->getParentLoop()1.73k
:
nullptr1.52k
;
1251
3.26k
1252
3.26k
  auto SubLoops =
1253
3.20k
      L ? 
L->getSubLoopsVector()51
:
std::vector<Loop *>(LI.begin(), LI.end())3.20k
;
1254
3.26k
1255
3.26k
  for (auto &SubLoop : SubLoops)
1256
3.64k
    
if (3.64k
R->contains(SubLoop)3.64k
)
{3.45k
1257
3.45k
      LoopStats Stats =
1258
3.45k
          countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1259
3.45k
      LoopNum += Stats.NumLoops;
1260
3.45k
      MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1261
3.45k
    }
1262
3.26k
1263
3.26k
  return {LoopNum, MaxLoopDepth};
1264
3.26k
}
1265
1266
1.11k
Region *ScopDetection::expandRegion(Region &R) {
1267
1.11k
  // Initial no valid region was found (greater than R)
1268
1.11k
  std::unique_ptr<Region> LastValidRegion;
1269
1.11k
  auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1270
1.11k
1271
1.11k
  DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1272
1.11k
1273
1.60k
  while (
ExpandedRegion1.60k
)
{544
1274
544
    const auto &It = DetectionContextMap.insert(std::make_pair(
1275
544
        getBBPairForRegion(ExpandedRegion.get()),
1276
544
        DetectionContext(*ExpandedRegion, AA, false /*verifying*/)));
1277
544
    DetectionContext &Context = It.first->second;
1278
544
    DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1279
544
    // Only expand when we did not collect errors.
1280
544
1281
544
    if (
!Context.Log.hasErrors()544
)
{544
1282
544
      // If the exit is valid check all blocks
1283
544
      //  - if true, a valid region was found => store it + keep expanding
1284
544
      //  - if false, .tbd. => stop  (should this really end the loop?)
1285
544
      if (
!allBlocksValid(Context) || 544
Context.Log.hasErrors()496
)
{49
1286
49
        removeCachedResults(*ExpandedRegion);
1287
49
        DetectionContextMap.erase(It.first);
1288
49
        break;
1289
49
      }
1290
544
1291
544
      // Store this region, because it is the greatest valid (encountered so
1292
544
      // far).
1293
495
      
if (495
LastValidRegion495
)
{254
1294
254
        removeCachedResults(*LastValidRegion);
1295
254
        DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get()));
1296
254
      }
1297
495
      LastValidRegion = std::move(ExpandedRegion);
1298
495
1299
495
      // Create and test the next greater region (if any)
1300
495
      ExpandedRegion =
1301
495
          std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1302
495
1303
0
    } else {
1304
0
      // Create and test the next greater region (if any)
1305
0
      removeCachedResults(*ExpandedRegion);
1306
0
      DetectionContextMap.erase(It.first);
1307
0
      ExpandedRegion =
1308
0
          std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1309
0
    }
1310
544
  }
1311
1.11k
1312
1.11k
  DEBUG({
1313
1.11k
    if (LastValidRegion)
1314
1.11k
      dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1315
1.11k
    else
1316
1.11k
      dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1317
1.11k
  });
1318
1.11k
1319
1.11k
  return LastValidRegion.release();
1320
1.11k
}
1321
40
static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1322
40
  for (const BasicBlock *BB : R.blocks())
1323
44
    
if (44
R.contains(LI.getLoopFor(BB))44
)
1324
39
      return false;
1325
40
1326
1
  return true;
1327
40
}
1328
1329
583
void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1330
545
  for (auto &SubRegion : R) {
1331
545
    if (
ValidRegions.count(SubRegion.get())545
)
{203
1332
203
      removeCachedResults(*SubRegion.get());
1333
203
    } else
1334
342
      removeCachedResultsRecursively(*SubRegion);
1335
545
  }
1336
583
}
1337
1338
2.38k
void ScopDetection::removeCachedResults(const Region &R) {
1339
2.38k
  ValidRegions.remove(&R);
1340
2.38k
}
1341
1342
2.95k
void ScopDetection::findScops(Region &R) {
1343
2.95k
  const auto &It = DetectionContextMap.insert(std::make_pair(
1344
2.95k
      getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/)));
1345
2.95k
  DetectionContext &Context = It.first->second;
1346
2.95k
1347
2.95k
  bool RegionIsValid = false;
1348
2.95k
  if (
!PollyProcessUnprofitable && 2.95k
regionWithoutLoops(R, LI)40
)
1349
1
    invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1350
2.95k
  else
1351
2.95k
    RegionIsValid = isValidRegion(Context);
1352
2.95k
1353
1.31k
  bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1354
2.95k
1355
2.95k
  if (
HasErrors2.95k
)
{1.63k
1356
1.63k
    removeCachedResults(R);
1357
1.31k
  } else {
1358
1.31k
    ValidRegions.insert(&R);
1359
1.31k
    return;
1360
1.31k
  }
1361
2.95k
1362
1.63k
  for (auto &SubRegion : R)
1363
1.74k
    findScops(*SubRegion);
1364
1.63k
1365
1.63k
  // Try to expand regions.
1366
1.63k
  //
1367
1.63k
  // As the region tree normally only contains canonical regions, non canonical
1368
1.63k
  // regions that form a Scop are not found. Therefore, those non canonical
1369
1.63k
  // regions are checked by expanding the canonical ones.
1370
1.63k
1371
1.63k
  std::vector<Region *> ToExpand;
1372
1.63k
1373
1.63k
  for (auto &SubRegion : R)
1374
1.74k
    ToExpand.push_back(SubRegion.get());
1375
1.63k
1376
1.74k
  for (Region *CurrentRegion : ToExpand) {
1377
1.74k
    // Skip invalid regions. Regions may become invalid, if they are element of
1378
1.74k
    // an already expanded region.
1379
1.74k
    if (!ValidRegions.count(CurrentRegion))
1380
634
      continue;
1381
1.74k
1382
1.74k
    // Skip regions that had errors.
1383
1.11k
    bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1384
1.11k
    if (HadErrors)
1385
0
      continue;
1386
1.11k
1387
1.11k
    Region *ExpandedR = expandRegion(*CurrentRegion);
1388
1.11k
1389
1.11k
    if (!ExpandedR)
1390
870
      continue;
1391
1.11k
1392
241
    R.addSubRegion(ExpandedR, true);
1393
241
    ValidRegions.insert(ExpandedR);
1394
241
    removeCachedResults(*CurrentRegion);
1395
241
    removeCachedResultsRecursively(*ExpandedR);
1396
241
  }
1397
1.63k
}
1398
1399
3.20k
bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
1400
3.20k
  Region &CurRegion = Context.CurRegion;
1401
3.20k
1402
17.3k
  for (const BasicBlock *BB : CurRegion.blocks()) {
1403
17.3k
    Loop *L = LI.getLoopFor(BB);
1404
17.3k
    if (
L && 17.3k
L->getHeader() == BB13.9k
&&
CurRegion.contains(L)4.80k
&&
1405
4.75k
        
(!isValidLoop(L, Context) && 4.75k
!KeepGoing167
))
1406
167
      return false;
1407
17.3k
  }
1408
3.20k
1409
16.5k
  
for (BasicBlock *BB : CurRegion.blocks()) 3.04k
{16.5k
1410
16.5k
    bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT);
1411
16.5k
1412
16.5k
    // Also check exception blocks (and possibly register them as non-affine
1413
16.5k
    // regions). Even though exception blocks are not modeled, we use them
1414
16.5k
    // to forward-propagate domain constraints during ScopInfo construction.
1415
16.5k
    if (
!isValidCFG(*BB, false, IsErrorBlock, Context) && 16.5k
!KeepGoing66
)
1416
66
      return false;
1417
16.5k
1418
16.5k
    
if (16.5k
IsErrorBlock16.5k
)
1419
131
      continue;
1420
16.5k
1421
64.3k
    
for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); 16.3k
I != E64.3k
;
++I47.9k
)
1422
48.0k
      
if (48.0k
!isValidInstruction(*I, Context) && 48.0k
!KeepGoing106
)
1423
97
        return false;
1424
16.3k
  }
1425
3.04k
1426
2.87k
  
if (2.87k
!hasAffineMemoryAccesses(Context)2.87k
)
1427
33
    return false;
1428
2.87k
1429
2.84k
  return true;
1430
2.87k
}
1431
1432
bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1433
8
                                         int NumLoops) const {
1434
8
  int InstCount = 0;
1435
8
1436
8
  if (NumLoops == 0)
1437
0
    return false;
1438
8
1439
8
  for (auto *BB : Context.CurRegion.blocks())
1440
36
    
if (36
Context.CurRegion.contains(LI.getLoopFor(BB))36
)
1441
36
      InstCount += BB->size();
1442
8
1443
8
  InstCount = InstCount / NumLoops;
1444
8
1445
8
  return InstCount >= ProfitabilityMinPerLoopInstructions;
1446
8
}
1447
1448
bool ScopDetection::hasPossiblyDistributableLoop(
1449
10
    DetectionContext &Context) const {
1450
10
  for (auto *BB : Context.CurRegion.blocks()) {
1451
10
    auto *L = LI.getLoopFor(BB);
1452
10
    if (!Context.CurRegion.contains(L))
1453
0
      continue;
1454
10
    
if (10
Context.BoxedLoopsSet.count(L)10
)
1455
0
      continue;
1456
10
    unsigned StmtsWithStoresInLoops = 0;
1457
74
    for (auto *LBB : L->blocks()) {
1458
74
      bool MemStore = false;
1459
74
      for (auto &I : *LBB)
1460
303
        MemStore |= isa<StoreInst>(&I);
1461
74
      StmtsWithStoresInLoops += MemStore;
1462
74
    }
1463
10
    return (StmtsWithStoresInLoops > 1);
1464
10
  }
1465
0
  return false;
1466
10
}
1467
1468
1.11k
bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1469
1.11k
  Region &CurRegion = Context.CurRegion;
1470
1.11k
1471
1.11k
  if (PollyProcessUnprofitable)
1472
1.09k
    return true;
1473
1.11k
1474
1.11k
  // We can probably not do a lot on scops that only write or only read
1475
1.11k
  // data.
1476
19
  
if (19
!Context.hasStores || 19
!Context.hasLoads18
)
1477
2
    return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1478
19
1479
17
  int NumLoops =
1480
17
      countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1481
17
  int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1482
17
1483
17
  // Scops with at least two loops may allow either loop fusion or tiling and
1484
17
  // are consequently interesting to look at.
1485
17
  if (NumAffineLoops >= 2)
1486
3
    return true;
1487
17
1488
17
  // A loop with multiple non-trivial blocks might be amendable to distribution.
1489
14
  
if (14
NumAffineLoops == 1 && 14
hasPossiblyDistributableLoop(Context)10
)
1490
2
    return true;
1491
14
1492
14
  // Scops that contain a loop with a non-trivial amount of computation per
1493
14
  // loop-iteration are interesting as we may be able to parallelize such
1494
14
  // loops. Individual loops that have only a small amount of computation
1495
14
  // per-iteration are performance-wise very fragile as any change to the
1496
14
  // loop induction variables may affect performance. To not cause spurious
1497
14
  // performance regressions, we do not consider such loops.
1498
12
  
if (12
NumAffineLoops == 1 && 12
hasSufficientCompute(Context, NumLoops)8
)
1499
1
    return true;
1500
12
1501
11
  return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1502
12
}
1503
1504
3.98k
bool ScopDetection::isValidRegion(DetectionContext &Context) const {
1505
3.98k
  Region &CurRegion = Context.CurRegion;
1506
3.98k
1507
3.98k
  DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1508
3.98k
1509
3.98k
  if (
!AllowFullFunction && 3.98k
CurRegion.isTopLevelRegion()3.97k
)
{1.20k
1510
1.20k
    DEBUG(dbgs() << "Top level region is invalid\n");
1511
1.20k
    return false;
1512
1.20k
  }
1513
3.98k
1514
2.77k
  DebugLoc DbgLoc;
1515
2.77k
  if (CurRegion.getExit() &&
1516
2.77k
      
isa<UnreachableInst>(CurRegion.getExit()->getTerminator())2.77k
)
{26
1517
26
    DEBUG(dbgs() << "Unreachable in exit\n");
1518
26
    return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1519
26
                                            CurRegion.getExit(), DbgLoc);
1520
26
  }
1521
2.77k
1522
2.74k
  
if (2.74k
!CurRegion.getEntry()->getName().count(OnlyRegion)2.74k
)
{0
1523
0
    DEBUG({
1524
0
      dbgs() << "Region entry does not match -polly-region-only";
1525
0
      dbgs() << "\n";
1526
0
    });
1527
0
    return false;
1528
0
  }
1529
2.74k
1530
2.74k
  // SCoP cannot contain the entry block of the function, because we need
1531
2.74k
  // to insert alloca instruction there when translate scalar to array.
1532
2.74k
  
if (2.74k
!AllowFullFunction &&2.74k
1533
2.74k
      CurRegion.getEntry() ==
1534
2.74k
          &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1535
85
    return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1536
2.74k
1537
2.66k
  
if (2.66k
!allBlocksValid(Context)2.66k
)
1538
315
    return false;
1539
2.66k
1540
2.34k
  
if (2.34k
!isReducibleRegion(CurRegion, DbgLoc)2.34k
)
1541
3
    return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1542
3
                                            &CurRegion, DbgLoc);
1543
2.34k
1544
2.34k
  
DEBUG2.34k
(dbgs() << "OK\n");2.34k
1545
2.34k
  return true;
1546
2.34k
}
1547
1548
0
void ScopDetection::markFunctionAsInvalid(Function *F) {
1549
0
  F->addFnAttr(PollySkipFnAttr);
1550
0
}
1551
1552
1.23k
bool ScopDetection::isValidFunction(llvm::Function &F) {
1553
1.23k
  return !F.hasFnAttribute(PollySkipFnAttr);
1554
1.23k
}
1555
1556
2
void ScopDetection::printLocations(llvm::Function &F) {
1557
2
  for (const Region *R : *this) {
1558
2
    unsigned LineEntry, LineExit;
1559
2
    std::string FileName;
1560
2
1561
2
    getDebugLocation(R, LineEntry, LineExit, FileName);
1562
2
    DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1563
2
    F.getContext().diagnose(Diagnostic);
1564
2
  }
1565
2
}
1566
1567
1.20k
void ScopDetection::emitMissedRemarks(const Function &F) {
1568
3.19k
  for (auto &DIt : DetectionContextMap) {
1569
3.19k
    auto &DC = DIt.getSecond();
1570
3.19k
    if (DC.Log.hasErrors())
1571
438
      emitRejectionRemarks(DIt.getFirst(), DC.Log);
1572
3.19k
  }
1573
1.20k
}
1574
1575
2.34k
bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1576
2.34k
  /// Enum for coloring BBs in Region.
1577
2.34k
  ///
1578
2.34k
  /// WHITE - Unvisited BB in DFS walk.
1579
2.34k
  /// GREY - BBs which are currently on the DFS stack for processing.
1580
2.34k
  /// BLACK - Visited and completely processed BB.
1581
2.34k
  enum Color { WHITE, GREY, BLACK };
1582
2.34k
1583
2.34k
  BasicBlock *REntry = R.getEntry();
1584
2.34k
  BasicBlock *RExit = R.getExit();
1585
2.34k
  // Map to match the color of a BasicBlock during the DFS walk.
1586
2.34k
  DenseMap<const BasicBlock *, Color> BBColorMap;
1587
2.34k
  // Stack keeping track of current BB and index of next child to be processed.
1588
2.34k
  std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1589
2.34k
1590
2.34k
  unsigned AdjacentBlockIndex = 0;
1591
2.34k
  BasicBlock *CurrBB, *SuccBB;
1592
2.34k
  CurrBB = REntry;
1593
2.34k
1594
2.34k
  // Initialize the map for all BB with WHITE color.
1595
2.34k
  for (auto *BB : R.blocks())
1596
10.3k
    BBColorMap[BB] = WHITE;
1597
2.34k
1598
2.34k
  // Process the entry block of the Region.
1599
2.34k
  BBColorMap[CurrBB] = GREY;
1600
2.34k
  DFSStack.push(std::make_pair(CurrBB, 0));
1601
2.34k
1602
20.6k
  while (
!DFSStack.empty()20.6k
)
{18.2k
1603
18.2k
    // Get next BB on stack to be processed.
1604
18.2k
    CurrBB = DFSStack.top().first;
1605
18.2k
    AdjacentBlockIndex = DFSStack.top().second;
1606
18.2k
    DFSStack.pop();
1607
18.2k
1608
18.2k
    // Loop to iterate over the successors of current BB.
1609
18.2k
    const TerminatorInst *TInst = CurrBB->getTerminator();
1610
18.2k
    unsigned NSucc = TInst->getNumSuccessors();
1611
24.8k
    for (unsigned I = AdjacentBlockIndex; I < NSucc;
1612
14.5k
         
++I, ++AdjacentBlockIndex6.59k
)
{14.5k
1613
14.5k
      SuccBB = TInst->getSuccessor(I);
1614
14.5k
1615
14.5k
      // Checks for region exit block and self-loops in BB.
1616
14.5k
      if (
SuccBB == RExit || 14.5k
SuccBB == CurrBB11.6k
)
1617
3.67k
        continue;
1618
14.5k
1619
14.5k
      // WHITE indicates an unvisited BB in DFS walk.
1620
10.8k
      
if (10.8k
BBColorMap[SuccBB] == WHITE10.8k
)
{7.96k
1621
7.96k
        // Push the current BB and the index of the next child to be visited.
1622
7.96k
        DFSStack.push(std::make_pair(CurrBB, I + 1));
1623
7.96k
        // Push the next BB to be processed.
1624
7.96k
        DFSStack.push(std::make_pair(SuccBB, 0));
1625
7.96k
        // First time the BB is being processed.
1626
7.96k
        BBColorMap[SuccBB] = GREY;
1627
7.96k
        break;
1628
2.92k
      } else 
if (2.92k
BBColorMap[SuccBB] == GREY2.92k
)
{2.25k
1629
2.25k
        // GREY indicates a loop in the control flow.
1630
2.25k
        // If the destination dominates the source, it is a natural loop
1631
2.25k
        // else, an irreducible control flow in the region is detected.
1632
2.25k
        if (
!DT.dominates(SuccBB, CurrBB)2.25k
)
{3
1633
3
          // Get debug info of instruction which causes irregular control flow.
1634
3
          DbgLoc = TInst->getDebugLoc();
1635
3
          return false;
1636
3
        }
1637
2.25k
      }
1638
10.8k
    }
1639
18.2k
1640
18.2k
    // If all children of current BB have been processed,
1641
18.2k
    // then mark that BB as fully processed.
1642
18.2k
    
if (18.2k
AdjacentBlockIndex == NSucc18.2k
)
1643
10.2k
      BBColorMap[CurrBB] = BLACK;
1644
18.2k
  }
1645
2.34k
1646
2.34k
  return true;
1647
2.34k
}
1648
1649
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1650
2.21k
                                     bool OnlyProfitable) {
1651
2.21k
  if (
!OnlyProfitable2.21k
)
{1.11k
1652
1.11k
    NumLoopsInScop += Stats.NumLoops;
1653
1.11k
    MaxNumLoopsInScop =
1654
1.11k
        std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
1655
1.11k
    if (Stats.MaxDepth == 1)
1656
728
      NumScopsDepthOne++;
1657
385
    else 
if (385
Stats.MaxDepth == 2385
)
1658
211
      NumScopsDepthTwo++;
1659
174
    else 
if (174
Stats.MaxDepth == 3174
)
1660
70
      NumScopsDepthThree++;
1661
104
    else 
if (104
Stats.MaxDepth == 4104
)
1662
4
      NumScopsDepthFour++;
1663
100
    else 
if (100
Stats.MaxDepth == 5100
)
1664
0
      NumScopsDepthFive++;
1665
100
    else
1666
100
      NumScopsDepthLarger++;
1667
1.10k
  } else {
1668
1.10k
    NumLoopsInProfScop += Stats.NumLoops;
1669
1.10k
    MaxNumLoopsInProfScop =
1670
1.10k
        std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops);
1671
1.10k
    if (Stats.MaxDepth == 1)
1672
722
      NumProfScopsDepthOne++;
1673
378
    else 
if (378
Stats.MaxDepth == 2378
)
1674
205
      NumProfScopsDepthTwo++;
1675
173
    else 
if (173
Stats.MaxDepth == 3173
)
1676
70
      NumProfScopsDepthThree++;
1677
103
    else 
if (103
Stats.MaxDepth == 4103
)
1678
3
      NumProfScopsDepthFour++;
1679
100
    else 
if (100
Stats.MaxDepth == 5100
)
1680
0
      NumProfScopsDepthFive++;
1681
100
    else
1682
100
      NumProfScopsDepthLarger++;
1683
1.10k
  }
1684
2.21k
}
1685
1686
ScopDetection::DetectionContext *
1687
2.14k
ScopDetection::getDetectionContext(const Region *R) const {
1688
2.14k
  auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1689
2.14k
  if (DCMIt == DetectionContextMap.end())
1690
1
    return nullptr;
1691
2.14k
  return &DCMIt->second;
1692
2.14k
}
1693
1694
1.11k
const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1695
1.11k
  const DetectionContext *DC = getDetectionContext(R);
1696
1.11k
  return DC ? 
&DC->Log1.11k
:
nullptr1
;
1697
1.11k
}
1698
1699
0
void polly::ScopDetection::verifyRegion(const Region &R) const {
1700
0
  assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1701
0
1702
0
  DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1703
0
  isValidRegion(Context);
1704
0
}
1705
1706
0
void polly::ScopDetection::verifyAnalysis() const {
1707
0
  if (!VerifyScops)
1708
0
    return;
1709
0
1710
0
  for (const Region *R : ValidRegions)
1711
0
    verifyRegion(*R);
1712
0
}
1713
1714
1.23k
bool ScopDetectionWrapperPass::runOnFunction(llvm::Function &F) {
1715
1.23k
  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1716
1.23k
  auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1717
1.23k
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1718
1.23k
  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1719
1.23k
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1720
1.23k
  Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA));
1721
1.23k
  return false;
1722
1.23k
}
1723
1724
1.10k
void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1725
1.10k
  AU.addRequired<LoopInfoWrapperPass>();
1726
1.10k
  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1727
1.10k
  AU.addRequired<DominatorTreeWrapperPass>();
1728
1.10k
  // We also need AA and RegionInfo when we are verifying analysis.
1729
1.10k
  AU.addRequiredTransitive<AAResultsWrapperPass>();
1730
1.10k
  AU.addRequiredTransitive<RegionInfoPass>();
1731
1.10k
  AU.setPreservesAll();
1732
1.10k
}
1733
1734
120
void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1735
120
  for (const Region *R : Result->ValidRegions)
1736
72
    OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1737
120
1738
120
  OS << "\n";
1739
120
}
1740
1741
1.10k
ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1742
1.10k
  // Disable runtime alias checks if we ignore aliasing all together.
1743
1.10k
  if (IgnoreAliasing)
1744
20
    PollyUseRuntimeAliasChecks = false;
1745
1.10k
}
1746
1747
1.23k
void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
1748
1749
char ScopDetectionWrapperPass::ID;
1750
1751
AnalysisKey ScopAnalysis::Key;
1752
1753
0
ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
1754
0
  auto &LI = FAM.getResult<LoopAnalysis>(F);
1755
0
  auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
1756
0
  auto &AA = FAM.getResult<AAManager>(F);
1757
0
  auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
1758
0
  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1759
0
  return {F, DT, SE, LI, RI, AA};
1760
0
}
1761
1762
PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
1763
0
                                               FunctionAnalysisManager &FAM) {
1764
0
  auto &SD = FAM.getResult<ScopAnalysis>(F);
1765
0
  for (const Region *R : SD.ValidRegions)
1766
0
    Stream << "Valid Region for Scop: " << R->getNameStr() << '\n';
1767
0
1768
0
  Stream << "\n";
1769
0
  return PreservedAnalyses::all();
1770
0
}
1771
1772
0
Pass *polly::createScopDetectionWrapperPassPass() {
1773
0
  return new ScopDetectionWrapperPass();
1774
0
}
1775
1776
41.0k
INITIALIZE_PASS_BEGIN41.0k
(ScopDetectionWrapperPass, "polly-detect",41.0k
1777
41.0k
                      "Polly - Detect static control parts (SCoPs)", false,
1778
41.0k
                      false);
1779
41.0k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1780
41.0k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1781
41.0k
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1782
41.0k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
1783
41.0k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1784
41.0k
INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
1785
                    "Polly - Detect static control parts (SCoPs)", false, false)