Coverage Report

Created: 2017-08-21 19:50

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/polly/lib/Analysis/ScopDetection.cpp
Line
Count
Source (jump to first uncovered line)
1
//
2
//                     The LLVM Compiler Infrastructure
3
//
4
// This file is distributed under the University of Illinois Open Source
5
// License. See LICENSE.TXT for details.
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Detect the maximal Scops of a function.
10
//
11
// A static control part (Scop) is a subgraph of the control flow graph (CFG)
12
// that only has statically known control flow and can therefore be described
13
// within the polyhedral model.
14
//
15
// Every Scop fulfills these restrictions:
16
//
17
// * It is a single entry single exit region
18
//
19
// * Only affine linear bounds in the loops
20
//
21
// Every natural loop in a Scop must have a number of loop iterations that can
22
// be described as an affine linear function in surrounding loop iterators or
23
// parameters. (A parameter is a scalar that does not change its value during
24
// execution of the Scop).
25
//
26
// * Only comparisons of affine linear expressions in conditions
27
//
28
// * All loops and conditions perfectly nested
29
//
30
// The control flow needs to be structured such that it could be written using
31
// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32
// 'continue'.
33
//
34
// * Side effect free functions call
35
//
36
// Function calls and intrinsics that do not have side effects (readnone)
37
// or memory intrinsics (memset, memcpy, memmove) are allowed.
38
//
39
// The Scop detection finds the largest Scops by checking if the largest
40
// region is a Scop. If this is not the case, its canonical subregions are
41
// checked until a region is a Scop. It is now tried to extend this Scop by
42
// creating a larger non canonical region.
43
//
44
//===----------------------------------------------------------------------===//
45
46
#include "polly/ScopDetection.h"
47
#include "polly/CodeGen/CodeGeneration.h"
48
#include "polly/LinkAllPasses.h"
49
#include "polly/Options.h"
50
#include "polly/ScopDetectionDiagnostic.h"
51
#include "polly/Support/SCEVValidator.h"
52
#include "polly/Support/ScopLocation.h"
53
#include "llvm/ADT/Statistic.h"
54
#include "llvm/Analysis/AliasAnalysis.h"
55
#include "llvm/Analysis/Loads.h"
56
#include "llvm/Analysis/LoopInfo.h"
57
#include "llvm/Analysis/RegionIterator.h"
58
#include "llvm/Analysis/ScalarEvolution.h"
59
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
60
#include "llvm/IR/DebugInfo.h"
61
#include "llvm/IR/DiagnosticInfo.h"
62
#include "llvm/IR/DiagnosticPrinter.h"
63
#include "llvm/IR/IntrinsicInst.h"
64
#include "llvm/IR/LLVMContext.h"
65
#include "llvm/Support/Debug.h"
66
#include "llvm/Support/Regex.h"
67
#include <set>
68
#include <stack>
69
70
using namespace llvm;
71
using namespace polly;
72
73
#define DEBUG_TYPE "polly-detect"
74
75
// This option is set to a very high value, as analyzing such loops increases
76
// compile time on several cases. For experiments that enable this option,
77
// a value of around 40 has been working to avoid run-time regressions with
78
// Polly while still exposing interesting optimization opportunities.
79
static cl::opt<int> ProfitabilityMinPerLoopInstructions(
80
    "polly-detect-profitability-min-per-loop-insts",
81
    cl::desc("The minimal number of per-loop instructions before a single loop "
82
             "region is considered profitable"),
83
    cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
84
85
bool polly::PollyProcessUnprofitable;
86
static cl::opt<bool, true> XPollyProcessUnprofitable(
87
    "polly-process-unprofitable",
88
    cl::desc(
89
        "Process scops that are unlikely to benefit from Polly optimizations."),
90
    cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
91
    cl::cat(PollyCategory));
92
93
static cl::list<std::string> OnlyFunctions(
94
    "polly-only-func",
95
    cl::desc("Only run on functions that match a regex. "
96
             "Multiple regexes can be comma separated. "
97
             "Scop detection will run on all functions that match "
98
             "ANY of the regexes provided."),
99
    cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
100
101
static cl::list<std::string> IgnoredFunctions(
102
    "polly-ignore-func",
103
    cl::desc("Ignore functions that match a regex. "
104
             "Multiple regexes can be comma separated. "
105
             "Scop detection will ignore all functions that match "
106
             "ANY of the regexes provided."),
107
    cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
108
109
bool polly::PollyAllowFullFunction;
110
static cl::opt<bool, true>
111
    XAllowFullFunction("polly-detect-full-functions",
112
                       cl::desc("Allow the detection of full functions"),
113
                       cl::location(polly::PollyAllowFullFunction),
114
                       cl::init(false), cl::cat(PollyCategory));
115
116
static cl::opt<std::string> OnlyRegion(
117
    "polly-only-region",
118
    cl::desc("Only run on certain regions (The provided identifier must "
119
             "appear in the name of the region's entry block"),
120
    cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
121
    cl::cat(PollyCategory));
122
123
static cl::opt<bool>
124
    IgnoreAliasing("polly-ignore-aliasing",
125
                   cl::desc("Ignore possible aliasing of the array bases"),
126
                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
127
                   cl::cat(PollyCategory));
128
129
bool polly::PollyAllowUnsignedOperations;
130
static cl::opt<bool, true> XPollyAllowUnsignedOperations(
131
    "polly-allow-unsigned-operations",
132
    cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
133
    cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore,
134
    cl::init(true), cl::cat(PollyCategory));
135
136
bool polly::PollyUseRuntimeAliasChecks;
137
static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
138
    "polly-use-runtime-alias-checks",
139
    cl::desc("Use runtime alias checks to resolve possible aliasing."),
140
    cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
141
    cl::init(true), cl::cat(PollyCategory));
142
143
static cl::opt<bool>
144
    ReportLevel("polly-report",
145
                cl::desc("Print information about the activities of Polly"),
146
                cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
147
148
static cl::opt<bool> AllowDifferentTypes(
149
    "polly-allow-differing-element-types",
150
    cl::desc("Allow different element types for array accesses"), cl::Hidden,
151
    cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
152
153
static cl::opt<bool>
154
    AllowNonAffine("polly-allow-nonaffine",
155
                   cl::desc("Allow non affine access functions in arrays"),
156
                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
157
                   cl::cat(PollyCategory));
158
159
static cl::opt<bool>
160
    AllowModrefCall("polly-allow-modref-calls",
161
                    cl::desc("Allow functions with known modref behavior"),
162
                    cl::Hidden, cl::init(false), cl::ZeroOrMore,
163
                    cl::cat(PollyCategory));
164
165
static cl::opt<bool> AllowNonAffineSubRegions(
166
    "polly-allow-nonaffine-branches",
167
    cl::desc("Allow non affine conditions for branches"), cl::Hidden,
168
    cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
169
170
static cl::opt<bool>
171
    AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
172
                           cl::desc("Allow non affine conditions for loops"),
173
                           cl::Hidden, cl::init(false), cl::ZeroOrMore,
174
                           cl::cat(PollyCategory));
175
176
static cl::opt<bool, true>
177
    TrackFailures("polly-detect-track-failures",
178
                  cl::desc("Track failure strings in detecting scop regions"),
179
                  cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
180
                  cl::init(true), cl::cat(PollyCategory));
181
182
static cl::opt<bool> KeepGoing("polly-detect-keep-going",
183
                               cl::desc("Do not fail on the first error."),
184
                               cl::Hidden, cl::ZeroOrMore, cl::init(false),
185
                               cl::cat(PollyCategory));
186
187
static cl::opt<bool, true>
188
    PollyDelinearizeX("polly-delinearize",
189
                      cl::desc("Delinearize array access functions"),
190
                      cl::location(PollyDelinearize), cl::Hidden,
191
                      cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
192
193
static cl::opt<bool>
194
    VerifyScops("polly-detect-verify",
195
                cl::desc("Verify the detected SCoPs after each transformation"),
196
                cl::Hidden, cl::init(false), cl::ZeroOrMore,
197
                cl::cat(PollyCategory));
198
199
bool polly::PollyInvariantLoadHoisting;
200
static cl::opt<bool, true> XPollyInvariantLoadHoisting(
201
    "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
202
    cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
203
    cl::init(false), cl::cat(PollyCategory));
204
205
/// The minimal trip count under which loops are considered unprofitable.
206
static const unsigned MIN_LOOP_TRIP_COUNT = 8;
207
208
bool polly::PollyTrackFailures = false;
209
bool polly::PollyDelinearize = false;
210
StringRef polly::PollySkipFnAttr = "polly.skip.fn";
211
212
//===----------------------------------------------------------------------===//
213
// Statistics.
214
215
STATISTIC(NumScopRegions, "Number of scops");
216
STATISTIC(NumLoopsInScop, "Number of loops in scops");
217
STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
218
STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
219
STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
220
STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
221
STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
222
STATISTIC(NumScopsDepthLarger,
223
          "Number of scops with maximal loop depth 6 and larger");
224
STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
225
STATISTIC(NumLoopsInProfScop,
226
          "Number of loops in scops (profitable scops only)");
227
STATISTIC(NumLoopsOverall, "Number of total loops");
228
STATISTIC(NumProfScopsDepthOne,
229
          "Number of scops with maximal loop depth 1 (profitable scops only)");
230
STATISTIC(NumProfScopsDepthTwo,
231
          "Number of scops with maximal loop depth 2 (profitable scops only)");
232
STATISTIC(NumProfScopsDepthThree,
233
          "Number of scops with maximal loop depth 3 (profitable scops only)");
234
STATISTIC(NumProfScopsDepthFour,
235
          "Number of scops with maximal loop depth 4 (profitable scops only)");
236
STATISTIC(NumProfScopsDepthFive,
237
          "Number of scops with maximal loop depth 5 (profitable scops only)");
238
STATISTIC(NumProfScopsDepthLarger,
239
          "Number of scops with maximal loop depth 6 and larger "
240
          "(profitable scops only)");
241
STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
242
STATISTIC(MaxNumLoopsInProfScop,
243
          "Maximal number of loops in scops (profitable scops only)");
244
245
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
246
                                     bool OnlyProfitable);
247
248
class DiagnosticScopFound : public DiagnosticInfo {
249
private:
250
  static int PluginDiagnosticKind;
251
252
  Function &F;
253
  std::string FileName;
254
  unsigned EntryLine, ExitLine;
255
256
public:
257
  DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
258
                      unsigned ExitLine)
259
      : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
260
2
        EntryLine(EntryLine), ExitLine(ExitLine) {}
261
262
  virtual void print(DiagnosticPrinter &DP) const;
263
264
0
  static bool classof(const DiagnosticInfo *DI) {
265
0
    return DI->getKind() == PluginDiagnosticKind;
266
0
  }
267
};
268
269
int DiagnosticScopFound::PluginDiagnosticKind =
270
    getNextAvailablePluginDiagnosticKind();
271
272
2
void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
273
2
  DP << "Polly detected an optimizable loop region (scop) in function '" << F
274
2
     << "'\n";
275
2
276
2
  if (
FileName.empty()2
)
{0
277
0
    DP << "Scop location is unknown. Compile with debug info "
278
0
          "(-g) to get more precise information. ";
279
0
    return;
280
2
  }
281
2
282
2
  DP << FileName << ":" << EntryLine << ": Start of scop\n";
283
2
  DP << FileName << ":" << ExitLine << ": End of scop";
284
2
}
285
286
/// Check if a string matches any regex in a list of regexes.
287
/// @param Str the input string to match against.
288
/// @param RegexList a list of strings that are regular expressions.
289
static bool doesStringMatchAnyRegex(StringRef Str,
290
1.33k
                                    const cl::list<std::string> &RegexList) {
291
1.33k
  for (auto RegexStr : RegexList) {
292
17
    Regex R(RegexStr);
293
17
294
17
    std::string Err;
295
17
    if (!R.isValid(Err))
296
0
      report_fatal_error("invalid regex given as input to polly: " + Err, true);
297
17
298
17
    
if (17
R.match(Str)17
)
299
8
      return true;
300
1.33k
  }
301
1.32k
  return false;
302
1.33k
}
303
//===----------------------------------------------------------------------===//
304
// ScopDetection.
305
306
ScopDetection::ScopDetection(Function &F, const DominatorTree &DT,
307
                             ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI,
308
                             AliasAnalysis &AA, OptimizationRemarkEmitter &ORE)
309
1.33k
    : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {
310
1.33k
311
1.33k
  if (
!PollyProcessUnprofitable && 1.33k
LI.empty()21
)
312
0
    return;
313
1.33k
314
1.33k
  Region *TopRegion = RI.getTopLevelRegion();
315
1.33k
316
1.33k
  if (OnlyFunctions.size() > 0 &&
317
7
      !doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
318
2
    return;
319
1.33k
320
1.33k
  
if (1.33k
doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)1.33k
)
321
3
    return;
322
1.33k
323
1.32k
  
if (1.32k
!isValidFunction(F)1.32k
)
324
28
    return;
325
1.32k
326
1.32k
  findScops(*TopRegion);
327
1.29k
328
1.29k
  NumScopRegions += ValidRegions.size();
329
1.29k
330
1.29k
  // Prune non-profitable regions.
331
3.41k
  for (auto &DIt : DetectionContextMap) {
332
3.41k
    auto &DC = DIt.getSecond();
333
3.41k
    if (DC.Log.hasErrors())
334
408
      continue;
335
3.00k
    
if (3.00k
!ValidRegions.count(&DC.CurRegion)3.00k
)
336
1.80k
      continue;
337
3.00k
    LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
338
1.20k
    updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
339
1.20k
    if (
isProfitableRegion(DC)1.20k
)
{1.18k
340
1.18k
      updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
341
1.18k
      continue;
342
1.20k
    }
343
1.20k
344
1.20k
    ValidRegions.remove(&DC.CurRegion);
345
1.29k
  }
346
1.29k
347
1.29k
  NumProfScopRegions += ValidRegions.size();
348
1.29k
  NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
349
1.29k
350
1.29k
  // Only makes sense when we tracked errors.
351
1.29k
  if (PollyTrackFailures)
352
1.29k
    emitMissedRemarks(F);
353
1.29k
354
1.29k
  if (ReportLevel)
355
2
    printLocations(F);
356
1.29k
357
1.29k
  assert(ValidRegions.size() <= DetectionContextMap.size() &&
358
1.29k
         "Cached more results than valid regions");
359
1.33k
}
360
361
template <class RR, typename... Args>
362
inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
363
584
                                   Args &&... Arguments) const {
364
584
365
584
  if (
!Context.Verifying584
)
{584
366
584
    RejectLog &Log = Context.Log;
367
584
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
584
369
584
    if (PollyTrackFailures)
370
584
      Log.report(RejectReason);
371
584
372
584
    DEBUG(dbgs() << RejectReason->getMessage());
373
584
    DEBUG(dbgs() << "\n");
374
584
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
584
  }
377
584
378
584
  return false;
379
584
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNonAffBranch, llvm::BasicBlock*, llvm::SCEV const*&, llvm::SCEV const*&, llvm::SwitchInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::SCEV const*&&&, llvm::SCEV const*&&&, llvm::SwitchInst*&&&) const
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportInvalidCond, llvm::BranchInst*&, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BranchInst*&&&, llvm::BasicBlock*&&) const
bool polly::ScopDetection::invalid<polly::ReportUndefOperand, llvm::BasicBlock*, llvm::ICmpInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::ICmpInst*&&&) const
Line
Count
Source
363
5
                                   Args &&... Arguments) const {
364
5
365
5
  if (
!Context.Verifying5
)
{5
366
5
    RejectLog &Log = Context.Log;
367
5
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
5
369
5
    if (PollyTrackFailures)
370
5
      Log.report(RejectReason);
371
5
372
5
    DEBUG(dbgs() << RejectReason->getMessage());
373
5
    DEBUG(dbgs() << "\n");
374
5
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
5
  }
377
5
378
5
  return false;
379
5
}
bool polly::ScopDetection::invalid<polly::ReportNonAffBranch, llvm::BasicBlock*, llvm::SCEV const*&, llvm::SCEV const*&, llvm::ICmpInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::SCEV const*&&&, llvm::SCEV const*&&&, llvm::ICmpInst*&&&) const
Line
Count
Source
363
5
                                   Args &&... Arguments) const {
364
5
365
5
  if (
!Context.Verifying5
)
{5
366
5
    RejectLog &Log = Context.Log;
367
5
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
5
369
5
    if (PollyTrackFailures)
370
5
      Log.report(RejectReason);
371
5
372
5
    DEBUG(dbgs() << RejectReason->getMessage());
373
5
    DEBUG(dbgs() << "\n");
374
5
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
5
  }
377
5
378
5
  return false;
379
5
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportInvalidTerminator, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&) const
bool polly::ScopDetection::invalid<polly::ReportUndefCond, llvm::TerminatorInst*&, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::TerminatorInst*&&&, llvm::BasicBlock*&&) const
Line
Count
Source
363
158
                                   Args &&... Arguments) const {
364
158
365
158
  if (
!Context.Verifying158
)
{158
366
158
    RejectLog &Log = Context.Log;
367
158
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
158
369
158
    if (PollyTrackFailures)
370
158
      Log.report(RejectReason);
371
158
372
158
    DEBUG(dbgs() << RejectReason->getMessage());
373
158
    DEBUG(dbgs() << "\n");
374
158
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
158
  }
377
158
378
158
  return false;
379
158
}
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const*&, llvm::Instruction const*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const*&&&, llvm::Instruction const*&&&, llvm::Value*&&&) const
Line
Count
Source
363
10
                                   Args &&... Arguments) const {
364
10
365
10
  if (
!Context.Verifying10
)
{10
366
10
    RejectLog &Log = Context.Log;
367
10
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
10
369
10
    if (PollyTrackFailures)
370
10
      Log.report(RejectReason);
371
10
372
10
    DEBUG(dbgs() << RejectReason->getMessage());
373
10
    DEBUG(dbgs() << "\n");
374
10
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
10
  }
377
10
378
10
  return false;
379
10
}
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const* const&, llvm::Instruction const*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const* const&&&, llvm::Instruction const*&&&, llvm::Value*&&&) const
Line
Count
Source
363
26
                                   Args &&... Arguments) const {
364
26
365
26
  if (
!Context.Verifying26
)
{26
366
26
    RejectLog &Log = Context.Log;
367
26
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
26
369
26
    if (PollyTrackFailures)
370
26
      Log.report(RejectReason);
371
26
372
26
    DEBUG(dbgs() << RejectReason->getMessage());
373
26
    DEBUG(dbgs() << "\n");
374
26
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
26
  }
377
26
378
26
  return false;
379
26
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNoBasePtr, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&) const
bool polly::ScopDetection::invalid<polly::ReportUndefBasePtr, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&) const
Line
Count
Source
363
5
                                   Args &&... Arguments) const {
364
5
365
5
  if (
!Context.Verifying5
)
{5
366
5
    RejectLog &Log = Context.Log;
367
5
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
5
369
5
    if (PollyTrackFailures)
370
5
      Log.report(RejectReason);
371
5
372
5
    DEBUG(dbgs() << RejectReason->getMessage());
373
5
    DEBUG(dbgs() << "\n");
374
5
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
5
  }
377
5
378
5
  return false;
379
5
}
bool polly::ScopDetection::invalid<polly::ReportIntToPtr, llvm::IntToPtrInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::IntToPtrInst*&&&) const
Line
Count
Source
363
1
                                   Args &&... Arguments) const {
364
1
365
1
  if (
!Context.Verifying1
)
{1
366
1
    RejectLog &Log = Context.Log;
367
1
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
1
369
1
    if (PollyTrackFailures)
370
1
      Log.report(RejectReason);
371
1
372
1
    DEBUG(dbgs() << RejectReason->getMessage());
373
1
    DEBUG(dbgs() << "\n");
374
1
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
1
  }
377
1
378
1
  return false;
379
1
}
bool polly::ScopDetection::invalid<polly::ReportVariantBasePtr, llvm::Value*&, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Value*&&&, llvm::Instruction*&&&) const
Line
Count
Source
363
10
                                   Args &&... Arguments) const {
364
10
365
10
  if (
!Context.Verifying10
)
{10
366
10
    RejectLog &Log = Context.Log;
367
10
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
10
369
10
    if (PollyTrackFailures)
370
10
      Log.report(RejectReason);
371
10
372
10
    DEBUG(dbgs() << RejectReason->getMessage());
373
10
    DEBUG(dbgs() << "\n");
374
10
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
10
  }
377
10
378
10
  return false;
379
10
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportDifferentArrayElementSize, llvm::Instruction*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&, llvm::Value*&&&) const
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const*&, llvm::Instruction*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const*&&&, llvm::Instruction*&&&, llvm::Value*&&&) const
Line
Count
Source
363
22
                                   Args &&... Arguments) const {
364
22
365
22
  if (
!Context.Verifying22
)
{22
366
22
    RejectLog &Log = Context.Log;
367
22
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
22
369
22
    if (PollyTrackFailures)
370
22
      Log.report(RejectReason);
371
22
372
22
    DEBUG(dbgs() << RejectReason->getMessage());
373
22
    DEBUG(dbgs() << "\n");
374
22
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
22
  }
377
22
378
22
  return false;
379
22
}
bool polly::ScopDetection::invalid<polly::ReportAlias, llvm::Instruction*&, llvm::AliasSet&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&, llvm::AliasSet&&&) const
Line
Count
Source
363
3
                                   Args &&... Arguments) const {
364
3
365
3
  if (
!Context.Verifying3
)
{3
366
3
    RejectLog &Log = Context.Log;
367
3
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
3
369
3
    if (PollyTrackFailures)
370
3
      Log.report(RejectReason);
371
3
372
3
    DEBUG(dbgs() << RejectReason->getMessage());
373
3
    DEBUG(dbgs() << "\n");
374
3
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
3
  }
377
3
378
3
  return false;
379
3
}
bool polly::ScopDetection::invalid<polly::ReportFuncCall, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
Line
Count
Source
363
12
                                   Args &&... Arguments) const {
364
12
365
12
  if (
!Context.Verifying12
)
{12
366
12
    RejectLog &Log = Context.Log;
367
12
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
12
369
12
    if (PollyTrackFailures)
370
12
      Log.report(RejectReason);
371
12
372
12
    DEBUG(dbgs() << RejectReason->getMessage());
373
12
    DEBUG(dbgs() << "\n");
374
12
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
12
  }
377
12
378
12
  return false;
379
12
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportAlloca, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNonSimpleMemoryAccess, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
bool polly::ScopDetection::invalid<polly::ReportUnknownInst, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const
Line
Count
Source
363
48
                                   Args &&... Arguments) const {
364
48
365
48
  if (
!Context.Verifying48
)
{48
366
48
    RejectLog &Log = Context.Log;
367
48
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
48
369
48
    if (PollyTrackFailures)
370
48
      Log.report(RejectReason);
371
48
372
48
    DEBUG(dbgs() << RejectReason->getMessage());
373
48
    DEBUG(dbgs() << "\n");
374
48
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
48
  }
377
48
378
48
  return false;
379
48
}
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportLoopHasNoExit, llvm::Loop*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&) const
bool polly::ScopDetection::invalid<polly::ReportLoopBound, llvm::Loop*&, llvm::SCEV const*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&, llvm::SCEV const*&&&) const
Line
Count
Source
363
152
                                   Args &&... Arguments) const {
364
152
365
152
  if (
!Context.Verifying152
)
{152
366
152
    RejectLog &Log = Context.Log;
367
152
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
152
369
152
    if (PollyTrackFailures)
370
152
      Log.report(RejectReason);
371
152
372
152
    DEBUG(dbgs() << RejectReason->getMessage());
373
152
    DEBUG(dbgs() << "\n");
374
152
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
152
  }
377
152
378
152
  return false;
379
152
}
bool polly::ScopDetection::invalid<polly::ReportUnprofitable, llvm::Region*>(polly::ScopDetection::DetectionContext&, bool, llvm::Region*&&) const
Line
Count
Source
363
16
                                   Args &&... Arguments) const {
364
16
365
16
  if (
!Context.Verifying16
)
{16
366
16
    RejectLog &Log = Context.Log;
367
16
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
16
369
16
    if (PollyTrackFailures)
370
16
      Log.report(RejectReason);
371
16
372
16
    DEBUG(dbgs() << RejectReason->getMessage());
373
16
    DEBUG(dbgs() << "\n");
374
16
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
16
  }
377
16
378
16
  return false;
379
16
}
bool polly::ScopDetection::invalid<polly::ReportLoopOnlySomeLatches, llvm::Loop*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&) const
Line
Count
Source
363
5
                                   Args &&... Arguments) const {
364
5
365
5
  if (
!Context.Verifying5
)
{5
366
5
    RejectLog &Log = Context.Log;
367
5
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
5
369
5
    if (PollyTrackFailures)
370
5
      Log.report(RejectReason);
371
5
372
5
    DEBUG(dbgs() << RejectReason->getMessage());
373
5
    DEBUG(dbgs() << "\n");
374
5
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
5
  }
377
5
378
5
  return false;
379
5
}
bool polly::ScopDetection::invalid<polly::ReportUnreachableInExit, llvm::BasicBlock*, llvm::DebugLoc&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::DebugLoc&&&) const
Line
Count
Source
363
23
                                   Args &&... Arguments) const {
364
23
365
23
  if (
!Context.Verifying23
)
{23
366
23
    RejectLog &Log = Context.Log;
367
23
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
23
369
23
    if (PollyTrackFailures)
370
23
      Log.report(RejectReason);
371
23
372
23
    DEBUG(dbgs() << RejectReason->getMessage());
373
23
    DEBUG(dbgs() << "\n");
374
23
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
23
  }
377
23
378
23
  return false;
379
23
}
bool polly::ScopDetection::invalid<polly::ReportEntry, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&) const
Line
Count
Source
363
80
                                   Args &&... Arguments) const {
364
80
365
80
  if (
!Context.Verifying80
)
{80
366
80
    RejectLog &Log = Context.Log;
367
80
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
80
369
80
    if (PollyTrackFailures)
370
80
      Log.report(RejectReason);
371
80
372
80
    DEBUG(dbgs() << RejectReason->getMessage());
373
80
    DEBUG(dbgs() << "\n");
374
80
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
80
  }
377
80
378
80
  return false;
379
80
}
bool polly::ScopDetection::invalid<polly::ReportIrreducibleRegion, llvm::Region*, llvm::DebugLoc&>(polly::ScopDetection::DetectionContext&, bool, llvm::Region*&&, llvm::DebugLoc&&&) const
Line
Count
Source
363
3
                                   Args &&... Arguments) const {
364
3
365
3
  if (
!Context.Verifying3
)
{3
366
3
    RejectLog &Log = Context.Log;
367
3
    std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
368
3
369
3
    if (PollyTrackFailures)
370
3
      Log.report(RejectReason);
371
3
372
3
    DEBUG(dbgs() << RejectReason->getMessage());
373
3
    DEBUG(dbgs() << "\n");
374
3
  } else {
375
0
    assert(!Assert && "Verification of detected scop failed");
376
3
  }
377
3
378
3
  return false;
379
3
}
380
381
3.87k
bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
382
3.87k
  if (!ValidRegions.count(&R))
383
2.76k
    return false;
384
3.87k
385
1.11k
  
if (1.11k
Verify1.11k
)
{1.11k
386
1.11k
    DetectionContextMap.erase(getBBPairForRegion(&R));
387
1.11k
    const auto &It = DetectionContextMap.insert(std::make_pair(
388
1.11k
        getBBPairForRegion(&R),
389
1.11k
        DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/)));
390
1.11k
    DetectionContext &Context = It.first->second;
391
1.11k
    return isValidRegion(Context);
392
1.11k
  }
393
1.11k
394
0
  return true;
395
3.87k
}
396
397
4
std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
398
4
  // Get the first error we found. Even in keep-going mode, this is the first
399
4
  // reason that caused the candidate to be rejected.
400
4
  auto *Log = lookupRejectionLog(R);
401
4
402
4
  // This can happen when we marked a region invalid, but didn't track
403
4
  // an error for it.
404
4
  if (
!Log || 4
!Log->hasErrors()3
)
405
4
    return "";
406
4
407
4
  RejectReasonPtr RR = *Log->begin();
408
4
  return RR->getMessage();
409
4
}
410
411
bool ScopDetection::addOverApproximatedRegion(Region *AR,
412
673
                                              DetectionContext &Context) const {
413
673
414
673
  // If we already know about Ar we can exit.
415
673
  if (!Context.NonAffineSubRegionSet.insert(AR))
416
67
    return true;
417
673
418
673
  // All loops in the region have to be overapproximated too if there
419
673
  // are accesses that depend on the iteration count.
420
673
421
606
  
for (BasicBlock *BB : AR->blocks()) 606
{1.51k
422
1.51k
    Loop *L = LI.getLoopFor(BB);
423
1.51k
    if (AR->contains(L))
424
237
      Context.BoxedLoopsSet.insert(L);
425
1.51k
  }
426
606
427
606
  return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
428
673
}
429
430
bool ScopDetection::onlyValidRequiredInvariantLoads(
431
39.0k
    InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
432
39.0k
  Region &CurRegion = Context.CurRegion;
433
39.0k
  const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
434
39.0k
435
39.0k
  if (
!PollyInvariantLoadHoisting && 39.0k
!RequiredILS.empty()31.3k
)
436
382
    return false;
437
39.0k
438
38.7k
  
for (LoadInst *Load : RequiredILS) 38.7k
{1.61k
439
1.61k
    // If we already know a load has been accepted as required invariant, we
440
1.61k
    // already run the validation below once and consequently don't need to
441
1.61k
    // run it again. Hence, we return early. For certain test cases (e.g.,
442
1.61k
    // COSMO this avoids us spending 50% of scop-detection time in this
443
1.61k
    // very function (and its children).
444
1.61k
    if (Context.RequiredILS.count(Load))
445
839
      continue;
446
1.61k
447
775
    
if (775
!isHoistableLoad(Load, CurRegion, LI, SE, DT)775
)
448
136
      return false;
449
775
450
639
    
for (auto NonAffineRegion : Context.NonAffineSubRegionSet) 639
{320
451
320
452
320
      if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
453
320
                                      Load->getAlignment(), DL))
454
6
        continue;
455
320
456
314
      
if (314
NonAffineRegion->contains(Load) &&314
457
14
          Load->getParent() != NonAffineRegion->getEntry())
458
14
        return false;
459
639
    }
460
38.7k
  }
461
38.7k
462
38.7k
  Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
463
38.5k
464
38.7k
  return true;
465
39.0k
}
466
467
bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
468
12.7k
                                         Loop *Scope) const {
469
12.7k
  SetVector<Value *> Values;
470
12.7k
  findValues(S0, SE, Values);
471
12.7k
  if (S1)
472
4.65k
    findValues(S1, SE, Values);
473
12.7k
474
12.7k
  SmallPtrSet<Value *, 8> PtrVals;
475
12.7k
  for (auto *V : Values) {
476
5.54k
    if (auto *P2I = dyn_cast<PtrToIntInst>(V))
477
2
      V = P2I->getOperand(0);
478
5.54k
479
5.54k
    if (!V->getType()->isPointerTy())
480
5.48k
      continue;
481
5.54k
482
5.54k
    auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
483
68
    if (isa<SCEVConstant>(PtrSCEV))
484
2
      continue;
485
68
486
68
    auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
487
66
    if (!BasePtr)
488
0
      return true;
489
66
490
66
    auto *BasePtrVal = BasePtr->getValue();
491
66
    if (
PtrVals.insert(BasePtrVal).second66
)
{66
492
66
      for (auto *PtrVal : PtrVals)
493
66
        
if (66
PtrVal != BasePtrVal && 66
!AA.isNoAlias(PtrVal, BasePtrVal)0
)
494
0
          return true;
495
66
    }
496
12.7k
  }
497
12.7k
498
12.7k
  return false;
499
12.7k
}
500
501
bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
502
39.9k
                             DetectionContext &Context) const {
503
39.9k
504
39.9k
  InvariantLoadsSetTy AccessILS;
505
39.9k
  if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
506
878
    return false;
507
39.9k
508
39.0k
  
if (39.0k
!onlyValidRequiredInvariantLoads(AccessILS, Context)39.0k
)
509
532
    return false;
510
39.0k
511
38.5k
  return true;
512
39.9k
}
513
514
bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
515
                                  Value *Condition, bool IsLoopBranch,
516
47
                                  DetectionContext &Context) const {
517
47
  Loop *L = LI.getLoopFor(&BB);
518
47
  const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
519
47
520
47
  if (
IsLoopBranch && 47
L->isLoopLatch(&BB)5
)
521
1
    return false;
522
47
523
47
  // Check for invalid usage of different pointers in one expression.
524
46
  
if (46
involvesMultiplePtrs(ConditionSCEV, nullptr, L)46
)
525
0
    return false;
526
46
527
46
  
if (46
isAffine(ConditionSCEV, L, Context)46
)
528
44
    return true;
529
46
530
2
  
if (2
AllowNonAffineSubRegions &&2
531
2
      addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
532
2
    return true;
533
2
534
2
  return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
535
2
                                     ConditionSCEV, ConditionSCEV, SI);
536
47
}
537
538
bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
539
                                  Value *Condition, bool IsLoopBranch,
540
27.7k
                                  DetectionContext &Context) const {
541
27.7k
542
27.7k
  // Constant integer conditions are always affine.
543
27.7k
  if (isa<ConstantInt>(Condition))
544
14.8k
    return true;
545
27.7k
546
12.9k
  
if (BinaryOperator *12.9k
BinOp12.9k
= dyn_cast<BinaryOperator>(Condition))
{105
547
105
    auto Opcode = BinOp->getOpcode();
548
105
    if (
Opcode == Instruction::And || 105
Opcode == Instruction::Or94
)
{105
549
105
      Value *Op0 = BinOp->getOperand(0);
550
105
      Value *Op1 = BinOp->getOperand(1);
551
105
      return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
552
105
             isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
553
105
    }
554
12.9k
  }
555
12.9k
556
12.9k
  // Non constant conditions of branches need to be ICmpInst.
557
12.8k
  
if (12.8k
!isa<ICmpInst>(Condition)12.8k
)
{118
558
118
    if (
!IsLoopBranch && 118
AllowNonAffineSubRegions118
&&
559
118
        addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
560
118
      return true;
561
0
    return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
562
12.8k
  }
563
12.8k
564
12.8k
  ICmpInst *ICmp = cast<ICmpInst>(Condition);
565
12.7k
566
12.7k
  // Are both operands of the ICmp affine?
567
12.7k
  if (isa<UndefValue>(ICmp->getOperand(0)) ||
568
12.7k
      isa<UndefValue>(ICmp->getOperand(1)))
569
5
    return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
570
12.7k
571
12.7k
  Loop *L = LI.getLoopFor(&BB);
572
12.7k
  const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
573
12.7k
  const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
574
12.7k
575
12.7k
  // If unsigned operations are not allowed try to approximate the region.
576
12.7k
  if (
ICmp->isUnsigned() && 12.7k
!PollyAllowUnsignedOperations109
)
577
0
    
return !IsLoopBranch && 0
AllowNonAffineSubRegions0
&&
578
0
           addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
579
12.7k
580
12.7k
  // Check for invalid usage of different pointers in one expression.
581
12.7k
  
if (12.7k
ICmp->isEquality() && 12.7k
involvesMultiplePtrs(LHS, nullptr, L)8.07k
&&
582
0
      involvesMultiplePtrs(RHS, nullptr, L))
583
0
    return false;
584
12.7k
585
12.7k
  // Check for invalid usage of different pointers in a relational comparison.
586
12.7k
  
if (12.7k
ICmp->isRelational() && 12.7k
involvesMultiplePtrs(LHS, RHS, L)4.65k
)
587
0
    return false;
588
12.7k
589
12.7k
  
if (12.7k
isAffine(LHS, L, Context) && 12.7k
isAffine(RHS, L, Context)12.4k
)
590
12.1k
    return true;
591
12.7k
592
603
  
if (603
!IsLoopBranch && 603
AllowNonAffineSubRegions487
&&
593
485
      addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
594
482
    return true;
595
603
596
121
  
if (121
IsLoopBranch121
)
597
116
    return false;
598
121
599
121
  return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
600
121
                                     ICmp);
601
27.7k
}
602
603
bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
604
                               bool AllowUnreachable,
605
27.7k
                               DetectionContext &Context) const {
606
27.7k
  Region &CurRegion = Context.CurRegion;
607
27.7k
608
27.7k
  TerminatorInst *TI = BB.getTerminator();
609
27.7k
610
27.7k
  if (
AllowUnreachable && 27.7k
isa<UnreachableInst>(TI)132
)
611
0
    return true;
612
27.7k
613
27.7k
  // Return instructions are only valid if the region is the top level region.
614
27.7k
  
if (27.7k
isa<ReturnInst>(TI) && 27.7k
CurRegion.isTopLevelRegion()10
)
615
10
    return true;
616
27.7k
617
27.7k
  Value *Condition = getConditionFromTerminator(TI);
618
27.7k
619
27.7k
  if (!Condition)
620
0
    return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
621
27.7k
622
27.7k
  // UndefValue is not allowed as condition.
623
27.7k
  
if (27.7k
isa<UndefValue>(Condition)27.7k
)
624
158
    return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
625
27.7k
626
27.5k
  
if (BranchInst *27.5k
BI27.5k
= dyn_cast<BranchInst>(TI))
627
27.5k
    return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
628
27.5k
629
27.5k
  SwitchInst *SI = dyn_cast<SwitchInst>(TI);
630
47
  assert(SI && "Terminator was neither branch nor switch");
631
47
632
27.5k
  return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
633
27.7k
}
634
635
bool ScopDetection::isValidCallInst(CallInst &CI,
636
216
                                    DetectionContext &Context) const {
637
216
  if (CI.doesNotReturn())
638
0
    return false;
639
216
640
216
  
if (216
CI.doesNotAccessMemory()216
)
641
109
    return true;
642
216
643
107
  
if (auto *107
II107
= dyn_cast<IntrinsicInst>(&CI))
644
68
    
if (68
isValidIntrinsicInst(*II, Context)68
)
645
56
      return true;
646
107
647
107
  Function *CalledFunction = CI.getCalledFunction();
648
51
649
51
  // Indirect calls are not supported.
650
51
  if (CalledFunction == nullptr)
651
0
    return false;
652
51
653
51
  
if (51
AllowModrefCall51
)
{39
654
39
    switch (AA.getModRefBehavior(CalledFunction)) {
655
39
    case FMRB_UnknownModRefBehavior:
656
39
      return false;
657
39
    case FMRB_DoesNotAccessMemory:
658
21
    case FMRB_OnlyReadsMemory:
659
21
      // Implicitly disable delinearization since we have an unknown
660
21
      // accesses with an unknown access function.
661
21
      Context.HasUnknownAccess = true;
662
21
      Context.AST.add(&CI);
663
21
      return true;
664
21
    case FMRB_OnlyReadsArgumentPointees:
665
18
    case FMRB_OnlyAccessesArgumentPointees:
666
56
      for (const auto &Arg : CI.arg_operands()) {
667
56
        if (!Arg->getType()->isPointerTy())
668
28
          continue;
669
56
670
56
        // Bail if a pointer argument has a base address not known to
671
56
        // ScalarEvolution. Note that a zero pointer is acceptable.
672
56
        auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
673
28
        if (ArgSCEV->isZero())
674
14
          continue;
675
28
676
28
        auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
677
14
        if (!BP)
678
0
          return false;
679
14
680
14
        // Implicitly disable delinearization since we have an unknown
681
14
        // accesses with an unknown access function.
682
14
        Context.HasUnknownAccess = true;
683
18
      }
684
18
685
18
      Context.AST.add(&CI);
686
18
      return true;
687
18
    case FMRB_DoesNotReadMemory:
688
0
    case FMRB_OnlyAccessesInaccessibleMem:
689
0
    case FMRB_OnlyAccessesInaccessibleOrArgMem:
690
39
      return false;
691
39
    }
692
51
  }
693
51
694
12
  return false;
695
216
}
696
697
bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
698
68
                                         DetectionContext &Context) const {
699
68
  if (isIgnoredIntrinsic(&II))
700
29
    return true;
701
68
702
68
  // The closest loop surrounding the call instruction.
703
68
  Loop *L = LI.getLoopFor(II.getParent());
704
39
705
39
  // The access function and base pointer for memory intrinsics.
706
39
  const SCEV *AF;
707
39
  const SCEVUnknown *BP;
708
39
709
39
  switch (II.getIntrinsicID()) {
710
39
  // Memory intrinsics that can be represented are supported.
711
39
  case llvm::Intrinsic::memmove:
712
16
  case llvm::Intrinsic::memcpy:
713
16
    AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
714
16
    if (
!AF->isZero()16
)
{16
715
16
      BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
716
16
      // Bail if the source pointer is not valid.
717
16
      if (!isValidAccess(&II, AF, BP, Context))
718
0
        return false;
719
16
    }
720
16
  // Fall through
721
33
  case llvm::Intrinsic::memset:
722
33
    AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
723
33
    if (
!AF->isZero()33
)
{29
724
29
      BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
725
29
      // Bail if the destination pointer is not valid.
726
29
      if (!isValidAccess(&II, AF, BP, Context))
727
0
        return false;
728
33
    }
729
33
730
33
    // Bail if the length is not affine.
731
33
    
if (33
!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,33
732
33
                  Context))
733
6
      return false;
734
33
735
27
    return true;
736
33
  default:
737
39
    break;
738
39
  }
739
39
740
6
  return false;
741
68
}
742
743
bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
744
11.9k
                                DetectionContext &Ctx) const {
745
11.9k
  // A reference to function argument or constant value is invariant.
746
11.9k
  if (
isa<Argument>(Val) || 11.9k
isa<Constant>(Val)3.17k
)
747
9.51k
    return true;
748
11.9k
749
11.9k
  Instruction *I = dyn_cast<Instruction>(&Val);
750
2.46k
  if (!I)
751
0
    return false;
752
2.46k
753
2.46k
  
if (2.46k
!Reg.contains(I)2.46k
)
754
1.98k
    return true;
755
2.46k
756
2.46k
  // Loads within the SCoP may read arbitrary values, need to hoist them. If it
757
2.46k
  // is not hoistable, it will be rejected later, but here we assume it is and
758
2.46k
  // that makes the value invariant.
759
480
  
if (auto 480
LI480
= dyn_cast<LoadInst>(I))
{470
760
470
    Ctx.RequiredILS.insert(LI);
761
470
    return true;
762
480
  }
763
480
764
10
  return false;
765
11.9k
}
766
767
/// Remove smax of smax(0, size) expressions from a SCEV expression and
768
/// register the '...' components.
769
///
770
/// Array access expressions as they are generated by GFortran contain smax(0,
771
/// size) expressions that confuse the 'normal' delinearization algorithm.
772
/// However, if we extract such expressions before the normal delinearization
773
/// takes place they can actually help to identify array size expressions in
774
/// Fortran accesses. For the subsequently following delinearization the smax(0,
775
/// size) component can be replaced by just 'size'. This is correct as we will
776
/// always add and verify the assumption that for all subscript expressions
777
/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
778
/// that 0 <= size, which means smax(0, size) == size.
779
class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> {
780
public:
781
  static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
782
1.41k
                             std::vector<const SCEV *> *Terms = nullptr) {
783
1.41k
    SCEVRemoveMax Rewriter(SE, Terms);
784
1.41k
    return Rewriter.visit(Scev);
785
1.41k
  }
786
787
  SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
788
1.41k
      : SCEVRewriteVisitor(SE), Terms(Terms) {}
789
790
56
  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
791
56
    if (
(Expr->getNumOperands() == 2) && 56
Expr->getOperand(0)->isZero()56
)
{56
792
56
      auto Res = visit(Expr->getOperand(1));
793
56
      if (Terms)
794
28
        (*Terms).push_back(Res);
795
56
      return Res;
796
56
    }
797
56
798
0
    return Expr;
799
56
  }
800
801
private:
802
  std::vector<const SCEV *> *Terms;
803
};
804
805
SmallVector<const SCEV *, 4>
806
ScopDetection::getDelinearizationTerms(DetectionContext &Context,
807
443
                                       const SCEVUnknown *BasePointer) const {
808
443
  SmallVector<const SCEV *, 4> Terms;
809
715
  for (const auto &Pair : Context.Accesses[BasePointer]) {
810
715
    std::vector<const SCEV *> MaxTerms;
811
715
    SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
812
715
    if (
MaxTerms.size() > 0715
)
{16
813
16
      Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
814
16
      continue;
815
715
    }
816
715
    // In case the outermost expression is a plain add, we check if any of its
817
715
    // terms has the form 4 * %inst * %param * %param ..., aka a term that
818
715
    // contains a product between a parameter and an instruction that is
819
715
    // inside the scop. Such instructions, if allowed at all, are instructions
820
715
    // SCEV can not represent, but Polly is still looking through. As a
821
715
    // result, these instructions can depend on induction variables and are
822
715
    // most likely no array sizes. However, terms that are multiplied with
823
715
    // them are likely candidates for array sizes.
824
699
    
if (auto *699
AF699
= dyn_cast<SCEVAddExpr>(Pair.second))
{8
825
17
      for (auto Op : AF->operands()) {
826
17
        if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
827
4
          SE.collectParametricTerms(AF2, Terms);
828
17
        if (auto *
AF217
= dyn_cast<SCEVMulExpr>(Op))
{8
829
8
          SmallVector<const SCEV *, 0> Operands;
830
8
831
24
          for (auto *MulOp : AF2->operands()) {
832
24
            if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
833
6
              Operands.push_back(Const);
834
24
            if (auto *
Unknown24
= dyn_cast<SCEVUnknown>(MulOp))
{16
835
16
              if (auto *
Inst16
= dyn_cast<Instruction>(Unknown->getValue()))
{4
836
4
                if (!Context.CurRegion.contains(Inst))
837
0
                  Operands.push_back(MulOp);
838
4
839
16
              } else {
840
12
                Operands.push_back(MulOp);
841
16
              }
842
24
            }
843
24
          }
844
8
          if (Operands.size())
845
7
            Terms.push_back(SE.getMulExpr(Operands));
846
17
        }
847
17
      }
848
699
    }
849
699
    if (Terms.empty())
850
489
      SE.collectParametricTerms(Pair.second, Terms);
851
699
  }
852
443
  return Terms;
853
443
}
854
855
bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
856
                                       SmallVectorImpl<const SCEV *> &Sizes,
857
                                       const SCEVUnknown *BasePointer,
858
443
                                       Loop *Scope) const {
859
443
  // If no sizes were found, all sizes are trivially valid. We allow this case
860
443
  // to make it possible to pass known-affine accesses to the delinearization to
861
443
  // try to recover some interesting multi-dimensional accesses, but to still
862
443
  // allow the already known to be affine access in case the delinearization
863
443
  // fails. In such situations, the delinearization will just return a Sizes
864
443
  // array of size zero.
865
443
  if (Sizes.size() == 0)
866
73
    return true;
867
443
868
443
  Value *BaseValue = BasePointer->getValue();
869
370
  Region &CurRegion = Context.CurRegion;
870
1.03k
  for (const SCEV *DelinearizedSize : Sizes) {
871
1.03k
    if (
!isAffine(DelinearizedSize, Scope, Context)1.03k
)
{10
872
10
      Sizes.clear();
873
10
      break;
874
1.03k
    }
875
1.02k
    
if (auto *1.02k
Unknown1.02k
= dyn_cast<SCEVUnknown>(DelinearizedSize))
{617
876
617
      auto *V = dyn_cast<Value>(Unknown->getValue());
877
617
      if (auto *
Load617
= dyn_cast<LoadInst>(V))
{176
878
176
        if (Context.CurRegion.contains(Load) &&
879
24
            isHoistableLoad(Load, CurRegion, LI, SE, DT))
880
24
          Context.RequiredILS.insert(Load);
881
176
        continue;
882
617
      }
883
1.02k
    }
884
851
    
if (851
hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,851
885
851
                                  Context.RequiredILS))
886
851
      return invalid<ReportNonAffineAccess>(
887
0
          Context, /*Assert=*/true, DelinearizedSize,
888
851
          Context.Accesses[BasePointer].front().first, BaseValue);
889
851
  }
890
370
891
370
  // No array shape derived.
892
370
  
if (370
Sizes.empty()370
)
{10
893
10
    if (AllowNonAffine)
894
0
      return true;
895
10
896
10
    
for (const auto &Pair : Context.Accesses[BasePointer]) 10
{11
897
11
      const Instruction *Insn = Pair.first;
898
11
      const SCEV *AF = Pair.second;
899
11
900
11
      if (
!isAffine(AF, Scope, Context)11
)
{10
901
10
        invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
902
10
                                       BaseValue);
903
10
        if (!KeepGoing)
904
10
          return false;
905
11
      }
906
11
    }
907
0
    return false;
908
370
  }
909
360
  return true;
910
443
}
911
912
// We first store the resulting memory accesses in TempMemoryAccesses. Only
913
// if the access functions for all memory accesses have been successfully
914
// delinearized we continue. Otherwise, we either report a failure or, if
915
// non-affine accesses are allowed, we drop the information. In case the
916
// information is dropped the memory accesses need to be overapproximated
917
// when translated to a polyhedral representation.
918
bool ScopDetection::computeAccessFunctions(
919
    DetectionContext &Context, const SCEVUnknown *BasePointer,
920
433
    std::shared_ptr<ArrayShape> Shape) const {
921
433
  Value *BaseValue = BasePointer->getValue();
922
433
  bool BasePtrHasNonAffine = false;
923
433
  MapInsnToMemAcc TempMemoryAccesses;
924
695
  for (const auto &Pair : Context.Accesses[BasePointer]) {
925
695
    const Instruction *Insn = Pair.first;
926
695
    auto *AF = Pair.second;
927
695
    AF = SCEVRemoveMax::rewrite(AF, SE);
928
695
    bool IsNonAffine = false;
929
695
    TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
930
695
    MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
931
695
    auto *Scope = LI.getLoopFor(Insn->getParent());
932
695
933
695
    if (
!AF695
)
{0
934
0
      if (isAffine(Pair.second, Scope, Context))
935
0
        Acc->DelinearizedSubscripts.push_back(Pair.second);
936
0
      else
937
0
        IsNonAffine = true;
938
695
    } else {
939
695
      if (
Shape->DelinearizedSizes.size() == 0695
)
{117
940
117
        Acc->DelinearizedSubscripts.push_back(AF);
941
695
      } else {
942
578
        SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
943
578
                                  Shape->DelinearizedSizes);
944
578
        if (Acc->DelinearizedSubscripts.size() == 0)
945
6
          IsNonAffine = true;
946
695
      }
947
695
      for (const SCEV *S : Acc->DelinearizedSubscripts)
948
1.77k
        
if (1.77k
!isAffine(S, Scope, Context)1.77k
)
949
100
          IsNonAffine = true;
950
695
    }
951
695
952
695
    // (Possibly) report non affine access
953
695
    if (
IsNonAffine695
)
{106
954
106
      BasePtrHasNonAffine = true;
955
106
      if (!AllowNonAffine)
956
106
        invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
957
106
                                       Insn, BaseValue);
958
106
      if (
!KeepGoing && 106
!AllowNonAffine95
)
959
19
        return false;
960
695
    }
961
695
  }
962
433
963
414
  
if (414
!BasePtrHasNonAffine414
)
964
414
    Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
965
414
                                TempMemoryAccesses.end());
966
414
967
433
  return true;
968
433
}
969
970
bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
971
                                          const SCEVUnknown *BasePointer,
972
443
                                          Loop *Scope) const {
973
443
  auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
974
443
975
443
  auto Terms = getDelinearizationTerms(Context, BasePointer);
976
443
977
443
  SE.findArrayDimensions(Terms, Shape->DelinearizedSizes,
978
443
                         Context.ElementSize[BasePointer]);
979
443
980
443
  if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
981
443
                          Scope))
982
10
    return false;
983
443
984
433
  return computeAccessFunctions(Context, BasePointer, Shape);
985
443
}
986
987
3.11k
bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
988
3.11k
  // TODO: If we have an unknown access and other non-affine accesses we do
989
3.11k
  //       not try to delinearize them for now.
990
3.11k
  if (
Context.HasUnknownAccess && 3.11k
!Context.NonAffineAccesses.empty()35
)
991
12
    return AllowNonAffine;
992
3.11k
993
3.10k
  
for (auto &Pair : Context.NonAffineAccesses) 3.10k
{443
994
443
    auto *BasePointer = Pair.first;
995
443
    auto *Scope = Pair.second;
996
443
    if (
!hasBaseAffineAccesses(Context, BasePointer, Scope)443
)
{29
997
29
      if (KeepGoing)
998
0
        continue;
999
29
      else
1000
29
        return false;
1001
443
    }
1002
3.10k
  }
1003
3.07k
  return true;
1004
3.11k
}
1005
1006
bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1007
                                  const SCEVUnknown *BP,
1008
11.9k
                                  DetectionContext &Context) const {
1009
11.9k
1010
11.9k
  if (!BP)
1011
0
    return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
1012
11.9k
1013
11.9k
  auto *BV = BP->getValue();
1014
11.9k
  if (isa<UndefValue>(BV))
1015
5
    return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
1016
11.9k
1017
11.9k
  // FIXME: Think about allowing IntToPtrInst
1018
11.9k
  
if (IntToPtrInst *11.9k
Inst11.9k
= dyn_cast<IntToPtrInst>(BV))
1019
1
    return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
1020
11.9k
1021
11.9k
  // Check that the base address of the access is invariant in the current
1022
11.9k
  // region.
1023
11.9k
  
if (11.9k
!isInvariant(*BV, Context.CurRegion, Context)11.9k
)
1024
10
    return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
1025
11.9k
1026
11.9k
  AF = SE.getMinusSCEV(AF, BP);
1027
11.9k
1028
11.9k
  const SCEV *Size;
1029
11.9k
  if (
!isa<MemIntrinsic>(Inst)11.9k
)
{11.9k
1030
11.9k
    Size = SE.getElementSize(Inst);
1031
11.9k
  } else {
1032
45
    auto *SizeTy =
1033
45
        SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
1034
45
    Size = SE.getConstant(SizeTy, 8);
1035
11.9k
  }
1036
11.9k
1037
11.9k
  if (
Context.ElementSize[BP]11.9k
)
{6.79k
1038
6.79k
    if (
!AllowDifferentTypes && 6.79k
Context.ElementSize[BP] != Size0
)
1039
6.79k
      return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1040
6.79k
                                                      Inst, BV);
1041
6.79k
1042
6.79k
    Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
1043
11.9k
  } else {
1044
5.17k
    Context.ElementSize[BP] = Size;
1045
11.9k
  }
1046
11.9k
1047
11.9k
  bool IsVariantInNonAffineLoop = false;
1048
11.9k
  SetVector<const Loop *> Loops;
1049
11.9k
  findLoops(AF, Loops);
1050
11.9k
  for (const Loop *L : Loops)
1051
6.97k
    
if (6.97k
Context.BoxedLoopsSet.count(L)6.97k
)
1052
39
      IsVariantInNonAffineLoop = true;
1053
11.9k
1054
11.9k
  auto *Scope = LI.getLoopFor(Inst->getParent());
1055
11.9k
  bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
1056
11.9k
  // Do not try to delinearize memory intrinsics and force them to be affine.
1057
11.9k
  if (
isa<MemIntrinsic>(Inst) && 11.9k
!IsAffine45
)
{0
1058
0
    return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1059
0
                                          BV);
1060
11.9k
  } else 
if (11.9k
PollyDelinearize && 11.9k
!IsVariantInNonAffineLoop11.9k
)
{11.8k
1061
11.8k
    Context.Accesses[BP].push_back({Inst, AF});
1062
11.8k
1063
11.8k
    if (
!IsAffine || 11.8k
hasIVParams(AF)11.2k
)
1064
11.8k
      Context.NonAffineAccesses.insert(
1065
11.8k
          std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
1066
11.9k
  } else 
if (68
!AllowNonAffine && 68
!IsAffine28
)
{22
1067
22
    return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
1068
22
                                          BV);
1069
11.9k
  }
1070
11.9k
1071
11.9k
  
if (11.9k
IgnoreAliasing11.9k
)
1072
442
    return true;
1073
11.9k
1074
11.9k
  // Check if the base pointer of the memory access does alias with
1075
11.9k
  // any other pointer. This cannot be handled at the moment.
1076
11.9k
  AAMDNodes AATags;
1077
11.5k
  Inst->getAAMetadata(AATags);
1078
11.5k
  AliasSet &AS = Context.AST.getAliasSetForPointer(
1079
11.5k
      BP->getValue(), MemoryLocation::UnknownSize, AATags);
1080
11.5k
1081
11.5k
  if (
!AS.isMustAlias()11.5k
)
{3.16k
1082
3.16k
    if (
PollyUseRuntimeAliasChecks3.16k
)
{3.16k
1083
3.16k
      bool CanBuildRunTimeCheck = true;
1084
3.16k
      // The run-time alias check places code that involves the base pointer at
1085
3.16k
      // the beginning of the SCoP. This breaks if the base pointer is defined
1086
3.16k
      // inside the scop. Hence, we can only create a run-time check if we are
1087
3.16k
      // sure the base pointer is not an instruction defined inside the scop.
1088
3.16k
      // However, we can ignore loads that will be hoisted.
1089
11.1k
      for (const auto &Ptr : AS) {
1090
11.1k
        Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
1091
11.1k
        if (
Inst && 11.1k
Context.CurRegion.contains(Inst)2.32k
)
{2.17k
1092
2.17k
          auto *Load = dyn_cast<LoadInst>(Inst);
1093
2.17k
          if (
Load && 2.17k
isHoistableLoad(Load, Context.CurRegion, LI, SE, DT)2.17k
)
{2.17k
1094
2.17k
            Context.RequiredILS.insert(Load);
1095
2.17k
            continue;
1096
2.17k
          }
1097
2.17k
1098
2.17k
          CanBuildRunTimeCheck = false;
1099
2.17k
          break;
1100
11.1k
        }
1101
11.1k
      }
1102
3.16k
1103
3.16k
      if (CanBuildRunTimeCheck)
1104
3.16k
        return true;
1105
3.16k
    }
1106
3
    return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
1107
11.5k
  }
1108
11.5k
1109
8.33k
  return true;
1110
11.9k
}
1111
1112
bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1113
11.9k
                                        DetectionContext &Context) const {
1114
11.9k
  Value *Ptr = Inst.getPointerOperand();
1115
11.9k
  Loop *L = LI.getLoopFor(Inst->getParent());
1116
11.9k
  const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
1117
11.9k
  const SCEVUnknown *BasePointer;
1118
11.9k
1119
11.9k
  BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1120
11.9k
1121
11.9k
  return isValidAccess(Inst, AccessFunction, BasePointer, Context);
1122
11.9k
}
1123
1124
bool ScopDetection::isValidInstruction(Instruction &Inst,
1125
50.9k
                                       DetectionContext &Context) const {
1126
95.2k
  for (auto &Op : Inst.operands()) {
1127
95.2k
    auto *OpInst = dyn_cast<Instruction>(&Op);
1128
95.2k
1129
95.2k
    if (!OpInst)
1130
39.2k
      continue;
1131
95.2k
1132
56.0k
    
if (56.0k
isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)56.0k
)
1133
2
      return false;
1134
56.0k
  }
1135
50.9k
1136
50.9k
  
if (50.9k
isa<LandingPadInst>(&Inst) || 50.9k
isa<ResumeInst>(&Inst)50.9k
)
1137
0
    return false;
1138
50.9k
1139
50.9k
  // We only check the call instruction but not invoke instruction.
1140
50.9k
  
if (CallInst *50.9k
CI50.9k
= dyn_cast<CallInst>(&Inst))
{216
1141
216
    if (isValidCallInst(*CI, Context))
1142
204
      return true;
1143
216
1144
12
    return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
1145
50.9k
  }
1146
50.9k
1147
50.7k
  
if (50.7k
!Inst.mayReadOrWriteMemory()50.7k
)
{38.7k
1148
38.7k
    if (!isa<AllocaInst>(Inst))
1149
38.7k
      return true;
1150
38.7k
1151
0
    return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
1152
50.7k
  }
1153
50.7k
1154
50.7k
  // Check the access function.
1155
11.9k
  
if (auto 11.9k
MemInst11.9k
= MemAccInst::dyn_cast(Inst))
{11.9k
1156
11.9k
    Context.hasStores |= isa<StoreInst>(MemInst);
1157
11.9k
    Context.hasLoads |= isa<LoadInst>(MemInst);
1158
11.9k
    if (!MemInst.isSimple())
1159
11.9k
      return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1160
11.9k
                                                  &Inst);
1161
11.9k
1162
11.9k
    return isValidMemoryAccess(MemInst, Context);
1163
11.9k
  }
1164
11.9k
1165
11.9k
  // We do not know this instruction, therefore we assume it is invalid.
1166
48
  return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
1167
50.9k
}
1168
1169
/// Check whether @p L has exiting blocks.
1170
///
1171
/// @param L The loop of interest
1172
///
1173
/// @return True if the loop has exiting blocks, false otherwise.
1174
5.07k
static bool hasExitingBlocks(Loop *L) {
1175
5.07k
  SmallVector<BasicBlock *, 4> ExitingBlocks;
1176
5.07k
  L->getExitingBlocks(ExitingBlocks);
1177
5.07k
  return !ExitingBlocks.empty();
1178
5.07k
}
1179
1180
bool ScopDetection::canUseISLTripCount(Loop *L,
1181
5.07k
                                       DetectionContext &Context) const {
1182
5.07k
  // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1183
5.07k
  // need to overapproximate it as a boxed loop.
1184
5.07k
  SmallVector<BasicBlock *, 4> LoopControlBlocks;
1185
5.07k
  L->getExitingBlocks(LoopControlBlocks);
1186
5.07k
  L->getLoopLatches(LoopControlBlocks);
1187
10.0k
  for (BasicBlock *ControlBB : LoopControlBlocks) {
1188
10.0k
    if (!isValidCFG(*ControlBB, true, false, Context))
1189
220
      return false;
1190
10.0k
  }
1191
5.07k
1192
5.07k
  // We can use ISL to compute the trip count of L.
1193
4.85k
  return true;
1194
5.07k
}
1195
1196
5.07k
bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
1197
5.07k
  // Loops that contain part but not all of the blocks of a region cannot be
1198
5.07k
  // handled by the schedule generation. Such loop constructs can happen
1199
5.07k
  // because a region can contain BBs that have no path to the exit block
1200
5.07k
  // (Infinite loops, UnreachableInst), but such blocks are never part of a
1201
5.07k
  // loop.
1202
5.07k
  //
1203
5.07k
  // _______________
1204
5.07k
  // | Loop Header | <-----------.
1205
5.07k
  // ---------------             |
1206
5.07k
  //        |                    |
1207
5.07k
  // _______________       ______________
1208
5.07k
  // | RegionEntry |-----> | RegionExit |----->
1209
5.07k
  // ---------------       --------------
1210
5.07k
  //        |
1211
5.07k
  // _______________
1212
5.07k
  // | EndlessLoop | <--.
1213
5.07k
  // ---------------    |
1214
5.07k
  //       |            |
1215
5.07k
  //       \------------/
1216
5.07k
  //
1217
5.07k
  // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1218
5.07k
  // neither entirely contained in the region RegionEntry->RegionExit
1219
5.07k
  // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1220
5.07k
  // in the loop.
1221
5.07k
  // The block EndlessLoop is contained in the region because Region::contains
1222
5.07k
  // tests whether it is not dominated by RegionExit. This is probably to not
1223
5.07k
  // having to query the PostdominatorTree. Instead of an endless loop, a dead
1224
5.07k
  // end can also be formed by an UnreachableInst. This case is already caught
1225
5.07k
  // by isErrorBlock(). We hence only have to reject endless loops here.
1226
5.07k
  if (!hasExitingBlocks(L))
1227
0
    return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
1228
5.07k
1229
5.07k
  
if (5.07k
canUseISLTripCount(L, Context)5.07k
)
1230
4.85k
    return true;
1231
5.07k
1232
220
  
if (220
AllowNonAffineSubLoops && 220
AllowNonAffineSubRegions68
)
{68
1233
68
    Region *R = RI.getRegionFor(L->getHeader());
1234
79
    while (
R != &Context.CurRegion && 79
!R->contains(L)64
)
1235
11
      R = R->getParent();
1236
68
1237
68
    if (addOverApproximatedRegion(R, Context))
1238
68
      return true;
1239
220
  }
1240
220
1241
220
  const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1242
220
  return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
1243
5.07k
}
1244
1245
/// Return the number of loops in @p L (incl. @p L) that have a trip
1246
///        count that is not known to be less than @MinProfitableTrips.
1247
ScopDetection::LoopStats
1248
ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1249
5.19k
                                       unsigned MinProfitableTrips) {
1250
5.19k
  auto *TripCount = SE.getBackedgeTakenCount(L);
1251
5.19k
1252
5.19k
  int NumLoops = 1;
1253
5.19k
  int MaxLoopDepth = 1;
1254
5.19k
  if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
1255
2.84k
    
if (2.84k
TripCountC->getType()->getScalarSizeInBits() <= 642.84k
)
1256
2.84k
      
if (2.84k
TripCountC->getValue()->getZExtValue() <= MinProfitableTrips2.84k
)
1257
187
        NumLoops -= 1;
1258
5.19k
1259
5.19k
  for (auto &SubLoop : *L) {
1260
1.42k
    LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1261
1.42k
    NumLoops += Stats.NumLoops;
1262
1.42k
    MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
1263
5.19k
  }
1264
5.19k
1265
5.19k
  return {NumLoops, MaxLoopDepth};
1266
5.19k
}
1267
1268
ScopDetection::LoopStats
1269
ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1270
3.56k
                                    LoopInfo &LI, unsigned MinProfitableTrips) {
1271
3.56k
  int LoopNum = 0;
1272
3.56k
  int MaxLoopDepth = 0;
1273
3.56k
1274
3.56k
  auto L = LI.getLoopFor(R->getEntry());
1275
3.56k
  L = L ? 
R->outermostLoopInRegion(L)2.00k
:
nullptr1.56k
;
1276
3.56k
  L = L ? 
L->getParentLoop()1.93k
:
nullptr1.63k
;
1277
3.56k
1278
3.56k
  auto SubLoops =
1279
3.56k
      L ? 
L->getSubLoopsVector()51
:
std::vector<Loop *>(LI.begin(), LI.end())3.51k
;
1280
3.56k
1281
3.56k
  for (auto &SubLoop : SubLoops)
1282
3.97k
    
if (3.97k
R->contains(SubLoop)3.97k
)
{3.77k
1283
3.77k
      LoopStats Stats =
1284
3.77k
          countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
1285
3.77k
      LoopNum += Stats.NumLoops;
1286
3.77k
      MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
1287
3.77k
    }
1288
3.56k
1289
3.56k
  return {LoopNum, MaxLoopDepth};
1290
3.56k
}
1291
1292
1.19k
Region *ScopDetection::expandRegion(Region &R) {
1293
1.19k
  // Initial no valid region was found (greater than R)
1294
1.19k
  std::unique_ptr<Region> LastValidRegion;
1295
1.19k
  auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1296
1.19k
1297
1.19k
  DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1298
1.19k
1299
1.75k
  while (
ExpandedRegion1.75k
)
{602
1300
602
    const auto &It = DetectionContextMap.insert(std::make_pair(
1301
602
        getBBPairForRegion(ExpandedRegion.get()),
1302
602
        DetectionContext(*ExpandedRegion, AA, false /*verifying*/)));
1303
602
    DetectionContext &Context = It.first->second;
1304
602
    DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1305
602
    // Only expand when we did not collect errors.
1306
602
1307
602
    if (
!Context.Log.hasErrors()602
)
{602
1308
602
      // If the exit is valid check all blocks
1309
602
      //  - if true, a valid region was found => store it + keep expanding
1310
602
      //  - if false, .tbd. => stop  (should this really end the loop?)
1311
602
      if (
!allBlocksValid(Context) || 602
Context.Log.hasErrors()557
)
{46
1312
46
        removeCachedResults(*ExpandedRegion);
1313
46
        DetectionContextMap.erase(It.first);
1314
46
        break;
1315
602
      }
1316
602
1317
602
      // Store this region, because it is the greatest valid (encountered so
1318
602
      // far).
1319
556
      
if (556
LastValidRegion556
)
{256
1320
256
        removeCachedResults(*LastValidRegion);
1321
256
        DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get()));
1322
556
      }
1323
556
      LastValidRegion = std::move(ExpandedRegion);
1324
556
1325
556
      // Create and test the next greater region (if any)
1326
556
      ExpandedRegion =
1327
556
          std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1328
556
1329
602
    } else {
1330
0
      // Create and test the next greater region (if any)
1331
0
      removeCachedResults(*ExpandedRegion);
1332
0
      DetectionContextMap.erase(It.first);
1333
0
      ExpandedRegion =
1334
0
          std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1335
602
    }
1336
1.19k
  }
1337
1.19k
1338
1.19k
  DEBUG({
1339
1.19k
    if (LastValidRegion)
1340
1.19k
      dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1341
1.19k
    else
1342
1.19k
      dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1343
1.19k
  });
1344
1.19k
1345
1.19k
  return LastValidRegion.release();
1346
1.19k
}
1347
44
static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1348
44
  for (const BasicBlock *BB : R.blocks())
1349
48
    
if (48
R.contains(LI.getLoopFor(BB))48
)
1350
43
      return false;
1351
44
1352
1
  return true;
1353
44
}
1354
1355
728
void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1356
728
  for (auto &SubRegion : R) {
1357
634
    if (
ValidRegions.count(SubRegion.get())634
)
{206
1358
206
      removeCachedResults(*SubRegion.get());
1359
634
    } else
1360
428
      removeCachedResultsRecursively(*SubRegion);
1361
728
  }
1362
728
}
1363
1364
2.51k
void ScopDetection::removeCachedResults(const Region &R) {
1365
2.51k
  ValidRegions.remove(&R);
1366
2.51k
}
1367
1368
3.11k
void ScopDetection::findScops(Region &R) {
1369
3.11k
  const auto &It = DetectionContextMap.insert(std::make_pair(
1370
3.11k
      getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/)));
1371
3.11k
  DetectionContext &Context = It.first->second;
1372
3.11k
1373
3.11k
  bool RegionIsValid = false;
1374
3.11k
  if (
!PollyProcessUnprofitable && 3.11k
regionWithoutLoops(R, LI)44
)
1375
1
    invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
1376
3.11k
  else
1377
3.11k
    RegionIsValid = isValidRegion(Context);
1378
3.11k
1379
1.41k
  bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
1380
3.11k
1381
3.11k
  if (
HasErrors3.11k
)
{1.70k
1382
1.70k
    removeCachedResults(R);
1383
3.11k
  } else {
1384
1.40k
    ValidRegions.insert(&R);
1385
1.40k
    return;
1386
3.11k
  }
1387
3.11k
1388
3.11k
  for (auto &SubRegion : R)
1389
1.81k
    findScops(*SubRegion);
1390
1.70k
1391
1.70k
  // Try to expand regions.
1392
1.70k
  //
1393
1.70k
  // As the region tree normally only contains canonical regions, non canonical
1394
1.70k
  // regions that form a Scop are not found. Therefore, those non canonical
1395
1.70k
  // regions are checked by expanding the canonical ones.
1396
1.70k
1397
1.70k
  std::vector<Region *> ToExpand;
1398
1.70k
1399
1.70k
  for (auto &SubRegion : R)
1400
1.81k
    ToExpand.push_back(SubRegion.get());
1401
1.70k
1402
1.81k
  for (Region *CurrentRegion : ToExpand) {
1403
1.81k
    // Skip invalid regions. Regions may become invalid, if they are element of
1404
1.81k
    // an already expanded region.
1405
1.81k
    if (!ValidRegions.count(CurrentRegion))
1406
620
      continue;
1407
1.81k
1408
1.81k
    // Skip regions that had errors.
1409
1.81k
    bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
1410
1.19k
    if (HadErrors)
1411
0
      continue;
1412
1.19k
1413
1.19k
    Region *ExpandedR = expandRegion(*CurrentRegion);
1414
1.19k
1415
1.19k
    if (!ExpandedR)
1416
896
      continue;
1417
1.19k
1418
1.19k
    R.addSubRegion(ExpandedR, true);
1419
300
    ValidRegions.insert(ExpandedR);
1420
300
    removeCachedResults(*CurrentRegion);
1421
300
    removeCachedResultsRecursively(*ExpandedR);
1422
1.70k
  }
1423
3.11k
}
1424
1425
3.43k
bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
1426
3.43k
  Region &CurRegion = Context.CurRegion;
1427
3.43k
1428
18.4k
  for (const BasicBlock *BB : CurRegion.blocks()) {
1429
18.4k
    Loop *L = LI.getLoopFor(BB);
1430
18.4k
    if (
L && 18.4k
L->getHeader() == BB14.8k
)
{5.12k
1431
5.12k
      if (
CurRegion.contains(L)5.12k
)
{5.07k
1432
5.07k
        if (
!isValidLoop(L, Context) && 5.07k
!KeepGoing152
)
1433
152
          return false;
1434
5.12k
      } else {
1435
50
        SmallVector<BasicBlock *, 1> Latches;
1436
50
        L->getLoopLatches(Latches);
1437
50
        for (BasicBlock *Latch : Latches)
1438
58
          
if (58
CurRegion.contains(Latch)58
)
1439
58
            return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1440
50
                                                      L);
1441
5.12k
      }
1442
18.4k
    }
1443
18.4k
  }
1444
3.43k
1445
3.27k
  
for (BasicBlock *BB : CurRegion.blocks()) 3.27k
{17.7k
1446
17.7k
    bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT);
1447
17.7k
1448
17.7k
    // Also check exception blocks (and possibly register them as non-affine
1449
17.7k
    // regions). Even though exception blocks are not modeled, we use them
1450
17.7k
    // to forward-propagate domain constraints during ScopInfo construction.
1451
17.7k
    if (
!isValidCFG(*BB, false, IsErrorBlock, Context) && 17.7k
!KeepGoing65
)
1452
65
      return false;
1453
17.7k
1454
17.6k
    
if (17.6k
IsErrorBlock17.6k
)
1455
131
      continue;
1456
17.6k
1457
68.4k
    
for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); 17.5k
I != E68.4k
;
++I50.8k
)
1458
50.9k
      
if (50.9k
!isValidInstruction(*I, Context) && 50.9k
!KeepGoing103
)
1459
94
        return false;
1460
17.5k
  }
1461
3.27k
1462
3.11k
  
if (3.11k
!hasAffineMemoryAccesses(Context)3.11k
)
1463
35
    return false;
1464
3.11k
1465
3.08k
  return true;
1466
3.43k
}
1467
1468
bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1469
8
                                         int NumLoops) const {
1470
8
  int InstCount = 0;
1471
8
1472
8
  if (NumLoops == 0)
1473
0
    return false;
1474
8
1475
8
  for (auto *BB : Context.CurRegion.blocks())
1476
36
    
if (36
Context.CurRegion.contains(LI.getLoopFor(BB))36
)
1477
36
      InstCount += BB->size();
1478
8
1479
8
  InstCount = InstCount / NumLoops;
1480
8
1481
8
  return InstCount >= ProfitabilityMinPerLoopInstructions;
1482
8
}
1483
1484
bool ScopDetection::hasPossiblyDistributableLoop(
1485
10
    DetectionContext &Context) const {
1486
10
  for (auto *BB : Context.CurRegion.blocks()) {
1487
10
    auto *L = LI.getLoopFor(BB);
1488
10
    if (!Context.CurRegion.contains(L))
1489
0
      continue;
1490
10
    
if (10
Context.BoxedLoopsSet.count(L)10
)
1491
0
      continue;
1492
10
    unsigned StmtsWithStoresInLoops = 0;
1493
74
    for (auto *LBB : L->blocks()) {
1494
74
      bool MemStore = false;
1495
74
      for (auto &I : *LBB)
1496
303
        MemStore |= isa<StoreInst>(&I);
1497
74
      StmtsWithStoresInLoops += MemStore;
1498
74
    }
1499
10
    return (StmtsWithStoresInLoops > 1);
1500
10
  }
1501
0
  return false;
1502
10
}
1503
1504
1.20k
bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1505
1.20k
  Region &CurRegion = Context.CurRegion;
1506
1.20k
1507
1.20k
  if (PollyProcessUnprofitable)
1508
1.18k
    return true;
1509
1.20k
1510
1.20k
  // We can probably not do a lot on scops that only write or only read
1511
1.20k
  // data.
1512
21
  
if (21
!Context.hasStores || 21
!Context.hasLoads19
)
1513
4
    return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1514
21
1515
21
  int NumLoops =
1516
17
      countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
1517
17
  int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1518
17
1519
17
  // Scops with at least two loops may allow either loop fusion or tiling and
1520
17
  // are consequently interesting to look at.
1521
17
  if (NumAffineLoops >= 2)
1522
3
    return true;
1523
17
1524
17
  // A loop with multiple non-trivial blocks might be amendable to distribution.
1525
14
  
if (14
NumAffineLoops == 1 && 14
hasPossiblyDistributableLoop(Context)10
)
1526
2
    return true;
1527
14
1528
14
  // Scops that contain a loop with a non-trivial amount of computation per
1529
14
  // loop-iteration are interesting as we may be able to parallelize such
1530
14
  // loops. Individual loops that have only a small amount of computation
1531
14
  // per-iteration are performance-wise very fragile as any change to the
1532
14
  // loop induction variables may affect performance. To not cause spurious
1533
14
  // performance regressions, we do not consider such loops.
1534
12
  
if (12
NumAffineLoops == 1 && 12
hasSufficientCompute(Context, NumLoops)8
)
1535
1
    return true;
1536
12
1537
11
  return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
1538
1.20k
}
1539
1540
4.22k
bool ScopDetection::isValidRegion(DetectionContext &Context) const {
1541
4.22k
  Region &CurRegion = Context.CurRegion;
1542
4.22k
1543
4.22k
  DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1544
4.22k
1545
4.22k
  if (
!PollyAllowFullFunction && 4.22k
CurRegion.isTopLevelRegion()4.21k
)
{1.29k
1546
1.29k
    DEBUG(dbgs() << "Top level region is invalid\n");
1547
1.29k
    return false;
1548
4.22k
  }
1549
4.22k
1550
4.22k
  DebugLoc DbgLoc;
1551
2.93k
  if (CurRegion.getExit() &&
1552
2.93k
      
isa<UnreachableInst>(CurRegion.getExit()->getTerminator())2.92k
)
{23
1553
23
    DEBUG(dbgs() << "Unreachable in exit\n");
1554
23
    return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1555
23
                                            CurRegion.getExit(), DbgLoc);
1556
2.93k
  }
1557
2.93k
1558
2.91k
  
if (2.91k
!CurRegion.getEntry()->getName().count(OnlyRegion)2.91k
)
{0
1559
0
    DEBUG({
1560
0
      dbgs() << "Region entry does not match -polly-region-only";
1561
0
      dbgs() << "\n";
1562
0
    });
1563
0
    return false;
1564
2.91k
  }
1565
2.91k
1566
2.91k
  // SCoP cannot contain the entry block of the function, because we need
1567
2.91k
  // to insert alloca instruction there when translate scalar to array.
1568
2.91k
  
if (2.91k
!PollyAllowFullFunction &&2.91k
1569
2.91k
      CurRegion.getEntry() ==
1570
2.91k
          &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1571
80
    return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
1572
2.91k
1573
2.83k
  
if (2.83k
!allBlocksValid(Context)2.83k
)
1574
306
    return false;
1575
2.83k
1576
2.52k
  
if (2.52k
!isReducibleRegion(CurRegion, DbgLoc)2.52k
)
1577
2.52k
    return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1578
2.52k
                                            &CurRegion, DbgLoc);
1579
2.52k
1580
2.52k
  
DEBUG2.52k
(dbgs() << "OK\n");2.52k
1581
2.52k
  return true;
1582
4.22k
}
1583
1584
0
void ScopDetection::markFunctionAsInvalid(Function *F) {
1585
0
  F->addFnAttr(PollySkipFnAttr);
1586
0
}
1587
1588
1.32k
bool ScopDetection::isValidFunction(llvm::Function &F) {
1589
1.32k
  return !F.hasFnAttribute(PollySkipFnAttr);
1590
1.32k
}
1591
1592
2
void ScopDetection::printLocations(llvm::Function &F) {
1593
2
  for (const Region *R : *this) {
1594
2
    unsigned LineEntry, LineExit;
1595
2
    std::string FileName;
1596
2
1597
2
    getDebugLocation(R, LineEntry, LineExit, FileName);
1598
2
    DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1599
2
    F.getContext().diagnose(Diagnostic);
1600
2
  }
1601
2
}
1602
1603
1.29k
void ScopDetection::emitMissedRemarks(const Function &F) {
1604
3.41k
  for (auto &DIt : DetectionContextMap) {
1605
3.41k
    auto &DC = DIt.getSecond();
1606
3.41k
    if (DC.Log.hasErrors())
1607
423
      emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
1608
3.41k
  }
1609
1.29k
}
1610
1611
2.52k
bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1612
2.52k
  /// Enum for coloring BBs in Region.
1613
2.52k
  ///
1614
2.52k
  /// WHITE - Unvisited BB in DFS walk.
1615
2.52k
  /// GREY - BBs which are currently on the DFS stack for processing.
1616
2.52k
  /// BLACK - Visited and completely processed BB.
1617
2.52k
  enum Color { WHITE, GREY, BLACK };
1618
2.52k
1619
2.52k
  BasicBlock *REntry = R.getEntry();
1620
2.52k
  BasicBlock *RExit = R.getExit();
1621
2.52k
  // Map to match the color of a BasicBlock during the DFS walk.
1622
2.52k
  DenseMap<const BasicBlock *, Color> BBColorMap;
1623
2.52k
  // Stack keeping track of current BB and index of next child to be processed.
1624
2.52k
  std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1625
2.52k
1626
2.52k
  unsigned AdjacentBlockIndex = 0;
1627
2.52k
  BasicBlock *CurrBB, *SuccBB;
1628
2.52k
  CurrBB = REntry;
1629
2.52k
1630
2.52k
  // Initialize the map for all BB with WHITE color.
1631
2.52k
  for (auto *BB : R.blocks())
1632
11.1k
    BBColorMap[BB] = WHITE;
1633
2.52k
1634
2.52k
  // Process the entry block of the Region.
1635
2.52k
  BBColorMap[CurrBB] = GREY;
1636
2.52k
  DFSStack.push(std::make_pair(CurrBB, 0));
1637
2.52k
1638
22.2k
  while (
!DFSStack.empty()22.2k
)
{19.7k
1639
19.7k
    // Get next BB on stack to be processed.
1640
19.7k
    CurrBB = DFSStack.top().first;
1641
19.7k
    AdjacentBlockIndex = DFSStack.top().second;
1642
19.7k
    DFSStack.pop();
1643
19.7k
1644
19.7k
    // Loop to iterate over the successors of current BB.
1645
19.7k
    const TerminatorInst *TInst = CurrBB->getTerminator();
1646
19.7k
    unsigned NSucc = TInst->getNumSuccessors();
1647
26.7k
    for (unsigned I = AdjacentBlockIndex; I < NSucc;
1648
19.7k
         
++I, ++AdjacentBlockIndex7.05k
)
{15.6k
1649
15.6k
      SuccBB = TInst->getSuccessor(I);
1650
15.6k
1651
15.6k
      // Checks for region exit block and self-loops in BB.
1652
15.6k
      if (
SuccBB == RExit || 15.6k
SuccBB == CurrBB12.6k
)
1653
3.91k
        continue;
1654
15.6k
1655
15.6k
      // WHITE indicates an unvisited BB in DFS walk.
1656
11.7k
      
if (11.7k
BBColorMap[SuccBB] == WHITE11.7k
)
{8.60k
1657
8.60k
        // Push the current BB and the index of the next child to be visited.
1658
8.60k
        DFSStack.push(std::make_pair(CurrBB, I + 1));
1659
8.60k
        // Push the next BB to be processed.
1660
8.60k
        DFSStack.push(std::make_pair(SuccBB, 0));
1661
8.60k
        // First time the BB is being processed.
1662
8.60k
        BBColorMap[SuccBB] = GREY;
1663
8.60k
        break;
1664
11.7k
      } else 
if (3.14k
BBColorMap[SuccBB] == GREY3.14k
)
{2.44k
1665
2.44k
        // GREY indicates a loop in the control flow.
1666
2.44k
        // If the destination dominates the source, it is a natural loop
1667
2.44k
        // else, an irreducible control flow in the region is detected.
1668
2.44k
        if (
!DT.dominates(SuccBB, CurrBB)2.44k
)
{3
1669
3
          // Get debug info of instruction which causes irregular control flow.
1670
3
          DbgLoc = TInst->getDebugLoc();
1671
3
          return false;
1672
2.44k
        }
1673
11.7k
      }
1674
19.7k
    }
1675
19.7k
1676
19.7k
    // If all children of current BB have been processed,
1677
19.7k
    // then mark that BB as fully processed.
1678
19.7k
    
if (19.7k
AdjacentBlockIndex == NSucc19.7k
)
1679
11.1k
      BBColorMap[CurrBB] = BLACK;
1680
19.7k
  }
1681
2.52k
1682
2.52k
  return true;
1683
2.52k
}
1684
1685
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1686
2.38k
                                     bool OnlyProfitable) {
1687
2.38k
  if (
!OnlyProfitable2.38k
)
{1.20k
1688
1.20k
    NumLoopsInScop += Stats.NumLoops;
1689
1.20k
    MaxNumLoopsInScop =
1690
1.20k
        std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
1691
1.20k
    if (Stats.MaxDepth == 1)
1692
783
      NumScopsDepthOne++;
1693
419
    else 
if (419
Stats.MaxDepth == 2419
)
1694
235
      NumScopsDepthTwo++;
1695
184
    else 
if (184
Stats.MaxDepth == 3184
)
1696
77
      NumScopsDepthThree++;
1697
107
    else 
if (107
Stats.MaxDepth == 4107
)
1698
4
      NumScopsDepthFour++;
1699
103
    else 
if (103
Stats.MaxDepth == 5103
)
1700
0
      NumScopsDepthFive++;
1701
103
    else
1702
103
      NumScopsDepthLarger++;
1703
2.38k
  } else {
1704
1.18k
    NumLoopsInProfScop += Stats.NumLoops;
1705
1.18k
    MaxNumLoopsInProfScop =
1706
1.18k
        std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops);
1707
1.18k
    if (Stats.MaxDepth == 1)
1708
775
      NumProfScopsDepthOne++;
1709
412
    else 
if (412
Stats.MaxDepth == 2412
)
1710
229
      NumProfScopsDepthTwo++;
1711
183
    else 
if (183
Stats.MaxDepth == 3183
)
1712
77
      NumProfScopsDepthThree++;
1713
106
    else 
if (106
Stats.MaxDepth == 4106
)
1714
3
      NumProfScopsDepthFour++;
1715
103
    else 
if (103
Stats.MaxDepth == 5103
)
1716
0
      NumProfScopsDepthFive++;
1717
103
    else
1718
103
      NumProfScopsDepthLarger++;
1719
2.38k
  }
1720
2.38k
}
1721
1722
ScopDetection::DetectionContext *
1723
2.31k
ScopDetection::getDetectionContext(const Region *R) const {
1724
2.31k
  auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
1725
2.31k
  if (DCMIt == DetectionContextMap.end())
1726
1
    return nullptr;
1727
2.30k
  return &DCMIt->second;
1728
2.31k
}
1729
1730
1.20k
const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1731
1.20k
  const DetectionContext *DC = getDetectionContext(R);
1732
1.20k
  return DC ? 
&DC->Log1.19k
:
nullptr1
;
1733
1.20k
}
1734
1735
0
void polly::ScopDetection::verifyRegion(const Region &R) const {
1736
0
  assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1737
0
1738
0
  DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1739
0
  isValidRegion(Context);
1740
0
}
1741
1742
0
void polly::ScopDetection::verifyAnalysis() const {
1743
0
  if (!VerifyScops)
1744
0
    return;
1745
0
1746
0
  for (const Region *R : ValidRegions)
1747
0
    verifyRegion(*R);
1748
0
}
1749
1750
1.33k
bool ScopDetectionWrapperPass::runOnFunction(llvm::Function &F) {
1751
1.33k
  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1752
1.33k
  auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1753
1.33k
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1754
1.33k
  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1755
1.33k
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1756
1.33k
  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1757
1.33k
  Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE));
1758
1.33k
  return false;
1759
1.33k
}
1760
1761
1.19k
void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1762
1.19k
  AU.addRequired<LoopInfoWrapperPass>();
1763
1.19k
  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1764
1.19k
  AU.addRequired<DominatorTreeWrapperPass>();
1765
1.19k
  AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1766
1.19k
  // We also need AA and RegionInfo when we are verifying analysis.
1767
1.19k
  AU.addRequiredTransitive<AAResultsWrapperPass>();
1768
1.19k
  AU.addRequiredTransitive<RegionInfoPass>();
1769
1.19k
  AU.setPreservesAll();
1770
1.19k
}
1771
1772
124
void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
1773
124
  for (const Region *R : Result->ValidRegions)
1774
73
    OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
1775
124
1776
124
  OS << "\n";
1777
124
}
1778
1779
1.19k
ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
1780
1.19k
  // Disable runtime alias checks if we ignore aliasing all together.
1781
1.19k
  if (IgnoreAliasing)
1782
20
    PollyUseRuntimeAliasChecks = false;
1783
1.19k
}
1784
719
ScopAnalysis::ScopAnalysis() {
1785
719
  // Disable runtime alias checks if we ignore aliasing all together.
1786
719
  if (IgnoreAliasing)
1787
0
    PollyUseRuntimeAliasChecks = false;
1788
719
}
1789
1790
1.30k
void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
1791
1792
char ScopDetectionWrapperPass::ID;
1793
1794
AnalysisKey ScopAnalysis::Key;
1795
1796
2
ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
1797
2
  auto &LI = FAM.getResult<LoopAnalysis>(F);
1798
2
  auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
1799
2
  auto &AA = FAM.getResult<AAManager>(F);
1800
2
  auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
1801
2
  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
1802
2
  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1803
2
  return {F, DT, SE, LI, RI, AA, ORE};
1804
2
}
1805
1806
PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
1807
0
                                               FunctionAnalysisManager &FAM) {
1808
0
  Stream << "Detected Scops in Function " << F.getName() << "\n";
1809
0
  auto &SD = FAM.getResult<ScopAnalysis>(F);
1810
0
  for (const Region *R : SD.ValidRegions)
1811
0
    Stream << "Valid Region for Scop: " << R->getNameStr() << '\n';
1812
0
1813
0
  Stream << "\n";
1814
0
  return PreservedAnalyses::all();
1815
0
}
1816
1817
0
Pass *polly::createScopDetectionWrapperPassPass() {
1818
0
  return new ScopDetectionWrapperPass();
1819
0
}
1820
1821
41.9k
INITIALIZE_PASS_BEGIN41.9k
(ScopDetectionWrapperPass, "polly-detect",41.9k
1822
41.9k
                      "Polly - Detect static control parts (SCoPs)", false,
1823
41.9k
                      false);
1824
41.9k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
1825
41.9k
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
1826
41.9k
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
1827
41.9k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
1828
41.9k
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
1829
41.9k
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
1830
41.9k
INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
1831
                    "Polly - Detect static control parts (SCoPs)", false, false)