/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Analysis/ScopDetection.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- ScopDetection.cpp - Detect Scops -----------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Detect the maximal Scops of a function. |
10 | | // |
11 | | // A static control part (Scop) is a subgraph of the control flow graph (CFG) |
12 | | // that only has statically known control flow and can therefore be described |
13 | | // within the polyhedral model. |
14 | | // |
15 | | // Every Scop fulfills these restrictions: |
16 | | // |
17 | | // * It is a single entry single exit region |
18 | | // |
19 | | // * Only affine linear bounds in the loops |
20 | | // |
21 | | // Every natural loop in a Scop must have a number of loop iterations that can |
22 | | // be described as an affine linear function in surrounding loop iterators or |
23 | | // parameters. (A parameter is a scalar that does not change its value during |
24 | | // execution of the Scop). |
25 | | // |
26 | | // * Only comparisons of affine linear expressions in conditions |
27 | | // |
28 | | // * All loops and conditions perfectly nested |
29 | | // |
30 | | // The control flow needs to be structured such that it could be written using |
31 | | // just 'for' and 'if' statements, without the need for any 'goto', 'break' or |
32 | | // 'continue'. |
33 | | // |
34 | | // * Side effect free functions call |
35 | | // |
36 | | // Function calls and intrinsics that do not have side effects (readnone) |
37 | | // or memory intrinsics (memset, memcpy, memmove) are allowed. |
38 | | // |
39 | | // The Scop detection finds the largest Scops by checking if the largest |
40 | | // region is a Scop. If this is not the case, its canonical subregions are |
41 | | // checked until a region is a Scop. It is now tried to extend this Scop by |
42 | | // creating a larger non canonical region. |
43 | | // |
44 | | //===----------------------------------------------------------------------===// |
45 | | |
46 | | #include "polly/ScopDetection.h" |
47 | | #include "polly/LinkAllPasses.h" |
48 | | #include "polly/Options.h" |
49 | | #include "polly/ScopDetectionDiagnostic.h" |
50 | | #include "polly/Support/SCEVValidator.h" |
51 | | #include "polly/Support/ScopHelper.h" |
52 | | #include "polly/Support/ScopLocation.h" |
53 | | #include "llvm/ADT/SmallPtrSet.h" |
54 | | #include "llvm/ADT/Statistic.h" |
55 | | #include "llvm/Analysis/AliasAnalysis.h" |
56 | | #include "llvm/Analysis/Loads.h" |
57 | | #include "llvm/Analysis/LoopInfo.h" |
58 | | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
59 | | #include "llvm/Analysis/RegionInfo.h" |
60 | | #include "llvm/Analysis/ScalarEvolution.h" |
61 | | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
62 | | #include "llvm/IR/BasicBlock.h" |
63 | | #include "llvm/IR/DebugLoc.h" |
64 | | #include "llvm/IR/DerivedTypes.h" |
65 | | #include "llvm/IR/DiagnosticInfo.h" |
66 | | #include "llvm/IR/DiagnosticPrinter.h" |
67 | | #include "llvm/IR/Dominators.h" |
68 | | #include "llvm/IR/Function.h" |
69 | | #include "llvm/IR/InstrTypes.h" |
70 | | #include "llvm/IR/Instruction.h" |
71 | | #include "llvm/IR/Instructions.h" |
72 | | #include "llvm/IR/IntrinsicInst.h" |
73 | | #include "llvm/IR/Metadata.h" |
74 | | #include "llvm/IR/Module.h" |
75 | | #include "llvm/IR/PassManager.h" |
76 | | #include "llvm/IR/Value.h" |
77 | | #include "llvm/Pass.h" |
78 | | #include "llvm/Support/Debug.h" |
79 | | #include "llvm/Support/raw_ostream.h" |
80 | | #include <cassert> |
81 | | |
82 | | using namespace llvm; |
83 | | using namespace polly; |
84 | | |
85 | | #define DEBUG_TYPE "polly-detect" |
86 | | |
87 | | // This option is set to a very high value, as analyzing such loops increases |
88 | | // compile time on several cases. For experiments that enable this option, |
89 | | // a value of around 40 has been working to avoid run-time regressions with |
90 | | // Polly while still exposing interesting optimization opportunities. |
91 | | static cl::opt<int> ProfitabilityMinPerLoopInstructions( |
92 | | "polly-detect-profitability-min-per-loop-insts", |
93 | | cl::desc("The minimal number of per-loop instructions before a single loop " |
94 | | "region is considered profitable"), |
95 | | cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); |
96 | | |
97 | | bool polly::PollyProcessUnprofitable; |
98 | | |
99 | | static cl::opt<bool, true> XPollyProcessUnprofitable( |
100 | | "polly-process-unprofitable", |
101 | | cl::desc( |
102 | | "Process scops that are unlikely to benefit from Polly optimizations."), |
103 | | cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore, |
104 | | cl::cat(PollyCategory)); |
105 | | |
106 | | static cl::list<std::string> OnlyFunctions( |
107 | | "polly-only-func", |
108 | | cl::desc("Only run on functions that match a regex. " |
109 | | "Multiple regexes can be comma separated. " |
110 | | "Scop detection will run on all functions that match " |
111 | | "ANY of the regexes provided."), |
112 | | cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); |
113 | | |
114 | | static cl::list<std::string> IgnoredFunctions( |
115 | | "polly-ignore-func", |
116 | | cl::desc("Ignore functions that match a regex. " |
117 | | "Multiple regexes can be comma separated. " |
118 | | "Scop detection will ignore all functions that match " |
119 | | "ANY of the regexes provided."), |
120 | | cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); |
121 | | |
122 | | bool polly::PollyAllowFullFunction; |
123 | | |
124 | | static cl::opt<bool, true> |
125 | | XAllowFullFunction("polly-detect-full-functions", |
126 | | cl::desc("Allow the detection of full functions"), |
127 | | cl::location(polly::PollyAllowFullFunction), |
128 | | cl::init(false), cl::cat(PollyCategory)); |
129 | | |
130 | | static cl::opt<std::string> OnlyRegion( |
131 | | "polly-only-region", |
132 | | cl::desc("Only run on certain regions (The provided identifier must " |
133 | | "appear in the name of the region's entry block"), |
134 | | cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), |
135 | | cl::cat(PollyCategory)); |
136 | | |
137 | | static cl::opt<bool> |
138 | | IgnoreAliasing("polly-ignore-aliasing", |
139 | | cl::desc("Ignore possible aliasing of the array bases"), |
140 | | cl::Hidden, cl::init(false), cl::ZeroOrMore, |
141 | | cl::cat(PollyCategory)); |
142 | | |
143 | | bool polly::PollyAllowUnsignedOperations; |
144 | | |
145 | | static cl::opt<bool, true> XPollyAllowUnsignedOperations( |
146 | | "polly-allow-unsigned-operations", |
147 | | cl::desc("Allow unsigned operations such as comparisons or zero-extends."), |
148 | | cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore, |
149 | | cl::init(true), cl::cat(PollyCategory)); |
150 | | |
151 | | bool polly::PollyUseRuntimeAliasChecks; |
152 | | |
153 | | static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( |
154 | | "polly-use-runtime-alias-checks", |
155 | | cl::desc("Use runtime alias checks to resolve possible aliasing."), |
156 | | cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore, |
157 | | cl::init(true), cl::cat(PollyCategory)); |
158 | | |
159 | | static cl::opt<bool> |
160 | | ReportLevel("polly-report", |
161 | | cl::desc("Print information about the activities of Polly"), |
162 | | cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); |
163 | | |
164 | | static cl::opt<bool> AllowDifferentTypes( |
165 | | "polly-allow-differing-element-types", |
166 | | cl::desc("Allow different element types for array accesses"), cl::Hidden, |
167 | | cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); |
168 | | |
169 | | static cl::opt<bool> |
170 | | AllowNonAffine("polly-allow-nonaffine", |
171 | | cl::desc("Allow non affine access functions in arrays"), |
172 | | cl::Hidden, cl::init(false), cl::ZeroOrMore, |
173 | | cl::cat(PollyCategory)); |
174 | | |
175 | | static cl::opt<bool> |
176 | | AllowModrefCall("polly-allow-modref-calls", |
177 | | cl::desc("Allow functions with known modref behavior"), |
178 | | cl::Hidden, cl::init(false), cl::ZeroOrMore, |
179 | | cl::cat(PollyCategory)); |
180 | | |
181 | | static cl::opt<bool> AllowNonAffineSubRegions( |
182 | | "polly-allow-nonaffine-branches", |
183 | | cl::desc("Allow non affine conditions for branches"), cl::Hidden, |
184 | | cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); |
185 | | |
186 | | static cl::opt<bool> |
187 | | AllowNonAffineSubLoops("polly-allow-nonaffine-loops", |
188 | | cl::desc("Allow non affine conditions for loops"), |
189 | | cl::Hidden, cl::init(false), cl::ZeroOrMore, |
190 | | cl::cat(PollyCategory)); |
191 | | |
192 | | static cl::opt<bool, true> |
193 | | TrackFailures("polly-detect-track-failures", |
194 | | cl::desc("Track failure strings in detecting scop regions"), |
195 | | cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore, |
196 | | cl::init(true), cl::cat(PollyCategory)); |
197 | | |
198 | | static cl::opt<bool> KeepGoing("polly-detect-keep-going", |
199 | | cl::desc("Do not fail on the first error."), |
200 | | cl::Hidden, cl::ZeroOrMore, cl::init(false), |
201 | | cl::cat(PollyCategory)); |
202 | | |
203 | | static cl::opt<bool, true> |
204 | | PollyDelinearizeX("polly-delinearize", |
205 | | cl::desc("Delinearize array access functions"), |
206 | | cl::location(PollyDelinearize), cl::Hidden, |
207 | | cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); |
208 | | |
209 | | static cl::opt<bool> |
210 | | VerifyScops("polly-detect-verify", |
211 | | cl::desc("Verify the detected SCoPs after each transformation"), |
212 | | cl::Hidden, cl::init(false), cl::ZeroOrMore, |
213 | | cl::cat(PollyCategory)); |
214 | | |
215 | | bool polly::PollyInvariantLoadHoisting; |
216 | | |
217 | | static cl::opt<bool, true> XPollyInvariantLoadHoisting( |
218 | | "polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."), |
219 | | cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore, |
220 | | cl::init(false), cl::cat(PollyCategory)); |
221 | | |
222 | | /// The minimal trip count under which loops are considered unprofitable. |
223 | | static const unsigned MIN_LOOP_TRIP_COUNT = 8; |
224 | | |
225 | | bool polly::PollyTrackFailures = false; |
226 | | bool polly::PollyDelinearize = false; |
227 | | StringRef polly::PollySkipFnAttr = "polly.skip.fn"; |
228 | | |
229 | | //===----------------------------------------------------------------------===// |
230 | | // Statistics. |
231 | | |
232 | | STATISTIC(NumScopRegions, "Number of scops"); |
233 | | STATISTIC(NumLoopsInScop, "Number of loops in scops"); |
234 | | STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); |
235 | | STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); |
236 | | STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); |
237 | | STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); |
238 | | STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); |
239 | | STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); |
240 | | STATISTIC(NumScopsDepthLarger, |
241 | | "Number of scops with maximal loop depth 6 and larger"); |
242 | | STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); |
243 | | STATISTIC(NumLoopsInProfScop, |
244 | | "Number of loops in scops (profitable scops only)"); |
245 | | STATISTIC(NumLoopsOverall, "Number of total loops"); |
246 | | STATISTIC(NumProfScopsDepthZero, |
247 | | "Number of scops with maximal loop depth 0 (profitable scops only)"); |
248 | | STATISTIC(NumProfScopsDepthOne, |
249 | | "Number of scops with maximal loop depth 1 (profitable scops only)"); |
250 | | STATISTIC(NumProfScopsDepthTwo, |
251 | | "Number of scops with maximal loop depth 2 (profitable scops only)"); |
252 | | STATISTIC(NumProfScopsDepthThree, |
253 | | "Number of scops with maximal loop depth 3 (profitable scops only)"); |
254 | | STATISTIC(NumProfScopsDepthFour, |
255 | | "Number of scops with maximal loop depth 4 (profitable scops only)"); |
256 | | STATISTIC(NumProfScopsDepthFive, |
257 | | "Number of scops with maximal loop depth 5 (profitable scops only)"); |
258 | | STATISTIC(NumProfScopsDepthLarger, |
259 | | "Number of scops with maximal loop depth 6 and larger " |
260 | | "(profitable scops only)"); |
261 | | STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); |
262 | | STATISTIC(MaxNumLoopsInProfScop, |
263 | | "Maximal number of loops in scops (profitable scops only)"); |
264 | | |
265 | | static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, |
266 | | bool OnlyProfitable); |
267 | | |
268 | | namespace { |
269 | | |
270 | | class DiagnosticScopFound : public DiagnosticInfo { |
271 | | private: |
272 | | static int PluginDiagnosticKind; |
273 | | |
274 | | Function &F; |
275 | | std::string FileName; |
276 | | unsigned EntryLine, ExitLine; |
277 | | |
278 | | public: |
279 | | DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, |
280 | | unsigned ExitLine) |
281 | | : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), |
282 | 2 | EntryLine(EntryLine), ExitLine(ExitLine) {} |
283 | | |
284 | | void print(DiagnosticPrinter &DP) const override; |
285 | | |
286 | 0 | static bool classof(const DiagnosticInfo *DI) { |
287 | 0 | return DI->getKind() == PluginDiagnosticKind; |
288 | 0 | } |
289 | | }; |
290 | | } // namespace |
291 | | |
292 | | int DiagnosticScopFound::PluginDiagnosticKind = |
293 | | getNextAvailablePluginDiagnosticKind(); |
294 | | |
295 | 2 | void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { |
296 | 2 | DP << "Polly detected an optimizable loop region (scop) in function '" << F |
297 | 2 | << "'\n"; |
298 | 2 | |
299 | 2 | if (FileName.empty()) { |
300 | 0 | DP << "Scop location is unknown. Compile with debug info " |
301 | 0 | "(-g) to get more precise information. "; |
302 | 0 | return; |
303 | 0 | } |
304 | 2 | |
305 | 2 | DP << FileName << ":" << EntryLine << ": Start of scop\n"; |
306 | 2 | DP << FileName << ":" << ExitLine << ": End of scop"; |
307 | 2 | } |
308 | | |
309 | | /// Check if a string matches any regex in a list of regexes. |
310 | | /// @param Str the input string to match against. |
311 | | /// @param RegexList a list of strings that are regular expressions. |
312 | | static bool doesStringMatchAnyRegex(StringRef Str, |
313 | 1.45k | const cl::list<std::string> &RegexList) { |
314 | 1.45k | for (auto RegexStr : RegexList) { |
315 | 17 | Regex R(RegexStr); |
316 | 17 | |
317 | 17 | std::string Err; |
318 | 17 | if (!R.isValid(Err)) |
319 | 0 | report_fatal_error("invalid regex given as input to polly: " + Err, true); |
320 | 17 | |
321 | 17 | if (R.match(Str)) |
322 | 8 | return true; |
323 | 17 | } |
324 | 1.45k | return false1.44k ; |
325 | 1.45k | } |
326 | | //===----------------------------------------------------------------------===// |
327 | | // ScopDetection. |
328 | | |
329 | | ScopDetection::ScopDetection(Function &F, const DominatorTree &DT, |
330 | | ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI, |
331 | | AliasAnalysis &AA, OptimizationRemarkEmitter &ORE) |
332 | 1.45k | : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) { |
333 | 1.45k | if (!PollyProcessUnprofitable && LI.empty()22 ) |
334 | 0 | return; |
335 | 1.45k | |
336 | 1.45k | Region *TopRegion = RI.getTopLevelRegion(); |
337 | 1.45k | |
338 | 1.45k | if (!OnlyFunctions.empty() && |
339 | 1.45k | !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)7 ) |
340 | 2 | return; |
341 | 1.44k | |
342 | 1.44k | if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) |
343 | 3 | return; |
344 | 1.44k | |
345 | 1.44k | if (!isValidFunction(F)) |
346 | 42 | return; |
347 | 1.40k | |
348 | 1.40k | findScops(*TopRegion); |
349 | 1.40k | |
350 | 1.40k | NumScopRegions += ValidRegions.size(); |
351 | 1.40k | |
352 | 1.40k | // Prune non-profitable regions. |
353 | 3.69k | for (auto &DIt : DetectionContextMap) { |
354 | 3.69k | auto &DC = DIt.getSecond(); |
355 | 3.69k | if (DC.Log.hasErrors()) |
356 | 432 | continue; |
357 | 3.26k | if (!ValidRegions.count(&DC.CurRegion)) |
358 | 1.96k | continue; |
359 | 1.29k | LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); |
360 | 1.29k | updateLoopCountStatistic(Stats, false /* OnlyProfitable */); |
361 | 1.29k | if (isProfitableRegion(DC)) { |
362 | 1.28k | updateLoopCountStatistic(Stats, true /* OnlyProfitable */); |
363 | 1.28k | continue; |
364 | 1.28k | } |
365 | 15 | |
366 | 15 | ValidRegions.remove(&DC.CurRegion); |
367 | 15 | } |
368 | 1.40k | |
369 | 1.40k | NumProfScopRegions += ValidRegions.size(); |
370 | 1.40k | NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; |
371 | 1.40k | |
372 | 1.40k | // Only makes sense when we tracked errors. |
373 | 1.40k | if (PollyTrackFailures) |
374 | 1.40k | emitMissedRemarks(F); |
375 | 1.40k | |
376 | 1.40k | if (ReportLevel) |
377 | 2 | printLocations(F); |
378 | 1.40k | |
379 | 1.40k | assert(ValidRegions.size() <= DetectionContextMap.size() && |
380 | 1.40k | "Cached more results than valid regions"); |
381 | 1.40k | } |
382 | | |
383 | | template <class RR, typename... Args> |
384 | | inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, |
385 | 604 | Args &&... Arguments) const { |
386 | 604 | if (!Context.Verifying) { |
387 | 604 | RejectLog &Log = Context.Log; |
388 | 604 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); |
389 | 604 | |
390 | 604 | if (PollyTrackFailures) |
391 | 604 | Log.report(RejectReason); |
392 | 604 | |
393 | 604 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); |
394 | 604 | LLVM_DEBUG(dbgs() << "\n"); |
395 | 604 | } else { |
396 | 0 | assert(!Assert && "Verification of detected scop failed"); |
397 | 0 | } |
398 | 604 | |
399 | 604 | return false; |
400 | 604 | } Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNonAffBranch, llvm::BasicBlock*, llvm::SCEV const*&, llvm::SCEV const*&, llvm::SwitchInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::SCEV const*&&&, llvm::SCEV const*&&&, llvm::SwitchInst*&&&) const bool polly::ScopDetection::invalid<polly::ReportInvalidCond, llvm::BranchInst*&, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BranchInst*&&&, llvm::BasicBlock*&&) const Line | Count | Source | 385 | 1 | Args &&... Arguments) const { | 386 | 1 | if (!Context.Verifying) { | 387 | 1 | RejectLog &Log = Context.Log; | 388 | 1 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 1 | | 390 | 1 | if (PollyTrackFailures) | 391 | 1 | Log.report(RejectReason); | 392 | 1 | | 393 | 1 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 1 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 1 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 1 | | 399 | 1 | return false; | 400 | 1 | } |
bool polly::ScopDetection::invalid<polly::ReportUndefOperand, llvm::BasicBlock*, llvm::ICmpInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::ICmpInst*&&&) const Line | Count | Source | 385 | 5 | Args &&... Arguments) const { | 386 | 5 | if (!Context.Verifying) { | 387 | 5 | RejectLog &Log = Context.Log; | 388 | 5 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 5 | | 390 | 5 | if (PollyTrackFailures) | 391 | 5 | Log.report(RejectReason); | 392 | 5 | | 393 | 5 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 5 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 5 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 5 | | 399 | 5 | return false; | 400 | 5 | } |
bool polly::ScopDetection::invalid<polly::ReportNonAffBranch, llvm::BasicBlock*, llvm::SCEV const*&, llvm::SCEV const*&, llvm::ICmpInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::SCEV const*&&&, llvm::SCEV const*&&&, llvm::ICmpInst*&&&) const Line | Count | Source | 385 | 5 | Args &&... Arguments) const { | 386 | 5 | if (!Context.Verifying) { | 387 | 5 | RejectLog &Log = Context.Log; | 388 | 5 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 5 | | 390 | 5 | if (PollyTrackFailures) | 391 | 5 | Log.report(RejectReason); | 392 | 5 | | 393 | 5 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 5 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 5 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 5 | | 399 | 5 | return false; | 400 | 5 | } |
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportInvalidTerminator, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&) const bool polly::ScopDetection::invalid<polly::ReportUndefCond, llvm::Instruction*&, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&, llvm::BasicBlock*&&) const Line | Count | Source | 385 | 154 | Args &&... Arguments) const { | 386 | 154 | if (!Context.Verifying) { | 387 | 154 | RejectLog &Log = Context.Log; | 388 | 154 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 154 | | 390 | 154 | if (PollyTrackFailures) | 391 | 154 | Log.report(RejectReason); | 392 | 154 | | 393 | 154 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 154 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 154 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 154 | | 399 | 154 | return false; | 400 | 154 | } |
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const*&, llvm::Instruction const*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const*&&&, llvm::Instruction const*&&&, llvm::Value*&&&) const Line | Count | Source | 385 | 11 | Args &&... Arguments) const { | 386 | 11 | if (!Context.Verifying) { | 387 | 11 | RejectLog &Log = Context.Log; | 388 | 11 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 11 | | 390 | 11 | if (PollyTrackFailures) | 391 | 11 | Log.report(RejectReason); | 392 | 11 | | 393 | 11 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 11 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 11 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 11 | | 399 | 11 | return false; | 400 | 11 | } |
bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const* const&, llvm::Instruction const*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const* const&&&, llvm::Instruction const*&&&, llvm::Value*&&&) const Line | Count | Source | 385 | 23 | Args &&... Arguments) const { | 386 | 23 | if (!Context.Verifying) { | 387 | 23 | RejectLog &Log = Context.Log; | 388 | 23 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 23 | | 390 | 23 | if (PollyTrackFailures) | 391 | 23 | Log.report(RejectReason); | 392 | 23 | | 393 | 23 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 23 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 23 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 23 | | 399 | 23 | return false; | 400 | 23 | } |
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNoBasePtr, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&) const bool polly::ScopDetection::invalid<polly::ReportUndefBasePtr, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&) const Line | Count | Source | 385 | 6 | Args &&... Arguments) const { | 386 | 6 | if (!Context.Verifying) { | 387 | 6 | RejectLog &Log = Context.Log; | 388 | 6 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 6 | | 390 | 6 | if (PollyTrackFailures) | 391 | 6 | Log.report(RejectReason); | 392 | 6 | | 393 | 6 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 6 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 6 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 6 | | 399 | 6 | return false; | 400 | 6 | } |
bool polly::ScopDetection::invalid<polly::ReportIntToPtr, llvm::IntToPtrInst*&>(polly::ScopDetection::DetectionContext&, bool, llvm::IntToPtrInst*&&&) const Line | Count | Source | 385 | 1 | Args &&... Arguments) const { | 386 | 1 | if (!Context.Verifying) { | 387 | 1 | RejectLog &Log = Context.Log; | 388 | 1 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 1 | | 390 | 1 | if (PollyTrackFailures) | 391 | 1 | Log.report(RejectReason); | 392 | 1 | | 393 | 1 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 1 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 1 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 1 | | 399 | 1 | return false; | 400 | 1 | } |
bool polly::ScopDetection::invalid<polly::ReportVariantBasePtr, llvm::Value*&, llvm::Instruction*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Value*&&&, llvm::Instruction*&&&) const Line | Count | Source | 385 | 10 | Args &&... Arguments) const { | 386 | 10 | if (!Context.Verifying) { | 387 | 10 | RejectLog &Log = Context.Log; | 388 | 10 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 10 | | 390 | 10 | if (PollyTrackFailures) | 391 | 10 | Log.report(RejectReason); | 392 | 10 | | 393 | 10 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 10 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 10 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 10 | | 399 | 10 | return false; | 400 | 10 | } |
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportDifferentArrayElementSize, llvm::Instruction*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&, llvm::Value*&&&) const bool polly::ScopDetection::invalid<polly::ReportNonAffineAccess, llvm::SCEV const*&, llvm::Instruction*&, llvm::Value*&>(polly::ScopDetection::DetectionContext&, bool, llvm::SCEV const*&&&, llvm::Instruction*&&&, llvm::Value*&&&) const Line | Count | Source | 385 | 22 | Args &&... Arguments) const { | 386 | 22 | if (!Context.Verifying) { | 387 | 22 | RejectLog &Log = Context.Log; | 388 | 22 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 22 | | 390 | 22 | if (PollyTrackFailures) | 391 | 22 | Log.report(RejectReason); | 392 | 22 | | 393 | 22 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 22 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 22 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 22 | | 399 | 22 | return false; | 400 | 22 | } |
bool polly::ScopDetection::invalid<polly::ReportAlias, llvm::Instruction*&, llvm::AliasSet&>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&&, llvm::AliasSet&&&) const Line | Count | Source | 385 | 3 | Args &&... Arguments) const { | 386 | 3 | if (!Context.Verifying) { | 387 | 3 | RejectLog &Log = Context.Log; | 388 | 3 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 3 | | 390 | 3 | if (PollyTrackFailures) | 391 | 3 | Log.report(RejectReason); | 392 | 3 | | 393 | 3 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 3 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 3 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 3 | | 399 | 3 | return false; | 400 | 3 | } |
bool polly::ScopDetection::invalid<polly::ReportFuncCall, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const Line | Count | Source | 385 | 11 | Args &&... Arguments) const { | 386 | 11 | if (!Context.Verifying) { | 387 | 11 | RejectLog &Log = Context.Log; | 388 | 11 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 11 | | 390 | 11 | if (PollyTrackFailures) | 391 | 11 | Log.report(RejectReason); | 392 | 11 | | 393 | 11 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 11 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 11 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 11 | | 399 | 11 | return false; | 400 | 11 | } |
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportAlloca, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportNonSimpleMemoryAccess, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const bool polly::ScopDetection::invalid<polly::ReportUnknownInst, llvm::Instruction*>(polly::ScopDetection::DetectionContext&, bool, llvm::Instruction*&&) const Line | Count | Source | 385 | 49 | Args &&... Arguments) const { | 386 | 49 | if (!Context.Verifying) { | 387 | 49 | RejectLog &Log = Context.Log; | 388 | 49 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 49 | | 390 | 49 | if (PollyTrackFailures) | 391 | 49 | Log.report(RejectReason); | 392 | 49 | | 393 | 49 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 49 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 49 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 49 | | 399 | 49 | return false; | 400 | 49 | } |
Unexecuted instantiation: bool polly::ScopDetection::invalid<polly::ReportLoopHasNoExit, llvm::Loop*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&) const bool polly::ScopDetection::invalid<polly::ReportLoopHasMultipleExits, llvm::Loop*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&) const Line | Count | Source | 385 | 35 | Args &&... Arguments) const { | 386 | 35 | if (!Context.Verifying) { | 387 | 35 | RejectLog &Log = Context.Log; | 388 | 35 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 35 | | 390 | 35 | if (PollyTrackFailures) | 391 | 35 | Log.report(RejectReason); | 392 | 35 | | 393 | 35 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 35 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 35 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 35 | | 399 | 35 | return false; | 400 | 35 | } |
bool polly::ScopDetection::invalid<polly::ReportLoopBound, llvm::Loop*&, llvm::SCEV const*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&, llvm::SCEV const*&&&) const Line | Count | Source | 385 | 134 | Args &&... Arguments) const { | 386 | 134 | if (!Context.Verifying) { | 387 | 134 | RejectLog &Log = Context.Log; | 388 | 134 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 134 | | 390 | 134 | if (PollyTrackFailures) | 391 | 134 | Log.report(RejectReason); | 392 | 134 | | 393 | 134 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 134 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 134 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 134 | | 399 | 134 | return false; | 400 | 134 | } |
bool polly::ScopDetection::invalid<polly::ReportUnprofitable, llvm::Region*>(polly::ScopDetection::DetectionContext&, bool, llvm::Region*&&) const Line | Count | Source | 385 | 16 | Args &&... Arguments) const { | 386 | 16 | if (!Context.Verifying) { | 387 | 16 | RejectLog &Log = Context.Log; | 388 | 16 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 16 | | 390 | 16 | if (PollyTrackFailures) | 391 | 16 | Log.report(RejectReason); | 392 | 16 | | 393 | 16 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 16 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 16 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 16 | | 399 | 16 | return false; | 400 | 16 | } |
bool polly::ScopDetection::invalid<polly::ReportLoopOnlySomeLatches, llvm::Loop*&>(polly::ScopDetection::DetectionContext&, bool, llvm::Loop*&&&) const Line | Count | Source | 385 | 5 | Args &&... Arguments) const { | 386 | 5 | if (!Context.Verifying) { | 387 | 5 | RejectLog &Log = Context.Log; | 388 | 5 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 5 | | 390 | 5 | if (PollyTrackFailures) | 391 | 5 | Log.report(RejectReason); | 392 | 5 | | 393 | 5 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 5 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 5 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 5 | | 399 | 5 | return false; | 400 | 5 | } |
bool polly::ScopDetection::invalid<polly::ReportUnreachableInExit, llvm::BasicBlock*, llvm::DebugLoc&>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&, llvm::DebugLoc&&&) const Line | Count | Source | 385 | 26 | Args &&... Arguments) const { | 386 | 26 | if (!Context.Verifying) { | 387 | 26 | RejectLog &Log = Context.Log; | 388 | 26 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 26 | | 390 | 26 | if (PollyTrackFailures) | 391 | 26 | Log.report(RejectReason); | 392 | 26 | | 393 | 26 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 26 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 26 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 26 | | 399 | 26 | return false; | 400 | 26 | } |
bool polly::ScopDetection::invalid<polly::ReportEntry, llvm::BasicBlock*>(polly::ScopDetection::DetectionContext&, bool, llvm::BasicBlock*&&) const Line | Count | Source | 385 | 84 | Args &&... Arguments) const { | 386 | 84 | if (!Context.Verifying) { | 387 | 84 | RejectLog &Log = Context.Log; | 388 | 84 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 84 | | 390 | 84 | if (PollyTrackFailures) | 391 | 84 | Log.report(RejectReason); | 392 | 84 | | 393 | 84 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 84 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 84 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 84 | | 399 | 84 | return false; | 400 | 84 | } |
bool polly::ScopDetection::invalid<polly::ReportIrreducibleRegion, llvm::Region*, llvm::DebugLoc&>(polly::ScopDetection::DetectionContext&, bool, llvm::Region*&&, llvm::DebugLoc&&&) const Line | Count | Source | 385 | 3 | Args &&... Arguments) const { | 386 | 3 | if (!Context.Verifying) { | 387 | 3 | RejectLog &Log = Context.Log; | 388 | 3 | std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); | 389 | 3 | | 390 | 3 | if (PollyTrackFailures) | 391 | 3 | Log.report(RejectReason); | 392 | 3 | | 393 | 3 | LLVM_DEBUG(dbgs() << RejectReason->getMessage()); | 394 | 3 | LLVM_DEBUG(dbgs() << "\n"); | 395 | 3 | } else { | 396 | 0 | assert(!Assert && "Verification of detected scop failed"); | 397 | 0 | } | 398 | 3 | | 399 | 3 | return false; | 400 | 3 | } |
|
401 | | |
402 | 4.34k | bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const { |
403 | 4.34k | if (!ValidRegions.count(&R)) |
404 | 3.13k | return false; |
405 | 1.20k | |
406 | 1.20k | if (Verify) { |
407 | 1.20k | DetectionContextMap.erase(getBBPairForRegion(&R)); |
408 | 1.20k | const auto &It = DetectionContextMap.insert(std::make_pair( |
409 | 1.20k | getBBPairForRegion(&R), |
410 | 1.20k | DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/))); |
411 | 1.20k | DetectionContext &Context = It.first->second; |
412 | 1.20k | return isValidRegion(Context); |
413 | 1.20k | } |
414 | 0 | |
415 | 0 | return true; |
416 | 0 | } |
417 | | |
418 | 4 | std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { |
419 | 4 | // Get the first error we found. Even in keep-going mode, this is the first |
420 | 4 | // reason that caused the candidate to be rejected. |
421 | 4 | auto *Log = lookupRejectionLog(R); |
422 | 4 | |
423 | 4 | // This can happen when we marked a region invalid, but didn't track |
424 | 4 | // an error for it. |
425 | 4 | if (!Log || !Log->hasErrors()3 ) |
426 | 4 | return ""; |
427 | 0 | |
428 | 0 | RejectReasonPtr RR = *Log->begin(); |
429 | 0 | return RR->getMessage(); |
430 | 0 | } |
431 | | |
432 | | bool ScopDetection::addOverApproximatedRegion(Region *AR, |
433 | 697 | DetectionContext &Context) const { |
434 | 697 | // If we already know about Ar we can exit. |
435 | 697 | if (!Context.NonAffineSubRegionSet.insert(AR)) |
436 | 63 | return true; |
437 | 634 | |
438 | 634 | // All loops in the region have to be overapproximated too if there |
439 | 634 | // are accesses that depend on the iteration count. |
440 | 634 | |
441 | 1.56k | for (BasicBlock *BB : AR->blocks())634 { |
442 | 1.56k | Loop *L = LI.getLoopFor(BB); |
443 | 1.56k | if (AR->contains(L)) |
444 | 224 | Context.BoxedLoopsSet.insert(L); |
445 | 1.56k | } |
446 | 634 | |
447 | 634 | return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()545 ); |
448 | 634 | } |
449 | | |
450 | | bool ScopDetection::onlyValidRequiredInvariantLoads( |
451 | 41.4k | InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { |
452 | 41.4k | Region &CurRegion = Context.CurRegion; |
453 | 41.4k | const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); |
454 | 41.4k | |
455 | 41.4k | if (!PollyInvariantLoadHoisting && !RequiredILS.empty()33.5k ) |
456 | 371 | return false; |
457 | 41.0k | |
458 | 41.0k | for (LoadInst *Load : RequiredILS) { |
459 | 1.62k | // If we already know a load has been accepted as required invariant, we |
460 | 1.62k | // already run the validation below once and consequently don't need to |
461 | 1.62k | // run it again. Hence, we return early. For certain test cases (e.g., |
462 | 1.62k | // COSMO this avoids us spending 50% of scop-detection time in this |
463 | 1.62k | // very function (and its children). |
464 | 1.62k | if (Context.RequiredILS.count(Load)) |
465 | 843 | continue; |
466 | 786 | if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) |
467 | 137 | return false; |
468 | 649 | |
469 | 649 | for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { |
470 | 320 | if (isSafeToLoadUnconditionally(Load->getPointerOperand(), |
471 | 320 | Load->getType(), Load->getAlignment(), |
472 | 320 | DL)) |
473 | 6 | continue; |
474 | 314 | |
475 | 314 | if (NonAffineRegion->contains(Load) && |
476 | 314 | Load->getParent() != NonAffineRegion->getEntry()14 ) |
477 | 14 | return false; |
478 | 314 | } |
479 | 649 | } |
480 | 41.0k | |
481 | 41.0k | Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); |
482 | 40.9k | |
483 | 40.9k | return true; |
484 | 41.0k | } |
485 | | |
486 | | bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, |
487 | 13.6k | Loop *Scope) const { |
488 | 13.6k | SetVector<Value *> Values; |
489 | 13.6k | findValues(S0, SE, Values); |
490 | 13.6k | if (S1) |
491 | 5.07k | findValues(S1, SE, Values); |
492 | 13.6k | |
493 | 13.6k | SmallPtrSet<Value *, 8> PtrVals; |
494 | 13.6k | for (auto *V : Values) { |
495 | 5.93k | if (auto *P2I = dyn_cast<PtrToIntInst>(V)) |
496 | 2 | V = P2I->getOperand(0); |
497 | 5.93k | |
498 | 5.93k | if (!V->getType()->isPointerTy()) |
499 | 5.86k | continue; |
500 | 70 | |
501 | 70 | auto *PtrSCEV = SE.getSCEVAtScope(V, Scope); |
502 | 70 | if (isa<SCEVConstant>(PtrSCEV)) |
503 | 2 | continue; |
504 | 68 | |
505 | 68 | auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); |
506 | 68 | if (!BasePtr) |
507 | 0 | return true; |
508 | 68 | |
509 | 68 | auto *BasePtrVal = BasePtr->getValue(); |
510 | 68 | if (PtrVals.insert(BasePtrVal).second) { |
511 | 68 | for (auto *PtrVal : PtrVals) |
512 | 68 | if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)0 ) |
513 | 0 | return true; |
514 | 68 | } |
515 | 68 | } |
516 | 13.6k | |
517 | 13.6k | return false; |
518 | 13.6k | } |
519 | | |
520 | | bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, |
521 | 42.3k | DetectionContext &Context) const { |
522 | 42.3k | InvariantLoadsSetTy AccessILS; |
523 | 42.3k | if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) |
524 | 886 | return false; |
525 | 41.4k | |
526 | 41.4k | if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) |
527 | 522 | return false; |
528 | 40.9k | |
529 | 40.9k | return true; |
530 | 40.9k | } |
531 | | |
532 | | bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, |
533 | | Value *Condition, bool IsLoopBranch, |
534 | 39 | DetectionContext &Context) const { |
535 | 39 | Loop *L = LI.getLoopFor(&BB); |
536 | 39 | const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); |
537 | 39 | |
538 | 39 | if (IsLoopBranch && L->isLoopLatch(&BB)1 ) |
539 | 1 | return false; |
540 | 38 | |
541 | 38 | // Check for invalid usage of different pointers in one expression. |
542 | 38 | if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) |
543 | 0 | return false; |
544 | 38 | |
545 | 38 | if (isAffine(ConditionSCEV, L, Context)) |
546 | 36 | return true; |
547 | 2 | |
548 | 2 | if (AllowNonAffineSubRegions && |
549 | 2 | addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) |
550 | 2 | return true; |
551 | 0 | |
552 | 0 | return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, |
553 | 0 | ConditionSCEV, ConditionSCEV, SI); |
554 | 0 | } |
555 | | |
556 | | bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, |
557 | | Value *Condition, bool IsLoopBranch, |
558 | 29.4k | DetectionContext &Context) const { |
559 | 29.4k | // Constant integer conditions are always affine. |
560 | 29.4k | if (isa<ConstantInt>(Condition)) |
561 | 15.6k | return true; |
562 | 13.8k | |
563 | 13.8k | if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { |
564 | 110 | auto Opcode = BinOp->getOpcode(); |
565 | 110 | if (Opcode == Instruction::And || Opcode == Instruction::Or95 ) { |
566 | 109 | Value *Op0 = BinOp->getOperand(0); |
567 | 109 | Value *Op1 = BinOp->getOperand(1); |
568 | 109 | return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && |
569 | 109 | isValidBranch(BB, BI, Op1, IsLoopBranch, Context); |
570 | 109 | } |
571 | 13.7k | } |
572 | 13.7k | |
573 | 13.7k | if (auto PHI = dyn_cast<PHINode>(Condition)) { |
574 | 10 | auto *Unique = dyn_cast_or_null<ConstantInt>( |
575 | 10 | getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT)); |
576 | 10 | if (Unique && (2 Unique->isZero()2 || Unique->isOne()2 )) |
577 | 2 | return true; |
578 | 13.7k | } |
579 | 13.7k | |
580 | 13.7k | if (auto Load = dyn_cast<LoadInst>(Condition)) |
581 | 6 | if (!IsLoopBranch && Context.CurRegion.contains(Load)) { |
582 | 3 | Context.RequiredILS.insert(Load); |
583 | 3 | return true; |
584 | 3 | } |
585 | 13.7k | |
586 | 13.7k | // Non constant conditions of branches need to be ICmpInst. |
587 | 13.7k | if (!isa<ICmpInst>(Condition)) { |
588 | 136 | if (!IsLoopBranch && AllowNonAffineSubRegions && |
589 | 136 | addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) |
590 | 135 | return true; |
591 | 1 | return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); |
592 | 1 | } |
593 | 13.6k | |
594 | 13.6k | ICmpInst *ICmp = cast<ICmpInst>(Condition); |
595 | 13.6k | |
596 | 13.6k | // Are both operands of the ICmp affine? |
597 | 13.6k | if (isa<UndefValue>(ICmp->getOperand(0)) || |
598 | 13.6k | isa<UndefValue>(ICmp->getOperand(1))) |
599 | 5 | return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); |
600 | 13.5k | |
601 | 13.5k | Loop *L = LI.getLoopFor(&BB); |
602 | 13.5k | const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); |
603 | 13.5k | const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); |
604 | 13.5k | |
605 | 13.5k | LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT); |
606 | 13.5k | RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT); |
607 | 13.5k | |
608 | 13.5k | // If unsigned operations are not allowed try to approximate the region. |
609 | 13.5k | if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations167 ) |
610 | 0 | return !IsLoopBranch && AllowNonAffineSubRegions && |
611 | 0 | addOverApproximatedRegion(RI.getRegionFor(&BB), Context); |
612 | 13.5k | |
613 | 13.5k | // Check for invalid usage of different pointers in one expression. |
614 | 13.5k | if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L)8.51k && |
615 | 13.5k | involvesMultiplePtrs(RHS, nullptr, L)0 ) |
616 | 0 | return false; |
617 | 13.5k | |
618 | 13.5k | // Check for invalid usage of different pointers in a relational comparison. |
619 | 13.5k | if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)5.07k ) |
620 | 0 | return false; |
621 | 13.5k | |
622 | 13.5k | if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)13.2k ) |
623 | 13.0k | return true; |
624 | 596 | |
625 | 596 | if (!IsLoopBranch && AllowNonAffineSubRegions495 && |
626 | 596 | addOverApproximatedRegion(RI.getRegionFor(&BB), Context)493 ) |
627 | 490 | return true; |
628 | 106 | |
629 | 106 | if (IsLoopBranch) |
630 | 101 | return false; |
631 | 5 | |
632 | 5 | return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, |
633 | 5 | ICmp); |
634 | 5 | } |
635 | | |
636 | | bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, |
637 | | bool AllowUnreachable, |
638 | 29.4k | DetectionContext &Context) const { |
639 | 29.4k | Region &CurRegion = Context.CurRegion; |
640 | 29.4k | |
641 | 29.4k | Instruction *TI = BB.getTerminator(); |
642 | 29.4k | |
643 | 29.4k | if (AllowUnreachable && isa<UnreachableInst>(TI)138 ) |
644 | 0 | return true; |
645 | 29.4k | |
646 | 29.4k | // Return instructions are only valid if the region is the top level region. |
647 | 29.4k | if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()10 ) |
648 | 10 | return true; |
649 | 29.4k | |
650 | 29.4k | Value *Condition = getConditionFromTerminator(TI); |
651 | 29.4k | |
652 | 29.4k | if (!Condition) |
653 | 0 | return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); |
654 | 29.4k | |
655 | 29.4k | // UndefValue is not allowed as condition. |
656 | 29.4k | if (isa<UndefValue>(Condition)) |
657 | 154 | return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); |
658 | 29.3k | |
659 | 29.3k | if (BranchInst *BI = dyn_cast<BranchInst>(TI)) |
660 | 29.2k | return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); |
661 | 39 | |
662 | 39 | SwitchInst *SI = dyn_cast<SwitchInst>(TI); |
663 | 39 | assert(SI && "Terminator was neither branch nor switch"); |
664 | 39 | |
665 | 39 | return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); |
666 | 39 | } |
667 | | |
668 | | bool ScopDetection::isValidCallInst(CallInst &CI, |
669 | 243 | DetectionContext &Context) const { |
670 | 243 | if (CI.doesNotReturn()) |
671 | 0 | return false; |
672 | 243 | |
673 | 243 | if (CI.doesNotAccessMemory()) |
674 | 116 | return true; |
675 | 127 | |
676 | 127 | if (auto *II = dyn_cast<IntrinsicInst>(&CI)) |
677 | 86 | if (isValidIntrinsicInst(*II, Context)) |
678 | 74 | return true; |
679 | 53 | |
680 | 53 | Function *CalledFunction = CI.getCalledFunction(); |
681 | 53 | |
682 | 53 | // Indirect calls are not supported. |
683 | 53 | if (CalledFunction == nullptr) |
684 | 0 | return false; |
685 | 53 | |
686 | 53 | if (isDebugCall(&CI)) { |
687 | 3 | LLVM_DEBUG(dbgs() << "Allow call to debug function: " |
688 | 3 | << CalledFunction->getName() << '\n'); |
689 | 3 | return true; |
690 | 3 | } |
691 | 50 | |
692 | 50 | if (AllowModrefCall) { |
693 | 39 | switch (AA.getModRefBehavior(CalledFunction)) { |
694 | 39 | case FMRB_UnknownModRefBehavior: |
695 | 0 | return false; |
696 | 39 | case FMRB_DoesNotAccessMemory: |
697 | 21 | case FMRB_OnlyReadsMemory: |
698 | 21 | // Implicitly disable delinearization since we have an unknown |
699 | 21 | // accesses with an unknown access function. |
700 | 21 | Context.HasUnknownAccess = true; |
701 | 21 | // Explicitly use addUnknown so we don't put a loop-variant |
702 | 21 | // pointer into the alias set. |
703 | 21 | Context.AST.addUnknown(&CI); |
704 | 21 | return true; |
705 | 21 | case FMRB_OnlyReadsArgumentPointees: |
706 | 18 | case FMRB_OnlyAccessesArgumentPointees: |
707 | 52 | for (const auto &Arg : CI.arg_operands()) { |
708 | 52 | if (!Arg->getType()->isPointerTy()) |
709 | 24 | continue; |
710 | 28 | |
711 | 28 | // Bail if a pointer argument has a base address not known to |
712 | 28 | // ScalarEvolution. Note that a zero pointer is acceptable. |
713 | 28 | auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); |
714 | 28 | if (ArgSCEV->isZero()) |
715 | 14 | continue; |
716 | 14 | |
717 | 14 | auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); |
718 | 14 | if (!BP) |
719 | 0 | return false; |
720 | 14 | |
721 | 14 | // Implicitly disable delinearization since we have an unknown |
722 | 14 | // accesses with an unknown access function. |
723 | 14 | Context.HasUnknownAccess = true; |
724 | 14 | } |
725 | 18 | |
726 | 18 | // Explicitly use addUnknown so we don't put a loop-variant |
727 | 18 | // pointer into the alias set. |
728 | 18 | Context.AST.addUnknown(&CI); |
729 | 18 | return true; |
730 | 18 | case FMRB_DoesNotReadMemory: |
731 | 0 | case FMRB_OnlyAccessesInaccessibleMem: |
732 | 0 | case FMRB_OnlyAccessesInaccessibleOrArgMem: |
733 | 0 | return false; |
734 | 11 | } |
735 | 11 | } |
736 | 11 | |
737 | 11 | return false; |
738 | 11 | } |
739 | | |
740 | | bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, |
741 | 86 | DetectionContext &Context) const { |
742 | 86 | if (isIgnoredIntrinsic(&II)) |
743 | 29 | return true; |
744 | 57 | |
745 | 57 | // The closest loop surrounding the call instruction. |
746 | 57 | Loop *L = LI.getLoopFor(II.getParent()); |
747 | 57 | |
748 | 57 | // The access function and base pointer for memory intrinsics. |
749 | 57 | const SCEV *AF; |
750 | 57 | const SCEVUnknown *BP; |
751 | 57 | |
752 | 57 | switch (II.getIntrinsicID()) { |
753 | 57 | // Memory intrinsics that can be represented are supported. |
754 | 57 | case Intrinsic::memmove: |
755 | 19 | case Intrinsic::memcpy: |
756 | 19 | AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); |
757 | 19 | if (!AF->isZero()) { |
758 | 19 | BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); |
759 | 19 | // Bail if the source pointer is not valid. |
760 | 19 | if (!isValidAccess(&II, AF, BP, Context)) |
761 | 0 | return false; |
762 | 19 | } |
763 | 19 | LLVM_FALLTHROUGH; |
764 | 51 | case Intrinsic::memset: |
765 | 51 | AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); |
766 | 51 | if (!AF->isZero()) { |
767 | 47 | BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); |
768 | 47 | // Bail if the destination pointer is not valid. |
769 | 47 | if (!isValidAccess(&II, AF, BP, Context)) |
770 | 0 | return false; |
771 | 51 | } |
772 | 51 | |
773 | 51 | // Bail if the length is not affine. |
774 | 51 | if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, |
775 | 51 | Context)) |
776 | 6 | return false; |
777 | 45 | |
778 | 45 | return true; |
779 | 45 | default: |
780 | 6 | break; |
781 | 6 | } |
782 | 6 | |
783 | 6 | return false; |
784 | 6 | } |
785 | | |
786 | | bool ScopDetection::isInvariant(Value &Val, const Region &Reg, |
787 | 12.5k | DetectionContext &Ctx) const { |
788 | 12.5k | // A reference to function argument or constant value is invariant. |
789 | 12.5k | if (isa<Argument>(Val) || isa<Constant>(Val)3.30k ) |
790 | 9.99k | return true; |
791 | 2.53k | |
792 | 2.53k | Instruction *I = dyn_cast<Instruction>(&Val); |
793 | 2.53k | if (!I) |
794 | 0 | return false; |
795 | 2.53k | |
796 | 2.53k | if (!Reg.contains(I)) |
797 | 2.03k | return true; |
798 | 499 | |
799 | 499 | // Loads within the SCoP may read arbitrary values, need to hoist them. If it |
800 | 499 | // is not hoistable, it will be rejected later, but here we assume it is and |
801 | 499 | // that makes the value invariant. |
802 | 499 | if (auto LI = dyn_cast<LoadInst>(I)) { |
803 | 489 | Ctx.RequiredILS.insert(LI); |
804 | 489 | return true; |
805 | 489 | } |
806 | 10 | |
807 | 10 | return false; |
808 | 10 | } |
809 | | |
810 | | namespace { |
811 | | |
812 | | /// Remove smax of smax(0, size) expressions from a SCEV expression and |
813 | | /// register the '...' components. |
814 | | /// |
815 | | /// Array access expressions as they are generated by GFortran contain smax(0, |
816 | | /// size) expressions that confuse the 'normal' delinearization algorithm. |
817 | | /// However, if we extract such expressions before the normal delinearization |
818 | | /// takes place they can actually help to identify array size expressions in |
819 | | /// Fortran accesses. For the subsequently following delinearization the smax(0, |
820 | | /// size) component can be replaced by just 'size'. This is correct as we will |
821 | | /// always add and verify the assumption that for all subscript expressions |
822 | | /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify |
823 | | /// that 0 <= size, which means smax(0, size) == size. |
824 | | class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> { |
825 | | public: |
826 | | SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) |
827 | 1.45k | : SCEVRewriteVisitor(SE), Terms(Terms) {} |
828 | | |
829 | | static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, |
830 | 1.45k | std::vector<const SCEV *> *Terms = nullptr) { |
831 | 1.45k | SCEVRemoveMax Rewriter(SE, Terms); |
832 | 1.45k | return Rewriter.visit(Scev); |
833 | 1.45k | } |
834 | | |
835 | 69 | const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { |
836 | 69 | if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { |
837 | 69 | auto Res = visit(Expr->getOperand(1)); |
838 | 69 | if (Terms) |
839 | 35 | (*Terms).push_back(Res); |
840 | 69 | return Res; |
841 | 69 | } |
842 | 0 | |
843 | 0 | return Expr; |
844 | 0 | } |
845 | | |
846 | | private: |
847 | | std::vector<const SCEV *> *Terms; |
848 | | }; |
849 | | } // namespace |
850 | | |
851 | | SmallVector<const SCEV *, 4> |
852 | | ScopDetection::getDelinearizationTerms(DetectionContext &Context, |
853 | 450 | const SCEVUnknown *BasePointer) const { |
854 | 450 | SmallVector<const SCEV *, 4> Terms; |
855 | 740 | for (const auto &Pair : Context.Accesses[BasePointer]) { |
856 | 740 | std::vector<const SCEV *> MaxTerms; |
857 | 740 | SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); |
858 | 740 | if (!MaxTerms.empty()) { |
859 | 23 | Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); |
860 | 23 | continue; |
861 | 23 | } |
862 | 717 | // In case the outermost expression is a plain add, we check if any of its |
863 | 717 | // terms has the form 4 * %inst * %param * %param ..., aka a term that |
864 | 717 | // contains a product between a parameter and an instruction that is |
865 | 717 | // inside the scop. Such instructions, if allowed at all, are instructions |
866 | 717 | // SCEV can not represent, but Polly is still looking through. As a |
867 | 717 | // result, these instructions can depend on induction variables and are |
868 | 717 | // most likely no array sizes. However, terms that are multiplied with |
869 | 717 | // them are likely candidates for array sizes. |
870 | 717 | if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { |
871 | 17 | for (auto Op : AF->operands()) { |
872 | 17 | if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) |
873 | 4 | SE.collectParametricTerms(AF2, Terms); |
874 | 17 | if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { |
875 | 8 | SmallVector<const SCEV *, 0> Operands; |
876 | 8 | |
877 | 24 | for (auto *MulOp : AF2->operands()) { |
878 | 24 | if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) |
879 | 6 | Operands.push_back(Const); |
880 | 24 | if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { |
881 | 16 | if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { |
882 | 4 | if (!Context.CurRegion.contains(Inst)) |
883 | 0 | Operands.push_back(MulOp); |
884 | 4 | |
885 | 12 | } else { |
886 | 12 | Operands.push_back(MulOp); |
887 | 12 | } |
888 | 16 | } |
889 | 24 | } |
890 | 8 | if (Operands.size()) |
891 | 7 | Terms.push_back(SE.getMulExpr(Operands)); |
892 | 8 | } |
893 | 17 | } |
894 | 8 | } |
895 | 717 | if (Terms.empty()) |
896 | 499 | SE.collectParametricTerms(Pair.second, Terms); |
897 | 717 | } |
898 | 450 | return Terms; |
899 | 450 | } |
900 | | |
901 | | bool ScopDetection::hasValidArraySizes(DetectionContext &Context, |
902 | | SmallVectorImpl<const SCEV *> &Sizes, |
903 | | const SCEVUnknown *BasePointer, |
904 | 450 | Loop *Scope) const { |
905 | 450 | // If no sizes were found, all sizes are trivially valid. We allow this case |
906 | 450 | // to make it possible to pass known-affine accesses to the delinearization to |
907 | 450 | // try to recover some interesting multi-dimensional accesses, but to still |
908 | 450 | // allow the already known to be affine access in case the delinearization |
909 | 450 | // fails. In such situations, the delinearization will just return a Sizes |
910 | 450 | // array of size zero. |
911 | 450 | if (Sizes.size() == 0) |
912 | 65 | return true; |
913 | 385 | |
914 | 385 | Value *BaseValue = BasePointer->getValue(); |
915 | 385 | Region &CurRegion = Context.CurRegion; |
916 | 1.06k | for (const SCEV *DelinearizedSize : Sizes) { |
917 | 1.06k | // Don't pass down the scope to isAfffine; array dimensions must be |
918 | 1.06k | // invariant across the entire scop. |
919 | 1.06k | if (!isAffine(DelinearizedSize, nullptr, Context)) { |
920 | 11 | Sizes.clear(); |
921 | 11 | break; |
922 | 11 | } |
923 | 1.05k | if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { |
924 | 627 | auto *V = dyn_cast<Value>(Unknown->getValue()); |
925 | 627 | if (auto *Load = dyn_cast<LoadInst>(V)) { |
926 | 178 | if (Context.CurRegion.contains(Load) && |
927 | 178 | isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)26 ) |
928 | 26 | Context.RequiredILS.insert(Load); |
929 | 178 | continue; |
930 | 178 | } |
931 | 877 | } |
932 | 877 | if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, |
933 | 877 | Context.RequiredILS)) |
934 | 0 | return invalid<ReportNonAffineAccess>( |
935 | 0 | Context, /*Assert=*/true, DelinearizedSize, |
936 | 0 | Context.Accesses[BasePointer].front().first, BaseValue); |
937 | 877 | } |
938 | 385 | |
939 | 385 | // No array shape derived. |
940 | 385 | if (Sizes.empty()) { |
941 | 11 | if (AllowNonAffine) |
942 | 0 | return true; |
943 | 11 | |
944 | 12 | for (const auto &Pair : Context.Accesses[BasePointer])11 { |
945 | 12 | const Instruction *Insn = Pair.first; |
946 | 12 | const SCEV *AF = Pair.second; |
947 | 12 | |
948 | 12 | if (!isAffine(AF, Scope, Context)) { |
949 | 11 | invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, |
950 | 11 | BaseValue); |
951 | 11 | if (!KeepGoing) |
952 | 11 | return false; |
953 | 11 | } |
954 | 12 | } |
955 | 11 | return false0 ; |
956 | 374 | } |
957 | 374 | return true; |
958 | 374 | } |
959 | | |
960 | | // We first store the resulting memory accesses in TempMemoryAccesses. Only |
961 | | // if the access functions for all memory accesses have been successfully |
962 | | // delinearized we continue. Otherwise, we either report a failure or, if |
963 | | // non-affine accesses are allowed, we drop the information. In case the |
964 | | // information is dropped the memory accesses need to be overapproximated |
965 | | // when translated to a polyhedral representation. |
966 | | bool ScopDetection::computeAccessFunctions( |
967 | | DetectionContext &Context, const SCEVUnknown *BasePointer, |
968 | 439 | std::shared_ptr<ArrayShape> Shape) const { |
969 | 439 | Value *BaseValue = BasePointer->getValue(); |
970 | 439 | bool BasePtrHasNonAffine = false; |
971 | 439 | MapInsnToMemAcc TempMemoryAccesses; |
972 | 719 | for (const auto &Pair : Context.Accesses[BasePointer]) { |
973 | 719 | const Instruction *Insn = Pair.first; |
974 | 719 | auto *AF = Pair.second; |
975 | 719 | AF = SCEVRemoveMax::rewrite(AF, SE); |
976 | 719 | bool IsNonAffine = false; |
977 | 719 | TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); |
978 | 719 | MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; |
979 | 719 | auto *Scope = LI.getLoopFor(Insn->getParent()); |
980 | 719 | |
981 | 719 | if (!AF) { |
982 | 0 | if (isAffine(Pair.second, Scope, Context)) |
983 | 0 | Acc->DelinearizedSubscripts.push_back(Pair.second); |
984 | 0 | else |
985 | 0 | IsNonAffine = true; |
986 | 719 | } else { |
987 | 719 | if (Shape->DelinearizedSizes.size() == 0) { |
988 | 109 | Acc->DelinearizedSubscripts.push_back(AF); |
989 | 610 | } else { |
990 | 610 | SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, |
991 | 610 | Shape->DelinearizedSizes); |
992 | 610 | if (Acc->DelinearizedSubscripts.size() == 0) |
993 | 6 | IsNonAffine = true; |
994 | 610 | } |
995 | 719 | for (const SCEV *S : Acc->DelinearizedSubscripts) |
996 | 1.83k | if (!isAffine(S, Scope, Context)) |
997 | 92 | IsNonAffine = true; |
998 | 719 | } |
999 | 719 | |
1000 | 719 | // (Possibly) report non affine access |
1001 | 719 | if (IsNonAffine) { |
1002 | 98 | BasePtrHasNonAffine = true; |
1003 | 98 | if (!AllowNonAffine) |
1004 | 23 | invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, |
1005 | 23 | Insn, BaseValue); |
1006 | 98 | if (!KeepGoing && !AllowNonAffine87 ) |
1007 | 16 | return false; |
1008 | 98 | } |
1009 | 719 | } |
1010 | 439 | |
1011 | 439 | if (423 !BasePtrHasNonAffine423 ) |
1012 | 367 | Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), |
1013 | 367 | TempMemoryAccesses.end()); |
1014 | 423 | |
1015 | 423 | return true; |
1016 | 439 | } |
1017 | | |
1018 | | bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, |
1019 | | const SCEVUnknown *BasePointer, |
1020 | 450 | Loop *Scope) const { |
1021 | 450 | auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); |
1022 | 450 | |
1023 | 450 | auto Terms = getDelinearizationTerms(Context, BasePointer); |
1024 | 450 | |
1025 | 450 | SE.findArrayDimensions(Terms, Shape->DelinearizedSizes, |
1026 | 450 | Context.ElementSize[BasePointer]); |
1027 | 450 | |
1028 | 450 | if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, |
1029 | 450 | Scope)) |
1030 | 11 | return false; |
1031 | 439 | |
1032 | 439 | return computeAccessFunctions(Context, BasePointer, Shape); |
1033 | 439 | } |
1034 | | |
1035 | 3.35k | bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { |
1036 | 3.35k | // TODO: If we have an unknown access and other non-affine accesses we do |
1037 | 3.35k | // not try to delinearize them for now. |
1038 | 3.35k | if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()35 ) |
1039 | 12 | return AllowNonAffine; |
1040 | 3.34k | |
1041 | 3.34k | for (auto &Pair : Context.NonAffineAccesses) { |
1042 | 450 | auto *BasePointer = Pair.first; |
1043 | 450 | auto *Scope = Pair.second; |
1044 | 450 | if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { |
1045 | 27 | if (KeepGoing) |
1046 | 0 | continue; |
1047 | 27 | else |
1048 | 27 | return false; |
1049 | 27 | } |
1050 | 450 | } |
1051 | 3.34k | return true3.31k ; |
1052 | 3.34k | } |
1053 | | |
1054 | | bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, |
1055 | | const SCEVUnknown *BP, |
1056 | 12.5k | DetectionContext &Context) const { |
1057 | 12.5k | |
1058 | 12.5k | if (!BP) |
1059 | 0 | return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); |
1060 | 12.5k | |
1061 | 12.5k | auto *BV = BP->getValue(); |
1062 | 12.5k | if (isa<UndefValue>(BV)) |
1063 | 6 | return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); |
1064 | 12.5k | |
1065 | 12.5k | // FIXME: Think about allowing IntToPtrInst |
1066 | 12.5k | if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) |
1067 | 1 | return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); |
1068 | 12.5k | |
1069 | 12.5k | // Check that the base address of the access is invariant in the current |
1070 | 12.5k | // region. |
1071 | 12.5k | if (!isInvariant(*BV, Context.CurRegion, Context)) |
1072 | 10 | return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); |
1073 | 12.5k | |
1074 | 12.5k | AF = SE.getMinusSCEV(AF, BP); |
1075 | 12.5k | |
1076 | 12.5k | const SCEV *Size; |
1077 | 12.5k | if (!isa<MemIntrinsic>(Inst)) { |
1078 | 12.4k | Size = SE.getElementSize(Inst); |
1079 | 12.4k | } else { |
1080 | 66 | auto *SizeTy = |
1081 | 66 | SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext())); |
1082 | 66 | Size = SE.getConstant(SizeTy, 8); |
1083 | 66 | } |
1084 | 12.5k | |
1085 | 12.5k | if (Context.ElementSize[BP]) { |
1086 | 6.98k | if (!AllowDifferentTypes && Context.ElementSize[BP] != Size0 ) |
1087 | 0 | return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, |
1088 | 0 | Inst, BV); |
1089 | 6.98k | |
1090 | 6.98k | Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); |
1091 | 6.98k | } else { |
1092 | 5.53k | Context.ElementSize[BP] = Size; |
1093 | 5.53k | } |
1094 | 12.5k | |
1095 | 12.5k | bool IsVariantInNonAffineLoop = false; |
1096 | 12.5k | SetVector<const Loop *> Loops; |
1097 | 12.5k | findLoops(AF, Loops); |
1098 | 12.5k | for (const Loop *L : Loops) |
1099 | 7.36k | if (Context.BoxedLoopsSet.count(L)) |
1100 | 39 | IsVariantInNonAffineLoop = true; |
1101 | 12.5k | |
1102 | 12.5k | auto *Scope = LI.getLoopFor(Inst->getParent()); |
1103 | 12.5k | bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context)12.4k ; |
1104 | 12.5k | // Do not try to delinearize memory intrinsics and force them to be affine. |
1105 | 12.5k | if (isa<MemIntrinsic>(Inst) && !IsAffine66 ) { |
1106 | 0 | return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, |
1107 | 0 | BV); |
1108 | 12.5k | } else if (PollyDelinearize && !IsVariantInNonAffineLoop12.4k ) { |
1109 | 12.4k | Context.Accesses[BP].push_back({Inst, AF}); |
1110 | 12.4k | |
1111 | 12.4k | if (!IsAffine || hasIVParams(AF)11.7k ) |
1112 | 679 | Context.NonAffineAccesses.insert( |
1113 | 679 | std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); |
1114 | 12.4k | } else if (68 !AllowNonAffine68 && !IsAffine28 ) { |
1115 | 22 | return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, |
1116 | 22 | BV); |
1117 | 22 | } |
1118 | 12.4k | |
1119 | 12.4k | if (IgnoreAliasing) |
1120 | 440 | return true; |
1121 | 12.0k | |
1122 | 12.0k | // Check if the base pointer of the memory access does alias with |
1123 | 12.0k | // any other pointer. This cannot be handled at the moment. |
1124 | 12.0k | AAMDNodes AATags; |
1125 | 12.0k | Inst->getAAMetadata(AATags); |
1126 | 12.0k | AliasSet &AS = Context.AST.getAliasSetFor( |
1127 | 12.0k | MemoryLocation(BP->getValue(), MemoryLocation::UnknownSize, AATags)); |
1128 | 12.0k | |
1129 | 12.0k | if (!AS.isMustAlias()) { |
1130 | 3.32k | if (PollyUseRuntimeAliasChecks) { |
1131 | 3.32k | bool CanBuildRunTimeCheck = true; |
1132 | 3.32k | // The run-time alias check places code that involves the base pointer at |
1133 | 3.32k | // the beginning of the SCoP. This breaks if the base pointer is defined |
1134 | 3.32k | // inside the scop. Hence, we can only create a run-time check if we are |
1135 | 3.32k | // sure the base pointer is not an instruction defined inside the scop. |
1136 | 3.32k | // However, we can ignore loads that will be hoisted. |
1137 | 3.32k | |
1138 | 3.32k | InvariantLoadsSetTy VariantLS, InvariantLS; |
1139 | 3.32k | // In order to detect loads which are dependent on other invariant loads |
1140 | 3.32k | // as invariant, we use fixed-point iteration method here i.e we iterate |
1141 | 3.32k | // over the alias set for arbitrary number of times until it is safe to |
1142 | 3.32k | // assume that all the invariant loads have been detected |
1143 | 3.90k | while (1) { |
1144 | 3.90k | const unsigned int VariantSize = VariantLS.size(), |
1145 | 3.90k | InvariantSize = InvariantLS.size(); |
1146 | 3.90k | |
1147 | 14.4k | for (const auto &Ptr : AS) { |
1148 | 14.4k | Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue()); |
1149 | 14.4k | if (Inst && Context.CurRegion.contains(Inst)4.69k ) { |
1150 | 4.41k | auto *Load = dyn_cast<LoadInst>(Inst); |
1151 | 4.41k | if (Load && InvariantLS.count(Load)) |
1152 | 2.20k | continue; |
1153 | 2.21k | if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, |
1154 | 2.21k | InvariantLS)) { |
1155 | 2.20k | if (VariantLS.count(Load)) |
1156 | 0 | VariantLS.remove(Load); |
1157 | 2.20k | Context.RequiredILS.insert(Load); |
1158 | 2.20k | InvariantLS.insert(Load); |
1159 | 2.20k | } else { |
1160 | 4 | CanBuildRunTimeCheck = false; |
1161 | 4 | VariantLS.insert(Load); |
1162 | 4 | } |
1163 | 2.21k | } |
1164 | 14.4k | } |
1165 | 3.90k | |
1166 | 3.90k | if (InvariantSize == InvariantLS.size() && |
1167 | 3.90k | VariantSize == VariantLS.size()3.32k ) |
1168 | 3.32k | break; |
1169 | 3.90k | } |
1170 | 3.32k | |
1171 | 3.32k | if (CanBuildRunTimeCheck) |
1172 | 3.32k | return true; |
1173 | 3 | } |
1174 | 3 | return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); |
1175 | 3 | } |
1176 | 8.73k | |
1177 | 8.73k | return true; |
1178 | 8.73k | } |
1179 | | |
1180 | | bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, |
1181 | 12.4k | DetectionContext &Context) const { |
1182 | 12.4k | Value *Ptr = Inst.getPointerOperand(); |
1183 | 12.4k | Loop *L = LI.getLoopFor(Inst->getParent()); |
1184 | 12.4k | const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); |
1185 | 12.4k | const SCEVUnknown *BasePointer; |
1186 | 12.4k | |
1187 | 12.4k | BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); |
1188 | 12.4k | |
1189 | 12.4k | return isValidAccess(Inst, AccessFunction, BasePointer, Context); |
1190 | 12.4k | } |
1191 | | |
1192 | | bool ScopDetection::isValidInstruction(Instruction &Inst, |
1193 | 53.7k | DetectionContext &Context) const { |
1194 | 100k | for (auto &Op : Inst.operands()) { |
1195 | 100k | auto *OpInst = dyn_cast<Instruction>(&Op); |
1196 | 100k | |
1197 | 100k | if (!OpInst) |
1198 | 41.5k | continue; |
1199 | 59.1k | |
1200 | 59.1k | if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) { |
1201 | 2 | auto *PHI = dyn_cast<PHINode>(OpInst); |
1202 | 2 | if (PHI) { |
1203 | 0 | for (User *U : PHI->users()) { |
1204 | 0 | auto *UI = dyn_cast<Instruction>(U); |
1205 | 0 | if (!UI || !UI->isTerminator()) |
1206 | 0 | return false; |
1207 | 0 | } |
1208 | 2 | } else { |
1209 | 2 | return false; |
1210 | 2 | } |
1211 | 2 | } |
1212 | 59.1k | } |
1213 | 53.7k | |
1214 | 53.7k | if (53.7k isa<LandingPadInst>(&Inst)53.7k || isa<ResumeInst>(&Inst)53.7k ) |
1215 | 0 | return false; |
1216 | 53.7k | |
1217 | 53.7k | // We only check the call instruction but not invoke instruction. |
1218 | 53.7k | if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { |
1219 | 243 | if (isValidCallInst(*CI, Context)) |
1220 | 232 | return true; |
1221 | 11 | |
1222 | 11 | return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); |
1223 | 11 | } |
1224 | 53.5k | |
1225 | 53.5k | if (!Inst.mayReadOrWriteMemory()) { |
1226 | 41.0k | if (!isa<AllocaInst>(Inst)) |
1227 | 41.0k | return true; |
1228 | 0 | |
1229 | 0 | return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); |
1230 | 0 | } |
1231 | 12.5k | |
1232 | 12.5k | // Check the access function. |
1233 | 12.5k | if (auto MemInst = MemAccInst::dyn_cast(Inst)) { |
1234 | 12.4k | Context.hasStores |= isa<StoreInst>(MemInst); |
1235 | 12.4k | Context.hasLoads |= isa<LoadInst>(MemInst); |
1236 | 12.4k | if (!MemInst.isSimple()) |
1237 | 0 | return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, |
1238 | 0 | &Inst); |
1239 | 12.4k | |
1240 | 12.4k | return isValidMemoryAccess(MemInst, Context); |
1241 | 12.4k | } |
1242 | 49 | |
1243 | 49 | // We do not know this instruction, therefore we assume it is invalid. |
1244 | 49 | return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); |
1245 | 49 | } |
1246 | | |
1247 | | /// Check whether @p L has exiting blocks. |
1248 | | /// |
1249 | | /// @param L The loop of interest |
1250 | | /// |
1251 | | /// @return True if the loop has exiting blocks, false otherwise. |
1252 | 5.42k | static bool hasExitingBlocks(Loop *L) { |
1253 | 5.42k | SmallVector<BasicBlock *, 4> ExitingBlocks; |
1254 | 5.42k | L->getExitingBlocks(ExitingBlocks); |
1255 | 5.42k | return !ExitingBlocks.empty(); |
1256 | 5.42k | } |
1257 | | |
1258 | | bool ScopDetection::canUseISLTripCount(Loop *L, |
1259 | 5.38k | DetectionContext &Context) const { |
1260 | 5.38k | // Ensure the loop has valid exiting blocks as well as latches, otherwise we |
1261 | 5.38k | // need to overapproximate it as a boxed loop. |
1262 | 5.38k | SmallVector<BasicBlock *, 4> LoopControlBlocks; |
1263 | 5.38k | L->getExitingBlocks(LoopControlBlocks); |
1264 | 5.38k | L->getLoopLatches(LoopControlBlocks); |
1265 | 10.6k | for (BasicBlock *ControlBB : LoopControlBlocks) { |
1266 | 10.6k | if (!isValidCFG(*ControlBB, true, false, Context)) |
1267 | 200 | return false; |
1268 | 10.6k | } |
1269 | 5.38k | |
1270 | 5.38k | // We can use ISL to compute the trip count of L. |
1271 | 5.38k | return true5.18k ; |
1272 | 5.38k | } |
1273 | | |
1274 | 5.42k | bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { |
1275 | 5.42k | // Loops that contain part but not all of the blocks of a region cannot be |
1276 | 5.42k | // handled by the schedule generation. Such loop constructs can happen |
1277 | 5.42k | // because a region can contain BBs that have no path to the exit block |
1278 | 5.42k | // (Infinite loops, UnreachableInst), but such blocks are never part of a |
1279 | 5.42k | // loop. |
1280 | 5.42k | // |
1281 | 5.42k | // _______________ |
1282 | 5.42k | // | Loop Header | <-----------. |
1283 | 5.42k | // --------------- | |
1284 | 5.42k | // | | |
1285 | 5.42k | // _______________ ______________ |
1286 | 5.42k | // | RegionEntry |-----> | RegionExit |-----> |
1287 | 5.42k | // --------------- -------------- |
1288 | 5.42k | // | |
1289 | 5.42k | // _______________ |
1290 | 5.42k | // | EndlessLoop | <--. |
1291 | 5.42k | // --------------- | |
1292 | 5.42k | // | | |
1293 | 5.42k | // \------------/ |
1294 | 5.42k | // |
1295 | 5.42k | // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is |
1296 | 5.42k | // neither entirely contained in the region RegionEntry->RegionExit |
1297 | 5.42k | // (containing RegionEntry,EndlessLoop) nor is the region entirely contained |
1298 | 5.42k | // in the loop. |
1299 | 5.42k | // The block EndlessLoop is contained in the region because Region::contains |
1300 | 5.42k | // tests whether it is not dominated by RegionExit. This is probably to not |
1301 | 5.42k | // having to query the PostdominatorTree. Instead of an endless loop, a dead |
1302 | 5.42k | // end can also be formed by an UnreachableInst. This case is already caught |
1303 | 5.42k | // by isErrorBlock(). We hence only have to reject endless loops here. |
1304 | 5.42k | if (!hasExitingBlocks(L)) |
1305 | 0 | return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); |
1306 | 5.42k | |
1307 | 5.42k | // The algorithm for domain construction assumes that loops has only a single |
1308 | 5.42k | // exit block (and hence corresponds to a subregion). Note that we cannot use |
1309 | 5.42k | // L->getExitBlock() because it does not check whether all exiting edges point |
1310 | 5.42k | // to the same BB. |
1311 | 5.42k | SmallVector<BasicBlock *, 4> ExitBlocks; |
1312 | 5.42k | L->getExitBlocks(ExitBlocks); |
1313 | 5.42k | BasicBlock *TheExitBlock = ExitBlocks[0]; |
1314 | 5.46k | for (BasicBlock *ExitBB : ExitBlocks) { |
1315 | 5.46k | if (TheExitBlock != ExitBB) |
1316 | 35 | return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); |
1317 | 5.46k | } |
1318 | 5.42k | |
1319 | 5.42k | if (5.38k canUseISLTripCount(L, Context)5.38k ) |
1320 | 5.18k | return true; |
1321 | 200 | |
1322 | 200 | if (AllowNonAffineSubLoops && AllowNonAffineSubRegions66 ) { |
1323 | 66 | Region *R = RI.getRegionFor(L->getHeader()); |
1324 | 77 | while (R != &Context.CurRegion && !R->contains(L)62 ) |
1325 | 11 | R = R->getParent(); |
1326 | 66 | |
1327 | 66 | if (addOverApproximatedRegion(R, Context)) |
1328 | 66 | return true; |
1329 | 134 | } |
1330 | 134 | |
1331 | 134 | const SCEV *LoopCount = SE.getBackedgeTakenCount(L); |
1332 | 134 | return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); |
1333 | 134 | } |
1334 | | |
1335 | | /// Return the number of loops in @p L (incl. @p L) that have a trip |
1336 | | /// count that is not known to be less than @MinProfitableTrips. |
1337 | | ScopDetection::LoopStats |
1338 | | ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, |
1339 | 3.97k | unsigned MinProfitableTrips) { |
1340 | 3.97k | auto *TripCount = SE.getBackedgeTakenCount(L); |
1341 | 3.97k | |
1342 | 3.97k | int NumLoops = 1; |
1343 | 3.97k | int MaxLoopDepth = 1; |
1344 | 3.97k | if (MinProfitableTrips > 0) |
1345 | 38 | if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) |
1346 | 17 | if (TripCountC->getType()->getScalarSizeInBits() <= 64) |
1347 | 17 | if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) |
1348 | 4 | NumLoops -= 1; |
1349 | 3.97k | |
1350 | 3.97k | for (auto &SubLoop : *L) { |
1351 | 1.10k | LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); |
1352 | 1.10k | NumLoops += Stats.NumLoops; |
1353 | 1.10k | MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); |
1354 | 1.10k | } |
1355 | 3.97k | |
1356 | 3.97k | return {NumLoops, MaxLoopDepth}; |
1357 | 3.97k | } |
1358 | | |
1359 | | ScopDetection::LoopStats |
1360 | | ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, |
1361 | 2.72k | LoopInfo &LI, unsigned MinProfitableTrips) { |
1362 | 2.72k | int LoopNum = 0; |
1363 | 2.72k | int MaxLoopDepth = 0; |
1364 | 2.72k | |
1365 | 2.72k | auto L = LI.getLoopFor(R->getEntry()); |
1366 | 2.72k | |
1367 | 2.72k | // If L is fully contained in R, move to first loop surrounding R. Otherwise, |
1368 | 2.72k | // L is either nullptr or already surrounding R. |
1369 | 2.72k | if (L && R->contains(L)1.15k ) { |
1370 | 1.10k | L = R->outermostLoopInRegion(L); |
1371 | 1.10k | L = L->getParentLoop(); |
1372 | 1.10k | } |
1373 | 2.72k | |
1374 | 2.72k | auto SubLoops = |
1375 | 2.72k | L ? L->getSubLoopsVector()83 : std::vector<Loop *>(LI.begin(), LI.end())2.63k ; |
1376 | 2.72k | |
1377 | 2.72k | for (auto &SubLoop : SubLoops) |
1378 | 2.93k | if (R->contains(SubLoop)) { |
1379 | 2.86k | LoopStats Stats = |
1380 | 2.86k | countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); |
1381 | 2.86k | LoopNum += Stats.NumLoops; |
1382 | 2.86k | MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); |
1383 | 2.86k | } |
1384 | 2.72k | |
1385 | 2.72k | return {LoopNum, MaxLoopDepth}; |
1386 | 2.72k | } |
1387 | | |
1388 | 1.29k | Region *ScopDetection::expandRegion(Region &R) { |
1389 | 1.29k | // Initial no valid region was found (greater than R) |
1390 | 1.29k | std::unique_ptr<Region> LastValidRegion; |
1391 | 1.29k | auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); |
1392 | 1.29k | |
1393 | 1.29k | LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); |
1394 | 1.29k | |
1395 | 1.90k | while (ExpandedRegion) { |
1396 | 653 | const auto &It = DetectionContextMap.insert(std::make_pair( |
1397 | 653 | getBBPairForRegion(ExpandedRegion.get()), |
1398 | 653 | DetectionContext(*ExpandedRegion, AA, false /*verifying*/))); |
1399 | 653 | DetectionContext &Context = It.first->second; |
1400 | 653 | LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); |
1401 | 653 | // Only expand when we did not collect errors. |
1402 | 653 | |
1403 | 653 | if (!Context.Log.hasErrors()) { |
1404 | 653 | // If the exit is valid check all blocks |
1405 | 653 | // - if true, a valid region was found => store it + keep expanding |
1406 | 653 | // - if false, .tbd. => stop (should this really end the loop?) |
1407 | 653 | if (!allBlocksValid(Context) || Context.Log.hasErrors()607 ) { |
1408 | 47 | removeCachedResults(*ExpandedRegion); |
1409 | 47 | DetectionContextMap.erase(It.first); |
1410 | 47 | break; |
1411 | 47 | } |
1412 | 606 | |
1413 | 606 | // Store this region, because it is the greatest valid (encountered so |
1414 | 606 | // far). |
1415 | 606 | if (LastValidRegion) { |
1416 | 256 | removeCachedResults(*LastValidRegion); |
1417 | 256 | DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get())); |
1418 | 256 | } |
1419 | 606 | LastValidRegion = std::move(ExpandedRegion); |
1420 | 606 | |
1421 | 606 | // Create and test the next greater region (if any) |
1422 | 606 | ExpandedRegion = |
1423 | 606 | std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); |
1424 | 606 | |
1425 | 606 | } else { |
1426 | 0 | // Create and test the next greater region (if any) |
1427 | 0 | removeCachedResults(*ExpandedRegion); |
1428 | 0 | DetectionContextMap.erase(It.first); |
1429 | 0 | ExpandedRegion = |
1430 | 0 | std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); |
1431 | 0 | } |
1432 | 653 | } |
1433 | 1.29k | |
1434 | 1.29k | LLVM_DEBUG({ |
1435 | 1.29k | if (LastValidRegion) |
1436 | 1.29k | dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; |
1437 | 1.29k | else |
1438 | 1.29k | dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; |
1439 | 1.29k | }); |
1440 | 1.29k | |
1441 | 1.29k | return LastValidRegion.release(); |
1442 | 1.29k | } |
1443 | | |
1444 | 47 | static bool regionWithoutLoops(Region &R, LoopInfo &LI) { |
1445 | 47 | for (const BasicBlock *BB : R.blocks()) |
1446 | 53 | if (R.contains(LI.getLoopFor(BB))) |
1447 | 46 | return false; |
1448 | 47 | |
1449 | 47 | return true1 ; |
1450 | 47 | } |
1451 | | |
1452 | 865 | void ScopDetection::removeCachedResultsRecursively(const Region &R) { |
1453 | 865 | for (auto &SubRegion : R) { |
1454 | 723 | if (ValidRegions.count(SubRegion.get())) { |
1455 | 208 | removeCachedResults(*SubRegion.get()); |
1456 | 208 | } else |
1457 | 515 | removeCachedResultsRecursively(*SubRegion); |
1458 | 723 | } |
1459 | 865 | } |
1460 | | |
1461 | 2.69k | void ScopDetection::removeCachedResults(const Region &R) { |
1462 | 2.69k | ValidRegions.remove(&R); |
1463 | 2.69k | } |
1464 | | |
1465 | 3.34k | void ScopDetection::findScops(Region &R) { |
1466 | 3.34k | const auto &It = DetectionContextMap.insert(std::make_pair( |
1467 | 3.34k | getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/))); |
1468 | 3.34k | DetectionContext &Context = It.first->second; |
1469 | 3.34k | |
1470 | 3.34k | bool RegionIsValid = false; |
1471 | 3.34k | if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)47 ) |
1472 | 1 | invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); |
1473 | 3.34k | else |
1474 | 3.34k | RegionIsValid = isValidRegion(Context); |
1475 | 3.34k | |
1476 | 3.34k | bool HasErrors = !RegionIsValid || Context.Log.size() > 01.51k ; |
1477 | 3.34k | |
1478 | 3.34k | if (HasErrors) { |
1479 | 1.83k | removeCachedResults(R); |
1480 | 1.83k | } else { |
1481 | 1.50k | ValidRegions.insert(&R); |
1482 | 1.50k | return; |
1483 | 1.50k | } |
1484 | 1.83k | |
1485 | 1.83k | for (auto &SubRegion : R) |
1486 | 1.93k | findScops(*SubRegion); |
1487 | 1.83k | |
1488 | 1.83k | // Try to expand regions. |
1489 | 1.83k | // |
1490 | 1.83k | // As the region tree normally only contains canonical regions, non canonical |
1491 | 1.83k | // regions that form a Scop are not found. Therefore, those non canonical |
1492 | 1.83k | // regions are checked by expanding the canonical ones. |
1493 | 1.83k | |
1494 | 1.83k | std::vector<Region *> ToExpand; |
1495 | 1.83k | |
1496 | 1.83k | for (auto &SubRegion : R) |
1497 | 1.93k | ToExpand.push_back(SubRegion.get()); |
1498 | 1.83k | |
1499 | 1.93k | for (Region *CurrentRegion : ToExpand) { |
1500 | 1.93k | // Skip invalid regions. Regions may become invalid, if they are element of |
1501 | 1.93k | // an already expanded region. |
1502 | 1.93k | if (!ValidRegions.count(CurrentRegion)) |
1503 | 644 | continue; |
1504 | 1.29k | |
1505 | 1.29k | // Skip regions that had errors. |
1506 | 1.29k | bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); |
1507 | 1.29k | if (HadErrors) |
1508 | 0 | continue; |
1509 | 1.29k | |
1510 | 1.29k | Region *ExpandedR = expandRegion(*CurrentRegion); |
1511 | 1.29k | |
1512 | 1.29k | if (!ExpandedR) |
1513 | 944 | continue; |
1514 | 350 | |
1515 | 350 | R.addSubRegion(ExpandedR, true); |
1516 | 350 | ValidRegions.insert(ExpandedR); |
1517 | 350 | removeCachedResults(*CurrentRegion); |
1518 | 350 | removeCachedResultsRecursively(*ExpandedR); |
1519 | 350 | } |
1520 | 1.83k | } |
1521 | | |
1522 | 3.69k | bool ScopDetection::allBlocksValid(DetectionContext &Context) const { |
1523 | 3.69k | Region &CurRegion = Context.CurRegion; |
1524 | 3.69k | |
1525 | 19.5k | for (const BasicBlock *BB : CurRegion.blocks()) { |
1526 | 19.5k | Loop *L = LI.getLoopFor(BB); |
1527 | 19.5k | if (L && L->getHeader() == BB15.8k ) { |
1528 | 5.47k | if (CurRegion.contains(L)) { |
1529 | 5.42k | if (!isValidLoop(L, Context) && !KeepGoing169 ) |
1530 | 169 | return false; |
1531 | 54 | } else { |
1532 | 54 | SmallVector<BasicBlock *, 1> Latches; |
1533 | 54 | L->getLoopLatches(Latches); |
1534 | 54 | for (BasicBlock *Latch : Latches) |
1535 | 62 | if (CurRegion.contains(Latch)) |
1536 | 5 | return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, |
1537 | 5 | L); |
1538 | 54 | } |
1539 | 5.47k | } |
1540 | 19.5k | } |
1541 | 3.69k | |
1542 | 18.8k | for (BasicBlock *BB : CurRegion.blocks())3.52k { |
1543 | 18.8k | bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT); |
1544 | 18.8k | |
1545 | 18.8k | // Also check exception blocks (and possibly register them as non-affine |
1546 | 18.8k | // regions). Even though exception blocks are not modeled, we use them |
1547 | 18.8k | // to forward-propagate domain constraints during ScopInfo construction. |
1548 | 18.8k | if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing67 ) |
1549 | 67 | return false; |
1550 | 18.7k | |
1551 | 18.7k | if (IsErrorBlock) |
1552 | 137 | continue; |
1553 | 18.6k | |
1554 | 72.3k | for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); 18.6k I != E; ++I53.6k ) |
1555 | 53.7k | if (!isValidInstruction(*I, Context) && !KeepGoing104 ) |
1556 | 95 | return false; |
1557 | 18.6k | } |
1558 | 3.52k | |
1559 | 3.52k | if (3.35k !hasAffineMemoryAccesses(Context)3.35k ) |
1560 | 33 | return false; |
1561 | 3.32k | |
1562 | 3.32k | return true; |
1563 | 3.32k | } |
1564 | | |
1565 | | bool ScopDetection::hasSufficientCompute(DetectionContext &Context, |
1566 | 8 | int NumLoops) const { |
1567 | 8 | int InstCount = 0; |
1568 | 8 | |
1569 | 8 | if (NumLoops == 0) |
1570 | 0 | return false; |
1571 | 8 | |
1572 | 8 | for (auto *BB : Context.CurRegion.blocks()) |
1573 | 36 | if (Context.CurRegion.contains(LI.getLoopFor(BB))) |
1574 | 36 | InstCount += BB->size(); |
1575 | 8 | |
1576 | 8 | InstCount = InstCount / NumLoops; |
1577 | 8 | |
1578 | 8 | return InstCount >= ProfitabilityMinPerLoopInstructions; |
1579 | 8 | } |
1580 | | |
1581 | | bool ScopDetection::hasPossiblyDistributableLoop( |
1582 | 10 | DetectionContext &Context) const { |
1583 | 10 | for (auto *BB : Context.CurRegion.blocks()) { |
1584 | 10 | auto *L = LI.getLoopFor(BB); |
1585 | 10 | if (!Context.CurRegion.contains(L)) |
1586 | 0 | continue; |
1587 | 10 | if (Context.BoxedLoopsSet.count(L)) |
1588 | 0 | continue; |
1589 | 10 | unsigned StmtsWithStoresInLoops = 0; |
1590 | 74 | for (auto *LBB : L->blocks()) { |
1591 | 74 | bool MemStore = false; |
1592 | 74 | for (auto &I : *LBB) |
1593 | 303 | MemStore |= isa<StoreInst>(&I); |
1594 | 74 | StmtsWithStoresInLoops += MemStore; |
1595 | 74 | } |
1596 | 10 | return (StmtsWithStoresInLoops > 1); |
1597 | 10 | } |
1598 | 10 | return false0 ; |
1599 | 10 | } |
1600 | | |
1601 | 1.29k | bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { |
1602 | 1.29k | Region &CurRegion = Context.CurRegion; |
1603 | 1.29k | |
1604 | 1.29k | if (PollyProcessUnprofitable) |
1605 | 1.27k | return true; |
1606 | 22 | |
1607 | 22 | // We can probably not do a lot on scops that only write or only read |
1608 | 22 | // data. |
1609 | 22 | if (!Context.hasStores || !Context.hasLoads20 ) |
1610 | 4 | return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); |
1611 | 18 | |
1612 | 18 | int NumLoops = |
1613 | 18 | countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; |
1614 | 18 | int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); |
1615 | 18 | |
1616 | 18 | // Scops with at least two loops may allow either loop fusion or tiling and |
1617 | 18 | // are consequently interesting to look at. |
1618 | 18 | if (NumAffineLoops >= 2) |
1619 | 4 | return true; |
1620 | 14 | |
1621 | 14 | // A loop with multiple non-trivial blocks might be amendable to distribution. |
1622 | 14 | if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)10 ) |
1623 | 2 | return true; |
1624 | 12 | |
1625 | 12 | // Scops that contain a loop with a non-trivial amount of computation per |
1626 | 12 | // loop-iteration are interesting as we may be able to parallelize such |
1627 | 12 | // loops. Individual loops that have only a small amount of computation |
1628 | 12 | // per-iteration are performance-wise very fragile as any change to the |
1629 | 12 | // loop induction variables may affect performance. To not cause spurious |
1630 | 12 | // performance regressions, we do not consider such loops. |
1631 | 12 | if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)8 ) |
1632 | 1 | return true; |
1633 | 11 | |
1634 | 11 | return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); |
1635 | 11 | } |
1636 | | |
1637 | 4.54k | bool ScopDetection::isValidRegion(DetectionContext &Context) const { |
1638 | 4.54k | Region &CurRegion = Context.CurRegion; |
1639 | 4.54k | |
1640 | 4.54k | LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t"); |
1641 | 4.54k | |
1642 | 4.54k | if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()4.53k ) { |
1643 | 1.39k | LLVM_DEBUG(dbgs() << "Top level region is invalid\n"); |
1644 | 1.39k | return false; |
1645 | 1.39k | } |
1646 | 3.15k | |
1647 | 3.15k | DebugLoc DbgLoc; |
1648 | 3.15k | if (CurRegion.getExit() && |
1649 | 3.15k | isa<UnreachableInst>(CurRegion.getExit()->getTerminator())3.14k ) { |
1650 | 26 | LLVM_DEBUG(dbgs() << "Unreachable in exit\n"); |
1651 | 26 | return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, |
1652 | 26 | CurRegion.getExit(), DbgLoc); |
1653 | 26 | } |
1654 | 3.12k | |
1655 | 3.12k | if (!CurRegion.getEntry()->getName().count(OnlyRegion)) { |
1656 | 0 | LLVM_DEBUG({ |
1657 | 0 | dbgs() << "Region entry does not match -polly-region-only"; |
1658 | 0 | dbgs() << "\n"; |
1659 | 0 | }); |
1660 | 0 | return false; |
1661 | 0 | } |
1662 | 3.12k | |
1663 | 3.12k | // SCoP cannot contain the entry block of the function, because we need |
1664 | 3.12k | // to insert alloca instruction there when translate scalar to array. |
1665 | 3.12k | if (!PollyAllowFullFunction && |
1666 | 3.12k | CurRegion.getEntry() == |
1667 | 3.11k | &(CurRegion.getEntry()->getParent()->getEntryBlock())) |
1668 | 84 | return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); |
1669 | 3.04k | |
1670 | 3.04k | if (!allBlocksValid(Context)) |
1671 | 323 | return false; |
1672 | 2.71k | |
1673 | 2.71k | if (!isReducibleRegion(CurRegion, DbgLoc)) |
1674 | 3 | return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, |
1675 | 3 | &CurRegion, DbgLoc); |
1676 | 2.71k | |
1677 | 2.71k | LLVM_DEBUG(dbgs() << "OK\n"); |
1678 | 2.71k | return true; |
1679 | 2.71k | } |
1680 | | |
1681 | 0 | void ScopDetection::markFunctionAsInvalid(Function *F) { |
1682 | 0 | F->addFnAttr(PollySkipFnAttr); |
1683 | 0 | } |
1684 | | |
1685 | 1.44k | bool ScopDetection::isValidFunction(Function &F) { |
1686 | 1.44k | return !F.hasFnAttribute(PollySkipFnAttr); |
1687 | 1.44k | } |
1688 | | |
1689 | 2 | void ScopDetection::printLocations(Function &F) { |
1690 | 2 | for (const Region *R : *this) { |
1691 | 2 | unsigned LineEntry, LineExit; |
1692 | 2 | std::string FileName; |
1693 | 2 | |
1694 | 2 | getDebugLocation(R, LineEntry, LineExit, FileName); |
1695 | 2 | DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); |
1696 | 2 | F.getContext().diagnose(Diagnostic); |
1697 | 2 | } |
1698 | 2 | } |
1699 | | |
1700 | 1.40k | void ScopDetection::emitMissedRemarks(const Function &F) { |
1701 | 3.69k | for (auto &DIt : DetectionContextMap) { |
1702 | 3.69k | auto &DC = DIt.getSecond(); |
1703 | 3.69k | if (DC.Log.hasErrors()) |
1704 | 447 | emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); |
1705 | 3.69k | } |
1706 | 1.40k | } |
1707 | | |
1708 | 2.71k | bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { |
1709 | 2.71k | /// Enum for coloring BBs in Region. |
1710 | 2.71k | /// |
1711 | 2.71k | /// WHITE - Unvisited BB in DFS walk. |
1712 | 2.71k | /// GREY - BBs which are currently on the DFS stack for processing. |
1713 | 2.71k | /// BLACK - Visited and completely processed BB. |
1714 | 2.71k | enum Color { WHITE, GREY, BLACK }; |
1715 | 2.71k | |
1716 | 2.71k | BasicBlock *REntry = R.getEntry(); |
1717 | 2.71k | BasicBlock *RExit = R.getExit(); |
1718 | 2.71k | // Map to match the color of a BasicBlock during the DFS walk. |
1719 | 2.71k | DenseMap<const BasicBlock *, Color> BBColorMap; |
1720 | 2.71k | // Stack keeping track of current BB and index of next child to be processed. |
1721 | 2.71k | std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; |
1722 | 2.71k | |
1723 | 2.71k | unsigned AdjacentBlockIndex = 0; |
1724 | 2.71k | BasicBlock *CurrBB, *SuccBB; |
1725 | 2.71k | CurrBB = REntry; |
1726 | 2.71k | |
1727 | 2.71k | // Initialize the map for all BB with WHITE color. |
1728 | 2.71k | for (auto *BB : R.blocks()) |
1729 | 11.9k | BBColorMap[BB] = WHITE; |
1730 | 2.71k | |
1731 | 2.71k | // Process the entry block of the Region. |
1732 | 2.71k | BBColorMap[CurrBB] = GREY; |
1733 | 2.71k | DFSStack.push(std::make_pair(CurrBB, 0)); |
1734 | 2.71k | |
1735 | 23.8k | while (!DFSStack.empty()) { |
1736 | 21.1k | // Get next BB on stack to be processed. |
1737 | 21.1k | CurrBB = DFSStack.top().first; |
1738 | 21.1k | AdjacentBlockIndex = DFSStack.top().second; |
1739 | 21.1k | DFSStack.pop(); |
1740 | 21.1k | |
1741 | 21.1k | // Loop to iterate over the successors of current BB. |
1742 | 21.1k | const Instruction *TInst = CurrBB->getTerminator(); |
1743 | 21.1k | unsigned NSucc = TInst->getNumSuccessors(); |
1744 | 28.7k | for (unsigned I = AdjacentBlockIndex; I < NSucc; |
1745 | 21.1k | ++I, ++AdjacentBlockIndex7.58k ) { |
1746 | 16.8k | SuccBB = TInst->getSuccessor(I); |
1747 | 16.8k | |
1748 | 16.8k | // Checks for region exit block and self-loops in BB. |
1749 | 16.8k | if (SuccBB == RExit || SuccBB == CurrBB13.5k ) |
1750 | 4.19k | continue; |
1751 | 12.6k | |
1752 | 12.6k | // WHITE indicates an unvisited BB in DFS walk. |
1753 | 12.6k | if (BBColorMap[SuccBB] == WHITE) { |
1754 | 9.22k | // Push the current BB and the index of the next child to be visited. |
1755 | 9.22k | DFSStack.push(std::make_pair(CurrBB, I + 1)); |
1756 | 9.22k | // Push the next BB to be processed. |
1757 | 9.22k | DFSStack.push(std::make_pair(SuccBB, 0)); |
1758 | 9.22k | // First time the BB is being processed. |
1759 | 9.22k | BBColorMap[SuccBB] = GREY; |
1760 | 9.22k | break; |
1761 | 9.22k | } else if (3.38k BBColorMap[SuccBB] == GREY3.38k ) { |
1762 | 2.63k | // GREY indicates a loop in the control flow. |
1763 | 2.63k | // If the destination dominates the source, it is a natural loop |
1764 | 2.63k | // else, an irreducible control flow in the region is detected. |
1765 | 2.63k | if (!DT.dominates(SuccBB, CurrBB)) { |
1766 | 3 | // Get debug info of instruction which causes irregular control flow. |
1767 | 3 | DbgLoc = TInst->getDebugLoc(); |
1768 | 3 | return false; |
1769 | 3 | } |
1770 | 2.63k | } |
1771 | 12.6k | } |
1772 | 21.1k | |
1773 | 21.1k | // If all children of current BB have been processed, |
1774 | 21.1k | // then mark that BB as fully processed. |
1775 | 21.1k | if (21.1k AdjacentBlockIndex == NSucc21.1k ) |
1776 | 11.9k | BBColorMap[CurrBB] = BLACK; |
1777 | 21.1k | } |
1778 | 2.71k | |
1779 | 2.71k | return true2.71k ; |
1780 | 2.71k | } |
1781 | | |
1782 | | static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, |
1783 | 2.58k | bool OnlyProfitable) { |
1784 | 2.58k | if (!OnlyProfitable) { |
1785 | 1.29k | NumLoopsInScop += Stats.NumLoops; |
1786 | 1.29k | MaxNumLoopsInScop = |
1787 | 1.29k | std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); |
1788 | 1.29k | if (Stats.MaxDepth == 0) |
1789 | 104 | NumScopsDepthZero++; |
1790 | 1.19k | else if (Stats.MaxDepth == 1) |
1791 | 844 | NumScopsDepthOne++; |
1792 | 351 | else if (Stats.MaxDepth == 2) |
1793 | 266 | NumScopsDepthTwo++; |
1794 | 85 | else if (Stats.MaxDepth == 3) |
1795 | 81 | NumScopsDepthThree++; |
1796 | 4 | else if (Stats.MaxDepth == 4) |
1797 | 4 | NumScopsDepthFour++; |
1798 | 0 | else if (Stats.MaxDepth == 5) |
1799 | 0 | NumScopsDepthFive++; |
1800 | 0 | else |
1801 | 0 | NumScopsDepthLarger++; |
1802 | 1.29k | } else { |
1803 | 1.28k | NumLoopsInProfScop += Stats.NumLoops; |
1804 | 1.28k | MaxNumLoopsInProfScop = |
1805 | 1.28k | std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops); |
1806 | 1.28k | if (Stats.MaxDepth == 0) |
1807 | 104 | NumProfScopsDepthZero++; |
1808 | 1.18k | else if (Stats.MaxDepth == 1) |
1809 | 836 | NumProfScopsDepthOne++; |
1810 | 344 | else if (Stats.MaxDepth == 2) |
1811 | 260 | NumProfScopsDepthTwo++; |
1812 | 84 | else if (Stats.MaxDepth == 3) |
1813 | 81 | NumProfScopsDepthThree++; |
1814 | 3 | else if (Stats.MaxDepth == 4) |
1815 | 3 | NumProfScopsDepthFour++; |
1816 | 0 | else if (Stats.MaxDepth == 5) |
1817 | 0 | NumProfScopsDepthFive++; |
1818 | 0 | else |
1819 | 0 | NumProfScopsDepthLarger++; |
1820 | 1.28k | } |
1821 | 2.58k | } |
1822 | | |
1823 | | ScopDetection::DetectionContext * |
1824 | 2.50k | ScopDetection::getDetectionContext(const Region *R) const { |
1825 | 2.50k | auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); |
1826 | 2.50k | if (DCMIt == DetectionContextMap.end()) |
1827 | 1 | return nullptr; |
1828 | 2.50k | return &DCMIt->second; |
1829 | 2.50k | } |
1830 | | |
1831 | 1.29k | const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { |
1832 | 1.29k | const DetectionContext *DC = getDetectionContext(R); |
1833 | 1.29k | return DC ? &DC->Log1.29k : nullptr1 ; |
1834 | 1.29k | } |
1835 | | |
1836 | 0 | void ScopDetection::verifyRegion(const Region &R) const { |
1837 | 0 | assert(isMaxRegionInScop(R) && "Expect R is a valid region."); |
1838 | 0 |
|
1839 | 0 | DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); |
1840 | 0 | isValidRegion(Context); |
1841 | 0 | } |
1842 | | |
1843 | 0 | void ScopDetection::verifyAnalysis() const { |
1844 | 0 | if (!VerifyScops) |
1845 | 0 | return; |
1846 | 0 | |
1847 | 0 | for (const Region *R : ValidRegions) |
1848 | 0 | verifyRegion(*R); |
1849 | 0 | } |
1850 | | |
1851 | 1.44k | bool ScopDetectionWrapperPass::runOnFunction(Function &F) { |
1852 | 1.44k | auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
1853 | 1.44k | auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); |
1854 | 1.44k | auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); |
1855 | 1.44k | auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
1856 | 1.44k | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1857 | 1.44k | auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
1858 | 1.44k | Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE)); |
1859 | 1.44k | return false; |
1860 | 1.44k | } |
1861 | | |
1862 | 1.29k | void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1863 | 1.29k | AU.addRequired<LoopInfoWrapperPass>(); |
1864 | 1.29k | AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); |
1865 | 1.29k | AU.addRequired<DominatorTreeWrapperPass>(); |
1866 | 1.29k | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
1867 | 1.29k | // We also need AA and RegionInfo when we are verifying analysis. |
1868 | 1.29k | AU.addRequiredTransitive<AAResultsWrapperPass>(); |
1869 | 1.29k | AU.addRequiredTransitive<RegionInfoPass>(); |
1870 | 1.29k | AU.setPreservesAll(); |
1871 | 1.29k | } |
1872 | | |
1873 | 128 | void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { |
1874 | 128 | for (const Region *R : Result->ValidRegions) |
1875 | 76 | OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; |
1876 | 128 | |
1877 | 128 | OS << "\n"; |
1878 | 128 | } |
1879 | | |
1880 | 1.29k | ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { |
1881 | 1.29k | // Disable runtime alias checks if we ignore aliasing all together. |
1882 | 1.29k | if (IgnoreAliasing) |
1883 | 20 | PollyUseRuntimeAliasChecks = false; |
1884 | 1.29k | } |
1885 | | |
1886 | 1.25k | ScopAnalysis::ScopAnalysis() { |
1887 | 1.25k | // Disable runtime alias checks if we ignore aliasing all together. |
1888 | 1.25k | if (IgnoreAliasing) |
1889 | 0 | PollyUseRuntimeAliasChecks = false; |
1890 | 1.25k | } |
1891 | | |
1892 | 1.42k | void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } |
1893 | | |
1894 | | char ScopDetectionWrapperPass::ID; |
1895 | | |
1896 | | AnalysisKey ScopAnalysis::Key; |
1897 | | |
1898 | 3 | ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { |
1899 | 3 | auto &LI = FAM.getResult<LoopAnalysis>(F); |
1900 | 3 | auto &RI = FAM.getResult<RegionInfoAnalysis>(F); |
1901 | 3 | auto &AA = FAM.getResult<AAManager>(F); |
1902 | 3 | auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); |
1903 | 3 | auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); |
1904 | 3 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); |
1905 | 3 | return {F, DT, SE, LI, RI, AA, ORE}; |
1906 | 3 | } |
1907 | | |
1908 | | PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, |
1909 | 0 | FunctionAnalysisManager &FAM) { |
1910 | 0 | OS << "Detected Scops in Function " << F.getName() << "\n"; |
1911 | 0 | auto &SD = FAM.getResult<ScopAnalysis>(F); |
1912 | 0 | for (const Region *R : SD.ValidRegions) |
1913 | 0 | OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; |
1914 | 0 |
|
1915 | 0 | OS << "\n"; |
1916 | 0 | return PreservedAnalyses::all(); |
1917 | 0 | } |
1918 | | |
1919 | 0 | Pass *polly::createScopDetectionWrapperPassPass() { |
1920 | 0 | return new ScopDetectionWrapperPass(); |
1921 | 0 | } |
1922 | | |
1923 | 48.2k | INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", |
1924 | 48.2k | "Polly - Detect static control parts (SCoPs)", false, |
1925 | 48.2k | false); |
1926 | 48.2k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); |
1927 | 48.2k | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); |
1928 | 48.2k | INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); |
1929 | 48.2k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); |
1930 | 48.2k | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); |
1931 | 48.2k | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); |
1932 | 48.2k | INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", |
1933 | | "Polly - Detect static control parts (SCoPs)", false, false) |