/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/ForwardOpTree.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // Move instructions between statements. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "polly/ForwardOpTree.h" |
15 | | #include "polly/Options.h" |
16 | | #include "polly/ScopBuilder.h" |
17 | | #include "polly/ScopInfo.h" |
18 | | #include "polly/ScopPass.h" |
19 | | #include "polly/Support/GICHelper.h" |
20 | | #include "polly/Support/ISLOStream.h" |
21 | | #include "polly/Support/ISLTools.h" |
22 | | #include "polly/Support/VirtualInstruction.h" |
23 | | #include "polly/ZoneAlgo.h" |
24 | | #include "llvm/ADT/STLExtras.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include "llvm/ADT/Statistic.h" |
27 | | #include "llvm/Analysis/LoopInfo.h" |
28 | | #include "llvm/Analysis/ValueTracking.h" |
29 | | #include "llvm/IR/Instruction.h" |
30 | | #include "llvm/IR/Instructions.h" |
31 | | #include "llvm/IR/Value.h" |
32 | | #include "llvm/Pass.h" |
33 | | #include "llvm/Support/Casting.h" |
34 | | #include "llvm/Support/CommandLine.h" |
35 | | #include "llvm/Support/Compiler.h" |
36 | | #include "llvm/Support/Debug.h" |
37 | | #include "llvm/Support/ErrorHandling.h" |
38 | | #include "llvm/Support/raw_ostream.h" |
39 | | #include "isl/ctx.h" |
40 | | #include "isl/isl-noexceptions.h" |
41 | | #include <cassert> |
42 | | #include <memory> |
43 | | |
44 | | #define DEBUG_TYPE "polly-optree" |
45 | | |
46 | | using namespace llvm; |
47 | | using namespace polly; |
48 | | |
49 | | static cl::opt<bool> |
50 | | AnalyzeKnown("polly-optree-analyze-known", |
51 | | cl::desc("Analyze array contents for load forwarding"), |
52 | | cl::cat(PollyCategory), cl::init(true), cl::Hidden); |
53 | | |
54 | | static cl::opt<bool> |
55 | | NormalizePHIs("polly-optree-normalize-phi", |
56 | | cl::desc("Replace PHIs by their incoming values"), |
57 | | cl::cat(PollyCategory), cl::init(false), cl::Hidden); |
58 | | |
59 | | static cl::opt<unsigned> |
60 | | MaxOps("polly-optree-max-ops", |
61 | | cl::desc("Maximum number of ISL operations to invest for known " |
62 | | "analysis; 0=no limit"), |
63 | | cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); |
64 | | |
65 | | STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); |
66 | | STATISTIC(KnownOutOfQuota, |
67 | | "Analyses aborted because max_operations was reached"); |
68 | | |
69 | | STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); |
70 | | STATISTIC(TotalKnownLoadsForwarded, |
71 | | "Number of forwarded loads because their value was known"); |
72 | | STATISTIC(TotalReloads, "Number of reloaded values"); |
73 | | STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); |
74 | | STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); |
75 | | STATISTIC(TotalModifiedStmts, |
76 | | "Number of statements with at least one forwarded tree"); |
77 | | |
78 | | STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); |
79 | | |
80 | | STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); |
81 | | STATISTIC(NumValueWritesInLoops, |
82 | | "Number of scalar value writes nested in affine loops after OpTree"); |
83 | | STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); |
84 | | STATISTIC(NumPHIWritesInLoops, |
85 | | "Number of scalar phi writes nested in affine loops after OpTree"); |
86 | | STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); |
87 | | STATISTIC(NumSingletonWritesInLoops, |
88 | | "Number of singleton writes nested in affine loops after OpTree"); |
89 | | |
90 | | namespace { |
91 | | |
92 | | /// The state of whether an operand tree was/can be forwarded. |
93 | | /// |
94 | | /// The items apply to an instructions and its operand tree with the instruction |
95 | | /// as the root element. If the value in question is not an instruction in the |
96 | | /// SCoP, it can be a leaf of an instruction's operand tree. |
97 | | enum ForwardingDecision { |
98 | | /// The root instruction or value cannot be forwarded at all. |
99 | | FD_CannotForward, |
100 | | |
101 | | /// The root instruction or value can be forwarded as a leaf of a larger |
102 | | /// operand tree. |
103 | | /// It does not make sense to move the value itself, it would just replace it |
104 | | /// by a use of itself. For instance, a constant "5" used in a statement can |
105 | | /// be forwarded, but it would just replace it by the same constant "5". |
106 | | /// However, it makes sense to move as an operand of |
107 | | /// |
108 | | /// %add = add 5, 5 |
109 | | /// |
110 | | /// where "5" is moved as part of a larger operand tree. "5" would be placed |
111 | | /// (disregarding for a moment that literal constants don't have a location |
112 | | /// and can be used anywhere) into the same statement as %add would. |
113 | | FD_CanForwardLeaf, |
114 | | |
115 | | /// The root instruction can be forwarded and doing so avoids a scalar |
116 | | /// dependency. |
117 | | /// |
118 | | /// This can be either because the operand tree can be moved to the target |
119 | | /// statement, or a memory access is redirected to read from a different |
120 | | /// location. |
121 | | FD_CanForwardProfitably, |
122 | | |
123 | | /// Used to indicate that a forwarding has be carried out successfully, and |
124 | | /// the forwarded memory access can be deleted. |
125 | | FD_DidForwardTree, |
126 | | |
127 | | /// Used to indicate that a forwarding has be carried out successfully, and |
128 | | /// the forwarded memory access is being reused. |
129 | | FD_DidForwardLeaf, |
130 | | |
131 | | /// A forwarding method cannot be applied to the operand tree. |
132 | | /// The difference to FD_CannotForward is that there might be other methods |
133 | | /// that can handle it. |
134 | | /// The conditions that make an operand tree applicable must be checked even |
135 | | /// with DoIt==true because a method following the one that returned |
136 | | /// FD_NotApplicable might have returned FD_CanForwardTree. |
137 | | FD_NotApplicable |
138 | | }; |
139 | | |
140 | | /// Implementation of operand tree forwarding for a specific SCoP. |
141 | | /// |
142 | | /// For a statement that requires a scalar value (through a value read |
143 | | /// MemoryAccess), see if its operand can be moved into the statement. If so, |
144 | | /// the MemoryAccess is removed and the all the operand tree instructions are |
145 | | /// moved into the statement. All original instructions are left in the source |
146 | | /// statements. The simplification pass can clean these up. |
147 | | class ForwardOpTreeImpl : ZoneAlgorithm { |
148 | | private: |
149 | | /// Scope guard to limit the number of isl operations for this pass. |
150 | | IslMaxOperationsGuard &MaxOpGuard; |
151 | | |
152 | | /// How many instructions have been copied to other statements. |
153 | | int NumInstructionsCopied = 0; |
154 | | |
155 | | /// Number of loads forwarded because their value was known. |
156 | | int NumKnownLoadsForwarded = 0; |
157 | | |
158 | | /// Number of values reloaded from known array elements. |
159 | | int NumReloads = 0; |
160 | | |
161 | | /// How many read-only accesses have been copied. |
162 | | int NumReadOnlyCopied = 0; |
163 | | |
164 | | /// How many operand trees have been forwarded. |
165 | | int NumForwardedTrees = 0; |
166 | | |
167 | | /// Number of statements with at least one forwarded operand tree. |
168 | | int NumModifiedStmts = 0; |
169 | | |
170 | | /// Whether we carried out at least one change to the SCoP. |
171 | | bool Modified = false; |
172 | | |
173 | | /// Contains the zones where array elements are known to contain a specific |
174 | | /// value. |
175 | | /// { [Element[] -> Zone[]] -> ValInst[] } |
176 | | /// @see computeKnown() |
177 | | isl::union_map Known; |
178 | | |
179 | | /// Translator for newly introduced ValInsts to already existing ValInsts such |
180 | | /// that new introduced load instructions can reuse the Known analysis of its |
181 | | /// original load. { ValInst[] -> ValInst[] } |
182 | | isl::union_map Translator; |
183 | | |
184 | | /// Get list of array elements that do contain the same ValInst[] at Domain[]. |
185 | | /// |
186 | | /// @param ValInst { Domain[] -> ValInst[] } |
187 | | /// The values for which we search for alternative locations, |
188 | | /// per statement instance. |
189 | | /// |
190 | | /// @return { Domain[] -> Element[] } |
191 | | /// For each statement instance, the array elements that contain the |
192 | | /// same ValInst. |
193 | 65 | isl::union_map findSameContentElements(isl::union_map ValInst) { |
194 | 65 | assert(!ValInst.is_single_valued().is_false()); |
195 | 65 | |
196 | 65 | // { Domain[] } |
197 | 65 | isl::union_set Domain = ValInst.domain(); |
198 | 65 | |
199 | 65 | // { Domain[] -> Scatter[] } |
200 | 65 | isl::union_map Schedule = getScatterFor(Domain); |
201 | 65 | |
202 | 65 | // { Element[] -> [Scatter[] -> ValInst[]] } |
203 | 65 | isl::union_map MustKnownCurried = |
204 | 65 | convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); |
205 | 65 | |
206 | 65 | // { [Domain[] -> ValInst[]] -> Scatter[] } |
207 | 65 | isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); |
208 | 65 | |
209 | 65 | // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } |
210 | 65 | isl::union_map SchedValDomVal = |
211 | 65 | DomValSched.range_product(ValInst.range_map()).reverse(); |
212 | 65 | |
213 | 65 | // { Element[] -> [Domain[] -> ValInst[]] } |
214 | 65 | isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); |
215 | 65 | |
216 | 65 | // { Domain[] -> Element[] } |
217 | 65 | isl::union_map MustKnownMap = |
218 | 65 | MustKnownInst.uncurry().domain().unwrap().reverse(); |
219 | 65 | simplify(MustKnownMap); |
220 | 65 | |
221 | 65 | return MustKnownMap; |
222 | 65 | } |
223 | | |
224 | | /// Find a single array element for each statement instance, within a single |
225 | | /// array. |
226 | | /// |
227 | | /// @param MustKnown { Domain[] -> Element[] } |
228 | | /// Set of candidate array elements. |
229 | | /// @param Domain { Domain[] } |
230 | | /// The statement instance for which we need elements for. |
231 | | /// |
232 | | /// @return { Domain[] -> Element[] } |
233 | | /// For each statement instance, an array element out of @p MustKnown. |
234 | | /// All array elements must be in the same array (Polly does not yet |
235 | | /// support reading from different accesses using the same |
236 | | /// MemoryAccess). If no mapping for all of @p Domain exists, returns |
237 | | /// null. |
238 | 65 | isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { |
239 | 65 | // { Domain[] -> Element[] } |
240 | 65 | isl::map Result; |
241 | 65 | |
242 | 65 | // MemoryAccesses can read only elements from a single array |
243 | 65 | // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). |
244 | 65 | // Look through all spaces until we find one that contains at least the |
245 | 65 | // wanted statement instance.s |
246 | 65 | MustKnown.foreach_map([&](isl::map Map) -> isl::stat { |
247 | 54 | // Get the array this is accessing. |
248 | 54 | isl::id ArrayId = Map.get_tuple_id(isl::dim::out); |
249 | 54 | ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); |
250 | 54 | |
251 | 54 | // No support for generation of indirect array accesses. |
252 | 54 | if (SAI->getBasePtrOriginSAI()) |
253 | 0 | return isl::stat::ok; // continue |
254 | 54 | |
255 | 54 | // Determine whether this map contains all wanted values. |
256 | 54 | isl::set MapDom = Map.domain(); |
257 | 54 | if (!Domain.is_subset(MapDom).is_true()) |
258 | 2 | return isl::stat::ok; // continue |
259 | 52 | |
260 | 52 | // There might be multiple array elements that contain the same value, but |
261 | 52 | // choose only one of them. lexmin is used because it returns a one-value |
262 | 52 | // mapping, we do not care about which one. |
263 | 52 | // TODO: Get the simplest access function. |
264 | 52 | Result = Map.lexmin(); |
265 | 52 | return isl::stat::error; // break |
266 | 52 | }); |
267 | 65 | |
268 | 65 | return Result; |
269 | 65 | } |
270 | | |
271 | | public: |
272 | | ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) |
273 | 34 | : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} |
274 | | |
275 | | /// Compute the zones of known array element contents. |
276 | | /// |
277 | | /// @return True if the computed #Known is usable. |
278 | 34 | bool computeKnownValues() { |
279 | 34 | isl::union_map MustKnown, KnownFromLoad, KnownFromInit; |
280 | 34 | |
281 | 34 | // Check that nothing strange occurs. |
282 | 34 | collectCompatibleElts(); |
283 | 34 | |
284 | 34 | { |
285 | 34 | IslQuotaScope QuotaScope = MaxOpGuard.enter(); |
286 | 34 | |
287 | 34 | computeCommon(); |
288 | 34 | if (NormalizePHIs) |
289 | 4 | computeNormalizedPHIs(); |
290 | 34 | Known = computeKnown(true, true); |
291 | 34 | |
292 | 34 | // Preexisting ValInsts use the known content analysis of themselves. |
293 | 34 | Translator = makeIdentityMap(Known.range(), false); |
294 | 34 | } |
295 | 34 | |
296 | 34 | if (!Known || !Translator || !NormalizeMap) { |
297 | 0 | assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); |
298 | 0 | Known = nullptr; |
299 | 0 | Translator = nullptr; |
300 | 0 | NormalizeMap = nullptr; |
301 | 0 | DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); |
302 | 0 | return false; |
303 | 0 | } |
304 | 34 | |
305 | 34 | KnownAnalyzed++; |
306 | 34 | DEBUG(dbgs() << "All known: " << Known << "\n"); |
307 | 34 | |
308 | 34 | return true; |
309 | 34 | } |
310 | | |
311 | 33 | void printStatistics(raw_ostream &OS, int Indent = 0) { |
312 | 33 | OS.indent(Indent) << "Statistics {\n"; |
313 | 33 | OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied |
314 | 33 | << '\n'; |
315 | 33 | OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded |
316 | 33 | << '\n'; |
317 | 33 | OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; |
318 | 33 | OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied |
319 | 33 | << '\n'; |
320 | 33 | OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees |
321 | 33 | << '\n'; |
322 | 33 | OS.indent(Indent + 4) << "Statements with forwarded operand trees: " |
323 | 33 | << NumModifiedStmts << '\n'; |
324 | 33 | OS.indent(Indent) << "}\n"; |
325 | 33 | } |
326 | | |
327 | 25 | void printStatements(raw_ostream &OS, int Indent = 0) const { |
328 | 25 | OS.indent(Indent) << "After statements {\n"; |
329 | 62 | for (auto &Stmt : *S) { |
330 | 62 | OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; |
331 | 62 | for (auto *MA : Stmt) |
332 | 120 | MA->print(OS); |
333 | 62 | |
334 | 62 | OS.indent(Indent + 12); |
335 | 62 | Stmt.printInstructions(OS); |
336 | 62 | } |
337 | 25 | OS.indent(Indent) << "}\n"; |
338 | 25 | } |
339 | | |
340 | | /// Create a new MemoryAccess of type read and MemoryKind::Array. |
341 | | /// |
342 | | /// @param Stmt The statement in which the access occurs. |
343 | | /// @param LI The instruction that does the access. |
344 | | /// @param AccessRelation The array element that each statement instance |
345 | | /// accesses. |
346 | | /// |
347 | | /// @param The newly created access. |
348 | | MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, |
349 | 16 | isl::map AccessRelation) { |
350 | 16 | isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); |
351 | 16 | ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); |
352 | 16 | |
353 | 16 | // Create a dummy SCEV access, to be replaced anyway. |
354 | 16 | SmallVector<const SCEV *, 4> Sizes; |
355 | 16 | Sizes.reserve(SAI->getNumberOfDimensions()); |
356 | 16 | SmallVector<const SCEV *, 4> Subscripts; |
357 | 16 | Subscripts.reserve(SAI->getNumberOfDimensions()); |
358 | 32 | for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 116 ) { |
359 | 16 | Sizes.push_back(SAI->getDimensionSize(i)); |
360 | 16 | Subscripts.push_back(nullptr); |
361 | 16 | } |
362 | 16 | |
363 | 16 | MemoryAccess *Access = |
364 | 16 | new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), |
365 | 16 | LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); |
366 | 16 | S->addAccessFunction(Access); |
367 | 16 | Stmt->addAccess(Access, true); |
368 | 16 | |
369 | 16 | Access->setNewAccessRelation(AccessRelation); |
370 | 16 | |
371 | 16 | return Access; |
372 | 16 | } |
373 | | |
374 | | /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a |
375 | | /// use in every instance of @p UseStmt. |
376 | | /// |
377 | | /// @param UseStmt Statement a scalar is used in. |
378 | | /// @param DefStmt Statement a scalar is defined in. |
379 | | /// |
380 | | /// @return { DomainUse[] -> DomainDef[] } |
381 | 72 | isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) { |
382 | 72 | // { DomainUse[] -> Scatter[] } |
383 | 72 | isl::map UseScatter = getScatterFor(UseStmt); |
384 | 72 | |
385 | 72 | // { Zone[] -> DomainDef[] } |
386 | 72 | isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); |
387 | 72 | |
388 | 72 | // { Scatter[] -> DomainDef[] } |
389 | 72 | isl::map ReachDefTimepoints = |
390 | 72 | convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); |
391 | 72 | |
392 | 72 | // { DomainUse[] -> DomainDef[] } |
393 | 72 | return UseScatter.apply_range(ReachDefTimepoints); |
394 | 72 | } |
395 | | |
396 | | /// Forward a load by reading from an array element that contains the same |
397 | | /// value. Typically the location it was loaded from. |
398 | | /// |
399 | | /// @param TargetStmt The statement the operand tree will be copied to. |
400 | | /// @param Inst The (possibly speculatable) instruction to forward. |
401 | | /// @param UseStmt The statement that uses @p Inst. |
402 | | /// @param UseLoop The loop @p Inst is used in. |
403 | | /// @param UseToTarget { DomainUse[] -> DomainTarget[] } |
404 | | /// A mapping from the statement instance @p Inst is used |
405 | | /// to the statement instance it is forwarded to. |
406 | | /// @param DefStmt The statement @p Inst is defined in. |
407 | | /// @param DefLoop The loop which contains @p Inst. |
408 | | /// @param DefToTarget { DomainDef[] -> DomainTarget[] } |
409 | | /// A mapping from the statement instance @p Inst is |
410 | | /// defined to the statement instance it is forwarded to. |
411 | | /// @param DoIt If false, only determine whether an operand tree can be |
412 | | /// forwarded. If true, carry out the forwarding. Do not |
413 | | /// use DoIt==true if an operand tree is not known to be |
414 | | /// forwardable. |
415 | | /// |
416 | | /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new |
417 | | /// load. |
418 | | /// FD_CannotForward if the pointer operand cannot be forwarded. |
419 | | /// FD_CanForwardProfitably if @p Inst is forwardable. |
420 | | /// FD_DidForwardTree if @p DoIt was true. |
421 | | ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, |
422 | | ScopStmt *UseStmt, Loop *UseLoop, |
423 | | isl::map UseToTarget, ScopStmt *DefStmt, |
424 | | Loop *DefLoop, isl::map DefToTarget, |
425 | 66 | bool DoIt) { |
426 | 66 | // Cannot do anything without successful known analysis. |
427 | 66 | if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() || |
428 | 66 | DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded()) |
429 | 0 | return FD_NotApplicable; |
430 | 66 | |
431 | 66 | LoadInst *LI = dyn_cast<LoadInst>(Inst); |
432 | 66 | if (!LI) |
433 | 21 | return FD_NotApplicable; |
434 | 45 | |
435 | 45 | // If the load is already in the statement, no forwarding is necessary. |
436 | 45 | // However, it might happen that the LoadInst is already present in the |
437 | 45 | // statement's instruction list. In that case we do as follows: |
438 | 45 | // - For the evaluation (DoIt==false), we can trivially forward it as it is |
439 | 45 | // benefit of forwarding an already present instruction. |
440 | 45 | // - For the execution (DoIt==true), prepend the instruction (to make it |
441 | 45 | // available to all instructions following in the instruction list), but |
442 | 45 | // do not add another MemoryAccess. |
443 | 45 | MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); |
444 | 45 | if (Access && !DoIt9 ) |
445 | 4 | return FD_CanForwardProfitably; |
446 | 41 | |
447 | 41 | ForwardingDecision OpDecision = |
448 | 41 | forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, |
449 | 41 | DefToTarget, DoIt); |
450 | 41 | switch (OpDecision) { |
451 | 41 | case FD_CannotForward: |
452 | 0 | assert(!DoIt); |
453 | 0 | return OpDecision; |
454 | 41 | |
455 | 41 | case FD_CanForwardLeaf: |
456 | 20 | case FD_CanForwardProfitably: |
457 | 20 | assert(!DoIt); |
458 | 20 | break; |
459 | 20 | |
460 | 21 | case FD_DidForwardLeaf: |
461 | 21 | case FD_DidForwardTree: |
462 | 21 | assert(DoIt); |
463 | 21 | break; |
464 | 21 | |
465 | 21 | default: |
466 | 0 | llvm_unreachable("Shouldn't return this"); |
467 | 41 | } |
468 | 41 | |
469 | 41 | IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); |
470 | 41 | |
471 | 41 | // { DomainDef[] -> ValInst[] } |
472 | 41 | isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); |
473 | 41 | assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized"); |
474 | 41 | |
475 | 41 | // { DomainTarget[] -> ValInst[] } |
476 | 41 | isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); |
477 | 41 | isl::union_map TranslatedExpectedVal = |
478 | 41 | isl::union_map(TargetExpectedVal).apply_range(Translator); |
479 | 41 | |
480 | 41 | // { DomainTarget[] -> Element[] } |
481 | 41 | isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); |
482 | 41 | |
483 | 41 | isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); |
484 | 41 | if (!SameVal) |
485 | 3 | return FD_NotApplicable; |
486 | 38 | |
487 | 38 | if (DoIt) |
488 | 21 | TargetStmt->prependInstruction(LI); |
489 | 38 | |
490 | 38 | if (!DoIt) |
491 | 17 | return FD_CanForwardProfitably; |
492 | 21 | |
493 | 21 | if (Access) { |
494 | 5 | DEBUG(dbgs() << " forwarded known load with preexisting MemoryAccess" |
495 | 5 | << Access << "\n"); |
496 | 16 | } else { |
497 | 16 | Access = makeReadArrayAccess(TargetStmt, LI, SameVal); |
498 | 16 | DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access |
499 | 16 | << "\n"); |
500 | 16 | |
501 | 16 | // { ValInst[] } |
502 | 16 | isl::space ValInstSpace = ExpectedVal.get_space().range(); |
503 | 16 | |
504 | 16 | // After adding a new load to the SCoP, also update the Known content |
505 | 16 | // about it. The new load will have a known ValInst of |
506 | 16 | // { [DomainTarget[] -> Value[]] } |
507 | 16 | // but which -- because it is a copy of it -- has same value as the |
508 | 16 | // { [DomainDef[] -> Value[]] } |
509 | 16 | // that it replicates. Instead of cloning the known content of |
510 | 16 | // [DomainDef[] -> Value[]] |
511 | 16 | // for DomainTarget[], we add a 'translator' that maps |
512 | 16 | // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] |
513 | 16 | // before comparing to the known content. |
514 | 16 | // TODO: 'Translator' could also be used to map PHINodes to their incoming |
515 | 16 | // ValInsts. |
516 | 16 | if (ValInstSpace.is_wrapping()) { |
517 | 16 | // { DefDomain[] -> Value[] } |
518 | 16 | isl::map ValInsts = ExpectedVal.range().unwrap(); |
519 | 16 | |
520 | 16 | // { DefDomain[] } |
521 | 16 | isl::set DefDomain = ValInsts.domain(); |
522 | 16 | |
523 | 16 | // { Value[] } |
524 | 16 | isl::space ValSpace = ValInstSpace.unwrap().range(); |
525 | 16 | |
526 | 16 | // { Value[] -> Value[] } |
527 | 16 | isl::map ValToVal = |
528 | 16 | isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); |
529 | 16 | |
530 | 16 | // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } |
531 | 16 | isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); |
532 | 16 | |
533 | 16 | Translator = Translator.add_map(LocalTranslator); |
534 | 16 | DEBUG(dbgs() << " local translator is " << LocalTranslator |
535 | 16 | << "\n"); |
536 | 16 | } |
537 | 16 | } |
538 | 21 | DEBUG(dbgs() << " expected values where " << TargetExpectedVal |
539 | 21 | << "\n"); |
540 | 21 | DEBUG(dbgs() << " candidate elements where " << Candidates << "\n"); |
541 | 21 | assert(Access); |
542 | 21 | |
543 | 21 | NumKnownLoadsForwarded++; |
544 | 21 | TotalKnownLoadsForwarded++; |
545 | 21 | return FD_DidForwardTree; |
546 | 21 | } |
547 | | |
548 | | /// Forward a scalar by redirecting the access to an array element that stores |
549 | | /// the same value. |
550 | | /// |
551 | | /// @param TargetStmt The statement the operand tree will be copied to. |
552 | | /// @param Inst The scalar to forward. |
553 | | /// @param UseStmt The statement that uses @p Inst. |
554 | | /// @param UseLoop The loop @p Inst is used in. |
555 | | /// @param UseToTarget { DomainUse[] -> DomainTarget[] } |
556 | | /// A mapping from the statement instance @p Inst is used |
557 | | /// in, to the statement instance it is forwarded to. |
558 | | /// @param DefStmt The statement @p Inst is defined in. |
559 | | /// @param DefLoop The loop which contains @p Inst. |
560 | | /// @param DefToTarget { DomainDef[] -> DomainTarget[] } |
561 | | /// A mapping from the statement instance @p Inst is |
562 | | /// defined in, to the statement instance it is forwarded |
563 | | /// to. |
564 | | /// @param DoIt If false, only determine whether an operand tree can be |
565 | | /// forwarded. If true, carry out the forwarding. Do not |
566 | | /// use DoIt==true if an operand tree is not known to be |
567 | | /// forwardable. |
568 | | /// |
569 | | /// @return FD_NotApplicable if @p Inst cannot be reloaded. |
570 | | /// FD_CanForwardLeaf if @p Inst can be reloaded. |
571 | | /// FD_CanForwardProfitably if @p Inst has been reloaded. |
572 | | /// FD_DidForwardLeaf if @p DoIt was true. |
573 | | ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, |
574 | | ScopStmt *UseStmt, Loop *UseLoop, |
575 | | isl::map UseToTarget, ScopStmt *DefStmt, |
576 | | Loop *DefLoop, isl::map DefToTarget, |
577 | 24 | bool DoIt) { |
578 | 24 | // Cannot do anything without successful known analysis. |
579 | 24 | if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() || |
580 | 24 | DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded()) |
581 | 0 | return FD_NotApplicable; |
582 | 24 | |
583 | 24 | MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); |
584 | 24 | if (Access && Access->isLatestArrayKind()22 ) { |
585 | 0 | if (DoIt) |
586 | 0 | return FD_DidForwardLeaf; |
587 | 0 | return FD_CanForwardLeaf; |
588 | 0 | } |
589 | 24 | |
590 | 24 | // Don't spend too much time analyzing whether it can be reloaded. When |
591 | 24 | // carrying-out the forwarding, we cannot bail-out in the middle of the |
592 | 24 | // transformation. It also shouldn't take as long because some results are |
593 | 24 | // cached. |
594 | 24 | IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); |
595 | 24 | |
596 | 24 | // { DomainDef[] -> ValInst[] } |
597 | 24 | isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); |
598 | 24 | |
599 | 24 | // { DomainTarget[] -> ValInst[] } |
600 | 24 | isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); |
601 | 24 | isl::union_map TranslatedExpectedVal = |
602 | 24 | TargetExpectedVal.apply_range(Translator); |
603 | 24 | |
604 | 24 | // { DomainTarget[] -> Element[] } |
605 | 24 | isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); |
606 | 24 | |
607 | 24 | isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); |
608 | 24 | if (!SameVal) |
609 | 10 | return FD_NotApplicable; |
610 | 14 | |
611 | 14 | if (!DoIt) |
612 | 7 | return FD_CanForwardProfitably; |
613 | 7 | |
614 | 7 | if (!Access) |
615 | 0 | Access = TargetStmt->ensureValueRead(Inst); |
616 | 7 | |
617 | 7 | simplify(SameVal); |
618 | 7 | Access->setNewAccessRelation(SameVal); |
619 | 7 | |
620 | 7 | TotalReloads++; |
621 | 7 | NumReloads++; |
622 | 7 | return FD_DidForwardLeaf; |
623 | 7 | } |
624 | | |
625 | | /// Forwards a speculatively executable instruction. |
626 | | /// |
627 | | /// @param TargetStmt The statement the operand tree will be copied to. |
628 | | /// @param UseInst The (possibly speculatable) instruction to forward. |
629 | | /// @param DefStmt The statement @p UseInst is defined in. |
630 | | /// @param DefLoop The loop which contains @p UseInst. |
631 | | /// @param DefToTarget { DomainDef[] -> DomainTarget[] } |
632 | | /// A mapping from the statement instance @p UseInst is |
633 | | /// defined to the statement instance it is forwarded to. |
634 | | /// @param DoIt If false, only determine whether an operand tree can be |
635 | | /// forwarded. If true, carry out the forwarding. Do not |
636 | | /// use DoIt==true if an operand tree is not known to be |
637 | | /// forwardable. |
638 | | /// |
639 | | /// @return FD_NotApplicable if @p UseInst is not speculatable. |
640 | | /// FD_CannotForward if one of @p UseInst's operands is not |
641 | | /// forwardable. |
642 | | /// FD_CanForwardTree if @p UseInst is forwardable. |
643 | | /// FD_DidForward if @p DoIt was true. |
644 | | ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, |
645 | | Instruction *UseInst, |
646 | | ScopStmt *DefStmt, Loop *DefLoop, |
647 | 100 | isl::map DefToTarget, bool DoIt) { |
648 | 100 | // PHIs, unless synthesizable, are not yet supported. |
649 | 100 | if (isa<PHINode>(UseInst)) |
650 | 17 | return FD_NotApplicable; |
651 | 83 | |
652 | 83 | // Compatible instructions must satisfy the following conditions: |
653 | 83 | // 1. Idempotent (instruction will be copied, not moved; although its |
654 | 83 | // original instance might be removed by simplification) |
655 | 83 | // 2. Not access memory (There might be memory writes between) |
656 | 83 | // 3. Not cause undefined behaviour (we might copy to a location when the |
657 | 83 | // original instruction was no executed; this is currently not possible |
658 | 83 | // because we do not forward PHINodes) |
659 | 83 | // 4. Not leak memory if executed multiple times (i.e. malloc) |
660 | 83 | // |
661 | 83 | // Instruction::mayHaveSideEffects is not sufficient because it considers |
662 | 83 | // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is |
663 | 83 | // not sufficient because it allows memory accesses. |
664 | 83 | if (mayBeMemoryDependent(*UseInst)) |
665 | 49 | return FD_NotApplicable; |
666 | 34 | |
667 | 34 | if (DoIt) { |
668 | 16 | // To ensure the right order, prepend this instruction before its |
669 | 16 | // operands. This ensures that its operands are inserted before the |
670 | 16 | // instruction using them. |
671 | 16 | // TODO: The operand tree is not really a tree, but a DAG. We should be |
672 | 16 | // able to handle DAGs without duplication. |
673 | 16 | TargetStmt->prependInstruction(UseInst); |
674 | 16 | NumInstructionsCopied++; |
675 | 16 | TotalInstructionsCopied++; |
676 | 16 | } |
677 | 34 | |
678 | 60 | for (Value *OpVal : UseInst->operand_values()) { |
679 | 60 | ForwardingDecision OpDecision = |
680 | 60 | forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt); |
681 | 60 | switch (OpDecision) { |
682 | 60 | case FD_CannotForward: |
683 | 2 | assert(!DoIt); |
684 | 2 | return FD_CannotForward; |
685 | 60 | |
686 | 60 | case FD_CanForwardLeaf: |
687 | 29 | case FD_CanForwardProfitably: |
688 | 29 | assert(!DoIt); |
689 | 29 | break; |
690 | 29 | |
691 | 29 | case FD_DidForwardLeaf: |
692 | 29 | case FD_DidForwardTree: |
693 | 29 | assert(DoIt); |
694 | 29 | break; |
695 | 29 | |
696 | 29 | case FD_NotApplicable: |
697 | 0 | llvm_unreachable("forwardTree should never return FD_NotApplicable"); |
698 | 60 | } |
699 | 60 | } |
700 | 34 | |
701 | 34 | if (32 DoIt32 ) |
702 | 16 | return FD_DidForwardTree; |
703 | 16 | return FD_CanForwardProfitably; |
704 | 16 | } |
705 | | |
706 | | /// Determines whether an operand tree can be forwarded or carries out a |
707 | | /// forwarding, depending on the @p DoIt flag. |
708 | | /// |
709 | | /// @param TargetStmt The statement the operand tree will be copied to. |
710 | | /// @param UseVal The value (usually an instruction) which is root of an |
711 | | /// operand tree. |
712 | | /// @param UseStmt The statement that uses @p UseVal. |
713 | | /// @param UseLoop The loop @p UseVal is used in. |
714 | | /// @param UseToTarget { DomainUse[] -> DomainTarget[] } |
715 | | /// A mapping from the statement instance @p UseVal is used |
716 | | /// to the statement instance it is forwarded to. |
717 | | /// @param DoIt If false, only determine whether an operand tree can be |
718 | | /// forwarded. If true, carry out the forwarding. Do not |
719 | | /// use DoIt==true if an operand tree is not known to be |
720 | | /// forwardable. |
721 | | /// |
722 | | /// @return If DoIt==false, return whether the operand tree can be forwarded. |
723 | | /// If DoIt==true, return FD_DidForward. |
724 | | ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, |
725 | | ScopStmt *UseStmt, Loop *UseLoop, |
726 | 187 | isl::map UseToTarget, bool DoIt) { |
727 | 187 | ScopStmt *DefStmt = nullptr; |
728 | 187 | Loop *DefLoop = nullptr; |
729 | 187 | |
730 | 187 | // { DefDomain[] -> TargetDomain[] } |
731 | 187 | isl::map DefToTarget; |
732 | 187 | |
733 | 187 | VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); |
734 | 187 | switch (VUse.getKind()) { |
735 | 187 | case VirtualUse::Constant: |
736 | 42 | case VirtualUse::Block: |
737 | 42 | case VirtualUse::Hoisted: |
738 | 42 | // These can be used anywhere without special considerations. |
739 | 42 | if (DoIt) |
740 | 21 | return FD_DidForwardTree; |
741 | 21 | return FD_CanForwardLeaf; |
742 | 21 | |
743 | 39 | case VirtualUse::Synthesizable: { |
744 | 39 | // ScopExpander will take care for of generating the code at the new |
745 | 39 | // location. |
746 | 39 | if (DoIt) |
747 | 20 | return FD_DidForwardTree; |
748 | 19 | |
749 | 19 | // Check if the value is synthesizable at the new location as well. This |
750 | 19 | // might be possible when leaving a loop for which ScalarEvolution is |
751 | 19 | // unable to derive the exit value for. |
752 | 19 | // TODO: If there is a LCSSA PHI at the loop exit, use that one. |
753 | 19 | // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we |
754 | 19 | // do not forward past its loop header. This would require us to use a |
755 | 19 | // previous loop induction variable instead the current one. We currently |
756 | 19 | // do not allow forwarding PHI nodes, thus this should never occur (the |
757 | 19 | // only exception where no phi is necessary being an unreachable loop |
758 | 19 | // without edge from the outside). |
759 | 19 | VirtualUse TargetUse = VirtualUse::create( |
760 | 19 | S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); |
761 | 19 | if (TargetUse.getKind() == VirtualUse::Synthesizable) |
762 | 19 | return FD_CanForwardLeaf; |
763 | 0 | |
764 | 0 | DEBUG(dbgs() << " Synthesizable would not be synthesizable anymore: " |
765 | 0 | << *UseVal << "\n"); |
766 | 0 | return FD_CannotForward; |
767 | 0 | } |
768 | 0 |
|
769 | 5 | case VirtualUse::ReadOnly: |
770 | 5 | // Note that we cannot return FD_CanForwardTree here. With a operand tree |
771 | 5 | // depth of 0, UseVal is the use in TargetStmt that we try to replace. |
772 | 5 | // With -polly-analyze-read-only-scalars=true we would ensure the |
773 | 5 | // existence of a MemoryAccess (which already exists for a leaf) and be |
774 | 5 | // removed again by tryForwardTree because it's goal is to remove this |
775 | 5 | // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission |
776 | 5 | // to do so. |
777 | 5 | if (!DoIt) |
778 | 3 | return FD_CanForwardLeaf; |
779 | 2 | |
780 | 2 | // If we model read-only scalars, we need to create a MemoryAccess for it. |
781 | 2 | if (ModelReadOnlyScalars) |
782 | 1 | TargetStmt->ensureValueRead(UseVal); |
783 | 2 | |
784 | 2 | NumReadOnlyCopied++; |
785 | 2 | TotalReadOnlyCopied++; |
786 | 2 | return FD_DidForwardLeaf; |
787 | 2 | |
788 | 28 | case VirtualUse::Intra: |
789 | 28 | // Knowing that UseStmt and DefStmt are the same statement instance, just |
790 | 28 | // reuse the information about UseStmt for DefStmt |
791 | 28 | DefStmt = UseStmt; |
792 | 28 | DefToTarget = UseToTarget; |
793 | 28 | |
794 | 28 | LLVM_FALLTHROUGH; |
795 | 101 | case VirtualUse::Inter: |
796 | 101 | Instruction *Inst = cast<Instruction>(UseVal); |
797 | 101 | |
798 | 101 | if (!DefStmt) { |
799 | 73 | DefStmt = S->getStmtFor(Inst); |
800 | 73 | if (!DefStmt) |
801 | 1 | return FD_CannotForward; |
802 | 100 | } |
803 | 100 | |
804 | 100 | DefLoop = LI->getLoopFor(Inst->getParent()); |
805 | 100 | |
806 | 100 | if (DefToTarget.is_null() && !Known.is_null()72 ) { |
807 | 72 | IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); |
808 | 72 | |
809 | 72 | // { UseDomain[] -> DefDomain[] } |
810 | 72 | isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt); |
811 | 72 | |
812 | 72 | // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to |
813 | 72 | // { DefDomain[] -> TargetDomain[] } |
814 | 72 | DefToTarget = UseToTarget.apply_domain(UseToDef); |
815 | 72 | simplify(DefToTarget); |
816 | 72 | } |
817 | 100 | |
818 | 100 | ForwardingDecision SpeculativeResult = forwardSpeculatable( |
819 | 100 | TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt); |
820 | 100 | if (SpeculativeResult != FD_NotApplicable) |
821 | 34 | return SpeculativeResult; |
822 | 66 | |
823 | 66 | ForwardingDecision KnownResult = |
824 | 66 | forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, |
825 | 66 | DefStmt, DefLoop, DefToTarget, DoIt); |
826 | 66 | if (KnownResult != FD_NotApplicable) |
827 | 42 | return KnownResult; |
828 | 24 | |
829 | 24 | ForwardingDecision ReloadResult = |
830 | 24 | reloadKnownContent(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, |
831 | 24 | DefStmt, DefLoop, DefToTarget, DoIt); |
832 | 24 | if (ReloadResult != FD_NotApplicable) |
833 | 14 | return ReloadResult; |
834 | 10 | |
835 | 10 | // When no method is found to forward the operand tree, we effectively |
836 | 10 | // cannot handle it. |
837 | 10 | DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); |
838 | 10 | return FD_CannotForward; |
839 | 0 | } |
840 | 0 | |
841 | 0 | llvm_unreachable("Case unhandled"); |
842 | 0 | } |
843 | | |
844 | | /// Try to forward an operand tree rooted in @p RA. |
845 | 49 | bool tryForwardTree(MemoryAccess *RA) { |
846 | 49 | assert(RA->isLatestScalarKind()); |
847 | 49 | DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); |
848 | 49 | |
849 | 49 | ScopStmt *Stmt = RA->getStatement(); |
850 | 49 | Loop *InLoop = Stmt->getSurroundingLoop(); |
851 | 49 | |
852 | 49 | isl::map TargetToUse; |
853 | 49 | if (!Known.is_null()) { |
854 | 49 | isl::space DomSpace = Stmt->getDomainSpace(); |
855 | 49 | TargetToUse = |
856 | 49 | isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); |
857 | 49 | } |
858 | 49 | |
859 | 49 | ForwardingDecision Assessment = forwardTree( |
860 | 49 | Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false); |
861 | 49 | assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf); |
862 | 49 | if (Assessment != FD_CanForwardProfitably) |
863 | 12 | return false; |
864 | 37 | |
865 | 37 | ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt, |
866 | 37 | InLoop, TargetToUse, true); |
867 | 37 | assert(((Execution == FD_DidForwardTree) || |
868 | 37 | (Execution == FD_DidForwardLeaf)) && |
869 | 37 | "A previous positive assessment must also be executable"); |
870 | 37 | |
871 | 37 | if (Execution == FD_DidForwardTree) |
872 | 30 | Stmt->removeSingleMemoryAccess(RA); |
873 | 37 | return true; |
874 | 37 | } |
875 | | |
876 | | /// Return which SCoP this instance is processing. |
877 | 0 | Scop *getScop() const { return S; } |
878 | | |
879 | | /// Run the algorithm: Use value read accesses as operand tree roots and try |
880 | | /// to forward them into the statement. |
881 | 34 | bool forwardOperandTrees() { |
882 | 88 | for (ScopStmt &Stmt : *S) { |
883 | 88 | bool StmtModified = false; |
884 | 88 | |
885 | 88 | // Because we are modifying the MemoryAccess list, collect them first to |
886 | 88 | // avoid iterator invalidation. |
887 | 88 | SmallVector<MemoryAccess *, 16> Accs; |
888 | 179 | for (MemoryAccess *RA : Stmt) { |
889 | 179 | if (!RA->isRead()) |
890 | 106 | continue; |
891 | 73 | if (!RA->isLatestScalarKind()) |
892 | 24 | continue; |
893 | 49 | |
894 | 49 | Accs.push_back(RA); |
895 | 49 | } |
896 | 88 | |
897 | 88 | for (MemoryAccess *RA : Accs) { |
898 | 49 | if (tryForwardTree(RA)) { |
899 | 37 | Modified = true; |
900 | 37 | StmtModified = true; |
901 | 37 | NumForwardedTrees++; |
902 | 37 | TotalForwardedTrees++; |
903 | 37 | } |
904 | 49 | } |
905 | 88 | |
906 | 88 | if (StmtModified) { |
907 | 33 | NumModifiedStmts++; |
908 | 33 | TotalModifiedStmts++; |
909 | 33 | } |
910 | 88 | } |
911 | 34 | |
912 | 34 | if (Modified) |
913 | 26 | ScopsModified++; |
914 | 34 | return Modified; |
915 | 34 | } |
916 | | |
917 | | /// Print the pass result, performed transformations and the SCoP after the |
918 | | /// transformation. |
919 | 33 | void print(raw_ostream &OS, int Indent = 0) { |
920 | 33 | printStatistics(OS, Indent); |
921 | 33 | |
922 | 33 | if (!Modified) { |
923 | 8 | // This line can easily be checked in regression tests. |
924 | 8 | OS << "ForwardOpTree executed, but did not modify anything\n"; |
925 | 8 | return; |
926 | 8 | } |
927 | 25 | |
928 | 25 | printStatements(OS, Indent); |
929 | 25 | } |
930 | | }; |
931 | | |
932 | | /// Pass that redirects scalar reads to array elements that are known to contain |
933 | | /// the same value. |
934 | | /// |
935 | | /// This reduces the number of scalar accesses and therefore potentially |
936 | | /// increases the freedom of the scheduler. In the ideal case, all reads of a |
937 | | /// scalar definition are redirected (We currently do not care about removing |
938 | | /// the write in this case). This is also useful for the main DeLICM pass as |
939 | | /// there are less scalars to be mapped. |
940 | | class ForwardOpTree : public ScopPass { |
941 | | private: |
942 | | /// The pass implementation, also holding per-scop data. |
943 | | std::unique_ptr<ForwardOpTreeImpl> Impl; |
944 | | |
945 | | public: |
946 | | static char ID; |
947 | | |
948 | 33 | explicit ForwardOpTree() : ScopPass(ID) {} |
949 | | ForwardOpTree(const ForwardOpTree &) = delete; |
950 | | ForwardOpTree &operator=(const ForwardOpTree &) = delete; |
951 | | |
952 | 33 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
953 | 33 | AU.addRequiredTransitive<ScopInfoRegionPass>(); |
954 | 33 | AU.addRequired<LoopInfoWrapperPass>(); |
955 | 33 | AU.setPreservesAll(); |
956 | 33 | } |
957 | | |
958 | 34 | bool runOnScop(Scop &S) override { |
959 | 34 | // Free resources for previous SCoP's computation, if not yet done. |
960 | 34 | releaseMemory(); |
961 | 34 | |
962 | 34 | LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
963 | 34 | |
964 | 34 | { |
965 | 34 | IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); |
966 | 34 | Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); |
967 | 34 | |
968 | 34 | if (AnalyzeKnown) { |
969 | 34 | DEBUG(dbgs() << "Prepare forwarders...\n"); |
970 | 34 | Impl->computeKnownValues(); |
971 | 34 | } |
972 | 34 | |
973 | 34 | DEBUG(dbgs() << "Forwarding operand trees...\n"); |
974 | 34 | Impl->forwardOperandTrees(); |
975 | 34 | |
976 | 34 | if (MaxOpGuard.hasQuotaExceeded()) { |
977 | 0 | DEBUG(dbgs() << "Not all operations completed because of " |
978 | 0 | "max_operations exceeded\n"); |
979 | 0 | KnownOutOfQuota++; |
980 | 0 | } |
981 | 34 | } |
982 | 34 | |
983 | 34 | DEBUG(dbgs() << "\nFinal Scop:\n"); |
984 | 34 | DEBUG(dbgs() << S); |
985 | 34 | |
986 | 34 | // Update statistics |
987 | 34 | auto ScopStats = S.getStatistics(); |
988 | 34 | NumValueWrites += ScopStats.NumValueWrites; |
989 | 34 | NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; |
990 | 34 | NumPHIWrites += ScopStats.NumPHIWrites; |
991 | 34 | NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; |
992 | 34 | NumSingletonWrites += ScopStats.NumSingletonWrites; |
993 | 34 | NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; |
994 | 34 | |
995 | 34 | return false; |
996 | 34 | } |
997 | | |
998 | 33 | void printScop(raw_ostream &OS, Scop &S) const override { |
999 | 33 | if (!Impl) |
1000 | 0 | return; |
1001 | 33 | |
1002 | 33 | assert(Impl->getScop() == &S); |
1003 | 33 | Impl->print(OS); |
1004 | 33 | } |
1005 | | |
1006 | 151 | void releaseMemory() override { Impl.reset(); } |
1007 | | }; // class ForwardOpTree |
1008 | | |
1009 | | char ForwardOpTree::ID; |
1010 | | } // namespace |
1011 | | |
1012 | 0 | ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } |
1013 | | |
1014 | 43.0k | INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", |
1015 | 43.0k | "Polly - Forward operand tree", false, false) |
1016 | 43.0k | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
1017 | 43.0k | INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", |
1018 | | "Polly - Forward operand tree", false, false) |