/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/Simplify.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===------ Simplify.cpp ----------------------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Simplify a SCoP by removing unnecessary statements and accesses. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "polly/Simplify.h" |
14 | | #include "polly/ScopInfo.h" |
15 | | #include "polly/ScopPass.h" |
16 | | #include "polly/Support/GICHelper.h" |
17 | | #include "polly/Support/ISLOStream.h" |
18 | | #include "polly/Support/ISLTools.h" |
19 | | #include "polly/Support/VirtualInstruction.h" |
20 | | #include "llvm/ADT/Statistic.h" |
21 | | #include "llvm/Support/Debug.h" |
22 | | #define DEBUG_TYPE "polly-simplify" |
23 | | |
24 | | using namespace llvm; |
25 | | using namespace polly; |
26 | | |
27 | | namespace { |
28 | | |
29 | | #define TWO_STATISTICS(VARNAME, DESC) \ |
30 | | static llvm::Statistic VARNAME[2] = { \ |
31 | | {DEBUG_TYPE, #VARNAME "0", DESC " (first)", {0}, {false}}, \ |
32 | | {DEBUG_TYPE, #VARNAME "1", DESC " (second)", {0}, {false}}} |
33 | | |
34 | | /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid |
35 | | /// that the analysis of accesses in a statement is becoming too complex. Chosen |
36 | | /// to be relatively small because all the common cases should access only few |
37 | | /// array elements per statement. |
38 | | static int const SimplifyMaxDisjuncts = 4; |
39 | | |
40 | | TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed"); |
41 | | TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified"); |
42 | | |
43 | | TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes"); |
44 | | TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another"); |
45 | | TWO_STATISTICS(TotalRedundantWritesRemoved, |
46 | | "Number of writes of same value removed in any SCoP"); |
47 | | TWO_STATISTICS(TotalEmptyPartialAccessesRemoved, |
48 | | "Number of empty partial accesses removed"); |
49 | | TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed"); |
50 | | TWO_STATISTICS(TotalDeadInstructionsRemoved, |
51 | | "Number of unused instructions removed"); |
52 | | TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP"); |
53 | | |
54 | | TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify"); |
55 | | TWO_STATISTICS( |
56 | | NumValueWritesInLoops, |
57 | | "Number of scalar value writes nested in affine loops after Simplify"); |
58 | | TWO_STATISTICS(NumPHIWrites, |
59 | | "Number of scalar phi writes after the first simplification"); |
60 | | TWO_STATISTICS( |
61 | | NumPHIWritesInLoops, |
62 | | "Number of scalar phi writes nested in affine loops after Simplify"); |
63 | | TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify"); |
64 | | TWO_STATISTICS( |
65 | | NumSingletonWritesInLoops, |
66 | | "Number of singleton writes nested in affine loops after Simplify"); |
67 | | |
68 | 812 | static bool isImplicitRead(MemoryAccess *MA) { |
69 | 812 | return MA->isRead() && MA->isOriginalScalarKind()339 ; |
70 | 812 | } |
71 | | |
72 | 838 | static bool isExplicitAccess(MemoryAccess *MA) { |
73 | 838 | return MA->isOriginalArrayKind(); |
74 | 838 | } |
75 | | |
76 | 812 | static bool isImplicitWrite(MemoryAccess *MA) { |
77 | 812 | return MA->isWrite() && MA->isOriginalScalarKind()473 ; |
78 | 812 | } |
79 | | |
80 | | /// Like isl::union_map::add_map, but may also return an underapproximated |
81 | | /// result if getting too complex. |
82 | | /// |
83 | | /// This is implemented by adding disjuncts to the results until the limit is |
84 | | /// reached. |
85 | | static isl::union_map underapproximatedAddMap(isl::union_map UMap, |
86 | 147 | isl::map Map) { |
87 | 147 | if (UMap.is_null() || Map.is_null()) |
88 | 0 | return {}; |
89 | 147 | |
90 | 147 | isl::map PrevMap = UMap.extract_map(Map.get_space()); |
91 | 147 | |
92 | 147 | // Fast path: If known that we cannot exceed the disjunct limit, just add |
93 | 147 | // them. |
94 | 147 | if (isl_map_n_basic_map(PrevMap.get()) + isl_map_n_basic_map(Map.get()) <= |
95 | 147 | SimplifyMaxDisjuncts) |
96 | 141 | return UMap.add_map(Map); |
97 | 6 | |
98 | 6 | isl::map Result = isl::map::empty(PrevMap.get_space()); |
99 | 24 | for (isl::basic_map BMap : PrevMap.get_basic_map_list()) { |
100 | 24 | if (Result.n_basic_map() > SimplifyMaxDisjuncts) |
101 | 0 | break; |
102 | 24 | Result = Result.unite(BMap); |
103 | 24 | } |
104 | 6 | for (isl::basic_map BMap : Map.get_basic_map_list()) { |
105 | 6 | if (isl_map_n_basic_map(Result.get()) > SimplifyMaxDisjuncts) |
106 | 0 | break; |
107 | 6 | Result = Result.unite(BMap); |
108 | 6 | } |
109 | 6 | |
110 | 6 | isl::union_map UResult = |
111 | 6 | UMap.subtract(isl::map::universe(PrevMap.get_space())); |
112 | 6 | UResult.add_map(Result); |
113 | 6 | |
114 | 6 | return UResult; |
115 | 6 | } |
116 | | |
117 | | class Simplify : public ScopPass { |
118 | | private: |
119 | | /// The invocation id (if there are multiple instances in the pass manager's |
120 | | /// pipeline) to determine which statistics to update. |
121 | | int CallNo; |
122 | | |
123 | | /// The last/current SCoP that is/has been processed. |
124 | | Scop *S; |
125 | | |
126 | | /// Number of writes that are overwritten anyway. |
127 | | int OverwritesRemoved = 0; |
128 | | |
129 | | /// Number of combined writes. |
130 | | int WritesCoalesced = 0; |
131 | | |
132 | | /// Number of redundant writes removed from this SCoP. |
133 | | int RedundantWritesRemoved = 0; |
134 | | |
135 | | /// Number of writes with empty access domain removed. |
136 | | int EmptyPartialAccessesRemoved = 0; |
137 | | |
138 | | /// Number of unused accesses removed from this SCoP. |
139 | | int DeadAccessesRemoved = 0; |
140 | | |
141 | | /// Number of unused instructions removed from this SCoP. |
142 | | int DeadInstructionsRemoved = 0; |
143 | | |
144 | | /// Number of unnecessary statements removed from the SCoP. |
145 | | int StmtsRemoved = 0; |
146 | | |
147 | | /// Return whether at least one simplification has been applied. |
148 | 86 | bool isModified() const { |
149 | 86 | return OverwritesRemoved > 0 || WritesCoalesced > 076 || |
150 | 86 | RedundantWritesRemoved > 066 || EmptyPartialAccessesRemoved > 053 || |
151 | 86 | DeadAccessesRemoved > 051 || DeadInstructionsRemoved > 036 || |
152 | 86 | StmtsRemoved > 032 ; |
153 | 86 | } |
154 | | |
155 | | /// Remove writes that are overwritten unconditionally later in the same |
156 | | /// statement. |
157 | | /// |
158 | | /// There must be no read of the same value between the write (that is to be |
159 | | /// removed) and the overwrite. |
160 | 44 | void removeOverwrites() { |
161 | 98 | for (auto &Stmt : *S) { |
162 | 98 | isl::set Domain = Stmt.getDomain(); |
163 | 98 | isl::union_map WillBeOverwritten = |
164 | 98 | isl::union_map::empty(S->getParamSpace()); |
165 | 98 | |
166 | 98 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
167 | 98 | |
168 | 98 | // Iterate in reverse order, so the overwrite comes before the write that |
169 | 98 | // is to be removed. |
170 | 225 | for (auto *MA : reverse(Accesses)) { |
171 | 225 | |
172 | 225 | // In region statements, the explicit accesses can be in blocks that are |
173 | 225 | // can be executed in any order. We therefore process only the implicit |
174 | 225 | // writes and stop after that. |
175 | 225 | if (Stmt.isRegionStmt() && isExplicitAccess(MA)13 ) |
176 | 8 | break; |
177 | 217 | |
178 | 217 | auto AccRel = MA->getAccessRelation(); |
179 | 217 | AccRel = AccRel.intersect_domain(Domain); |
180 | 217 | AccRel = AccRel.intersect_params(S->getContext()); |
181 | 217 | |
182 | 217 | // If a value is read in-between, do not consider it as overwritten. |
183 | 217 | if (MA->isRead()) { |
184 | 70 | // Invalidate all overwrites for the array it accesses to avoid too |
185 | 70 | // complex isl sets. |
186 | 70 | isl::map AccRelUniv = isl::map::universe(AccRel.get_space()); |
187 | 70 | WillBeOverwritten = WillBeOverwritten.subtract(AccRelUniv); |
188 | 70 | continue; |
189 | 70 | } |
190 | 147 | |
191 | 147 | // If all of a write's elements are overwritten, remove it. |
192 | 147 | isl::union_map AccRelUnion = AccRel; |
193 | 147 | if (AccRelUnion.is_subset(WillBeOverwritten)) { |
194 | 7 | LLVM_DEBUG(dbgs() << "Removing " << MA |
195 | 7 | << " which will be overwritten anyway\n"); |
196 | 7 | |
197 | 7 | Stmt.removeSingleMemoryAccess(MA); |
198 | 7 | OverwritesRemoved++; |
199 | 7 | TotalOverwritesRemoved[CallNo]++; |
200 | 7 | } |
201 | 147 | |
202 | 147 | // Unconditional writes overwrite other values. |
203 | 147 | if (MA->isMustWrite()) { |
204 | 147 | // Avoid too complex isl sets. If necessary, throw away some of the |
205 | 147 | // knowledge. |
206 | 147 | WillBeOverwritten = |
207 | 147 | underapproximatedAddMap(WillBeOverwritten, AccRel); |
208 | 147 | } |
209 | 147 | } |
210 | 98 | } |
211 | 44 | } |
212 | | |
213 | | /// Combine writes that write the same value if possible. |
214 | | /// |
215 | | /// This function is able to combine: |
216 | | /// - Partial writes with disjoint domain. |
217 | | /// - Writes that write to the same array element. |
218 | | /// |
219 | | /// In all cases, both writes must write the same values. |
220 | 44 | void coalesceWrites() { |
221 | 98 | for (auto &Stmt : *S) { |
222 | 98 | isl::set Domain = Stmt.getDomain().intersect_params(S->getContext()); |
223 | 98 | |
224 | 98 | // We let isl do the lookup for the same-value condition. For this, we |
225 | 98 | // wrap llvm::Value into an isl::set such that isl can do the lookup in |
226 | 98 | // its hashtable implementation. llvm::Values are only compared within a |
227 | 98 | // ScopStmt, so the map can be local to this scope. TODO: Refactor with |
228 | 98 | // ZoneAlgorithm::makeValueSet() |
229 | 98 | SmallDenseMap<Value *, isl::set> ValueSets; |
230 | 140 | auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { |
231 | 140 | assert(V); |
232 | 140 | isl::set &Result = ValueSets[V]; |
233 | 140 | if (Result.is_null()) { |
234 | 96 | isl::ctx Ctx = S->getIslCtx(); |
235 | 96 | std::string Name = |
236 | 96 | getIslCompatibleName("Val", V, ValueSets.size() - 1, |
237 | 96 | std::string(), UseInstructionNames); |
238 | 96 | isl::id Id = isl::id::alloc(Ctx, Name, V); |
239 | 96 | Result = isl::set::universe( |
240 | 96 | isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id)); |
241 | 96 | } |
242 | 140 | return Result; |
243 | 140 | }; |
244 | 98 | |
245 | 98 | // List of all eligible (for coalescing) writes of the future. |
246 | 98 | // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } |
247 | 98 | isl::union_map FutureWrites = isl::union_map::empty(S->getParamSpace()); |
248 | 98 | |
249 | 98 | // Iterate over accesses from the last to the first. |
250 | 98 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
251 | 218 | for (MemoryAccess *MA : reverse(Accesses)) { |
252 | 218 | // In region statements, the explicit accesses can be in blocks that can |
253 | 218 | // be executed in any order. We therefore process only the implicit |
254 | 218 | // writes and stop after that. |
255 | 218 | if (Stmt.isRegionStmt() && isExplicitAccess(MA)13 ) |
256 | 8 | break; |
257 | 210 | |
258 | 210 | // { Domain[] -> Element[] } |
259 | 210 | isl::map AccRel = |
260 | 210 | MA->getLatestAccessRelation().intersect_domain(Domain); |
261 | 210 | |
262 | 210 | // { [Domain[] -> Element[]] } |
263 | 210 | isl::set AccRelWrapped = AccRel.wrap(); |
264 | 210 | |
265 | 210 | // { Value[] } |
266 | 210 | isl::set ValSet; |
267 | 210 | |
268 | 210 | if (MA->isMustWrite() && (140 MA->isOriginalScalarKind()140 || |
269 | 140 | isa<StoreInst>(MA->getAccessInstruction())112 )) { |
270 | 140 | // Normally, tryGetValueStored() should be used to determine which |
271 | 140 | // element is written, but it can return nullptr; For PHI accesses, |
272 | 140 | // getAccessValue() returns the PHI instead of the PHI's incoming |
273 | 140 | // value. In this case, where we only compare values of a single |
274 | 140 | // statement, this is fine, because within a statement, a PHI in a |
275 | 140 | // successor block has always the same value as the incoming write. We |
276 | 140 | // still preferably use the incoming value directly so we also catch |
277 | 140 | // direct uses of that. |
278 | 140 | Value *StoredVal = MA->tryGetValueStored(); |
279 | 140 | if (!StoredVal) |
280 | 1 | StoredVal = MA->getAccessValue(); |
281 | 140 | ValSet = makeValueSet(StoredVal); |
282 | 140 | |
283 | 140 | // { Domain[] } |
284 | 140 | isl::set AccDomain = AccRel.domain(); |
285 | 140 | |
286 | 140 | // Parts of the statement's domain that is not written by this access. |
287 | 140 | isl::set UndefDomain = Domain.subtract(AccDomain); |
288 | 140 | |
289 | 140 | // { Element[] } |
290 | 140 | isl::set ElementUniverse = |
291 | 140 | isl::set::universe(AccRel.get_space().range()); |
292 | 140 | |
293 | 140 | // { Domain[] -> Element[] } |
294 | 140 | isl::map UndefAnything = |
295 | 140 | isl::map::from_domain_and_range(UndefDomain, ElementUniverse); |
296 | 140 | |
297 | 140 | // We are looking a compatible write access. The other write can |
298 | 140 | // access these elements... |
299 | 140 | isl::map AllowedAccesses = AccRel.unite(UndefAnything); |
300 | 140 | |
301 | 140 | // ... and must write the same value. |
302 | 140 | // { [Domain[] -> Element[]] -> Value[] } |
303 | 140 | isl::map Filter = |
304 | 140 | isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet); |
305 | 140 | |
306 | 140 | // Lookup future write that fulfills these conditions. |
307 | 140 | // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] } |
308 | 140 | isl::union_map Filtered = |
309 | 140 | FutureWrites.uncurry().intersect_domain(Filter.wrap()); |
310 | 140 | |
311 | 140 | // Iterate through the candidates. |
312 | 140 | for (isl::map Map : Filtered.get_map_list()) { |
313 | 39 | MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space() |
314 | 39 | .get_tuple_id(isl::dim::out) |
315 | 39 | .get_user(); |
316 | 39 | |
317 | 39 | isl::map OtherAccRel = |
318 | 39 | OtherMA->getLatestAccessRelation().intersect_domain(Domain); |
319 | 39 | |
320 | 39 | // The filter only guaranteed that some of OtherMA's accessed |
321 | 39 | // elements are allowed. Verify that it only accesses allowed |
322 | 39 | // elements. Otherwise, continue with the next candidate. |
323 | 39 | if (!OtherAccRel.is_subset(AllowedAccesses).is_true()) |
324 | 32 | continue; |
325 | 7 | |
326 | 7 | // The combined access relation. |
327 | 7 | // { Domain[] -> Element[] } |
328 | 7 | isl::map NewAccRel = AccRel.unite(OtherAccRel); |
329 | 7 | simplify(NewAccRel); |
330 | 7 | |
331 | 7 | // Carry out the coalescing. |
332 | 7 | Stmt.removeSingleMemoryAccess(MA); |
333 | 7 | OtherMA->setNewAccessRelation(NewAccRel); |
334 | 7 | |
335 | 7 | // We removed MA, OtherMA takes its role. |
336 | 7 | MA = OtherMA; |
337 | 7 | |
338 | 7 | TotalWritesCoalesced[CallNo]++; |
339 | 7 | WritesCoalesced++; |
340 | 7 | |
341 | 7 | // Don't look for more candidates. |
342 | 7 | break; |
343 | 7 | } |
344 | 140 | } |
345 | 210 | |
346 | 210 | // Two writes cannot be coalesced if there is another access (to some of |
347 | 210 | // the written elements) between them. Remove all visited write accesses |
348 | 210 | // from the list of eligible writes. Don't just remove the accessed |
349 | 210 | // elements, but any MemoryAccess that touches any of the invalidated |
350 | 210 | // elements. |
351 | 210 | SmallPtrSet<MemoryAccess *, 2> TouchedAccesses; |
352 | 210 | for (isl::map Map : |
353 | 210 | FutureWrites.intersect_domain(AccRelWrapped).get_map_list()) { |
354 | 86 | MemoryAccess *MA = (MemoryAccess *)Map.get_space() |
355 | 86 | .range() |
356 | 86 | .unwrap() |
357 | 86 | .get_tuple_id(isl::dim::out) |
358 | 86 | .get_user(); |
359 | 86 | TouchedAccesses.insert(MA); |
360 | 86 | } |
361 | 210 | isl::union_map NewFutureWrites = |
362 | 210 | isl::union_map::empty(FutureWrites.get_space()); |
363 | 210 | for (isl::map FutureWrite : FutureWrites.get_map_list()) { |
364 | 123 | MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space() |
365 | 123 | .range() |
366 | 123 | .unwrap() |
367 | 123 | .get_tuple_id(isl::dim::out) |
368 | 123 | .get_user(); |
369 | 123 | if (!TouchedAccesses.count(MA)) |
370 | 37 | NewFutureWrites = NewFutureWrites.add_map(FutureWrite); |
371 | 123 | } |
372 | 210 | FutureWrites = NewFutureWrites; |
373 | 210 | |
374 | 210 | if (MA->isMustWrite() && !ValSet.is_null()140 ) { |
375 | 140 | // { MemoryAccess[] } |
376 | 140 | auto AccSet = |
377 | 140 | isl::set::universe(isl::space(S->getIslCtx(), 0, 0) |
378 | 140 | .set_tuple_id(isl::dim::set, MA->getId())); |
379 | 140 | |
380 | 140 | // { Val[] -> MemoryAccess[] } |
381 | 140 | isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet); |
382 | 140 | |
383 | 140 | // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } |
384 | 140 | isl::map AccRelValAcc = |
385 | 140 | isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap()); |
386 | 140 | FutureWrites = FutureWrites.add_map(AccRelValAcc); |
387 | 140 | } |
388 | 210 | } |
389 | 98 | } |
390 | 44 | } |
391 | | |
392 | | /// Remove writes that just write the same value already stored in the |
393 | | /// element. |
394 | 44 | void removeRedundantWrites() { |
395 | 98 | for (auto &Stmt : *S) { |
396 | 98 | SmallDenseMap<Value *, isl::set> ValueSets; |
397 | 213 | auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { |
398 | 213 | assert(V); |
399 | 213 | isl::set &Result = ValueSets[V]; |
400 | 213 | if (Result.is_null()) { |
401 | 121 | isl_ctx *Ctx = S->getIslCtx().get(); |
402 | 121 | std::string Name = |
403 | 121 | getIslCompatibleName("Val", V, ValueSets.size() - 1, |
404 | 121 | std::string(), UseInstructionNames); |
405 | 121 | isl::id Id = isl::manage(isl_id_alloc(Ctx, Name.c_str(), V)); |
406 | 121 | Result = isl::set::universe( |
407 | 121 | isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id)); |
408 | 121 | } |
409 | 213 | return Result; |
410 | 213 | }; |
411 | 98 | |
412 | 98 | isl::set Domain = Stmt.getDomain(); |
413 | 98 | Domain = Domain.intersect_params(S->getContext()); |
414 | 98 | |
415 | 98 | // List of element reads that still have the same value while iterating |
416 | 98 | // through the MemoryAccesses. |
417 | 98 | // { [Domain[] -> Element[]] -> Val[] } |
418 | 98 | isl::union_map Known = isl::union_map::empty(S->getParamSpace()); |
419 | 98 | |
420 | 98 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
421 | 221 | for (MemoryAccess *MA : Accesses) { |
422 | 221 | // Is the memory access in a defined order relative to the other |
423 | 221 | // accesses? In region statements, only the first and the last accesses |
424 | 221 | // have defined order. Execution of those in the middle may depend on |
425 | 221 | // runtime conditions an therefore cannot be modified. |
426 | 221 | bool IsOrdered = |
427 | 221 | Stmt.isBlockStmt() || MA->isOriginalScalarKind()23 || |
428 | 221 | (14 !S->getBoxedLoops().size()14 && MA->getAccessInstruction()12 && |
429 | 14 | Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent()12 ); |
430 | 221 | |
431 | 221 | isl::map AccRel = MA->getAccessRelation(); |
432 | 221 | AccRel = AccRel.intersect_domain(Domain); |
433 | 221 | isl::set AccRelWrapped = AccRel.wrap(); |
434 | 221 | |
435 | 221 | // Determine whether a write is redundant (stores only values that are |
436 | 221 | // already present in the written array elements) and remove it if this |
437 | 221 | // is the case. |
438 | 221 | if (IsOrdered && MA->isMustWrite()213 && |
439 | 221 | (133 isa<StoreInst>(MA->getAccessInstruction())133 || |
440 | 133 | MA->isOriginalScalarKind()27 )) { |
441 | 133 | Value *StoredVal = MA->tryGetValueStored(); |
442 | 133 | if (!StoredVal) |
443 | 1 | StoredVal = MA->getAccessValue(); |
444 | 133 | |
445 | 133 | if (StoredVal) { |
446 | 133 | // Lookup in the set of known values. |
447 | 133 | isl::map AccRelStoredVal = isl::map::from_domain_and_range( |
448 | 133 | AccRelWrapped, makeValueSet(StoredVal)); |
449 | 133 | if (isl::union_map(AccRelStoredVal).is_subset(Known)) { |
450 | 15 | LLVM_DEBUG(dbgs() << "Cleanup of " << MA << ":\n"); |
451 | 15 | LLVM_DEBUG(dbgs() << " Scalar: " << *StoredVal << "\n"); |
452 | 15 | LLVM_DEBUG(dbgs() << " AccRel: " << AccRel << "\n"); |
453 | 15 | |
454 | 15 | Stmt.removeSingleMemoryAccess(MA); |
455 | 15 | |
456 | 15 | RedundantWritesRemoved++; |
457 | 15 | TotalRedundantWritesRemoved[CallNo]++; |
458 | 15 | } |
459 | 133 | } |
460 | 133 | } |
461 | 221 | |
462 | 221 | // Update the know values set. |
463 | 221 | if (MA->isRead()) { |
464 | 80 | // Loaded values are the currently known values of the array element |
465 | 80 | // it was loaded from. |
466 | 80 | Value *LoadedVal = MA->getAccessValue(); |
467 | 80 | if (LoadedVal && IsOrdered) { |
468 | 80 | isl::map AccRelVal = isl::map::from_domain_and_range( |
469 | 80 | AccRelWrapped, makeValueSet(LoadedVal)); |
470 | 80 | |
471 | 80 | Known = Known.add_map(AccRelVal); |
472 | 80 | } |
473 | 141 | } else if (MA->isWrite()) { |
474 | 141 | // Remove (possibly) overwritten values from the known elements set. |
475 | 141 | // We remove all elements of the accessed array to avoid too complex |
476 | 141 | // isl sets. |
477 | 141 | isl::set AccRelUniv = isl::set::universe(AccRelWrapped.get_space()); |
478 | 141 | Known = Known.subtract_domain(AccRelUniv); |
479 | 141 | |
480 | 141 | // At this point, we could add the written value of must-writes. |
481 | 141 | // However, writing same values is already handled by |
482 | 141 | // coalesceWrites(). |
483 | 141 | } |
484 | 221 | } |
485 | 98 | } |
486 | 44 | } |
487 | | |
488 | | /// Remove statements without side effects. |
489 | 44 | void removeUnnecessaryStmts() { |
490 | 44 | auto NumStmtsBefore = S->getSize(); |
491 | 44 | S->simplifySCoP(true); |
492 | 44 | assert(NumStmtsBefore >= S->getSize()); |
493 | 44 | StmtsRemoved = NumStmtsBefore - S->getSize(); |
494 | 44 | LLVM_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore |
495 | 44 | << ") statements\n"); |
496 | 44 | TotalStmtsRemoved[CallNo] += StmtsRemoved; |
497 | 44 | } |
498 | | |
499 | | /// Remove accesses that have an empty domain. |
500 | 44 | void removeEmptyPartialAccesses() { |
501 | 98 | for (ScopStmt &Stmt : *S) { |
502 | 98 | // Defer the actual removal to not invalidate iterators. |
503 | 98 | SmallVector<MemoryAccess *, 8> DeferredRemove; |
504 | 98 | |
505 | 236 | for (MemoryAccess *MA : Stmt) { |
506 | 236 | if (!MA->isWrite()) |
507 | 80 | continue; |
508 | 156 | |
509 | 156 | isl::map AccRel = MA->getAccessRelation(); |
510 | 156 | if (!AccRel.is_empty().is_true()) |
511 | 155 | continue; |
512 | 1 | |
513 | 1 | LLVM_DEBUG( |
514 | 1 | dbgs() << "Removing " << MA |
515 | 1 | << " because it's a partial access that never occurs\n"); |
516 | 1 | DeferredRemove.push_back(MA); |
517 | 1 | } |
518 | 98 | |
519 | 98 | for (MemoryAccess *MA : DeferredRemove) { |
520 | 1 | Stmt.removeSingleMemoryAccess(MA); |
521 | 1 | EmptyPartialAccessesRemoved++; |
522 | 1 | TotalEmptyPartialAccessesRemoved[CallNo]++; |
523 | 1 | } |
524 | 98 | } |
525 | 44 | } |
526 | | |
527 | | /// Mark all reachable instructions and access, and sweep those that are not |
528 | | /// reachable. |
529 | 44 | void markAndSweep(LoopInfo *LI) { |
530 | 44 | DenseSet<MemoryAccess *> UsedMA; |
531 | 44 | DenseSet<VirtualInstruction> UsedInsts; |
532 | 44 | |
533 | 44 | // Get all reachable instructions and accesses. |
534 | 44 | markReachable(S, LI, UsedInsts, UsedMA); |
535 | 44 | |
536 | 44 | // Remove all non-reachable accesses. |
537 | 44 | // We need get all MemoryAccesses first, in order to not invalidate the |
538 | 44 | // iterators when removing them. |
539 | 44 | SmallVector<MemoryAccess *, 64> AllMAs; |
540 | 44 | for (ScopStmt &Stmt : *S) |
541 | 98 | AllMAs.append(Stmt.begin(), Stmt.end()); |
542 | 44 | |
543 | 206 | for (MemoryAccess *MA : AllMAs) { |
544 | 206 | if (UsedMA.count(MA)) |
545 | 182 | continue; |
546 | 24 | LLVM_DEBUG(dbgs() << "Removing " << MA |
547 | 24 | << " because its value is not used\n"); |
548 | 24 | ScopStmt *Stmt = MA->getStatement(); |
549 | 24 | Stmt->removeSingleMemoryAccess(MA); |
550 | 24 | |
551 | 24 | DeadAccessesRemoved++; |
552 | 24 | TotalDeadAccessesRemoved[CallNo]++; |
553 | 24 | } |
554 | 44 | |
555 | 44 | // Remove all non-reachable instructions. |
556 | 98 | for (ScopStmt &Stmt : *S) { |
557 | 98 | // Note that for region statements, we can only remove the non-terminator |
558 | 98 | // instructions of the entry block. All other instructions are not in the |
559 | 98 | // instructions list, but implicitly always part of the statement. |
560 | 98 | |
561 | 98 | SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(), |
562 | 98 | Stmt.insts_end()); |
563 | 98 | SmallVector<Instruction *, 32> RemainInsts; |
564 | 98 | |
565 | 209 | for (Instruction *Inst : AllInsts) { |
566 | 209 | auto It = UsedInsts.find({&Stmt, Inst}); |
567 | 209 | if (It == UsedInsts.end()) { |
568 | 36 | LLVM_DEBUG(dbgs() << "Removing "; Inst->print(dbgs()); |
569 | 36 | dbgs() << " because it is not used\n"); |
570 | 36 | DeadInstructionsRemoved++; |
571 | 36 | TotalDeadInstructionsRemoved[CallNo]++; |
572 | 36 | continue; |
573 | 36 | } |
574 | 173 | |
575 | 173 | RemainInsts.push_back(Inst); |
576 | 173 | |
577 | 173 | // If instructions appear multiple times, keep only the first. |
578 | 173 | UsedInsts.erase(It); |
579 | 173 | } |
580 | 98 | |
581 | 98 | // Set the new instruction list to be only those we did not remove. |
582 | 98 | Stmt.setInstructions(RemainInsts); |
583 | 98 | } |
584 | 44 | } |
585 | | |
586 | | /// Print simplification statistics to @p OS. |
587 | 42 | void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const { |
588 | 42 | OS.indent(Indent) << "Statistics {\n"; |
589 | 42 | OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved |
590 | 42 | << '\n'; |
591 | 42 | OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced |
592 | 42 | << "\n"; |
593 | 42 | OS.indent(Indent + 4) << "Redundant writes removed: " |
594 | 42 | << RedundantWritesRemoved << "\n"; |
595 | 42 | OS.indent(Indent + 4) << "Accesses with empty domains removed: " |
596 | 42 | << EmptyPartialAccessesRemoved << "\n"; |
597 | 42 | OS.indent(Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved |
598 | 42 | << '\n'; |
599 | 42 | OS.indent(Indent + 4) << "Dead instructions removed: " |
600 | 42 | << DeadInstructionsRemoved << '\n'; |
601 | 42 | OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n"; |
602 | 42 | OS.indent(Indent) << "}\n"; |
603 | 42 | } |
604 | | |
605 | | /// Print the current state of all MemoryAccesses to @p OS. |
606 | 26 | void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const { |
607 | 26 | OS.indent(Indent) << "After accesses {\n"; |
608 | 26 | for (auto &Stmt : *S) { |
609 | 26 | OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; |
610 | 26 | for (auto *MA : Stmt) |
611 | 40 | MA->print(OS); |
612 | 26 | } |
613 | 26 | OS.indent(Indent) << "}\n"; |
614 | 26 | } |
615 | | |
616 | | public: |
617 | | static char ID; |
618 | 44 | explicit Simplify(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {} |
619 | | |
620 | 44 | virtual void getAnalysisUsage(AnalysisUsage &AU) const override { |
621 | 44 | AU.addRequiredTransitive<ScopInfoRegionPass>(); |
622 | 44 | AU.addRequired<LoopInfoWrapperPass>(); |
623 | 44 | AU.setPreservesAll(); |
624 | 44 | } |
625 | | |
626 | 44 | virtual bool runOnScop(Scop &S) override { |
627 | 44 | // Reset statistics of last processed SCoP. |
628 | 44 | releaseMemory(); |
629 | 44 | assert(!isModified()); |
630 | 44 | |
631 | 44 | // Prepare processing of this SCoP. |
632 | 44 | this->S = &S; |
633 | 44 | ScopsProcessed[CallNo]++; |
634 | 44 | |
635 | 44 | LLVM_DEBUG(dbgs() << "Removing partial writes that never happen...\n"); |
636 | 44 | removeEmptyPartialAccesses(); |
637 | 44 | |
638 | 44 | LLVM_DEBUG(dbgs() << "Removing overwrites...\n"); |
639 | 44 | removeOverwrites(); |
640 | 44 | |
641 | 44 | LLVM_DEBUG(dbgs() << "Coalesce partial writes...\n"); |
642 | 44 | coalesceWrites(); |
643 | 44 | |
644 | 44 | LLVM_DEBUG(dbgs() << "Removing redundant writes...\n"); |
645 | 44 | removeRedundantWrites(); |
646 | 44 | |
647 | 44 | LLVM_DEBUG(dbgs() << "Cleanup unused accesses...\n"); |
648 | 44 | LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
649 | 44 | markAndSweep(LI); |
650 | 44 | |
651 | 44 | LLVM_DEBUG(dbgs() << "Removing statements without side effects...\n"); |
652 | 44 | removeUnnecessaryStmts(); |
653 | 44 | |
654 | 44 | if (isModified()) |
655 | 28 | ScopsModified[CallNo]++; |
656 | 44 | LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); |
657 | 44 | LLVM_DEBUG(dbgs() << S); |
658 | 44 | |
659 | 44 | auto ScopStats = S.getStatistics(); |
660 | 44 | NumValueWrites[CallNo] += ScopStats.NumValueWrites; |
661 | 44 | NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops; |
662 | 44 | NumPHIWrites[CallNo] += ScopStats.NumPHIWrites; |
663 | 44 | NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops; |
664 | 44 | NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites; |
665 | 44 | NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops; |
666 | 44 | |
667 | 44 | return false; |
668 | 44 | } |
669 | | |
670 | 42 | virtual void printScop(raw_ostream &OS, Scop &S) const override { |
671 | 42 | assert(&S == this->S && |
672 | 42 | "Can only print analysis for the last processed SCoP"); |
673 | 42 | printStatistics(OS); |
674 | 42 | |
675 | 42 | if (!isModified()) { |
676 | 16 | OS << "SCoP could not be simplified\n"; |
677 | 16 | return; |
678 | 16 | } |
679 | 26 | printAccesses(OS); |
680 | 26 | } |
681 | | |
682 | 194 | virtual void releaseMemory() override { |
683 | 194 | S = nullptr; |
684 | 194 | |
685 | 194 | OverwritesRemoved = 0; |
686 | 194 | WritesCoalesced = 0; |
687 | 194 | RedundantWritesRemoved = 0; |
688 | 194 | EmptyPartialAccessesRemoved = 0; |
689 | 194 | DeadAccessesRemoved = 0; |
690 | 194 | DeadInstructionsRemoved = 0; |
691 | 194 | StmtsRemoved = 0; |
692 | 194 | } |
693 | | }; |
694 | | |
695 | | char Simplify::ID; |
696 | | } // anonymous namespace |
697 | | |
698 | | namespace polly { |
699 | 323 | SmallVector<MemoryAccess *, 32> getAccessesInOrder(ScopStmt &Stmt) { |
700 | 323 | |
701 | 323 | SmallVector<MemoryAccess *, 32> Accesses; |
702 | 323 | |
703 | 323 | for (MemoryAccess *MemAcc : Stmt) |
704 | 812 | if (isImplicitRead(MemAcc)) |
705 | 97 | Accesses.push_back(MemAcc); |
706 | 323 | |
707 | 323 | for (MemoryAccess *MemAcc : Stmt) |
708 | 812 | if (isExplicitAccess(MemAcc)) |
709 | 629 | Accesses.push_back(MemAcc); |
710 | 323 | |
711 | 323 | for (MemoryAccess *MemAcc : Stmt) |
712 | 812 | if (isImplicitWrite(MemAcc)) |
713 | 86 | Accesses.push_back(MemAcc); |
714 | 323 | |
715 | 323 | return Accesses; |
716 | 323 | } |
717 | | } // namespace polly |
718 | | |
719 | 0 | Pass *polly::createSimplifyPass(int CallNo) { return new Simplify(CallNo); } |
720 | | |
721 | 48.2k | INITIALIZE_PASS_BEGIN(Simplify, "polly-simplify", "Polly - Simplify", false, |
722 | 48.2k | false) |
723 | 48.2k | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
724 | 48.2k | INITIALIZE_PASS_END(Simplify, "polly-simplify", "Polly - Simplify", false, |
725 | | false) |