/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/polly/lib/Transform/ZoneAlgo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===------ ZoneAlgo.cpp ----------------------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Derive information about array elements between statements ("Zones"). |
10 | | // |
11 | | // The algorithms here work on the scatter space - the image space of the |
12 | | // schedule returned by Scop::getSchedule(). We call an element in that space a |
13 | | // "timepoint". Timepoints are lexicographically ordered such that we can |
14 | | // defined ranges in the scatter space. We use two flavors of such ranges: |
15 | | // Timepoint sets and zones. A timepoint set is simply a subset of the scatter |
16 | | // space and is directly stored as isl_set. |
17 | | // |
18 | | // Zones are used to describe the space between timepoints as open sets, i.e. |
19 | | // they do not contain the extrema. Using isl rational sets to express these |
20 | | // would be overkill. We also cannot store them as the integer timepoints they |
21 | | // contain; the (nonempty) zone between 1 and 2 would be empty and |
22 | | // indistinguishable from e.g. the zone between 3 and 4. Also, we cannot store |
23 | | // the integer set including the extrema; the set ]1,2[ + ]3,4[ could be |
24 | | // coalesced to ]1,3[, although we defined the range [2,3] to be not in the set. |
25 | | // Instead, we store the "half-open" integer extrema, including the lower bound, |
26 | | // but excluding the upper bound. Examples: |
27 | | // |
28 | | // * The set { [i] : 1 <= i <= 3 } represents the zone ]0,3[ (which contains the |
29 | | // integer points 1 and 2, but not 0 or 3) |
30 | | // |
31 | | // * { [1] } represents the zone ]0,1[ |
32 | | // |
33 | | // * { [i] : i = 1 or i = 3 } represents the zone ]0,1[ + ]2,3[ |
34 | | // |
35 | | // Therefore, an integer i in the set represents the zone ]i-1,i[, i.e. strictly |
36 | | // speaking the integer points never belong to the zone. However, depending an |
37 | | // the interpretation, one might want to include them. Part of the |
38 | | // interpretation may not be known when the zone is constructed. |
39 | | // |
40 | | // Reads are assumed to always take place before writes, hence we can think of |
41 | | // reads taking place at the beginning of a timepoint and writes at the end. |
42 | | // |
43 | | // Let's assume that the zone represents the lifetime of a variable. That is, |
44 | | // the zone begins with a write that defines the value during its lifetime and |
45 | | // ends with the last read of that value. In the following we consider whether a |
46 | | // read/write at the beginning/ending of the lifetime zone should be within the |
47 | | // zone or outside of it. |
48 | | // |
49 | | // * A read at the timepoint that starts the live-range loads the previous |
50 | | // value. Hence, exclude the timepoint starting the zone. |
51 | | // |
52 | | // * A write at the timepoint that starts the live-range is not defined whether |
53 | | // it occurs before or after the write that starts the lifetime. We do not |
54 | | // allow this situation to occur. Hence, we include the timepoint starting the |
55 | | // zone to determine whether they are conflicting. |
56 | | // |
57 | | // * A read at the timepoint that ends the live-range reads the same variable. |
58 | | // We include the timepoint at the end of the zone to include that read into |
59 | | // the live-range. Doing otherwise would mean that the two reads access |
60 | | // different values, which would mean that the value they read are both alive |
61 | | // at the same time but occupy the same variable. |
62 | | // |
63 | | // * A write at the timepoint that ends the live-range starts a new live-range. |
64 | | // It must not be included in the live-range of the previous definition. |
65 | | // |
66 | | // All combinations of reads and writes at the endpoints are possible, but most |
67 | | // of the time only the write->read (for instance, a live-range from definition |
68 | | // to last use) and read->write (for instance, an unused range from last use to |
69 | | // overwrite) and combinations are interesting (half-open ranges). write->write |
70 | | // zones might be useful as well in some context to represent |
71 | | // output-dependencies. |
72 | | // |
73 | | // @see convertZoneToTimepoints |
74 | | // |
75 | | // |
76 | | // The code makes use of maps and sets in many different spaces. To not loose |
77 | | // track in which space a set or map is expected to be in, variables holding an |
78 | | // isl reference are usually annotated in the comments. They roughly follow isl |
79 | | // syntax for spaces, but only the tuples, not the dimensions. The tuples have a |
80 | | // meaning as follows: |
81 | | // |
82 | | // * Space[] - An unspecified tuple. Used for function parameters such that the |
83 | | // function caller can use it for anything they like. |
84 | | // |
85 | | // * Domain[] - A statement instance as returned by ScopStmt::getDomain() |
86 | | // isl_id_get_name: Stmt_<NameOfBasicBlock> |
87 | | // isl_id_get_user: Pointer to ScopStmt |
88 | | // |
89 | | // * Element[] - An array element as in the range part of |
90 | | // MemoryAccess::getAccessRelation() |
91 | | // isl_id_get_name: MemRef_<NameOfArrayVariable> |
92 | | // isl_id_get_user: Pointer to ScopArrayInfo |
93 | | // |
94 | | // * Scatter[] - Scatter space or space of timepoints |
95 | | // Has no tuple id |
96 | | // |
97 | | // * Zone[] - Range between timepoints as described above |
98 | | // Has no tuple id |
99 | | // |
100 | | // * ValInst[] - An llvm::Value as defined at a specific timepoint. |
101 | | // |
102 | | // A ValInst[] itself can be structured as one of: |
103 | | // |
104 | | // * [] - An unknown value. |
105 | | // Always zero dimensions |
106 | | // Has no tuple id |
107 | | // |
108 | | // * Value[] - An llvm::Value that is read-only in the SCoP, i.e. its |
109 | | // runtime content does not depend on the timepoint. |
110 | | // Always zero dimensions |
111 | | // isl_id_get_name: Val_<NameOfValue> |
112 | | // isl_id_get_user: A pointer to an llvm::Value |
113 | | // |
114 | | // * SCEV[...] - A synthesizable llvm::SCEV Expression. |
115 | | // In contrast to a Value[] is has at least one dimension per |
116 | | // SCEVAddRecExpr in the SCEV. |
117 | | // |
118 | | // * [Domain[] -> Value[]] - An llvm::Value that may change during the |
119 | | // Scop's execution. |
120 | | // The tuple itself has no id, but it wraps a map space holding a |
121 | | // statement instance which defines the llvm::Value as the map's domain |
122 | | // and llvm::Value itself as range. |
123 | | // |
124 | | // @see makeValInst() |
125 | | // |
126 | | // An annotation "{ Domain[] -> Scatter[] }" therefore means: A map from a |
127 | | // statement instance to a timepoint, aka a schedule. There is only one scatter |
128 | | // space, but most of the time multiple statements are processed in one set. |
129 | | // This is why most of the time isl_union_map has to be used. |
130 | | // |
131 | | // The basic algorithm works as follows: |
132 | | // At first we verify that the SCoP is compatible with this technique. For |
133 | | // instance, two writes cannot write to the same location at the same statement |
134 | | // instance because we cannot determine within the polyhedral model which one |
135 | | // comes first. Once this was verified, we compute zones at which an array |
136 | | // element is unused. This computation can fail if it takes too long. Then the |
137 | | // main algorithm is executed. Because every store potentially trails an unused |
138 | | // zone, we start at stores. We search for a scalar (MemoryKind::Value or |
139 | | // MemoryKind::PHI) that we can map to the array element overwritten by the |
140 | | // store, preferably one that is used by the store or at least the ScopStmt. |
141 | | // When it does not conflict with the lifetime of the values in the array |
142 | | // element, the map is applied and the unused zone updated as it is now used. We |
143 | | // continue to try to map scalars to the array element until there are no more |
144 | | // candidates to map. The algorithm is greedy in the sense that the first scalar |
145 | | // not conflicting will be mapped. Other scalars processed later that could have |
146 | | // fit the same unused zone will be rejected. As such the result depends on the |
147 | | // processing order. |
148 | | // |
149 | | //===----------------------------------------------------------------------===// |
150 | | |
151 | | #include "polly/ZoneAlgo.h" |
152 | | #include "polly/ScopInfo.h" |
153 | | #include "polly/Support/GICHelper.h" |
154 | | #include "polly/Support/ISLTools.h" |
155 | | #include "polly/Support/VirtualInstruction.h" |
156 | | #include "llvm/ADT/Statistic.h" |
157 | | #include "llvm/Support/raw_ostream.h" |
158 | | |
159 | | #define DEBUG_TYPE "polly-zone" |
160 | | |
161 | | STATISTIC(NumIncompatibleArrays, "Number of not zone-analyzable arrays"); |
162 | | STATISTIC(NumCompatibleArrays, "Number of zone-analyzable arrays"); |
163 | | STATISTIC(NumRecursivePHIs, "Number of recursive PHIs"); |
164 | | STATISTIC(NumNormalizablePHIs, "Number of normalizable PHIs"); |
165 | | STATISTIC(NumPHINormialization, "Number of PHI executed normalizations"); |
166 | | |
167 | | using namespace polly; |
168 | | using namespace llvm; |
169 | | |
170 | | static isl::union_map computeReachingDefinition(isl::union_map Schedule, |
171 | | isl::union_map Writes, |
172 | 183 | bool InclDef, bool InclRedef) { |
173 | 183 | return computeReachingWrite(Schedule, Writes, false, InclDef, InclRedef); |
174 | 183 | } |
175 | | |
176 | | /// Compute the reaching definition of a scalar. |
177 | | /// |
178 | | /// Compared to computeReachingDefinition, there is just one element which is |
179 | | /// accessed and therefore only a set if instances that accesses that element is |
180 | | /// required. |
181 | | /// |
182 | | /// @param Schedule { DomainWrite[] -> Scatter[] } |
183 | | /// @param Writes { DomainWrite[] } |
184 | | /// @param InclDef Include the timepoint of the definition to the result. |
185 | | /// @param InclRedef Include the timepoint of the overwrite into the result. |
186 | | /// |
187 | | /// @return { Scatter[] -> DomainWrite[] } |
188 | | static isl::union_map computeScalarReachingDefinition(isl::union_map Schedule, |
189 | | isl::union_set Writes, |
190 | | bool InclDef, |
191 | 96 | bool InclRedef) { |
192 | 96 | // { DomainWrite[] -> Element[] } |
193 | 96 | isl::union_map Defs = isl::union_map::from_domain(Writes); |
194 | 96 | |
195 | 96 | // { [Element[] -> Scatter[]] -> DomainWrite[] } |
196 | 96 | auto ReachDefs = |
197 | 96 | computeReachingDefinition(Schedule, Defs, InclDef, InclRedef); |
198 | 96 | |
199 | 96 | // { Scatter[] -> DomainWrite[] } |
200 | 96 | return ReachDefs.curry().range().unwrap(); |
201 | 96 | } |
202 | | |
203 | | /// Compute the reaching definition of a scalar. |
204 | | /// |
205 | | /// This overload accepts only a single writing statement as an isl_map, |
206 | | /// consequently the result also is only a single isl_map. |
207 | | /// |
208 | | /// @param Schedule { DomainWrite[] -> Scatter[] } |
209 | | /// @param Writes { DomainWrite[] } |
210 | | /// @param InclDef Include the timepoint of the definition to the result. |
211 | | /// @param InclRedef Include the timepoint of the overwrite into the result. |
212 | | /// |
213 | | /// @return { Scatter[] -> DomainWrite[] } |
214 | | static isl::map computeScalarReachingDefinition(isl::union_map Schedule, |
215 | | isl::set Writes, bool InclDef, |
216 | 96 | bool InclRedef) { |
217 | 96 | isl::space DomainSpace = Writes.get_space(); |
218 | 96 | isl::space ScatterSpace = getScatterSpace(Schedule); |
219 | 96 | |
220 | 96 | // { Scatter[] -> DomainWrite[] } |
221 | 96 | isl::union_map UMap = computeScalarReachingDefinition( |
222 | 96 | Schedule, isl::union_set(Writes), InclDef, InclRedef); |
223 | 96 | |
224 | 96 | isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomainSpace); |
225 | 96 | return singleton(UMap, ResultSpace); |
226 | 96 | } |
227 | | |
228 | 646 | isl::union_map polly::makeUnknownForDomain(isl::union_set Domain) { |
229 | 646 | return isl::union_map::from_domain(Domain); |
230 | 646 | } |
231 | | |
232 | | /// Create a domain-to-unknown value mapping. |
233 | | /// |
234 | | /// @see makeUnknownForDomain(isl::union_set) |
235 | | /// |
236 | | /// @param Domain { Domain[] } |
237 | | /// |
238 | | /// @return { Domain[] -> ValInst[] } |
239 | 16 | static isl::map makeUnknownForDomain(isl::set Domain) { |
240 | 16 | return isl::map::from_domain(Domain); |
241 | 16 | } |
242 | | |
243 | | /// Return whether @p Map maps to an unknown value. |
244 | | /// |
245 | | /// @param { [] -> ValInst[] } |
246 | 678 | static bool isMapToUnknown(const isl::map &Map) { |
247 | 678 | isl::space Space = Map.get_space().range(); |
248 | 678 | return Space.has_tuple_id(isl::dim::set).is_false() && |
249 | 678 | Space.is_wrapping().is_false()489 && Space.dim(isl::dim::set) == 089 ; |
250 | 678 | } |
251 | | |
252 | 638 | isl::union_map polly::filterKnownValInst(const isl::union_map &UMap) { |
253 | 638 | isl::union_map Result = isl::union_map::empty(UMap.get_space()); |
254 | 678 | for (isl::map Map : UMap.get_map_list()) { |
255 | 678 | if (!isMapToUnknown(Map)) |
256 | 589 | Result = Result.add_map(Map); |
257 | 678 | } |
258 | 638 | return Result; |
259 | 638 | } |
260 | | |
261 | | ZoneAlgorithm::ZoneAlgorithm(const char *PassName, Scop *S, LoopInfo *LI) |
262 | | : PassName(PassName), IslCtx(S->getSharedIslCtx()), S(S), LI(LI), |
263 | 87 | Schedule(S->getSchedule()) { |
264 | 87 | auto Domains = S->getDomains(); |
265 | 87 | |
266 | 87 | Schedule = Schedule.intersect_domain(Domains); |
267 | 87 | ParamSpace = Schedule.get_space(); |
268 | 87 | ScatterSpace = getScatterSpace(Schedule); |
269 | 87 | } |
270 | | |
271 | | /// Check if all stores in @p Stmt store the very same value. |
272 | | /// |
273 | | /// This covers a special situation occurring in Polybench's |
274 | | /// covariance/correlation (which is typical for algorithms that cover symmetric |
275 | | /// matrices): |
276 | | /// |
277 | | /// for (int i = 0; i < n; i += 1) |
278 | | /// for (int j = 0; j <= i; j += 1) { |
279 | | /// double x = ...; |
280 | | /// C[i][j] = x; |
281 | | /// C[j][i] = x; |
282 | | /// } |
283 | | /// |
284 | | /// For i == j, the same value is written twice to the same element.Double |
285 | | /// writes to the same element are not allowed in DeLICM because its algorithm |
286 | | /// does not see which of the writes is effective.But if its the same value |
287 | | /// anyway, it doesn't matter. |
288 | | /// |
289 | | /// LLVM passes, however, cannot simplify this because the write is necessary |
290 | | /// for i != j (unless it would add a condition for one of the writes to occur |
291 | | /// only if i != j). |
292 | | /// |
293 | | /// TODO: In the future we may want to extent this to make the checks |
294 | | /// specific to different memory locations. |
295 | 5 | static bool onlySameValueWrites(ScopStmt *Stmt) { |
296 | 5 | Value *V = nullptr; |
297 | 5 | |
298 | 16 | for (auto *MA : *Stmt) { |
299 | 16 | if (!MA->isLatestArrayKind() || !MA->isMustWrite()10 || |
300 | 16 | !MA->isOriginalArrayKind()8 ) |
301 | 8 | continue; |
302 | 8 | |
303 | 8 | if (!V) { |
304 | 4 | V = MA->getAccessValue(); |
305 | 4 | continue; |
306 | 4 | } |
307 | 4 | |
308 | 4 | if (V != MA->getAccessValue()) |
309 | 2 | return false; |
310 | 4 | } |
311 | 5 | return true3 ; |
312 | 5 | } |
313 | | |
314 | | /// Is @p InnerLoop nested inside @p OuterLoop? |
315 | 0 | static bool isInsideLoop(Loop *OuterLoop, Loop *InnerLoop) { |
316 | 0 | // If OuterLoop is nullptr, we cannot call its contains() method. In this case |
317 | 0 | // OuterLoop represents the 'top level' and therefore contains all loop. |
318 | 0 | return !OuterLoop || OuterLoop->contains(InnerLoop); |
319 | 0 | } |
320 | | |
321 | | void ZoneAlgorithm::collectIncompatibleElts(ScopStmt *Stmt, |
322 | | isl::union_set &IncompatibleElts, |
323 | 343 | isl::union_set &AllElts) { |
324 | 343 | auto Stores = makeEmptyUnionMap(); |
325 | 343 | auto Loads = makeEmptyUnionMap(); |
326 | 343 | |
327 | 343 | // This assumes that the MemoryKind::Array MemoryAccesses are iterated in |
328 | 343 | // order. |
329 | 684 | for (auto *MA : *Stmt) { |
330 | 684 | if (!MA->isOriginalArrayKind()) |
331 | 499 | continue; |
332 | 185 | |
333 | 185 | isl::map AccRelMap = getAccessRelationFor(MA); |
334 | 185 | isl::union_map AccRel = AccRelMap; |
335 | 185 | |
336 | 185 | // To avoid solving any ILP problems, always add entire arrays instead of |
337 | 185 | // just the elements that are accessed. |
338 | 185 | auto ArrayElts = isl::set::universe(AccRelMap.get_space().range()); |
339 | 185 | AllElts = AllElts.add_set(ArrayElts); |
340 | 185 | |
341 | 185 | if (MA->isRead()) { |
342 | 59 | // Reject load after store to same location. |
343 | 59 | if (!Stores.is_disjoint(AccRel)) { |
344 | 1 | LLVM_DEBUG( |
345 | 1 | dbgs() << "Load after store of same element in same statement\n"); |
346 | 1 | OptimizationRemarkMissed R(PassName, "LoadAfterStore", |
347 | 1 | MA->getAccessInstruction()); |
348 | 1 | R << "load after store of same element in same statement"; |
349 | 1 | R << " (previous stores: " << Stores; |
350 | 1 | R << ", loading: " << AccRel << ")"; |
351 | 1 | S->getFunction().getContext().diagnose(R); |
352 | 1 | |
353 | 1 | IncompatibleElts = IncompatibleElts.add_set(ArrayElts); |
354 | 1 | } |
355 | 59 | |
356 | 59 | Loads = Loads.unite(AccRel); |
357 | 59 | |
358 | 59 | continue; |
359 | 59 | } |
360 | 126 | |
361 | 126 | // In region statements the order is less clear, eg. the load and store |
362 | 126 | // might be in a boxed loop. |
363 | 126 | if (Stmt->isRegionStmt() && !Loads.is_disjoint(AccRel)9 ) { |
364 | 2 | LLVM_DEBUG(dbgs() << "WRITE in non-affine subregion not supported\n"); |
365 | 2 | OptimizationRemarkMissed R(PassName, "StoreInSubregion", |
366 | 2 | MA->getAccessInstruction()); |
367 | 2 | R << "store is in a non-affine subregion"; |
368 | 2 | S->getFunction().getContext().diagnose(R); |
369 | 2 | |
370 | 2 | IncompatibleElts = IncompatibleElts.add_set(ArrayElts); |
371 | 2 | } |
372 | 126 | |
373 | 126 | // Do not allow more than one store to the same location. |
374 | 126 | if (!Stores.is_disjoint(AccRel) && !onlySameValueWrites(Stmt)5 ) { |
375 | 2 | LLVM_DEBUG(dbgs() << "WRITE after WRITE to same element\n"); |
376 | 2 | OptimizationRemarkMissed R(PassName, "StoreAfterStore", |
377 | 2 | MA->getAccessInstruction()); |
378 | 2 | R << "store after store of same element in same statement"; |
379 | 2 | R << " (previous stores: " << Stores; |
380 | 2 | R << ", storing: " << AccRel << ")"; |
381 | 2 | S->getFunction().getContext().diagnose(R); |
382 | 2 | |
383 | 2 | IncompatibleElts = IncompatibleElts.add_set(ArrayElts); |
384 | 2 | } |
385 | 126 | |
386 | 126 | Stores = Stores.unite(AccRel); |
387 | 126 | } |
388 | 343 | } |
389 | | |
390 | 61 | void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) { |
391 | 61 | assert(MA->isLatestArrayKind()); |
392 | 61 | assert(MA->isRead()); |
393 | 61 | ScopStmt *Stmt = MA->getStatement(); |
394 | 61 | |
395 | 61 | // { DomainRead[] -> Element[] } |
396 | 61 | auto AccRel = intersectRange(getAccessRelationFor(MA), CompatibleElts); |
397 | 61 | AllReads = AllReads.add_map(AccRel); |
398 | 61 | |
399 | 61 | if (LoadInst *Load = dyn_cast_or_null<LoadInst>(MA->getAccessInstruction())) { |
400 | 59 | // { DomainRead[] -> ValInst[] } |
401 | 59 | isl::map LoadValInst = makeValInst( |
402 | 59 | Load, Stmt, LI->getLoopFor(Load->getParent()), Stmt->isBlockStmt()); |
403 | 59 | |
404 | 59 | // { DomainRead[] -> [Element[] -> DomainRead[]] } |
405 | 59 | isl::map IncludeElement = AccRel.domain_map().curry(); |
406 | 59 | |
407 | 59 | // { [Element[] -> DomainRead[]] -> ValInst[] } |
408 | 59 | isl::map EltLoadValInst = LoadValInst.apply_domain(IncludeElement); |
409 | 59 | |
410 | 59 | AllReadValInst = AllReadValInst.add_map(EltLoadValInst); |
411 | 59 | } |
412 | 61 | } |
413 | | |
414 | | isl::union_map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, |
415 | 126 | isl::map AccRel) { |
416 | 126 | if (!MA->isMustWrite()) |
417 | 9 | return {}; |
418 | 117 | |
419 | 117 | Value *AccVal = MA->getAccessValue(); |
420 | 117 | ScopStmt *Stmt = MA->getStatement(); |
421 | 117 | Instruction *AccInst = MA->getAccessInstruction(); |
422 | 117 | |
423 | 117 | // Write a value to a single element. |
424 | 117 | auto L = MA->isOriginalArrayKind() ? LI->getLoopFor(AccInst->getParent()) |
425 | 117 | : Stmt->getSurroundingLoop()0 ; |
426 | 117 | if (AccVal && |
427 | 117 | AccVal->getType() == MA->getLatestScopArrayInfo()->getElementType()111 && |
428 | 117 | AccRel.is_single_valued().is_true()108 ) |
429 | 106 | return makeNormalizedValInst(AccVal, Stmt, L); |
430 | 11 | |
431 | 11 | // memset(_, '0', ) is equivalent to writing the null value to all touched |
432 | 11 | // elements. isMustWrite() ensures that all of an element's bytes are |
433 | 11 | // overwritten. |
434 | 11 | if (auto *Memset = dyn_cast<MemSetInst>(AccInst)) { |
435 | 6 | auto *WrittenConstant = dyn_cast<Constant>(Memset->getValue()); |
436 | 6 | Type *Ty = MA->getLatestScopArrayInfo()->getElementType(); |
437 | 6 | if (WrittenConstant && WrittenConstant->isZeroValue()) { |
438 | 6 | Constant *Zero = Constant::getNullValue(Ty); |
439 | 6 | return makeNormalizedValInst(Zero, Stmt, L); |
440 | 6 | } |
441 | 5 | } |
442 | 5 | |
443 | 5 | return {}; |
444 | 5 | } |
445 | | |
446 | 126 | void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) { |
447 | 126 | assert(MA->isLatestArrayKind()); |
448 | 126 | assert(MA->isWrite()); |
449 | 126 | auto *Stmt = MA->getStatement(); |
450 | 126 | |
451 | 126 | // { Domain[] -> Element[] } |
452 | 126 | isl::map AccRel = intersectRange(getAccessRelationFor(MA), CompatibleElts); |
453 | 126 | |
454 | 126 | if (MA->isMustWrite()) |
455 | 117 | AllMustWrites = AllMustWrites.add_map(AccRel); |
456 | 126 | |
457 | 126 | if (MA->isMayWrite()) |
458 | 9 | AllMayWrites = AllMayWrites.add_map(AccRel); |
459 | 126 | |
460 | 126 | // { Domain[] -> ValInst[] } |
461 | 126 | isl::union_map WriteValInstance = getWrittenValue(MA, AccRel); |
462 | 126 | if (!WriteValInstance) |
463 | 14 | WriteValInstance = makeUnknownForDomain(Stmt); |
464 | 126 | |
465 | 126 | // { Domain[] -> [Element[] -> Domain[]] } |
466 | 126 | isl::map IncludeElement = AccRel.domain_map().curry(); |
467 | 126 | |
468 | 126 | // { [Element[] -> DomainWrite[]] -> ValInst[] } |
469 | 126 | isl::union_map EltWriteValInst = |
470 | 126 | WriteValInstance.apply_domain(IncludeElement); |
471 | 126 | |
472 | 126 | AllWriteValInst = AllWriteValInst.unite(EltWriteValInst); |
473 | 126 | } |
474 | | |
475 | | /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a |
476 | | /// use in every instance of @p UseStmt. |
477 | | /// |
478 | | /// @param UseStmt Statement a scalar is used in. |
479 | | /// @param DefStmt Statement a scalar is defined in. |
480 | | /// |
481 | | /// @return { DomainUse[] -> DomainDef[] } |
482 | | isl::map ZoneAlgorithm::computeUseToDefFlowDependency(ScopStmt *UseStmt, |
483 | 89 | ScopStmt *DefStmt) { |
484 | 89 | // { DomainUse[] -> Scatter[] } |
485 | 89 | isl::map UseScatter = getScatterFor(UseStmt); |
486 | 89 | |
487 | 89 | // { Zone[] -> DomainDef[] } |
488 | 89 | isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); |
489 | 89 | |
490 | 89 | // { Scatter[] -> DomainDef[] } |
491 | 89 | isl::map ReachDefTimepoints = |
492 | 89 | convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); |
493 | 89 | |
494 | 89 | // { DomainUse[] -> DomainDef[] } |
495 | 89 | return UseScatter.apply_range(ReachDefTimepoints); |
496 | 89 | } |
497 | | |
498 | | /// Return whether @p PHI refers (also transitively through other PHIs) to |
499 | | /// itself. |
500 | | /// |
501 | | /// loop: |
502 | | /// %phi1 = phi [0, %preheader], [%phi1, %loop] |
503 | | /// br i1 %c, label %loop, label %exit |
504 | | /// |
505 | | /// exit: |
506 | | /// %phi2 = phi [%phi1, %bb] |
507 | | /// |
508 | | /// In this example, %phi1 is recursive, but %phi2 is not. |
509 | 6 | static bool isRecursivePHI(const PHINode *PHI) { |
510 | 6 | SmallVector<const PHINode *, 8> Worklist; |
511 | 6 | SmallPtrSet<const PHINode *, 8> Visited; |
512 | 6 | Worklist.push_back(PHI); |
513 | 6 | |
514 | 12 | while (!Worklist.empty()) { |
515 | 7 | const PHINode *Cur = Worklist.pop_back_val(); |
516 | 7 | |
517 | 7 | if (Visited.count(Cur)) |
518 | 0 | continue; |
519 | 7 | Visited.insert(Cur); |
520 | 7 | |
521 | 13 | for (const Use &Incoming : Cur->incoming_values()) { |
522 | 13 | Value *IncomingVal = Incoming.get(); |
523 | 13 | auto *IncomingPHI = dyn_cast<PHINode>(IncomingVal); |
524 | 13 | if (!IncomingPHI) |
525 | 11 | continue; |
526 | 2 | |
527 | 2 | if (IncomingPHI == PHI) |
528 | 1 | return true; |
529 | 1 | Worklist.push_back(IncomingPHI); |
530 | 1 | } |
531 | 7 | } |
532 | 6 | return false5 ; |
533 | 6 | } |
534 | | |
535 | 41 | isl::union_map ZoneAlgorithm::computePerPHI(const ScopArrayInfo *SAI) { |
536 | 41 | // TODO: If the PHI has an incoming block from before the SCoP, it is not |
537 | 41 | // represented in any ScopStmt. |
538 | 41 | |
539 | 41 | auto *PHI = cast<PHINode>(SAI->getBasePtr()); |
540 | 41 | auto It = PerPHIMaps.find(PHI); |
541 | 41 | if (It != PerPHIMaps.end()) |
542 | 0 | return It->second; |
543 | 41 | |
544 | 41 | assert(SAI->isPHIKind()); |
545 | 41 | |
546 | 41 | // { DomainPHIWrite[] -> Scatter[] } |
547 | 41 | isl::union_map PHIWriteScatter = makeEmptyUnionMap(); |
548 | 41 | |
549 | 41 | // Collect all incoming block timepoints. |
550 | 81 | for (MemoryAccess *MA : S->getPHIIncomings(SAI)) { |
551 | 81 | isl::map Scatter = getScatterFor(MA); |
552 | 81 | PHIWriteScatter = PHIWriteScatter.add_map(Scatter); |
553 | 81 | } |
554 | 41 | |
555 | 41 | // { DomainPHIRead[] -> Scatter[] } |
556 | 41 | isl::map PHIReadScatter = getScatterFor(S->getPHIRead(SAI)); |
557 | 41 | |
558 | 41 | // { DomainPHIRead[] -> Scatter[] } |
559 | 41 | isl::map BeforeRead = beforeScatter(PHIReadScatter, true); |
560 | 41 | |
561 | 41 | // { Scatter[] } |
562 | 41 | isl::set WriteTimes = singleton(PHIWriteScatter.range(), ScatterSpace); |
563 | 41 | |
564 | 41 | // { DomainPHIRead[] -> Scatter[] } |
565 | 41 | isl::map PHIWriteTimes = BeforeRead.intersect_range(WriteTimes); |
566 | 41 | |
567 | 41 | // Remove instances outside the context. |
568 | 41 | PHIWriteTimes = PHIWriteTimes.intersect_params(S->getAssumedContext()); |
569 | 41 | PHIWriteTimes = subtractParams(PHIWriteTimes, S->getInvalidContext()); |
570 | 41 | |
571 | 41 | isl::map LastPerPHIWrites = PHIWriteTimes.lexmax(); |
572 | 41 | |
573 | 41 | // { DomainPHIRead[] -> DomainPHIWrite[] } |
574 | 41 | isl::union_map Result = |
575 | 41 | isl::union_map(LastPerPHIWrites).apply_range(PHIWriteScatter.reverse()); |
576 | 41 | assert(!Result.is_single_valued().is_false()); |
577 | 41 | assert(!Result.is_injective().is_false()); |
578 | 41 | |
579 | 41 | PerPHIMaps.insert({PHI, Result}); |
580 | 41 | return Result; |
581 | 41 | } |
582 | | |
583 | 230 | isl::union_set ZoneAlgorithm::makeEmptyUnionSet() const { |
584 | 230 | return isl::union_set::empty(ParamSpace); |
585 | 230 | } |
586 | | |
587 | 1.38k | isl::union_map ZoneAlgorithm::makeEmptyUnionMap() const { |
588 | 1.38k | return isl::union_map::empty(ParamSpace); |
589 | 1.38k | } |
590 | | |
591 | 87 | void ZoneAlgorithm::collectCompatibleElts() { |
592 | 87 | // First find all the incompatible elements, then take the complement. |
593 | 87 | // We compile the list of compatible (rather than incompatible) elements so |
594 | 87 | // users can intersect with the list, not requiring a subtract operation. It |
595 | 87 | // also allows us to define a 'universe' of all elements and makes it more |
596 | 87 | // explicit in which array elements can be used. |
597 | 87 | isl::union_set AllElts = makeEmptyUnionSet(); |
598 | 87 | isl::union_set IncompatibleElts = makeEmptyUnionSet(); |
599 | 87 | |
600 | 87 | for (auto &Stmt : *S) |
601 | 343 | collectIncompatibleElts(&Stmt, IncompatibleElts, AllElts); |
602 | 87 | |
603 | 87 | NumIncompatibleArrays += isl_union_set_n_set(IncompatibleElts.get()); |
604 | 87 | CompatibleElts = AllElts.subtract(IncompatibleElts); |
605 | 87 | NumCompatibleArrays += isl_union_set_n_set(CompatibleElts.get()); |
606 | 87 | } |
607 | | |
608 | 305 | isl::map ZoneAlgorithm::getScatterFor(ScopStmt *Stmt) const { |
609 | 305 | isl::space ResultSpace = |
610 | 305 | Stmt->getDomainSpace().map_from_domain_and_range(ScatterSpace); |
611 | 305 | return Schedule.extract_map(ResultSpace); |
612 | 305 | } |
613 | | |
614 | 216 | isl::map ZoneAlgorithm::getScatterFor(MemoryAccess *MA) const { |
615 | 216 | return getScatterFor(MA->getStatement()); |
616 | 216 | } |
617 | | |
618 | 177 | isl::union_map ZoneAlgorithm::getScatterFor(isl::union_set Domain) const { |
619 | 177 | return Schedule.intersect_domain(Domain); |
620 | 177 | } |
621 | | |
622 | 56 | isl::map ZoneAlgorithm::getScatterFor(isl::set Domain) const { |
623 | 56 | auto ResultSpace = Domain.get_space().map_from_domain_and_range(ScatterSpace); |
624 | 56 | auto UDomain = isl::union_set(Domain); |
625 | 56 | auto UResult = getScatterFor(std::move(UDomain)); |
626 | 56 | auto Result = singleton(std::move(UResult), std::move(ResultSpace)); |
627 | 56 | assert(!Result || Result.domain().is_equal(Domain) == isl_bool_true); |
628 | 56 | return Result; |
629 | 56 | } |
630 | | |
631 | 1.56k | isl::set ZoneAlgorithm::getDomainFor(ScopStmt *Stmt) const { |
632 | 1.56k | return Stmt->getDomain().remove_redundancies(); |
633 | 1.56k | } |
634 | | |
635 | 915 | isl::set ZoneAlgorithm::getDomainFor(MemoryAccess *MA) const { |
636 | 915 | return getDomainFor(MA->getStatement()); |
637 | 915 | } |
638 | | |
639 | 475 | isl::map ZoneAlgorithm::getAccessRelationFor(MemoryAccess *MA) const { |
640 | 475 | auto Domain = getDomainFor(MA); |
641 | 475 | auto AccRel = MA->getLatestAccessRelation(); |
642 | 475 | return AccRel.intersect_domain(Domain); |
643 | 475 | } |
644 | | |
645 | | isl::map ZoneAlgorithm::getDefToTarget(ScopStmt *DefStmt, |
646 | 211 | ScopStmt *TargetStmt) { |
647 | 211 | // No translation required if the definition is already at the target. |
648 | 211 | if (TargetStmt == DefStmt) |
649 | 53 | return isl::map::identity( |
650 | 53 | getDomainFor(TargetStmt).get_space().map_from_set()); |
651 | 158 | |
652 | 158 | isl::map &Result = DefToTargetCache[std::make_pair(TargetStmt, DefStmt)]; |
653 | 158 | |
654 | 158 | // This is a shortcut in case the schedule is still the original and |
655 | 158 | // TargetStmt is in the same or nested inside DefStmt's loop. With the |
656 | 158 | // additional assumption that operand trees do not cross DefStmt's loop |
657 | 158 | // header, then TargetStmt's instance shared coordinates are the same as |
658 | 158 | // DefStmt's coordinates. All TargetStmt instances with this prefix share |
659 | 158 | // the same DefStmt instance. |
660 | 158 | // Model: |
661 | 158 | // |
662 | 158 | // for (int i < 0; i < N; i+=1) { |
663 | 158 | // DefStmt: |
664 | 158 | // D = ...; |
665 | 158 | // for (int j < 0; j < N; j+=1) { |
666 | 158 | // TargetStmt: |
667 | 158 | // use(D); |
668 | 158 | // } |
669 | 158 | // } |
670 | 158 | // |
671 | 158 | // Here, the value used in TargetStmt is defined in the corresponding |
672 | 158 | // DefStmt, i.e. |
673 | 158 | // |
674 | 158 | // { DefStmt[i] -> TargetStmt[i,j] } |
675 | 158 | // |
676 | 158 | // In practice, this should cover the majority of cases. |
677 | 158 | if (!Result && S->isOriginalSchedule()89 && |
678 | 158 | isInsideLoop(DefStmt->getSurroundingLoop(), |
679 | 0 | TargetStmt->getSurroundingLoop())) { |
680 | 0 | isl::set DefDomain = getDomainFor(DefStmt); |
681 | 0 | isl::set TargetDomain = getDomainFor(TargetStmt); |
682 | 0 | assert(DefDomain.dim(isl::dim::set) <= TargetDomain.dim(isl::dim::set)); |
683 | 0 |
|
684 | 0 | Result = isl::map::from_domain_and_range(DefDomain, TargetDomain); |
685 | 0 | for (unsigned i = 0, DefDims = DefDomain.dim(isl::dim::set); i < DefDims; |
686 | 0 | i += 1) |
687 | 0 | Result = Result.equate(isl::dim::in, i, isl::dim::out, i); |
688 | 0 | } |
689 | 158 | |
690 | 158 | if (!Result) { |
691 | 89 | // { DomainDef[] -> DomainTarget[] } |
692 | 89 | Result = computeUseToDefFlowDependency(TargetStmt, DefStmt).reverse(); |
693 | 89 | simplify(Result); |
694 | 89 | } |
695 | 158 | |
696 | 158 | return Result; |
697 | 158 | } |
698 | | |
699 | 145 | isl::map ZoneAlgorithm::getScalarReachingDefinition(ScopStmt *Stmt) { |
700 | 145 | auto &Result = ScalarReachDefZone[Stmt]; |
701 | 145 | if (Result) |
702 | 49 | return Result; |
703 | 96 | |
704 | 96 | auto Domain = getDomainFor(Stmt); |
705 | 96 | Result = computeScalarReachingDefinition(Schedule, Domain, false, true); |
706 | 96 | simplify(Result); |
707 | 96 | |
708 | 96 | return Result; |
709 | 96 | } |
710 | | |
711 | 0 | isl::map ZoneAlgorithm::getScalarReachingDefinition(isl::set DomainDef) { |
712 | 0 | auto DomId = DomainDef.get_tuple_id(); |
713 | 0 | auto *Stmt = static_cast<ScopStmt *>(isl_id_get_user(DomId.get())); |
714 | 0 |
|
715 | 0 | auto StmtResult = getScalarReachingDefinition(Stmt); |
716 | 0 |
|
717 | 0 | return StmtResult.intersect_range(DomainDef); |
718 | 0 | } |
719 | | |
720 | 16 | isl::map ZoneAlgorithm::makeUnknownForDomain(ScopStmt *Stmt) const { |
721 | 16 | return ::makeUnknownForDomain(getDomainFor(Stmt)); |
722 | 16 | } |
723 | | |
724 | 371 | isl::id ZoneAlgorithm::makeValueId(Value *V) { |
725 | 371 | if (!V) |
726 | 0 | return nullptr; |
727 | 371 | |
728 | 371 | auto &Id = ValueIds[V]; |
729 | 371 | if (Id.is_null()) { |
730 | 204 | auto Name = getIslCompatibleName("Val_", V, ValueIds.size() - 1, |
731 | 204 | std::string(), UseInstructionNames); |
732 | 204 | Id = isl::id::alloc(IslCtx.get(), Name.c_str(), V); |
733 | 204 | } |
734 | 371 | return Id; |
735 | 371 | } |
736 | | |
737 | 371 | isl::space ZoneAlgorithm::makeValueSpace(Value *V) { |
738 | 371 | auto Result = ParamSpace.set_from_params(); |
739 | 371 | return Result.set_tuple_id(isl::dim::set, makeValueId(V)); |
740 | 371 | } |
741 | | |
742 | 371 | isl::set ZoneAlgorithm::makeValueSet(Value *V) { |
743 | 371 | auto Space = makeValueSpace(V); |
744 | 371 | return isl::set::universe(Space); |
745 | 371 | } |
746 | | |
747 | | isl::map ZoneAlgorithm::makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope, |
748 | 376 | bool IsCertain) { |
749 | 376 | // If the definition/write is conditional, the value at the location could |
750 | 376 | // be either the written value or the old value. Since we cannot know which |
751 | 376 | // one, consider the value to be unknown. |
752 | 376 | if (!IsCertain) |
753 | 2 | return makeUnknownForDomain(UserStmt); |
754 | 374 | |
755 | 374 | auto DomainUse = getDomainFor(UserStmt); |
756 | 374 | auto VUse = VirtualUse::create(S, UserStmt, Scope, Val, true); |
757 | 374 | switch (VUse.getKind()) { |
758 | 374 | case VirtualUse::Constant: |
759 | 52 | case VirtualUse::Block: |
760 | 52 | case VirtualUse::Hoisted: |
761 | 52 | case VirtualUse::ReadOnly: { |
762 | 52 | // The definition does not depend on the statement which uses it. |
763 | 52 | auto ValSet = makeValueSet(Val); |
764 | 52 | return isl::map::from_domain_and_range(DomainUse, ValSet); |
765 | 52 | } |
766 | 52 | |
767 | 52 | case VirtualUse::Synthesizable: { |
768 | 3 | auto *ScevExpr = VUse.getScevExpr(); |
769 | 3 | auto UseDomainSpace = DomainUse.get_space(); |
770 | 3 | |
771 | 3 | // Construct the SCEV space. |
772 | 3 | // TODO: Add only the induction variables referenced in SCEVAddRecExpr |
773 | 3 | // expressions, not just all of them. |
774 | 3 | auto ScevId = isl::manage(isl_id_alloc( |
775 | 3 | UseDomainSpace.get_ctx().get(), nullptr, const_cast<SCEV *>(ScevExpr))); |
776 | 3 | |
777 | 3 | auto ScevSpace = UseDomainSpace.drop_dims(isl::dim::set, 0, 0); |
778 | 3 | ScevSpace = ScevSpace.set_tuple_id(isl::dim::set, ScevId); |
779 | 3 | |
780 | 3 | // { DomainUse[] -> ScevExpr[] } |
781 | 3 | auto ValInst = |
782 | 3 | isl::map::identity(UseDomainSpace.map_from_domain_and_range(ScevSpace)); |
783 | 3 | return ValInst; |
784 | 52 | } |
785 | 52 | |
786 | 189 | case VirtualUse::Intra: { |
787 | 189 | // Definition and use is in the same statement. We do not need to compute |
788 | 189 | // a reaching definition. |
789 | 189 | |
790 | 189 | // { llvm::Value } |
791 | 189 | auto ValSet = makeValueSet(Val); |
792 | 189 | |
793 | 189 | // { UserDomain[] -> llvm::Value } |
794 | 189 | auto ValInstSet = isl::map::from_domain_and_range(DomainUse, ValSet); |
795 | 189 | |
796 | 189 | // { UserDomain[] -> [UserDomain[] - >llvm::Value] } |
797 | 189 | auto Result = ValInstSet.domain_map().reverse(); |
798 | 189 | simplify(Result); |
799 | 189 | return Result; |
800 | 52 | } |
801 | 52 | |
802 | 130 | case VirtualUse::Inter: { |
803 | 130 | // The value is defined in a different statement. |
804 | 130 | |
805 | 130 | auto *Inst = cast<Instruction>(Val); |
806 | 130 | auto *ValStmt = S->getStmtFor(Inst); |
807 | 130 | |
808 | 130 | // If the llvm::Value is defined in a removed Stmt, we cannot derive its |
809 | 130 | // domain. We could use an arbitrary statement, but this could result in |
810 | 130 | // different ValInst[] for the same llvm::Value. |
811 | 130 | if (!ValStmt) |
812 | 0 | return ::makeUnknownForDomain(DomainUse); |
813 | 130 | |
814 | 130 | // { DomainUse[] -> DomainDef[] } |
815 | 130 | auto UsedInstance = getDefToTarget(ValStmt, UserStmt).reverse(); |
816 | 130 | |
817 | 130 | // { llvm::Value } |
818 | 130 | auto ValSet = makeValueSet(Val); |
819 | 130 | |
820 | 130 | // { DomainUse[] -> llvm::Value[] } |
821 | 130 | auto ValInstSet = isl::map::from_domain_and_range(DomainUse, ValSet); |
822 | 130 | |
823 | 130 | // { DomainUse[] -> [DomainDef[] -> llvm::Value] } |
824 | 130 | auto Result = UsedInstance.range_product(ValInstSet); |
825 | 130 | |
826 | 130 | simplify(Result); |
827 | 130 | return Result; |
828 | 130 | } |
829 | 0 | } |
830 | 0 | llvm_unreachable("Unhandled use type"); |
831 | 0 | } |
832 | | |
833 | | /// Remove all computed PHIs out of @p Input and replace by their incoming |
834 | | /// value. |
835 | | /// |
836 | | /// @param Input { [] -> ValInst[] } |
837 | | /// @param ComputedPHIs Set of PHIs that are replaced. Its ValInst must appear |
838 | | /// on the LHS of @p NormalizeMap. |
839 | | /// @param NormalizeMap { ValInst[] -> ValInst[] } |
840 | | static isl::union_map normalizeValInst(isl::union_map Input, |
841 | | const DenseSet<PHINode *> &ComputedPHIs, |
842 | 146 | isl::union_map NormalizeMap) { |
843 | 146 | isl::union_map Result = isl::union_map::empty(Input.get_space()); |
844 | 149 | for (isl::map Map : Input.get_map_list()) { |
845 | 149 | isl::space Space = Map.get_space(); |
846 | 149 | isl::space RangeSpace = Space.range(); |
847 | 149 | |
848 | 149 | // Instructions within the SCoP are always wrapped. Non-wrapped tuples |
849 | 149 | // are therefore invariant in the SCoP and don't need normalization. |
850 | 149 | if (!RangeSpace.is_wrapping()) { |
851 | 28 | Result = Result.add_map(Map); |
852 | 28 | continue; |
853 | 28 | } |
854 | 121 | |
855 | 121 | auto *PHI = dyn_cast<PHINode>(static_cast<Value *>( |
856 | 121 | RangeSpace.unwrap().get_tuple_id(isl::dim::out).get_user())); |
857 | 121 | |
858 | 121 | // If no normalization is necessary, then the ValInst stands for itself. |
859 | 121 | if (!ComputedPHIs.count(PHI)) { |
860 | 107 | Result = Result.add_map(Map); |
861 | 107 | continue; |
862 | 107 | } |
863 | 14 | |
864 | 14 | // Otherwise, apply the normalization. |
865 | 14 | isl::union_map Mapped = isl::union_map(Map).apply_range(NormalizeMap); |
866 | 14 | Result = Result.unite(Mapped); |
867 | 14 | NumPHINormialization++; |
868 | 14 | } |
869 | 146 | return Result; |
870 | 146 | } |
871 | | |
872 | | isl::union_map ZoneAlgorithm::makeNormalizedValInst(llvm::Value *Val, |
873 | | ScopStmt *UserStmt, |
874 | | llvm::Loop *Scope, |
875 | 136 | bool IsCertain) { |
876 | 136 | isl::map ValInst = makeValInst(Val, UserStmt, Scope, IsCertain); |
877 | 136 | isl::union_map Normalized = |
878 | 136 | normalizeValInst(ValInst, ComputedPHIs, NormalizeMap); |
879 | 136 | return Normalized; |
880 | 136 | } |
881 | | |
882 | 0 | bool ZoneAlgorithm::isCompatibleAccess(MemoryAccess *MA) { |
883 | 0 | if (!MA) |
884 | 0 | return false; |
885 | 0 | if (!MA->isLatestArrayKind()) |
886 | 0 | return false; |
887 | 0 | Instruction *AccInst = MA->getAccessInstruction(); |
888 | 0 | return isa<StoreInst>(AccInst) || isa<LoadInst>(AccInst); |
889 | 0 | } |
890 | | |
891 | 6 | bool ZoneAlgorithm::isNormalizable(MemoryAccess *MA) { |
892 | 6 | assert(MA->isRead()); |
893 | 6 | |
894 | 6 | // Exclude ExitPHIs, we are assuming that a normalizable PHI has a READ |
895 | 6 | // MemoryAccess. |
896 | 6 | if (!MA->isOriginalPHIKind()) |
897 | 0 | return false; |
898 | 6 | |
899 | 6 | // Exclude recursive PHIs, normalizing them would require a transitive |
900 | 6 | // closure. |
901 | 6 | auto *PHI = cast<PHINode>(MA->getAccessInstruction()); |
902 | 6 | if (RecursivePHIs.count(PHI)) |
903 | 1 | return false; |
904 | 5 | |
905 | 5 | // Ensure that each incoming value can be represented by a ValInst[]. |
906 | 5 | // We do represent values from statements associated to multiple incoming |
907 | 5 | // value by the PHI itself, but we do not handle this case yet (especially |
908 | 5 | // isNormalized()) when normalizing. |
909 | 5 | const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo(); |
910 | 5 | auto Incomings = S->getPHIIncomings(SAI); |
911 | 9 | for (MemoryAccess *Incoming : Incomings) { |
912 | 9 | if (Incoming->getIncoming().size() != 1) |
913 | 0 | return false; |
914 | 9 | } |
915 | 5 | |
916 | 5 | return true; |
917 | 5 | } |
918 | | |
919 | 0 | isl::boolean ZoneAlgorithm::isNormalized(isl::map Map) { |
920 | 0 | isl::space Space = Map.get_space(); |
921 | 0 | isl::space RangeSpace = Space.range(); |
922 | 0 |
|
923 | 0 | isl::boolean IsWrapping = RangeSpace.is_wrapping(); |
924 | 0 | if (!IsWrapping.is_true()) |
925 | 0 | return !IsWrapping; |
926 | 0 | isl::space Unwrapped = RangeSpace.unwrap(); |
927 | 0 |
|
928 | 0 | isl::id OutTupleId = Unwrapped.get_tuple_id(isl::dim::out); |
929 | 0 | if (OutTupleId.is_null()) |
930 | 0 | return isl::boolean(); |
931 | 0 | auto *PHI = dyn_cast<PHINode>(static_cast<Value *>(OutTupleId.get_user())); |
932 | 0 | if (!PHI) |
933 | 0 | return true; |
934 | 0 | |
935 | 0 | isl::id InTupleId = Unwrapped.get_tuple_id(isl::dim::in); |
936 | 0 | if (OutTupleId.is_null()) |
937 | 0 | return isl::boolean(); |
938 | 0 | auto *IncomingStmt = static_cast<ScopStmt *>(InTupleId.get_user()); |
939 | 0 | MemoryAccess *PHIRead = IncomingStmt->lookupPHIReadOf(PHI); |
940 | 0 | if (!isNormalizable(PHIRead)) |
941 | 0 | return true; |
942 | 0 | |
943 | 0 | return false; |
944 | 0 | } |
945 | | |
946 | 0 | isl::boolean ZoneAlgorithm::isNormalized(isl::union_map UMap) { |
947 | 0 | isl::boolean Result = true; |
948 | 0 | for (isl::map Map : UMap.get_map_list()) { |
949 | 0 | Result = isNormalized(Map); |
950 | 0 | if (Result.is_true()) |
951 | 0 | continue; |
952 | 0 | break; |
953 | 0 | } |
954 | 0 | return Result; |
955 | 0 | } |
956 | | |
957 | 87 | void ZoneAlgorithm::computeCommon() { |
958 | 87 | AllReads = makeEmptyUnionMap(); |
959 | 87 | AllMayWrites = makeEmptyUnionMap(); |
960 | 87 | AllMustWrites = makeEmptyUnionMap(); |
961 | 87 | AllWriteValInst = makeEmptyUnionMap(); |
962 | 87 | AllReadValInst = makeEmptyUnionMap(); |
963 | 87 | |
964 | 87 | // Default to empty, i.e. no normalization/replacement is taking place. Call |
965 | 87 | // computeNormalizedPHIs() to initialize. |
966 | 87 | NormalizeMap = makeEmptyUnionMap(); |
967 | 87 | ComputedPHIs.clear(); |
968 | 87 | |
969 | 343 | for (auto &Stmt : *S) { |
970 | 684 | for (auto *MA : Stmt) { |
971 | 684 | if (!MA->isLatestArrayKind()) |
972 | 497 | continue; |
973 | 187 | |
974 | 187 | if (MA->isRead()) |
975 | 61 | addArrayReadAccess(MA); |
976 | 187 | |
977 | 187 | if (MA->isWrite()) |
978 | 126 | addArrayWriteAccess(MA); |
979 | 187 | } |
980 | 343 | } |
981 | 87 | |
982 | 87 | // { DomainWrite[] -> Element[] } |
983 | 87 | AllWrites = AllMustWrites.unite(AllMayWrites); |
984 | 87 | |
985 | 87 | // { [Element[] -> Zone[]] -> DomainWrite[] } |
986 | 87 | WriteReachDefZone = |
987 | 87 | computeReachingDefinition(Schedule, AllWrites, false, true); |
988 | 87 | simplify(WriteReachDefZone); |
989 | 87 | } |
990 | | |
991 | 4 | void ZoneAlgorithm::computeNormalizedPHIs() { |
992 | 4 | // Determine which PHIs can reference themselves. They are excluded from |
993 | 4 | // normalization to avoid problems with transitive closures. |
994 | 13 | for (ScopStmt &Stmt : *S) { |
995 | 38 | for (MemoryAccess *MA : Stmt) { |
996 | 38 | if (!MA->isPHIKind()) |
997 | 21 | continue; |
998 | 17 | if (!MA->isRead()) |
999 | 11 | continue; |
1000 | 6 | |
1001 | 6 | // TODO: Can be more efficient since isRecursivePHI can theoretically |
1002 | 6 | // determine recursiveness for multiple values and/or cache results. |
1003 | 6 | auto *PHI = cast<PHINode>(MA->getAccessInstruction()); |
1004 | 6 | if (isRecursivePHI(PHI)) { |
1005 | 1 | NumRecursivePHIs++; |
1006 | 1 | RecursivePHIs.insert(PHI); |
1007 | 1 | } |
1008 | 6 | } |
1009 | 13 | } |
1010 | 4 | |
1011 | 4 | // { PHIValInst[] -> IncomingValInst[] } |
1012 | 4 | isl::union_map AllPHIMaps = makeEmptyUnionMap(); |
1013 | 4 | |
1014 | 4 | // Discover new PHIs and try to normalize them. |
1015 | 4 | DenseSet<PHINode *> AllPHIs; |
1016 | 13 | for (ScopStmt &Stmt : *S) { |
1017 | 38 | for (MemoryAccess *MA : Stmt) { |
1018 | 38 | if (!MA->isOriginalPHIKind()) |
1019 | 21 | continue; |
1020 | 17 | if (!MA->isRead()) |
1021 | 11 | continue; |
1022 | 6 | if (!isNormalizable(MA)) |
1023 | 1 | continue; |
1024 | 5 | |
1025 | 5 | auto *PHI = cast<PHINode>(MA->getAccessInstruction()); |
1026 | 5 | const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo(); |
1027 | 5 | |
1028 | 5 | // { PHIDomain[] -> PHIValInst[] } |
1029 | 5 | isl::map PHIValInst = makeValInst(PHI, &Stmt, Stmt.getSurroundingLoop()); |
1030 | 5 | |
1031 | 5 | // { IncomingDomain[] -> IncomingValInst[] } |
1032 | 5 | isl::union_map IncomingValInsts = makeEmptyUnionMap(); |
1033 | 5 | |
1034 | 5 | // Get all incoming values. |
1035 | 9 | for (MemoryAccess *MA : S->getPHIIncomings(SAI)) { |
1036 | 9 | ScopStmt *IncomingStmt = MA->getStatement(); |
1037 | 9 | |
1038 | 9 | auto Incoming = MA->getIncoming(); |
1039 | 9 | assert(Incoming.size() == 1 && "The incoming value must be " |
1040 | 9 | "representable by something else than " |
1041 | 9 | "the PHI itself"); |
1042 | 9 | Value *IncomingVal = Incoming[0].second; |
1043 | 9 | |
1044 | 9 | // { IncomingDomain[] -> IncomingValInst[] } |
1045 | 9 | isl::map IncomingValInst = makeValInst( |
1046 | 9 | IncomingVal, IncomingStmt, IncomingStmt->getSurroundingLoop()); |
1047 | 9 | |
1048 | 9 | IncomingValInsts = IncomingValInsts.add_map(IncomingValInst); |
1049 | 9 | } |
1050 | 5 | |
1051 | 5 | // Determine which instance of the PHI statement corresponds to which |
1052 | 5 | // incoming value. |
1053 | 5 | // { PHIDomain[] -> IncomingDomain[] } |
1054 | 5 | isl::union_map PerPHI = computePerPHI(SAI); |
1055 | 5 | |
1056 | 5 | // { PHIValInst[] -> IncomingValInst[] } |
1057 | 5 | isl::union_map PHIMap = |
1058 | 5 | PerPHI.apply_domain(PHIValInst).apply_range(IncomingValInsts); |
1059 | 5 | assert(!PHIMap.is_single_valued().is_false()); |
1060 | 5 | |
1061 | 5 | // Resolve transitiveness: The incoming value of the newly discovered PHI |
1062 | 5 | // may reference a previously normalized PHI. At the same time, already |
1063 | 5 | // normalized PHIs might be normalized to the new PHI. At the end, none of |
1064 | 5 | // the PHIs may appear on the right-hand-side of the normalization map. |
1065 | 5 | PHIMap = normalizeValInst(PHIMap, AllPHIs, AllPHIMaps); |
1066 | 5 | AllPHIs.insert(PHI); |
1067 | 5 | AllPHIMaps = normalizeValInst(AllPHIMaps, AllPHIs, PHIMap); |
1068 | 5 | |
1069 | 5 | AllPHIMaps = AllPHIMaps.unite(PHIMap); |
1070 | 5 | NumNormalizablePHIs++; |
1071 | 5 | } |
1072 | 13 | } |
1073 | 4 | simplify(AllPHIMaps); |
1074 | 4 | |
1075 | 4 | // Apply the normalization. |
1076 | 4 | ComputedPHIs = AllPHIs; |
1077 | 4 | NormalizeMap = AllPHIMaps; |
1078 | 4 | |
1079 | 4 | assert(!NormalizeMap || isNormalized(NormalizeMap)); |
1080 | 4 | } |
1081 | | |
1082 | 30 | void ZoneAlgorithm::printAccesses(llvm::raw_ostream &OS, int Indent) const { |
1083 | 30 | OS.indent(Indent) << "After accesses {\n"; |
1084 | 145 | for (auto &Stmt : *S) { |
1085 | 145 | OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; |
1086 | 145 | for (auto *MA : Stmt) |
1087 | 288 | MA->print(OS); |
1088 | 145 | } |
1089 | 30 | OS.indent(Indent) << "}\n"; |
1090 | 30 | } |
1091 | | |
1092 | 87 | isl::union_map ZoneAlgorithm::computeKnownFromMustWrites() const { |
1093 | 87 | // { [Element[] -> Zone[]] -> [Element[] -> DomainWrite[]] } |
1094 | 87 | isl::union_map EltReachdDef = distributeDomain(WriteReachDefZone.curry()); |
1095 | 87 | |
1096 | 87 | // { [Element[] -> DomainWrite[]] -> ValInst[] } |
1097 | 87 | isl::union_map AllKnownWriteValInst = filterKnownValInst(AllWriteValInst); |
1098 | 87 | |
1099 | 87 | // { [Element[] -> Zone[]] -> ValInst[] } |
1100 | 87 | return EltReachdDef.apply_range(AllKnownWriteValInst); |
1101 | 87 | } |
1102 | | |
1103 | 34 | isl::union_map ZoneAlgorithm::computeKnownFromLoad() const { |
1104 | 34 | // { Element[] } |
1105 | 34 | isl::union_set AllAccessedElts = AllReads.range().unite(AllWrites.range()); |
1106 | 34 | |
1107 | 34 | // { Element[] -> Scatter[] } |
1108 | 34 | isl::union_map EltZoneUniverse = isl::union_map::from_domain_and_range( |
1109 | 34 | AllAccessedElts, isl::set::universe(ScatterSpace)); |
1110 | 34 | |
1111 | 34 | // This assumes there are no "holes" in |
1112 | 34 | // isl_union_map_domain(WriteReachDefZone); alternatively, compute the zone |
1113 | 34 | // before the first write or that are not written at all. |
1114 | 34 | // { Element[] -> Scatter[] } |
1115 | 34 | isl::union_set NonReachDef = |
1116 | 34 | EltZoneUniverse.wrap().subtract(WriteReachDefZone.domain()); |
1117 | 34 | |
1118 | 34 | // { [Element[] -> Zone[]] -> ReachDefId[] } |
1119 | 34 | isl::union_map DefZone = |
1120 | 34 | WriteReachDefZone.unite(isl::union_map::from_domain(NonReachDef)); |
1121 | 34 | |
1122 | 34 | // { [Element[] -> Scatter[]] -> Element[] } |
1123 | 34 | isl::union_map EltZoneElt = EltZoneUniverse.domain_map(); |
1124 | 34 | |
1125 | 34 | // { [Element[] -> Zone[]] -> [Element[] -> ReachDefId[]] } |
1126 | 34 | isl::union_map DefZoneEltDefId = EltZoneElt.range_product(DefZone); |
1127 | 34 | |
1128 | 34 | // { Element[] -> [Zone[] -> ReachDefId[]] } |
1129 | 34 | isl::union_map EltDefZone = DefZone.curry(); |
1130 | 34 | |
1131 | 34 | // { [Element[] -> Zone[] -> [Element[] -> ReachDefId[]] } |
1132 | 34 | isl::union_map EltZoneEltDefid = distributeDomain(EltDefZone); |
1133 | 34 | |
1134 | 34 | // { [Element[] -> Scatter[]] -> DomainRead[] } |
1135 | 34 | isl::union_map Reads = AllReads.range_product(Schedule).reverse(); |
1136 | 34 | |
1137 | 34 | // { [Element[] -> Scatter[]] -> [Element[] -> DomainRead[]] } |
1138 | 34 | isl::union_map ReadsElt = EltZoneElt.range_product(Reads); |
1139 | 34 | |
1140 | 34 | // { [Element[] -> Scatter[]] -> ValInst[] } |
1141 | 34 | isl::union_map ScatterKnown = ReadsElt.apply_range(AllReadValInst); |
1142 | 34 | |
1143 | 34 | // { [Element[] -> ReachDefId[]] -> ValInst[] } |
1144 | 34 | isl::union_map DefidKnown = |
1145 | 34 | DefZoneEltDefId.apply_domain(ScatterKnown).reverse(); |
1146 | 34 | |
1147 | 34 | // { [Element[] -> Zone[]] -> ValInst[] } |
1148 | 34 | return DefZoneEltDefId.apply_range(DefidKnown); |
1149 | 34 | } |
1150 | | |
1151 | | isl::union_map ZoneAlgorithm::computeKnown(bool FromWrite, |
1152 | 87 | bool FromRead) const { |
1153 | 87 | isl::union_map Result = makeEmptyUnionMap(); |
1154 | 87 | |
1155 | 87 | if (FromWrite) |
1156 | 87 | Result = Result.unite(computeKnownFromMustWrites()); |
1157 | 87 | |
1158 | 87 | if (FromRead) |
1159 | 34 | Result = Result.unite(computeKnownFromLoad()); |
1160 | 87 | |
1161 | 87 | simplify(Result); |
1162 | 87 | return Result; |
1163 | 87 | } |