/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Utils/MemorySSA.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements the MemorySSA class. |
11 | | // |
12 | | //===----------------------------------------------------------------===// |
13 | | #include "llvm/Transforms/Utils/MemorySSA.h" |
14 | | #include "llvm/ADT/DenseMap.h" |
15 | | #include "llvm/ADT/DenseSet.h" |
16 | | #include "llvm/ADT/DepthFirstIterator.h" |
17 | | #include "llvm/ADT/GraphTraits.h" |
18 | | #include "llvm/ADT/PostOrderIterator.h" |
19 | | #include "llvm/ADT/STLExtras.h" |
20 | | #include "llvm/ADT/SmallBitVector.h" |
21 | | #include "llvm/ADT/SmallPtrSet.h" |
22 | | #include "llvm/ADT/SmallSet.h" |
23 | | #include "llvm/ADT/Statistic.h" |
24 | | #include "llvm/Analysis/AliasAnalysis.h" |
25 | | #include "llvm/Analysis/CFG.h" |
26 | | #include "llvm/Analysis/GlobalsModRef.h" |
27 | | #include "llvm/Analysis/IteratedDominanceFrontier.h" |
28 | | #include "llvm/Analysis/MemoryLocation.h" |
29 | | #include "llvm/Analysis/PHITransAddr.h" |
30 | | #include "llvm/IR/AssemblyAnnotationWriter.h" |
31 | | #include "llvm/IR/DataLayout.h" |
32 | | #include "llvm/IR/Dominators.h" |
33 | | #include "llvm/IR/GlobalVariable.h" |
34 | | #include "llvm/IR/IRBuilder.h" |
35 | | #include "llvm/IR/IntrinsicInst.h" |
36 | | #include "llvm/IR/LLVMContext.h" |
37 | | #include "llvm/IR/Metadata.h" |
38 | | #include "llvm/IR/Module.h" |
39 | | #include "llvm/IR/PatternMatch.h" |
40 | | #include "llvm/Support/Debug.h" |
41 | | #include "llvm/Support/FormattedStream.h" |
42 | | #include "llvm/Transforms/Scalar.h" |
43 | | #include <algorithm> |
44 | | |
45 | | #define DEBUG_TYPE "memoryssa" |
46 | | using namespace llvm; |
47 | 22.0k | INITIALIZE_PASS_BEGIN22.0k (MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,22.0k
|
48 | 22.0k | true) |
49 | 22.0k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
50 | 22.0k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
51 | 22.0k | INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, |
52 | | true) |
53 | | |
54 | 7.02k | INITIALIZE_PASS_BEGIN7.02k (MemorySSAPrinterLegacyPass, "print-memoryssa",7.02k
|
55 | 7.02k | "Memory SSA Printer", false, false) |
56 | 7.02k | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) |
57 | 7.02k | INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", |
58 | | "Memory SSA Printer", false, false) |
59 | | |
60 | | static cl::opt<unsigned> MaxCheckLimit( |
61 | | "memssa-check-limit", cl::Hidden, cl::init(100), |
62 | | cl::desc("The maximum number of stores/phis MemorySSA" |
63 | | "will consider trying to walk past (default = 100)")); |
64 | | |
65 | | static cl::opt<bool> |
66 | | VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden, |
67 | | cl::desc("Verify MemorySSA in legacy printer pass.")); |
68 | | |
69 | | namespace llvm { |
70 | | /// \brief An assembly annotator class to print Memory SSA information in |
71 | | /// comments. |
72 | | class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { |
73 | | friend class MemorySSA; |
74 | | const MemorySSA *MSSA; |
75 | | |
76 | | public: |
77 | 82 | MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} |
78 | | |
79 | | virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, |
80 | 228 | formatted_raw_ostream &OS) { |
81 | 228 | if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) |
82 | 69 | OS << "; " << *MA << "\n"; |
83 | 228 | } |
84 | | |
85 | | virtual void emitInstructionAnnot(const Instruction *I, |
86 | 724 | formatted_raw_ostream &OS) { |
87 | 724 | if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) |
88 | 363 | OS << "; " << *MA << "\n"; |
89 | 724 | } |
90 | | }; |
91 | | } |
92 | | |
93 | | namespace { |
94 | | /// Our current alias analysis API differentiates heavily between calls and |
95 | | /// non-calls, and functions called on one usually assert on the other. |
96 | | /// This class encapsulates the distinction to simplify other code that wants |
97 | | /// "Memory affecting instructions and related data" to use as a key. |
98 | | /// For example, this class is used as a densemap key in the use optimizer. |
99 | | class MemoryLocOrCall { |
100 | | public: |
101 | 0 | MemoryLocOrCall() : IsCall(false) {} |
102 | | MemoryLocOrCall(MemoryUseOrDef *MUD) |
103 | 822 | : MemoryLocOrCall(MUD->getMemoryInst()) {} |
104 | | MemoryLocOrCall(const MemoryUseOrDef *MUD) |
105 | 1 | : MemoryLocOrCall(MUD->getMemoryInst()) {} |
106 | | |
107 | 823 | MemoryLocOrCall(Instruction *Inst) { |
108 | 823 | if (ImmutableCallSite(Inst)823 ) {60 |
109 | 60 | IsCall = true; |
110 | 60 | CS = ImmutableCallSite(Inst); |
111 | 763 | } else { |
112 | 763 | IsCall = false; |
113 | 763 | // There is no such thing as a memorylocation for a fence inst, and it is |
114 | 763 | // unique in that regard. |
115 | 763 | if (!isa<FenceInst>(Inst)) |
116 | 763 | Loc = MemoryLocation::get(Inst); |
117 | 763 | } |
118 | 823 | } |
119 | | |
120 | | explicit MemoryLocOrCall(const MemoryLocation &Loc) |
121 | 3.20k | : IsCall(false), Loc(Loc) {} |
122 | | |
123 | | bool IsCall; |
124 | 60 | ImmutableCallSite getCS() const { |
125 | 60 | assert(IsCall); |
126 | 60 | return CS; |
127 | 60 | } |
128 | 1.63k | MemoryLocation getLoc() const { |
129 | 1.63k | assert(!IsCall); |
130 | 1.63k | return Loc; |
131 | 1.63k | } |
132 | | |
133 | 23.9k | bool operator==(const MemoryLocOrCall &Other) const { |
134 | 23.9k | if (IsCall != Other.IsCall) |
135 | 122 | return false; |
136 | 23.9k | |
137 | 23.8k | if (23.8k IsCall23.8k ) |
138 | 21 | return CS.getCalledValue() == Other.CS.getCalledValue(); |
139 | 23.7k | return Loc == Other.Loc; |
140 | 23.8k | } |
141 | | |
142 | | private: |
143 | | union { |
144 | | ImmutableCallSite CS; |
145 | | MemoryLocation Loc; |
146 | | }; |
147 | | }; |
148 | | } |
149 | | |
150 | | namespace llvm { |
151 | | template <> struct DenseMapInfo<MemoryLocOrCall> { |
152 | 2.04k | static inline MemoryLocOrCall getEmptyKey() { |
153 | 2.04k | return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); |
154 | 2.04k | } |
155 | 1.15k | static inline MemoryLocOrCall getTombstoneKey() { |
156 | 1.15k | return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); |
157 | 1.15k | } |
158 | 822 | static unsigned getHashValue(const MemoryLocOrCall &MLOC) { |
159 | 822 | if (MLOC.IsCall) |
160 | 60 | return hash_combine(MLOC.IsCall, |
161 | 60 | DenseMapInfo<const Value *>::getHashValue( |
162 | 60 | MLOC.getCS().getCalledValue())); |
163 | 762 | return hash_combine( |
164 | 762 | MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); |
165 | 822 | } |
166 | 23.9k | static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { |
167 | 23.9k | return LHS == RHS; |
168 | 23.9k | } |
169 | | }; |
170 | | |
171 | | enum class Reorderability { Always, IfNoAlias, Never }; |
172 | | |
173 | | /// This does one-way checks to see if Use could theoretically be hoisted above |
174 | | /// MayClobber. This will not check the other way around. |
175 | | /// |
176 | | /// This assumes that, for the purposes of MemorySSA, Use comes directly after |
177 | | /// MayClobber, with no potentially clobbering operations in between them. |
178 | | /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) |
179 | | static Reorderability getLoadReorderability(const LoadInst *Use, |
180 | 34 | const LoadInst *MayClobber) { |
181 | 34 | bool VolatileUse = Use->isVolatile(); |
182 | 34 | bool VolatileClobber = MayClobber->isVolatile(); |
183 | 34 | // Volatile operations may never be reordered with other volatile operations. |
184 | 34 | if (VolatileUse && 34 VolatileClobber0 ) |
185 | 0 | return Reorderability::Never; |
186 | 34 | |
187 | 34 | // The lang ref allows reordering of volatile and non-volatile operations. |
188 | 34 | // Whether an aliasing nonvolatile load and volatile load can be reordered, |
189 | 34 | // though, is ambiguous. Because it may not be best to exploit this ambiguity, |
190 | 34 | // we only allow volatile/non-volatile reordering if the volatile and |
191 | 34 | // non-volatile operations don't alias. |
192 | 34 | Reorderability Result = VolatileUse || 34 VolatileClobber34 |
193 | 24 | ? Reorderability::IfNoAlias |
194 | 10 | : Reorderability::Always; |
195 | 34 | |
196 | 34 | // If a load is seq_cst, it cannot be moved above other loads. If its ordering |
197 | 34 | // is weaker, it can be moved above other loads. We just need to be sure that |
198 | 34 | // MayClobber isn't an acquire load, because loads can't be moved above |
199 | 34 | // acquire loads. |
200 | 34 | // |
201 | 34 | // Note that this explicitly *does* allow the free reordering of monotonic (or |
202 | 34 | // weaker) loads of the same address. |
203 | 34 | bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; |
204 | 34 | bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), |
205 | 34 | AtomicOrdering::Acquire); |
206 | 34 | if (SeqCstUse || 34 MayClobberIsAcquire34 ) |
207 | 12 | return Reorderability::Never; |
208 | 22 | return Result; |
209 | 34 | } |
210 | | |
211 | | static bool instructionClobbersQuery(MemoryDef *MD, |
212 | | const MemoryLocation &UseLoc, |
213 | | const Instruction *UseInst, |
214 | 768 | AliasAnalysis &AA) { |
215 | 768 | Instruction *DefInst = MD->getMemoryInst(); |
216 | 768 | assert(DefInst && "Defining instruction not actually an instruction"); |
217 | 768 | ImmutableCallSite UseCS(UseInst); |
218 | 768 | |
219 | 768 | if (const IntrinsicInst *II768 = dyn_cast<IntrinsicInst>(DefInst)) {32 |
220 | 32 | // These intrinsics will show up as affecting memory, but they are just |
221 | 32 | // markers. |
222 | 32 | switch (II->getIntrinsicID()) { |
223 | 3 | case Intrinsic::lifetime_start: |
224 | 3 | if (UseCS) |
225 | 0 | return false; |
226 | 3 | return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc); |
227 | 6 | case Intrinsic::lifetime_end: |
228 | 6 | case Intrinsic::invariant_start: |
229 | 6 | case Intrinsic::invariant_end: |
230 | 6 | case Intrinsic::assume: |
231 | 6 | return false; |
232 | 23 | default: |
233 | 23 | break; |
234 | 32 | } |
235 | 32 | } |
236 | 768 | |
237 | 759 | if (759 UseCS759 ) {37 |
238 | 37 | ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); |
239 | 37 | return I != MRI_NoModRef; |
240 | 37 | } |
241 | 759 | |
242 | 722 | if (auto *722 DefLoad722 = dyn_cast<LoadInst>(DefInst)) {34 |
243 | 34 | if (auto *UseLoad34 = dyn_cast<LoadInst>(UseInst)) {34 |
244 | 34 | switch (getLoadReorderability(UseLoad, DefLoad)) { |
245 | 0 | case Reorderability::Always: |
246 | 0 | return false; |
247 | 12 | case Reorderability::Never: |
248 | 12 | return true; |
249 | 22 | case Reorderability::IfNoAlias: |
250 | 22 | return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad)); |
251 | 34 | } |
252 | 34 | } |
253 | 34 | } |
254 | 722 | |
255 | 688 | return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; |
256 | 722 | } |
257 | | |
258 | | static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, |
259 | | const MemoryLocOrCall &UseMLOC, |
260 | 468 | AliasAnalysis &AA) { |
261 | 468 | // FIXME: This is a temporary hack to allow a single instructionClobbersQuery |
262 | 468 | // to exist while MemoryLocOrCall is pushed through places. |
263 | 468 | if (UseMLOC.IsCall) |
264 | 34 | return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), |
265 | 34 | AA); |
266 | 434 | return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), |
267 | 434 | AA); |
268 | 468 | } |
269 | | |
270 | | // Return true when MD may alias MU, return false otherwise. |
271 | | bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, |
272 | 1 | AliasAnalysis &AA) { |
273 | 1 | return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); |
274 | 1 | } |
275 | | } |
276 | | |
277 | | namespace { |
278 | | struct UpwardsMemoryQuery { |
279 | | // True if our original query started off as a call |
280 | | bool IsCall; |
281 | | // The pointer location we started the query with. This will be empty if |
282 | | // IsCall is true. |
283 | | MemoryLocation StartingLoc; |
284 | | // This is the instruction we were querying about. |
285 | | const Instruction *Inst; |
286 | | // The MemoryAccess we actually got called with, used to test local domination |
287 | | const MemoryAccess *OriginalAccess; |
288 | | |
289 | | UpwardsMemoryQuery() |
290 | 2 | : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {} |
291 | | |
292 | | UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) |
293 | 188 | : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { |
294 | 188 | if (!IsCall) |
295 | 184 | StartingLoc = MemoryLocation::get(Inst); |
296 | 188 | } |
297 | | }; |
298 | | |
299 | | static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, |
300 | 436 | AliasAnalysis &AA) { |
301 | 436 | Instruction *Inst = MD->getMemoryInst(); |
302 | 436 | if (IntrinsicInst *II436 = dyn_cast<IntrinsicInst>(Inst)) {24 |
303 | 24 | switch (II->getIntrinsicID()) { |
304 | 5 | case Intrinsic::lifetime_end: |
305 | 5 | return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc); |
306 | 19 | default: |
307 | 19 | return false; |
308 | 24 | } |
309 | 24 | } |
310 | 412 | return false; |
311 | 436 | } |
312 | | |
313 | | static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA, |
314 | 1.02k | const Instruction *I) { |
315 | 1.02k | // If the memory can't be changed, then loads of the memory can't be |
316 | 1.02k | // clobbered. |
317 | 1.02k | // |
318 | 1.02k | // FIXME: We should handle invariant groups, as well. It's a bit harder, |
319 | 1.02k | // because we need to pay close attention to invariant group barriers. |
320 | 952 | return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || |
321 | 942 | AA.pointsToConstantMemory(cast<LoadInst>(I)-> |
322 | 942 | getPointerOperand())); |
323 | 1.02k | } |
324 | | |
325 | | /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing |
326 | | /// inbetween `Start` and `ClobberAt` can clobbers `Start`. |
327 | | /// |
328 | | /// This is meant to be as simple and self-contained as possible. Because it |
329 | | /// uses no cache, etc., it can be relatively expensive. |
330 | | /// |
331 | | /// \param Start The MemoryAccess that we want to walk from. |
332 | | /// \param ClobberAt A clobber for Start. |
333 | | /// \param StartLoc The MemoryLocation for Start. |
334 | | /// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to. |
335 | | /// \param Query The UpwardsMemoryQuery we used for our search. |
336 | | /// \param AA The AliasAnalysis we used for our search. |
337 | | static void LLVM_ATTRIBUTE_UNUSED |
338 | | checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt, |
339 | | const MemoryLocation &StartLoc, const MemorySSA &MSSA, |
340 | 0 | const UpwardsMemoryQuery &Query, AliasAnalysis &AA) { |
341 | 0 | assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); |
342 | 0 |
|
343 | 0 | if (MSSA.isLiveOnEntryDef(Start)) { |
344 | 0 | assert(MSSA.isLiveOnEntryDef(ClobberAt) && |
345 | 0 | "liveOnEntry must clobber itself"); |
346 | 0 | return; |
347 | 0 | } |
348 | 0 |
|
349 | 0 | bool FoundClobber = false; |
350 | 0 | DenseSet<MemoryAccessPair> VisitedPhis; |
351 | 0 | SmallVector<MemoryAccessPair, 8> Worklist; |
352 | 0 | Worklist.emplace_back(Start, StartLoc); |
353 | 0 | // Walk all paths from Start to ClobberAt, while looking for clobbers. If one |
354 | 0 | // is found, complain. |
355 | 0 | while (!Worklist.empty()) { |
356 | 0 | MemoryAccessPair MAP = Worklist.pop_back_val(); |
357 | 0 | // All we care about is that nothing from Start to ClobberAt clobbers Start. |
358 | 0 | // We learn nothing from revisiting nodes. |
359 | 0 | if (!VisitedPhis.insert(MAP).second) |
360 | 0 | continue; |
361 | 0 |
|
362 | 0 | for (MemoryAccess *MA : def_chain(MAP.first)) { |
363 | 0 | if (MA == ClobberAt) { |
364 | 0 | if (auto *MD = dyn_cast<MemoryDef>(MA)) { |
365 | 0 | // instructionClobbersQuery isn't essentially free, so don't use `|=`, |
366 | 0 | // since it won't let us short-circuit. |
367 | 0 | // |
368 | 0 | // Also, note that this can't be hoisted out of the `Worklist` loop, |
369 | 0 | // since MD may only act as a clobber for 1 of N MemoryLocations. |
370 | 0 | FoundClobber = |
371 | 0 | FoundClobber || MSSA.isLiveOnEntryDef(MD) || |
372 | 0 | instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); |
373 | 0 | } |
374 | 0 | break; |
375 | 0 | } |
376 | 0 |
|
377 | 0 | // We should never hit liveOnEntry, unless it's the clobber. |
378 | 0 | assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); |
379 | 0 |
|
380 | 0 | if (auto *MD = dyn_cast<MemoryDef>(MA)) { |
381 | 0 | (void)MD; |
382 | 0 | assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) && |
383 | 0 | "Found clobber before reaching ClobberAt!"); |
384 | 0 | continue; |
385 | 0 | } |
386 | 0 |
|
387 | 0 | assert(isa<MemoryPhi>(MA)); |
388 | 0 | Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end()); |
389 | 0 | } |
390 | 0 | } |
391 | 0 |
|
392 | 0 | // If ClobberAt is a MemoryPhi, we can assume something above it acted as a |
393 | 0 | // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. |
394 | 0 | assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && |
395 | 0 | "ClobberAt never acted as a clobber"); |
396 | 0 | } |
397 | | |
398 | | /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up |
399 | | /// in one class. |
400 | | class ClobberWalker { |
401 | | /// Save a few bytes by using unsigned instead of size_t. |
402 | | using ListIndex = unsigned; |
403 | | |
404 | | /// Represents a span of contiguous MemoryDefs, potentially ending in a |
405 | | /// MemoryPhi. |
406 | | struct DefPath { |
407 | | MemoryLocation Loc; |
408 | | // Note that, because we always walk in reverse, Last will always dominate |
409 | | // First. Also note that First and Last are inclusive. |
410 | | MemoryAccess *First; |
411 | | MemoryAccess *Last; |
412 | | Optional<ListIndex> Previous; |
413 | | |
414 | | DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, |
415 | | Optional<ListIndex> Previous) |
416 | 1.03k | : Loc(Loc), First(First), Last(Last), Previous(Previous) {} |
417 | | |
418 | | DefPath(const MemoryLocation &Loc, MemoryAccess *Init, |
419 | | Optional<ListIndex> Previous) |
420 | 675 | : DefPath(Loc, Init, Init, Previous) {} |
421 | | }; |
422 | | |
423 | | const MemorySSA &MSSA; |
424 | | AliasAnalysis &AA; |
425 | | DominatorTree &DT; |
426 | | UpwardsMemoryQuery *Query; |
427 | | |
428 | | // Phi optimization bookkeeping |
429 | | SmallVector<DefPath, 32> Paths; |
430 | | DenseSet<ConstMemoryAccessPair> VisitedPhis; |
431 | | |
432 | | /// Find the nearest def or phi that `From` can legally be optimized to. |
433 | 180 | const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { |
434 | 180 | assert(From->getNumOperands() && "Phi with no operands?"); |
435 | 180 | |
436 | 180 | BasicBlock *BB = From->getBlock(); |
437 | 180 | MemoryAccess *Result = MSSA.getLiveOnEntryDef(); |
438 | 180 | DomTreeNode *Node = DT.getNode(BB); |
439 | 295 | while ((Node = Node->getIDom())295 ) {223 |
440 | 223 | auto *Defs = MSSA.getBlockDefs(Node->getBlock()); |
441 | 223 | if (Defs) |
442 | 108 | return &*Defs->rbegin(); |
443 | 223 | } |
444 | 72 | return Result; |
445 | 180 | } |
446 | | |
447 | | /// Result of calling walkToPhiOrClobber. |
448 | | struct UpwardsWalkResult { |
449 | | /// The "Result" of the walk. Either a clobber, the last thing we walked, or |
450 | | /// both. |
451 | | MemoryAccess *Result; |
452 | | bool IsKnownClobber; |
453 | | }; |
454 | | |
455 | | /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. |
456 | | /// This will update Desc.Last as it walks. It will (optionally) also stop at |
457 | | /// StopAt. |
458 | | /// |
459 | | /// This does not test for whether StopAt is a clobber |
460 | | UpwardsWalkResult |
461 | | walkToPhiOrClobber(DefPath &Desc, |
462 | 560 | const MemoryAccess *StopAt = nullptr) const { |
463 | 560 | assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); |
464 | 560 | |
465 | 704 | for (MemoryAccess *Current : def_chain(Desc.Last)) { |
466 | 704 | Desc.Last = Current; |
467 | 704 | if (Current == StopAt) |
468 | 71 | return {Current, false}; |
469 | 704 | |
470 | 633 | if (auto *633 MD633 = dyn_cast<MemoryDef>(Current)) |
471 | 339 | if (339 MSSA.isLiveOnEntryDef(MD) ||339 |
472 | 300 | instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA)) |
473 | 195 | return {MD, true}; |
474 | 633 | } |
475 | 560 | |
476 | 294 | assert(isa<MemoryPhi>(Desc.Last) && |
477 | 294 | "Ended at a non-clobber that's not a phi?"); |
478 | 294 | return {Desc.Last, false}; |
479 | 560 | } |
480 | | |
481 | | void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, |
482 | 294 | ListIndex PriorNode) { |
483 | 294 | auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}), |
484 | 294 | upward_defs_end()); |
485 | 675 | for (const MemoryAccessPair &P : UpwardDefs) { |
486 | 675 | PausedSearches.push_back(Paths.size()); |
487 | 675 | Paths.emplace_back(P.second, P.first, PriorNode); |
488 | 675 | } |
489 | 294 | } |
490 | | |
491 | | /// Represents a search that terminated after finding a clobber. This clobber |
492 | | /// may or may not be present in the path of defs from LastNode..SearchStart, |
493 | | /// since it may have been retrieved from cache. |
494 | | struct TerminatedPath { |
495 | | MemoryAccess *Clobber; |
496 | | ListIndex LastNode; |
497 | | }; |
498 | | |
499 | | /// Get an access that keeps us from optimizing to the given phi. |
500 | | /// |
501 | | /// PausedSearches is an array of indices into the Paths array. Its incoming |
502 | | /// value is the indices of searches that stopped at the last phi optimization |
503 | | /// target. It's left in an unspecified state. |
504 | | /// |
505 | | /// If this returns None, NewPaused is a vector of searches that terminated |
506 | | /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. |
507 | | Optional<TerminatedPath> |
508 | | getBlockingAccess(const MemoryAccess *StopWhere, |
509 | | SmallVectorImpl<ListIndex> &PausedSearches, |
510 | | SmallVectorImpl<ListIndex> &NewPaused, |
511 | 180 | SmallVectorImpl<TerminatedPath> &Terminated) { |
512 | 180 | assert(!PausedSearches.empty() && "No searches to continue?"); |
513 | 180 | |
514 | 180 | // BFS vs DFS really doesn't make a difference here, so just do a DFS with |
515 | 180 | // PausedSearches as our stack. |
516 | 496 | while (!PausedSearches.empty()496 ) {451 |
517 | 451 | ListIndex PathIndex = PausedSearches.pop_back_val(); |
518 | 451 | DefPath &Node = Paths[PathIndex]; |
519 | 451 | |
520 | 451 | // If we've already visited this path with this MemoryLocation, we don't |
521 | 451 | // need to do so again. |
522 | 451 | // |
523 | 451 | // NOTE: That we just drop these paths on the ground makes caching |
524 | 451 | // behavior sporadic. e.g. given a diamond: |
525 | 451 | // A |
526 | 451 | // B C |
527 | 451 | // D |
528 | 451 | // |
529 | 451 | // ...If we walk D, B, A, C, we'll only cache the result of phi |
530 | 451 | // optimization for A, B, and D; C will be skipped because it dies here. |
531 | 451 | // This arguably isn't the worst thing ever, since: |
532 | 451 | // - We generally query things in a top-down order, so if we got below D |
533 | 451 | // without needing cache entries for {C, MemLoc}, then chances are |
534 | 451 | // that those cache entries would end up ultimately unused. |
535 | 451 | // - We still cache things for A, so C only needs to walk up a bit. |
536 | 451 | // If this behavior becomes problematic, we can fix without a ton of extra |
537 | 451 | // work. |
538 | 451 | if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) |
539 | 136 | continue; |
540 | 451 | |
541 | 315 | UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere); |
542 | 315 | if (Res.IsKnownClobber315 ) {135 |
543 | 135 | assert(Res.Result != StopWhere); |
544 | 135 | // If this wasn't a cache hit, we hit a clobber when walking. That's a |
545 | 135 | // failure. |
546 | 135 | TerminatedPath Term{Res.Result, PathIndex}; |
547 | 135 | if (!MSSA.dominates(Res.Result, StopWhere)) |
548 | 135 | return Term; |
549 | 135 | |
550 | 135 | // Otherwise, it's a valid thing to potentially optimize to. |
551 | 0 | Terminated.push_back(Term); |
552 | 0 | continue; |
553 | 135 | } |
554 | 315 | |
555 | 180 | if (180 Res.Result == StopWhere180 ) {71 |
556 | 71 | // We've hit our target. Save this path off for if we want to continue |
557 | 71 | // walking. |
558 | 71 | NewPaused.push_back(PathIndex); |
559 | 71 | continue; |
560 | 71 | } |
561 | 180 | |
562 | 109 | assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); |
563 | 109 | addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); |
564 | 109 | } |
565 | 180 | |
566 | 45 | return None; |
567 | 180 | } |
568 | | |
569 | | template <typename T, typename Walker> |
570 | | struct generic_def_path_iterator |
571 | | : public iterator_facade_base<generic_def_path_iterator<T, Walker>, |
572 | | std::forward_iterator_tag, T *> { |
573 | 135 | generic_def_path_iterator() : W(nullptr), N(None) {} |
574 | 135 | generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} |
575 | | |
576 | 445 | T &operator*() const { return curNode(); } |
577 | | |
578 | 175 | generic_def_path_iterator &operator++() { |
579 | 175 | N = curNode().Previous; |
580 | 175 | return *this; |
581 | 175 | } |
582 | | |
583 | 310 | bool operator==(const generic_def_path_iterator &O) const { |
584 | 310 | if (N.hasValue() != O.N.hasValue()) |
585 | 310 | return false; |
586 | 0 | return !N.hasValue() || 0 *N == *O.N0 ; |
587 | 310 | } |
588 | | |
589 | | private: |
590 | 620 | T &curNode() const { return W->Paths[*N]; } |
591 | | |
592 | | Walker *W; |
593 | | Optional<ListIndex> N; |
594 | | }; |
595 | | |
596 | | using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; |
597 | | using const_def_path_iterator = |
598 | | generic_def_path_iterator<const DefPath, const ClobberWalker>; |
599 | | |
600 | 135 | iterator_range<def_path_iterator> def_path(ListIndex From) { |
601 | 135 | return make_range(def_path_iterator(this, From), def_path_iterator()); |
602 | 135 | } |
603 | | |
604 | 0 | iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { |
605 | 0 | return make_range(const_def_path_iterator(this, From), |
606 | 0 | const_def_path_iterator()); |
607 | 0 | } |
608 | | |
609 | | struct OptznResult { |
610 | | /// The path that contains our result. |
611 | | TerminatedPath PrimaryClobber; |
612 | | /// The paths that we can legally cache back from, but that aren't |
613 | | /// necessarily the result of the Phi optimization. |
614 | | SmallVector<TerminatedPath, 4> OtherClobbers; |
615 | | }; |
616 | | |
617 | 445 | ListIndex defPathIndex(const DefPath &N) const { |
618 | 445 | // The assert looks nicer if we don't need to do &N |
619 | 445 | const DefPath *NP = &N; |
620 | 445 | assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && |
621 | 445 | "Out of bounds DefPath!"); |
622 | 445 | return NP - &Paths.front(); |
623 | 445 | } |
624 | | |
625 | | /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths |
626 | | /// that act as legal clobbers. Note that this won't return *all* clobbers. |
627 | | /// |
628 | | /// Phi optimization algorithm tl;dr: |
629 | | /// - Find the earliest def/phi, A, we can optimize to |
630 | | /// - Find if all paths from the starting memory access ultimately reach A |
631 | | /// - If not, optimization isn't possible. |
632 | | /// - Otherwise, walk from A to another clobber or phi, A'. |
633 | | /// - If A' is a def, we're done. |
634 | | /// - If A' is a phi, try to optimize it. |
635 | | /// |
636 | | /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path |
637 | | /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. |
638 | | OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, |
639 | 172 | const MemoryLocation &Loc) { |
640 | 172 | assert(Paths.empty() && VisitedPhis.empty() && |
641 | 172 | "Reset the optimization state."); |
642 | 172 | |
643 | 172 | Paths.emplace_back(Loc, Start, Phi, None); |
644 | 172 | // Stores how many "valid" optimization nodes we had prior to calling |
645 | 172 | // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. |
646 | 172 | auto PriorPathsSize = Paths.size(); |
647 | 172 | |
648 | 172 | SmallVector<ListIndex, 16> PausedSearches; |
649 | 172 | SmallVector<ListIndex, 8> NewPaused; |
650 | 172 | SmallVector<TerminatedPath, 4> TerminatedPaths; |
651 | 172 | |
652 | 172 | addSearches(Phi, PausedSearches, 0); |
653 | 172 | |
654 | 172 | // Moves the TerminatedPath with the "most dominated" Clobber to the end of |
655 | 172 | // Paths. |
656 | 37 | auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { |
657 | 37 | assert(!Paths.empty() && "Need a path to move"); |
658 | 37 | auto Dom = Paths.begin(); |
659 | 49 | for (auto I = std::next(Dom), E = Paths.end(); I != E49 ; ++I12 ) |
660 | 12 | if (12 !MSSA.dominates(I->Clobber, Dom->Clobber)12 ) |
661 | 0 | Dom = I; |
662 | 37 | auto Last = Paths.end() - 1; |
663 | 37 | if (Last != Dom) |
664 | 12 | std::iter_swap(Last, Dom); |
665 | 37 | }; |
666 | 172 | |
667 | 172 | MemoryPhi *Current = Phi; |
668 | 180 | while (1180 ) {180 |
669 | 180 | assert(!MSSA.isLiveOnEntryDef(Current) && |
670 | 180 | "liveOnEntry wasn't treated as a clobber?"); |
671 | 180 | |
672 | 180 | const auto *Target = getWalkTarget(Current); |
673 | 180 | // If a TerminatedPath doesn't dominate Target, then it wasn't a legal |
674 | 180 | // optimization for the prior phi. |
675 | 180 | assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { |
676 | 180 | return MSSA.dominates(P.Clobber, Target); |
677 | 180 | })); |
678 | 180 | |
679 | 180 | // FIXME: This is broken, because the Blocker may be reported to be |
680 | 180 | // liveOnEntry, and we'll happily wait for that to disappear (read: never) |
681 | 180 | // For the moment, this is fine, since we do nothing with blocker info. |
682 | 180 | if (Optional<TerminatedPath> Blocker = getBlockingAccess( |
683 | 135 | Target, PausedSearches, NewPaused, TerminatedPaths)) { |
684 | 135 | |
685 | 135 | // Find the node we started at. We can't search based on N->Last, since |
686 | 135 | // we may have gone around a loop with a different MemoryLocation. |
687 | 310 | auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { |
688 | 310 | return defPathIndex(N) < PriorPathsSize; |
689 | 310 | }); |
690 | 135 | assert(Iter != def_path_iterator()); |
691 | 135 | |
692 | 135 | DefPath &CurNode = *Iter; |
693 | 135 | assert(CurNode.Last == Current); |
694 | 135 | |
695 | 135 | // Two things: |
696 | 135 | // A. We can't reliably cache all of NewPaused back. Consider a case |
697 | 135 | // where we have two paths in NewPaused; one of which can't optimize |
698 | 135 | // above this phi, whereas the other can. If we cache the second path |
699 | 135 | // back, we'll end up with suboptimal cache entries. We can handle |
700 | 135 | // cases like this a bit better when we either try to find all |
701 | 135 | // clobbers that block phi optimization, or when our cache starts |
702 | 135 | // supporting unfinished searches. |
703 | 135 | // B. We can't reliably cache TerminatedPaths back here without doing |
704 | 135 | // extra checks; consider a case like: |
705 | 135 | // T |
706 | 135 | // / \ |
707 | 135 | // D C |
708 | 135 | // \ / |
709 | 135 | // S |
710 | 135 | // Where T is our target, C is a node with a clobber on it, D is a |
711 | 135 | // diamond (with a clobber *only* on the left or right node, N), and |
712 | 135 | // S is our start. Say we walk to D, through the node opposite N |
713 | 135 | // (read: ignoring the clobber), and see a cache entry in the top |
714 | 135 | // node of D. That cache entry gets put into TerminatedPaths. We then |
715 | 135 | // walk up to C (N is later in our worklist), find the clobber, and |
716 | 135 | // quit. If we append TerminatedPaths to OtherClobbers, we'll cache |
717 | 135 | // the bottom part of D to the cached clobber, ignoring the clobber |
718 | 135 | // in N. Again, this problem goes away if we start tracking all |
719 | 135 | // blockers for a given phi optimization. |
720 | 135 | TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; |
721 | 135 | return {Result, {}}; |
722 | 135 | } |
723 | 180 | |
724 | 180 | // If there's nothing left to search, then all paths led to valid clobbers |
725 | 180 | // that we got from our cache; pick the nearest to the start, and allow |
726 | 180 | // the rest to be cached back. |
727 | 45 | if (45 NewPaused.empty()45 ) {0 |
728 | 0 | MoveDominatedPathToEnd(TerminatedPaths); |
729 | 0 | TerminatedPath Result = TerminatedPaths.pop_back_val(); |
730 | 0 | return {Result, std::move(TerminatedPaths)}; |
731 | 0 | } |
732 | 45 | |
733 | 45 | MemoryAccess *DefChainEnd = nullptr; |
734 | 45 | SmallVector<TerminatedPath, 4> Clobbers; |
735 | 62 | for (ListIndex Paused : NewPaused) { |
736 | 62 | UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); |
737 | 62 | if (WR.IsKnownClobber) |
738 | 49 | Clobbers.push_back({WR.Result, Paused}); |
739 | 62 | else |
740 | 62 | // Micro-opt: If we hit the end of the chain, save it. |
741 | 13 | DefChainEnd = WR.Result; |
742 | 62 | } |
743 | 45 | |
744 | 45 | if (!TerminatedPaths.empty()45 ) {0 |
745 | 0 | // If we couldn't find the dominating phi/liveOnEntry in the above loop, |
746 | 0 | // do it now. |
747 | 0 | if (!DefChainEnd) |
748 | 0 | for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) |
749 | 0 | DefChainEnd = MA; |
750 | 0 |
|
751 | 0 | // If any of the terminated paths don't dominate the phi we'll try to |
752 | 0 | // optimize, we need to figure out what they are and quit. |
753 | 0 | const BasicBlock *ChainBB = DefChainEnd->getBlock(); |
754 | 0 | for (const TerminatedPath &TP : TerminatedPaths) { |
755 | 0 | // Because we know that DefChainEnd is as "high" as we can go, we |
756 | 0 | // don't need local dominance checks; BB dominance is sufficient. |
757 | 0 | if (DT.dominates(ChainBB, TP.Clobber->getBlock())) |
758 | 0 | Clobbers.push_back(TP); |
759 | 0 | } |
760 | 0 | } |
761 | 45 | |
762 | 45 | // If we have clobbers in the def chain, find the one closest to Current |
763 | 45 | // and quit. |
764 | 45 | if (!Clobbers.empty()45 ) {37 |
765 | 37 | MoveDominatedPathToEnd(Clobbers); |
766 | 37 | TerminatedPath Result = Clobbers.pop_back_val(); |
767 | 37 | return {Result, std::move(Clobbers)}; |
768 | 37 | } |
769 | 45 | |
770 | 8 | assert(all_of(NewPaused, |
771 | 8 | [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); |
772 | 8 | |
773 | 8 | // Because liveOnEntry is a clobber, this must be a phi. |
774 | 8 | auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); |
775 | 8 | |
776 | 8 | PriorPathsSize = Paths.size(); |
777 | 8 | PausedSearches.clear(); |
778 | 8 | for (ListIndex I : NewPaused) |
779 | 13 | addSearches(DefChainPhi, PausedSearches, I); |
780 | 8 | NewPaused.clear(); |
781 | 8 | |
782 | 8 | Current = DefChainPhi; |
783 | 8 | } |
784 | 172 | } |
785 | | |
786 | 172 | void verifyOptResult(const OptznResult &R) const { |
787 | 172 | assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { |
788 | 172 | return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); |
789 | 172 | })); |
790 | 172 | } |
791 | | |
792 | 172 | void resetPhiOptznState() { |
793 | 172 | Paths.clear(); |
794 | 172 | VisitedPhis.clear(); |
795 | 172 | } |
796 | | |
797 | | public: |
798 | | ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT) |
799 | 566 | : MSSA(MSSA), AA(AA), DT(DT) {} |
800 | | |
801 | 580 | void reset() {} |
802 | | |
803 | | /// Finds the nearest clobber for the given query, optimizing phis if |
804 | | /// possible. |
805 | 183 | MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { |
806 | 183 | Query = &Q; |
807 | 183 | |
808 | 183 | MemoryAccess *Current = Start; |
809 | 183 | // This walker pretends uses don't exist. If we're handed one, silently grab |
810 | 183 | // its def. (This has the nice side-effect of ensuring we never cache uses) |
811 | 183 | if (auto *MU = dyn_cast<MemoryUse>(Start)) |
812 | 0 | Current = MU->getDefiningAccess(); |
813 | 183 | |
814 | 183 | DefPath FirstDesc(Q.StartingLoc, Current, Current, None); |
815 | 183 | // Fast path for the overly-common case (no crazy phi optimization |
816 | 183 | // necessary) |
817 | 183 | UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); |
818 | 183 | MemoryAccess *Result; |
819 | 183 | if (WalkResult.IsKnownClobber183 ) {11 |
820 | 11 | Result = WalkResult.Result; |
821 | 172 | } else { |
822 | 172 | OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), |
823 | 172 | Current, Q.StartingLoc); |
824 | 172 | verifyOptResult(OptRes); |
825 | 172 | resetPhiOptznState(); |
826 | 172 | Result = OptRes.PrimaryClobber.Clobber; |
827 | 172 | } |
828 | 183 | |
829 | 183 | #ifdef EXPENSIVE_CHECKS |
830 | | checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); |
831 | | #endif |
832 | 183 | return Result; |
833 | 183 | } |
834 | | |
835 | 91 | void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); } |
836 | | }; |
837 | | |
838 | | struct RenamePassData { |
839 | | DomTreeNode *DTN; |
840 | | DomTreeNode::const_iterator ChildIt; |
841 | | MemoryAccess *IncomingVal; |
842 | | |
843 | | RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, |
844 | | MemoryAccess *M) |
845 | 1.67k | : DTN(D), ChildIt(It), IncomingVal(M) {} |
846 | 0 | void swap(RenamePassData &RHS) { |
847 | 0 | std::swap(DTN, RHS.DTN); |
848 | 0 | std::swap(ChildIt, RHS.ChildIt); |
849 | 0 | std::swap(IncomingVal, RHS.IncomingVal); |
850 | 0 | } |
851 | | }; |
852 | | } // anonymous namespace |
853 | | |
854 | | namespace llvm { |
855 | | /// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no |
856 | | /// longer does caching on its own, |
857 | | /// but the name has been retained for the moment. |
858 | | class MemorySSA::CachingWalker final : public MemorySSAWalker { |
859 | | ClobberWalker Walker; |
860 | | bool AutoResetWalker; |
861 | | |
862 | | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); |
863 | | void verifyRemoved(MemoryAccess *); |
864 | | |
865 | | public: |
866 | | CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); |
867 | | ~CachingWalker() override; |
868 | | |
869 | | using MemorySSAWalker::getClobberingMemoryAccess; |
870 | | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; |
871 | | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, |
872 | | const MemoryLocation &) override; |
873 | | void invalidateInfo(MemoryAccess *) override; |
874 | | |
875 | | /// Whether we call resetClobberWalker() after each time we *actually* walk to |
876 | | /// answer a clobber query. |
877 | 1.13k | void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; } |
878 | | |
879 | | /// Drop the walker's persistent data structures. |
880 | 580 | void resetClobberWalker() { Walker.reset(); } |
881 | | |
882 | 91 | void verify(const MemorySSA *MSSA) override { |
883 | 91 | MemorySSAWalker::verify(MSSA); |
884 | 91 | Walker.verify(MSSA); |
885 | 91 | } |
886 | | }; |
887 | | |
888 | | void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, |
889 | 1.67k | bool RenameAllUses) { |
890 | 1.67k | // Pass through values to our successors |
891 | 1.59k | for (const BasicBlock *S : successors(BB)) { |
892 | 1.59k | auto It = PerBlockAccesses.find(S); |
893 | 1.59k | // Rename the phi nodes in our successor block |
894 | 1.59k | if (It == PerBlockAccesses.end() || 1.59k !isa<MemoryPhi>(It->second->front())908 ) |
895 | 1.09k | continue; |
896 | 503 | AccessList *Accesses = It->second.get(); |
897 | 503 | auto *Phi = cast<MemoryPhi>(&Accesses->front()); |
898 | 503 | if (RenameAllUses503 ) {2 |
899 | 2 | int PhiIndex = Phi->getBasicBlockIndex(BB); |
900 | 2 | assert(PhiIndex != -1 && "Incomplete phi during partial rename"); |
901 | 2 | Phi->setIncomingValue(PhiIndex, IncomingVal); |
902 | 2 | } else |
903 | 501 | Phi->addIncoming(IncomingVal, BB); |
904 | 503 | } |
905 | 1.67k | } |
906 | | |
907 | | /// \brief Rename a single basic block into MemorySSA form. |
908 | | /// Uses the standard SSA renaming algorithm. |
909 | | /// \returns The new incoming value. |
910 | | MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, |
911 | 1.67k | bool RenameAllUses) { |
912 | 1.67k | auto It = PerBlockAccesses.find(BB); |
913 | 1.67k | // Skip most processing if the list is empty. |
914 | 1.67k | if (It != PerBlockAccesses.end()1.67k ) {915 |
915 | 915 | AccessList *Accesses = It->second.get(); |
916 | 2.04k | for (MemoryAccess &L : *Accesses) { |
917 | 2.04k | if (MemoryUseOrDef *MUD2.04k = dyn_cast<MemoryUseOrDef>(&L)) {1.82k |
918 | 1.82k | if (MUD->getDefiningAccess() == nullptr || 1.82k RenameAllUses5 ) |
919 | 1.82k | MUD->setDefiningAccess(IncomingVal); |
920 | 1.82k | if (isa<MemoryDef>(&L)) |
921 | 982 | IncomingVal = &L; |
922 | 223 | } else { |
923 | 223 | IncomingVal = &L; |
924 | 223 | } |
925 | 2.04k | } |
926 | 915 | } |
927 | 1.67k | return IncomingVal; |
928 | 1.67k | } |
929 | | |
930 | | /// \brief This is the standard SSA renaming algorithm. |
931 | | /// |
932 | | /// We walk the dominator tree in preorder, renaming accesses, and then filling |
933 | | /// in phi nodes in our successors. |
934 | | void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, |
935 | | SmallPtrSetImpl<BasicBlock *> &Visited, |
936 | 567 | bool SkipVisited, bool RenameAllUses) { |
937 | 567 | SmallVector<RenamePassData, 32> WorkStack; |
938 | 567 | // Skip everything if we already renamed this block and we are skipping. |
939 | 567 | // Note: You can't sink this into the if, because we need it to occur |
940 | 567 | // regardless of whether we skip blocks or not. |
941 | 567 | bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; |
942 | 567 | if (SkipVisited && 567 AlreadyVisited1 ) |
943 | 0 | return; |
944 | 567 | |
945 | 567 | IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); |
946 | 567 | renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); |
947 | 567 | WorkStack.push_back({Root, Root->begin(), IncomingVal}); |
948 | 567 | |
949 | 3.35k | while (!WorkStack.empty()3.35k ) {2.78k |
950 | 2.78k | DomTreeNode *Node = WorkStack.back().DTN; |
951 | 2.78k | DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; |
952 | 2.78k | IncomingVal = WorkStack.back().IncomingVal; |
953 | 2.78k | |
954 | 2.78k | if (ChildIt == Node->end()2.78k ) {1.67k |
955 | 1.67k | WorkStack.pop_back(); |
956 | 1.11k | } else { |
957 | 1.11k | DomTreeNode *Child = *ChildIt; |
958 | 1.11k | ++WorkStack.back().ChildIt; |
959 | 1.11k | BasicBlock *BB = Child->getBlock(); |
960 | 1.11k | // Note: You can't sink this into the if, because we need it to occur |
961 | 1.11k | // regardless of whether we skip blocks or not. |
962 | 1.11k | AlreadyVisited = !Visited.insert(BB).second; |
963 | 1.11k | if (SkipVisited && 1.11k AlreadyVisited3 ) {0 |
964 | 0 | // We already visited this during our renaming, which can happen when |
965 | 0 | // being asked to rename multiple blocks. Figure out the incoming val, |
966 | 0 | // which is the last def. |
967 | 0 | // Incoming value can only change if there is a block def, and in that |
968 | 0 | // case, it's the last block def in the list. |
969 | 0 | if (auto *BlockDefs = getWritableBlockDefs(BB)) |
970 | 0 | IncomingVal = &*BlockDefs->rbegin(); |
971 | 0 | } else |
972 | 1.11k | IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); |
973 | 1.11k | renameSuccessorPhis(BB, IncomingVal, RenameAllUses); |
974 | 1.11k | WorkStack.push_back({Child, Child->begin(), IncomingVal}); |
975 | 1.11k | } |
976 | 2.78k | } |
977 | 567 | } |
978 | | |
979 | | /// \brief This handles unreachable block accesses by deleting phi nodes in |
980 | | /// unreachable blocks, and marking all other unreachable MemoryAccess's as |
981 | | /// being uses of the live on entry definition. |
982 | 21 | void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { |
983 | 21 | assert(!DT->isReachableFromEntry(BB) && |
984 | 21 | "Reachable block found while handling unreachable blocks"); |
985 | 21 | |
986 | 21 | // Make sure phi nodes in our reachable successors end up with a |
987 | 21 | // LiveOnEntryDef for our incoming edge, even though our block is forward |
988 | 21 | // unreachable. We could just disconnect these blocks from the CFG fully, |
989 | 21 | // but we do not right now. |
990 | 23 | for (const BasicBlock *S : successors(BB)) { |
991 | 23 | if (!DT->isReachableFromEntry(S)) |
992 | 11 | continue; |
993 | 12 | auto It = PerBlockAccesses.find(S); |
994 | 12 | // Rename the phi nodes in our successor block |
995 | 12 | if (It == PerBlockAccesses.end() || 12 !isa<MemoryPhi>(It->second->front())10 ) |
996 | 10 | continue; |
997 | 2 | AccessList *Accesses = It->second.get(); |
998 | 2 | auto *Phi = cast<MemoryPhi>(&Accesses->front()); |
999 | 2 | Phi->addIncoming(LiveOnEntryDef.get(), BB); |
1000 | 2 | } |
1001 | 21 | |
1002 | 21 | auto It = PerBlockAccesses.find(BB); |
1003 | 21 | if (It == PerBlockAccesses.end()) |
1004 | 16 | return; |
1005 | 21 | |
1006 | 5 | auto &Accesses = It->second; |
1007 | 11 | for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE11 ;) {6 |
1008 | 6 | auto Next = std::next(AI); |
1009 | 6 | // If we have a phi, just remove it. We are going to replace all |
1010 | 6 | // users with live on entry. |
1011 | 6 | if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) |
1012 | 6 | UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); |
1013 | 6 | else |
1014 | 0 | Accesses->erase(AI); |
1015 | 6 | AI = Next; |
1016 | 6 | } |
1017 | 5 | } |
1018 | | |
1019 | | MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) |
1020 | | : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), |
1021 | 566 | NextID(INVALID_MEMORYACCESS_ID) { |
1022 | 566 | buildMemorySSA(); |
1023 | 566 | } |
1024 | | |
1025 | 566 | MemorySSA::~MemorySSA() { |
1026 | 566 | // Drop all our references |
1027 | 566 | for (const auto &Pair : PerBlockAccesses) |
1028 | 890 | for (MemoryAccess &MA : *Pair.second) |
1029 | 1.91k | MA.dropAllReferences(); |
1030 | 566 | } |
1031 | | |
1032 | 1.10k | MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { |
1033 | 1.10k | auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); |
1034 | 1.10k | |
1035 | 1.10k | if (Res.second) |
1036 | 942 | Res.first->second = make_unique<AccessList>(); |
1037 | 1.10k | return Res.first->second.get(); |
1038 | 1.10k | } |
1039 | 851 | MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { |
1040 | 851 | auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); |
1041 | 851 | |
1042 | 851 | if (Res.second) |
1043 | 767 | Res.first->second = make_unique<DefsList>(); |
1044 | 851 | return Res.first->second.get(); |
1045 | 851 | } |
1046 | | |
1047 | | /// This class is a batch walker of all MemoryUse's in the program, and points |
1048 | | /// their defining access at the thing that actually clobbers them. Because it |
1049 | | /// is a batch walker that touches everything, it does not operate like the |
1050 | | /// other walkers. This walker is basically performing a top-down SSA renaming |
1051 | | /// pass, where the version stack is used as the cache. This enables it to be |
1052 | | /// significantly more time and memory efficient than using the regular walker, |
1053 | | /// which is walking bottom-up. |
1054 | | class MemorySSA::OptimizeUses { |
1055 | | public: |
1056 | | OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA, |
1057 | | DominatorTree *DT) |
1058 | 566 | : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) { |
1059 | 566 | Walker = MSSA->getWalker(); |
1060 | 566 | } |
1061 | | |
1062 | | void optimizeUses(); |
1063 | | |
1064 | | private: |
1065 | | /// This represents where a given memorylocation is in the stack. |
1066 | | struct MemlocStackInfo { |
1067 | | // This essentially is keeping track of versions of the stack. Whenever |
1068 | | // the stack changes due to pushes or pops, these versions increase. |
1069 | | unsigned long StackEpoch; |
1070 | | unsigned long PopEpoch; |
1071 | | // This is the lower bound of places on the stack to check. It is equal to |
1072 | | // the place the last stack walk ended. |
1073 | | // Note: Correctness depends on this being initialized to 0, which densemap |
1074 | | // does |
1075 | | unsigned long LowerBound; |
1076 | | const BasicBlock *LowerBoundBlock; |
1077 | | // This is where the last walk for this memory location ended. |
1078 | | unsigned long LastKill; |
1079 | | bool LastKillValid; |
1080 | | }; |
1081 | | void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, |
1082 | | SmallVectorImpl<MemoryAccess *> &, |
1083 | | DenseMap<MemoryLocOrCall, MemlocStackInfo> &); |
1084 | | MemorySSA *MSSA; |
1085 | | MemorySSAWalker *Walker; |
1086 | | AliasAnalysis *AA; |
1087 | | DominatorTree *DT; |
1088 | | }; |
1089 | | |
1090 | | /// Optimize the uses in a given block This is basically the SSA renaming |
1091 | | /// algorithm, with one caveat: We are able to use a single stack for all |
1092 | | /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is |
1093 | | /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just |
1094 | | /// going to be some position in that stack of possible ones. |
1095 | | /// |
1096 | | /// We track the stack positions that each MemoryLocation needs |
1097 | | /// to check, and last ended at. This is because we only want to check the |
1098 | | /// things that changed since last time. The same MemoryLocation should |
1099 | | /// get clobbered by the same store (getModRefInfo does not use invariantness or |
1100 | | /// things like this, and if they start, we can modify MemoryLocOrCall to |
1101 | | /// include relevant data) |
1102 | | void MemorySSA::OptimizeUses::optimizeUsesInBlock( |
1103 | | const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, |
1104 | | SmallVectorImpl<MemoryAccess *> &VersionStack, |
1105 | 1.67k | DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { |
1106 | 1.67k | |
1107 | 1.67k | /// If no accesses, nothing to do. |
1108 | 1.67k | MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); |
1109 | 1.67k | if (Accesses == nullptr) |
1110 | 762 | return; |
1111 | 1.67k | |
1112 | 1.67k | // Pop everything that doesn't dominate the current block off the stack, |
1113 | 1.67k | // increment the PopEpoch to account for this. |
1114 | 1.15k | while (912 true1.15k ) {1.15k |
1115 | 1.15k | assert( |
1116 | 1.15k | !VersionStack.empty() && |
1117 | 1.15k | "Version stack should have liveOnEntry sentinel dominating everything"); |
1118 | 1.15k | BasicBlock *BackBlock = VersionStack.back()->getBlock(); |
1119 | 1.15k | if (DT->dominates(BackBlock, BB)) |
1120 | 912 | break; |
1121 | 556 | while (238 VersionStack.back()->getBlock() == BackBlock556 ) |
1122 | 318 | VersionStack.pop_back(); |
1123 | 238 | ++PopEpoch; |
1124 | 238 | } |
1125 | 912 | |
1126 | 2.04k | for (MemoryAccess &MA : *Accesses) { |
1127 | 2.04k | auto *MU = dyn_cast<MemoryUse>(&MA); |
1128 | 2.04k | if (!MU2.04k ) {1.20k |
1129 | 1.20k | VersionStack.push_back(&MA); |
1130 | 1.20k | ++StackEpoch; |
1131 | 1.20k | continue; |
1132 | 1.20k | } |
1133 | 2.04k | |
1134 | 840 | if (840 isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())840 ) {18 |
1135 | 18 | MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); |
1136 | 18 | continue; |
1137 | 18 | } |
1138 | 840 | |
1139 | 822 | MemoryLocOrCall UseMLOC(MU); |
1140 | 822 | auto &LocInfo = LocStackInfo[UseMLOC]; |
1141 | 822 | // If the pop epoch changed, it means we've removed stuff from top of |
1142 | 822 | // stack due to changing blocks. We may have to reset the lower bound or |
1143 | 822 | // last kill info. |
1144 | 822 | if (LocInfo.PopEpoch != PopEpoch822 ) {617 |
1145 | 617 | LocInfo.PopEpoch = PopEpoch; |
1146 | 617 | LocInfo.StackEpoch = StackEpoch; |
1147 | 617 | // If the lower bound was in something that no longer dominates us, we |
1148 | 617 | // have to reset it. |
1149 | 617 | // We can't simply track stack size, because the stack may have had |
1150 | 617 | // pushes/pops in the meantime. |
1151 | 617 | // XXX: This is non-optimal, but only is slower cases with heavily |
1152 | 617 | // branching dominator trees. To get the optimal number of queries would |
1153 | 617 | // be to make lowerbound and lastkill a per-loc stack, and pop it until |
1154 | 617 | // the top of that stack dominates us. This does not seem worth it ATM. |
1155 | 617 | // A much cheaper optimization would be to always explore the deepest |
1156 | 617 | // branch of the dominator tree first. This will guarantee this resets on |
1157 | 617 | // the smallest set of blocks. |
1158 | 617 | if (LocInfo.LowerBoundBlock && 617 LocInfo.LowerBoundBlock != BB66 && |
1159 | 66 | !DT->dominates(LocInfo.LowerBoundBlock, BB)66 ) {48 |
1160 | 48 | // Reset the lower bound of things to check. |
1161 | 48 | // TODO: Some day we should be able to reset to last kill, rather than |
1162 | 48 | // 0. |
1163 | 48 | LocInfo.LowerBound = 0; |
1164 | 48 | LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); |
1165 | 48 | LocInfo.LastKillValid = false; |
1166 | 48 | } |
1167 | 205 | } else if (205 LocInfo.StackEpoch != StackEpoch205 ) {119 |
1168 | 119 | // If all that has changed is the StackEpoch, we only have to check the |
1169 | 119 | // new things on the stack, because we've checked everything before. In |
1170 | 119 | // this case, the lower bound of things to check remains the same. |
1171 | 119 | LocInfo.PopEpoch = PopEpoch; |
1172 | 119 | LocInfo.StackEpoch = StackEpoch; |
1173 | 119 | } |
1174 | 822 | if (!LocInfo.LastKillValid822 ) {599 |
1175 | 599 | LocInfo.LastKill = VersionStack.size() - 1; |
1176 | 599 | LocInfo.LastKillValid = true; |
1177 | 599 | } |
1178 | 822 | |
1179 | 822 | // At this point, we should have corrected last kill and LowerBound to be |
1180 | 822 | // in bounds. |
1181 | 822 | assert(LocInfo.LowerBound < VersionStack.size() && |
1182 | 822 | "Lower bound out of range"); |
1183 | 822 | assert(LocInfo.LastKill < VersionStack.size() && |
1184 | 822 | "Last kill info out of range"); |
1185 | 822 | // In any case, the new upper bound is the top of the stack. |
1186 | 822 | unsigned long UpperBound = VersionStack.size() - 1; |
1187 | 822 | |
1188 | 822 | if (UpperBound - LocInfo.LowerBound > MaxCheckLimit822 ) {0 |
1189 | 0 | DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" |
1190 | 0 | << *(MU->getMemoryInst()) << ")" |
1191 | 0 | << " because there are " << UpperBound - LocInfo.LowerBound |
1192 | 0 | << " stores to disambiguate\n"); |
1193 | 0 | // Because we did not walk, LastKill is no longer valid, as this may |
1194 | 0 | // have been a kill. |
1195 | 0 | LocInfo.LastKillValid = false; |
1196 | 0 | continue; |
1197 | 0 | } |
1198 | 822 | bool FoundClobberResult = false; |
1199 | 983 | while (UpperBound > LocInfo.LowerBound983 ) {639 |
1200 | 639 | if (isa<MemoryPhi>(VersionStack[UpperBound])639 ) {169 |
1201 | 169 | // For phis, use the walker, see where we ended up, go there |
1202 | 169 | Instruction *UseInst = MU->getMemoryInst(); |
1203 | 169 | MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); |
1204 | 169 | // We are guaranteed to find it or something is wrong |
1205 | 220 | while (VersionStack[UpperBound] != Result220 ) {51 |
1206 | 51 | assert(UpperBound != 0); |
1207 | 51 | --UpperBound; |
1208 | 51 | } |
1209 | 169 | FoundClobberResult = true; |
1210 | 169 | break; |
1211 | 169 | } |
1212 | 639 | |
1213 | 470 | MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); |
1214 | 470 | // If the lifetime of the pointer ends at this instruction, it's live on |
1215 | 470 | // entry. |
1216 | 470 | if (!UseMLOC.IsCall && 470 lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)436 ) {3 |
1217 | 3 | // Reset UpperBound to liveOnEntryDef's place in the stack |
1218 | 3 | UpperBound = 0; |
1219 | 3 | FoundClobberResult = true; |
1220 | 3 | break; |
1221 | 3 | } |
1222 | 467 | if (467 instructionClobbersQuery(MD, MU, UseMLOC, *AA)467 ) {306 |
1223 | 306 | FoundClobberResult = true; |
1224 | 306 | break; |
1225 | 306 | } |
1226 | 161 | --UpperBound; |
1227 | 161 | } |
1228 | 822 | // At the end of this loop, UpperBound is either a clobber, or lower bound |
1229 | 822 | // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. |
1230 | 822 | if (FoundClobberResult || 822 UpperBound < LocInfo.LastKill344 ) {499 |
1231 | 499 | MU->setDefiningAccess(VersionStack[UpperBound], true); |
1232 | 499 | // We were last killed now by where we got to |
1233 | 499 | LocInfo.LastKill = UpperBound; |
1234 | 323 | } else { |
1235 | 323 | // Otherwise, we checked all the new ones, and now we know we can get to |
1236 | 323 | // LastKill. |
1237 | 323 | MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); |
1238 | 323 | } |
1239 | 822 | LocInfo.LowerBound = VersionStack.size() - 1; |
1240 | 822 | LocInfo.LowerBoundBlock = BB; |
1241 | 822 | } |
1242 | 912 | } |
1243 | | |
1244 | | /// Optimize uses to point to their actual clobbering definitions. |
1245 | 566 | void MemorySSA::OptimizeUses::optimizeUses() { |
1246 | 566 | SmallVector<MemoryAccess *, 16> VersionStack; |
1247 | 566 | DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; |
1248 | 566 | VersionStack.push_back(MSSA->getLiveOnEntryDef()); |
1249 | 566 | |
1250 | 566 | unsigned long StackEpoch = 1; |
1251 | 566 | unsigned long PopEpoch = 1; |
1252 | 566 | // We perform a non-recursive top-down dominator tree walk. |
1253 | 566 | for (const auto *DomNode : depth_first(DT->getRootNode())) |
1254 | 1.67k | optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, |
1255 | 1.67k | LocStackInfo); |
1256 | 566 | } |
1257 | | |
1258 | | void MemorySSA::placePHINodes( |
1259 | | const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks, |
1260 | 566 | const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) { |
1261 | 566 | // Determine where our MemoryPhi's should go |
1262 | 566 | ForwardIDFCalculator IDFs(*DT); |
1263 | 566 | IDFs.setDefiningBlocks(DefiningBlocks); |
1264 | 566 | SmallVector<BasicBlock *, 32> IDFBlocks; |
1265 | 566 | IDFs.calculate(IDFBlocks); |
1266 | 566 | |
1267 | 566 | std::sort(IDFBlocks.begin(), IDFBlocks.end(), |
1268 | 111 | [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { |
1269 | 111 | return BBNumbers.lookup(A) < BBNumbers.lookup(B); |
1270 | 111 | }); |
1271 | 566 | |
1272 | 566 | // Now place MemoryPhi nodes. |
1273 | 566 | for (auto &BB : IDFBlocks) |
1274 | 222 | createMemoryPhi(BB); |
1275 | 566 | } |
1276 | | |
1277 | 566 | void MemorySSA::buildMemorySSA() { |
1278 | 566 | // We create an access to represent "live on entry", for things like |
1279 | 566 | // arguments or users of globals, where the memory they use is defined before |
1280 | 566 | // the beginning of the function. We do not actually insert it into the IR. |
1281 | 566 | // We do not define a live on exit for the immediate uses, and thus our |
1282 | 566 | // semantics do *not* imply that something with no immediate uses can simply |
1283 | 566 | // be removed. |
1284 | 566 | BasicBlock &StartingPoint = F.getEntryBlock(); |
1285 | 566 | LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr, |
1286 | 566 | &StartingPoint, NextID++); |
1287 | 566 | DenseMap<const BasicBlock *, unsigned int> BBNumbers; |
1288 | 566 | unsigned NextBBNum = 0; |
1289 | 566 | |
1290 | 566 | // We maintain lists of memory accesses per-block, trading memory for time. We |
1291 | 566 | // could just look up the memory access for every possible instruction in the |
1292 | 566 | // stream. |
1293 | 566 | SmallPtrSet<BasicBlock *, 32> DefiningBlocks; |
1294 | 566 | SmallPtrSet<BasicBlock *, 32> DefUseBlocks; |
1295 | 566 | // Go through each block, figure out where defs occur, and chain together all |
1296 | 566 | // the accesses. |
1297 | 1.69k | for (BasicBlock &B : F) { |
1298 | 1.69k | BBNumbers[&B] = NextBBNum++; |
1299 | 1.69k | bool InsertIntoDef = false; |
1300 | 1.69k | AccessList *Accesses = nullptr; |
1301 | 1.69k | DefsList *Defs = nullptr; |
1302 | 5.39k | for (Instruction &I : B) { |
1303 | 5.39k | MemoryUseOrDef *MUD = createNewAccess(&I); |
1304 | 5.39k | if (!MUD) |
1305 | 3.57k | continue; |
1306 | 5.39k | |
1307 | 1.82k | if (1.82k !Accesses1.82k ) |
1308 | 826 | Accesses = getOrCreateAccessList(&B); |
1309 | 1.82k | Accesses->push_back(MUD); |
1310 | 1.82k | if (isa<MemoryDef>(MUD)1.82k ) {983 |
1311 | 983 | InsertIntoDef = true; |
1312 | 983 | if (!Defs) |
1313 | 604 | Defs = getOrCreateDefsList(&B); |
1314 | 983 | Defs->push_back(*MUD); |
1315 | 983 | } |
1316 | 1.82k | } |
1317 | 1.69k | if (InsertIntoDef) |
1318 | 604 | DefiningBlocks.insert(&B); |
1319 | 1.69k | if (Accesses) |
1320 | 826 | DefUseBlocks.insert(&B); |
1321 | 1.69k | } |
1322 | 566 | placePHINodes(DefiningBlocks, BBNumbers); |
1323 | 566 | |
1324 | 566 | // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get |
1325 | 566 | // filled in with all blocks. |
1326 | 566 | SmallPtrSet<BasicBlock *, 16> Visited; |
1327 | 566 | renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); |
1328 | 566 | |
1329 | 566 | CachingWalker *Walker = getWalkerImpl(); |
1330 | 566 | |
1331 | 566 | // We're doing a batch of updates; don't drop useful caches between them. |
1332 | 566 | Walker->setAutoResetWalker(false); |
1333 | 566 | OptimizeUses(this, Walker, AA, DT).optimizeUses(); |
1334 | 566 | Walker->setAutoResetWalker(true); |
1335 | 566 | Walker->resetClobberWalker(); |
1336 | 566 | |
1337 | 566 | // Mark the uses in unreachable blocks as live on entry, so that they go |
1338 | 566 | // somewhere. |
1339 | 566 | for (auto &BB : F) |
1340 | 1.69k | if (1.69k !Visited.count(&BB)1.69k ) |
1341 | 21 | markUnreachableAsLiveOnEntry(&BB); |
1342 | 566 | } |
1343 | | |
1344 | 853 | MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } |
1345 | | |
1346 | 1.41k | MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { |
1347 | 1.41k | if (Walker) |
1348 | 853 | return Walker.get(); |
1349 | 1.41k | |
1350 | 566 | Walker = make_unique<CachingWalker>(this, AA, DT); |
1351 | 566 | return Walker.get(); |
1352 | 1.41k | } |
1353 | | |
1354 | | // This is a helper function used by the creation routines. It places NewAccess |
1355 | | // into the access and defs lists for a given basic block, at the given |
1356 | | // insertion point. |
1357 | | void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, |
1358 | | const BasicBlock *BB, |
1359 | 275 | InsertionPlace Point) { |
1360 | 275 | auto *Accesses = getOrCreateAccessList(BB); |
1361 | 275 | if (Point == Beginning275 ) {235 |
1362 | 235 | // If it's a phi node, it goes first, otherwise, it goes after any phi |
1363 | 235 | // nodes. |
1364 | 235 | if (isa<MemoryPhi>(NewAccess)235 ) {226 |
1365 | 226 | Accesses->push_front(NewAccess); |
1366 | 226 | auto *Defs = getOrCreateDefsList(BB); |
1367 | 226 | Defs->push_front(*NewAccess); |
1368 | 9 | } else { |
1369 | 9 | auto AI = find_if_not( |
1370 | 2 | *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); |
1371 | 9 | Accesses->insert(AI, NewAccess); |
1372 | 9 | if (!isa<MemoryUse>(NewAccess)9 ) {4 |
1373 | 4 | auto *Defs = getOrCreateDefsList(BB); |
1374 | 4 | auto DI = find_if_not( |
1375 | 0 | *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); |
1376 | 4 | Defs->insert(DI, *NewAccess); |
1377 | 4 | } |
1378 | 9 | } |
1379 | 40 | } else { |
1380 | 40 | Accesses->push_back(NewAccess); |
1381 | 40 | if (!isa<MemoryUse>(NewAccess)40 ) {11 |
1382 | 11 | auto *Defs = getOrCreateDefsList(BB); |
1383 | 11 | Defs->push_back(*NewAccess); |
1384 | 11 | } |
1385 | 40 | } |
1386 | 275 | BlockNumberingValid.erase(BB); |
1387 | 275 | } |
1388 | | |
1389 | | void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, |
1390 | 6 | AccessList::iterator InsertPt) { |
1391 | 6 | auto *Accesses = getWritableBlockAccesses(BB); |
1392 | 6 | bool WasEnd = InsertPt == Accesses->end(); |
1393 | 6 | Accesses->insert(AccessList::iterator(InsertPt), What); |
1394 | 6 | if (!isa<MemoryUse>(What)6 ) {6 |
1395 | 6 | auto *Defs = getOrCreateDefsList(BB); |
1396 | 6 | // If we got asked to insert at the end, we have an easy job, just shove it |
1397 | 6 | // at the end. If we got asked to insert before an existing def, we also get |
1398 | 6 | // an terator. If we got asked to insert before a use, we have to hunt for |
1399 | 6 | // the next def. |
1400 | 6 | if (WasEnd6 ) {3 |
1401 | 3 | Defs->push_back(*What); |
1402 | 3 | } else if (3 isa<MemoryDef>(InsertPt)3 ) {2 |
1403 | 2 | Defs->insert(InsertPt->getDefsIterator(), *What); |
1404 | 1 | } else { |
1405 | 2 | while (InsertPt != Accesses->end() && 2 !isa<MemoryDef>(InsertPt)1 ) |
1406 | 1 | ++InsertPt; |
1407 | 1 | // Either we found a def, or we are inserting at the end |
1408 | 1 | if (InsertPt == Accesses->end()) |
1409 | 1 | Defs->push_back(*What); |
1410 | 1 | else |
1411 | 0 | Defs->insert(InsertPt->getDefsIterator(), *What); |
1412 | 1 | } |
1413 | 6 | } |
1414 | 6 | BlockNumberingValid.erase(BB); |
1415 | 6 | } |
1416 | | |
1417 | | // Move What before Where in the IR. The end result is taht What will belong to |
1418 | | // the right lists and have the right Block set, but will not otherwise be |
1419 | | // correct. It will not have the right defining access, and if it is a def, |
1420 | | // things below it will not properly be updated. |
1421 | | void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, |
1422 | 4 | AccessList::iterator Where) { |
1423 | 4 | // Keep it in the lookup tables, remove from the lists |
1424 | 4 | removeFromLists(What, false); |
1425 | 4 | What->setBlock(BB); |
1426 | 4 | insertIntoListsBefore(What, BB, Where); |
1427 | 4 | } |
1428 | | |
1429 | | void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, |
1430 | 1 | InsertionPlace Point) { |
1431 | 1 | removeFromLists(What, false); |
1432 | 1 | What->setBlock(BB); |
1433 | 1 | insertIntoListsForBlock(What, BB, Point); |
1434 | 1 | } |
1435 | | |
1436 | 226 | MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { |
1437 | 226 | assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); |
1438 | 226 | MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); |
1439 | 226 | // Phi's always are placed at the front of the block. |
1440 | 226 | insertIntoListsForBlock(Phi, BB, Beginning); |
1441 | 226 | ValueToMemoryAccess[BB] = Phi; |
1442 | 226 | return Phi; |
1443 | 226 | } |
1444 | | |
1445 | | MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, |
1446 | 50 | MemoryAccess *Definition) { |
1447 | 50 | assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); |
1448 | 50 | MemoryUseOrDef *NewAccess = createNewAccess(I); |
1449 | 50 | assert( |
1450 | 50 | NewAccess != nullptr && |
1451 | 50 | "Tried to create a memory access for a non-memory touching instruction"); |
1452 | 50 | NewAccess->setDefiningAccess(Definition); |
1453 | 50 | return NewAccess; |
1454 | 50 | } |
1455 | | |
1456 | | // Return true if the instruction has ordering constraints. |
1457 | | // Note specifically that this only considers stores and loads |
1458 | | // because others are still considered ModRef by getModRefInfo. |
1459 | 4.46k | static inline bool isOrdered(const Instruction *I) { |
1460 | 4.46k | if (auto *SI4.46k = dyn_cast<StoreInst>(I)) {0 |
1461 | 0 | if (!SI->isUnordered()) |
1462 | 0 | return true; |
1463 | 4.46k | } else if (auto *4.46k LI4.46k = dyn_cast<LoadInst>(I)) {861 |
1464 | 861 | if (!LI->isUnordered()) |
1465 | 45 | return true; |
1466 | 861 | } |
1467 | 4.42k | return false; |
1468 | 4.46k | } |
1469 | | /// \brief Helper function to create new memory accesses |
1470 | 5.44k | MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { |
1471 | 5.44k | // The assume intrinsic has a control dependency which we model by claiming |
1472 | 5.44k | // that it writes arbitrarily. Ignore that fake memory dependency here. |
1473 | 5.44k | // FIXME: Replace this special casing with a more accurate modelling of |
1474 | 5.44k | // assume's control dependency. |
1475 | 5.44k | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) |
1476 | 143 | if (143 II->getIntrinsicID() == Intrinsic::assume143 ) |
1477 | 22 | return nullptr; |
1478 | 5.44k | |
1479 | 5.44k | // Find out what affect this instruction has on memory. |
1480 | 5.42k | ModRefInfo ModRef = AA->getModRefInfo(I); |
1481 | 5.42k | // The isOrdered check is used to ensure that volatiles end up as defs |
1482 | 5.42k | // (atomics end up as ModRef right now anyway). Until we separate the |
1483 | 5.42k | // ordering chain from the memory chain, this enables people to see at least |
1484 | 5.42k | // some relative ordering to volatiles. Note that getClobberingMemoryAccess |
1485 | 5.42k | // will still give an answer that bypasses other volatile loads. TODO: |
1486 | 5.42k | // Separate memory aliasing and ordering into two different chains so that we |
1487 | 5.42k | // can precisely represent both "what memory will this read/write/is clobbered |
1488 | 5.42k | // by" and "what instructions can I move this past". |
1489 | 4.46k | bool Def = bool(ModRef & MRI_Mod) || isOrdered(I); |
1490 | 5.42k | bool Use = bool(ModRef & MRI_Ref); |
1491 | 5.42k | |
1492 | 5.42k | // It's possible for an instruction to not modify memory at all. During |
1493 | 5.42k | // construction, we ignore them. |
1494 | 5.42k | if (!Def && 5.42k !Use4.42k ) |
1495 | 3.54k | return nullptr; |
1496 | 5.42k | |
1497 | 1.87k | assert((Def || Use) && |
1498 | 1.87k | "Trying to create a memory access with a non-memory instruction"); |
1499 | 1.87k | |
1500 | 1.87k | MemoryUseOrDef *MUD; |
1501 | 1.87k | if (Def) |
1502 | 999 | MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); |
1503 | 1.87k | else |
1504 | 876 | MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); |
1505 | 1.87k | ValueToMemoryAccess[I] = MUD; |
1506 | 1.87k | return MUD; |
1507 | 5.42k | } |
1508 | | |
1509 | | /// \brief Returns true if \p Replacer dominates \p Replacee . |
1510 | | bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, |
1511 | 0 | const MemoryAccess *Replacee) const { |
1512 | 0 | if (isa<MemoryUseOrDef>(Replacee)) |
1513 | 0 | return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); |
1514 | 0 | const auto *MP = cast<MemoryPhi>(Replacee); |
1515 | 0 | // For a phi node, the use occurs in the predecessor block of the phi node. |
1516 | 0 | // Since we may occur multiple times in the phi node, we have to check each |
1517 | 0 | // operand to ensure Replacer dominates each operand where Replacee occurs. |
1518 | 0 | for (const Use &Arg : MP->operands()) { |
1519 | 0 | if (Arg.get() != Replacee && |
1520 | 0 | !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) |
1521 | 0 | return false; |
1522 | 0 | } |
1523 | 0 | return true; |
1524 | 0 | } |
1525 | | |
1526 | | /// \brief Properly remove \p MA from all of MemorySSA's lookup tables. |
1527 | 182 | void MemorySSA::removeFromLookups(MemoryAccess *MA) { |
1528 | 182 | assert(MA->use_empty() && |
1529 | 182 | "Trying to remove memory access that still has uses"); |
1530 | 182 | BlockNumbering.erase(MA); |
1531 | 182 | if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA)) |
1532 | 169 | MUD->setDefiningAccess(nullptr); |
1533 | 182 | // Invalidate our walker's cache if necessary |
1534 | 182 | if (!isa<MemoryUse>(MA)) |
1535 | 67 | Walker->invalidateInfo(MA); |
1536 | 182 | // The call below to erase will destroy MA, so we can't change the order we |
1537 | 182 | // are doing things here |
1538 | 182 | Value *MemoryInst; |
1539 | 182 | if (MemoryUseOrDef *MUD182 = dyn_cast<MemoryUseOrDef>(MA)) {169 |
1540 | 169 | MemoryInst = MUD->getMemoryInst(); |
1541 | 13 | } else { |
1542 | 13 | MemoryInst = MA->getBlock(); |
1543 | 13 | } |
1544 | 182 | auto VMA = ValueToMemoryAccess.find(MemoryInst); |
1545 | 182 | if (VMA->second == MA) |
1546 | 142 | ValueToMemoryAccess.erase(VMA); |
1547 | 182 | } |
1548 | | |
1549 | | /// \brief Properly remove \p MA from all of MemorySSA's lists. |
1550 | | /// |
1551 | | /// Because of the way the intrusive list and use lists work, it is important to |
1552 | | /// do removal in the right order. |
1553 | | /// ShouldDelete defaults to true, and will cause the memory access to also be |
1554 | | /// deleted, not just removed. |
1555 | 187 | void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { |
1556 | 187 | // The access list owns the reference, so we erase it from the non-owning list |
1557 | 187 | // first. |
1558 | 187 | if (!isa<MemoryUse>(MA)187 ) {72 |
1559 | 72 | auto DefsIt = PerBlockDefs.find(MA->getBlock()); |
1560 | 72 | std::unique_ptr<DefsList> &Defs = DefsIt->second; |
1561 | 72 | Defs->remove(*MA); |
1562 | 72 | if (Defs->empty()) |
1563 | 42 | PerBlockDefs.erase(DefsIt); |
1564 | 72 | } |
1565 | 187 | |
1566 | 187 | // The erase call here will delete it. If we don't want it deleted, we call |
1567 | 187 | // remove instead. |
1568 | 187 | auto AccessIt = PerBlockAccesses.find(MA->getBlock()); |
1569 | 187 | std::unique_ptr<AccessList> &Accesses = AccessIt->second; |
1570 | 187 | if (ShouldDelete) |
1571 | 182 | Accesses->erase(MA); |
1572 | 187 | else |
1573 | 5 | Accesses->remove(MA); |
1574 | 187 | |
1575 | 187 | if (Accesses->empty()) |
1576 | 52 | PerBlockAccesses.erase(AccessIt); |
1577 | 187 | } |
1578 | | |
1579 | 82 | void MemorySSA::print(raw_ostream &OS) const { |
1580 | 82 | MemorySSAAnnotatedWriter Writer(this); |
1581 | 82 | F.print(OS, &Writer); |
1582 | 82 | } |
1583 | | |
1584 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1585 | | LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } |
1586 | | #endif |
1587 | | |
1588 | 91 | void MemorySSA::verifyMemorySSA() const { |
1589 | 91 | verifyDefUses(F); |
1590 | 91 | verifyDomination(F); |
1591 | 91 | verifyOrdering(F); |
1592 | 91 | Walker->verify(this); |
1593 | 91 | } |
1594 | | |
1595 | | /// \brief Verify that the order and existence of MemoryAccesses matches the |
1596 | | /// order and existence of memory affecting instructions. |
1597 | 91 | void MemorySSA::verifyOrdering(Function &F) const { |
1598 | 91 | // Walk all the blocks, comparing what the lookups think and what the access |
1599 | 91 | // lists think, as well as the order in the blocks vs the order in the access |
1600 | 91 | // lists. |
1601 | 91 | SmallVector<MemoryAccess *, 32> ActualAccesses; |
1602 | 91 | SmallVector<MemoryAccess *, 32> ActualDefs; |
1603 | 276 | for (BasicBlock &B : F) { |
1604 | 276 | const AccessList *AL = getBlockAccesses(&B); |
1605 | 276 | const auto *DL = getBlockDefs(&B); |
1606 | 276 | MemoryAccess *Phi = getMemoryAccess(&B); |
1607 | 276 | if (Phi276 ) {81 |
1608 | 81 | ActualAccesses.push_back(Phi); |
1609 | 81 | ActualDefs.push_back(Phi); |
1610 | 81 | } |
1611 | 276 | |
1612 | 790 | for (Instruction &I : B) { |
1613 | 790 | MemoryAccess *MA = getMemoryAccess(&I); |
1614 | 790 | assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && |
1615 | 790 | "We have memory affecting instructions " |
1616 | 790 | "in this block but they are not in the " |
1617 | 790 | "access list or defs list"); |
1618 | 790 | if (MA790 ) {390 |
1619 | 390 | ActualAccesses.push_back(MA); |
1620 | 390 | if (isa<MemoryDef>(MA)) |
1621 | 232 | ActualDefs.push_back(MA); |
1622 | 390 | } |
1623 | 790 | } |
1624 | 276 | // Either we hit the assert, really have no accesses, or we have both |
1625 | 276 | // accesses and an access list. |
1626 | 276 | // Same with defs. |
1627 | 276 | if (!AL && 276 !DL67 ) |
1628 | 67 | continue; |
1629 | 209 | assert(AL->size() == ActualAccesses.size() && |
1630 | 209 | "We don't have the same number of accesses in the block as on the " |
1631 | 209 | "access list"); |
1632 | 209 | assert((DL || ActualDefs.size() == 0) && |
1633 | 209 | "Either we should have a defs list, or we should have no defs"); |
1634 | 209 | assert((!DL || DL->size() == ActualDefs.size()) && |
1635 | 209 | "We don't have the same number of defs in the block as on the " |
1636 | 209 | "def list"); |
1637 | 209 | auto ALI = AL->begin(); |
1638 | 209 | auto AAI = ActualAccesses.begin(); |
1639 | 680 | while (ALI != AL->end() && 680 AAI != ActualAccesses.end()471 ) {471 |
1640 | 471 | assert(&*ALI == *AAI && "Not the same accesses in the same order"); |
1641 | 471 | ++ALI; |
1642 | 471 | ++AAI; |
1643 | 471 | } |
1644 | 209 | ActualAccesses.clear(); |
1645 | 209 | if (DL209 ) {203 |
1646 | 203 | auto DLI = DL->begin(); |
1647 | 203 | auto ADI = ActualDefs.begin(); |
1648 | 516 | while (DLI != DL->end() && 516 ADI != ActualDefs.end()313 ) {313 |
1649 | 313 | assert(&*DLI == *ADI && "Not the same defs in the same order"); |
1650 | 313 | ++DLI; |
1651 | 313 | ++ADI; |
1652 | 313 | } |
1653 | 203 | } |
1654 | 209 | ActualDefs.clear(); |
1655 | 209 | } |
1656 | 91 | } |
1657 | | |
1658 | | /// \brief Verify the domination properties of MemorySSA by checking that each |
1659 | | /// definition dominates all of its uses. |
1660 | 91 | void MemorySSA::verifyDomination(Function &F) const { |
1661 | 91 | #ifndef NDEBUG |
1662 | | for (BasicBlock &B : F) { |
1663 | | // Phi nodes are attached to basic blocks |
1664 | | if (MemoryPhi *MP = getMemoryAccess(&B)) |
1665 | | for (const Use &U : MP->uses()) |
1666 | | assert(dominates(MP, U) && "Memory PHI does not dominate it's uses"); |
1667 | | |
1668 | | for (Instruction &I : B) { |
1669 | | MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I)); |
1670 | | if (!MD) |
1671 | | continue; |
1672 | | |
1673 | | for (const Use &U : MD->uses()) |
1674 | | assert(dominates(MD, U) && "Memory Def does not dominate it's uses"); |
1675 | | } |
1676 | | } |
1677 | | #endif |
1678 | 91 | } |
1679 | | |
1680 | | /// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use |
1681 | | /// appears in the use list of \p Def. |
1682 | | |
1683 | 573 | void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { |
1684 | 573 | #ifndef NDEBUG |
1685 | | // The live on entry use may cause us to get a NULL def here |
1686 | | if (!Def) |
1687 | | assert(isLiveOnEntryDef(Use) && |
1688 | | "Null def but use not point to live on entry def"); |
1689 | | else |
1690 | | assert(is_contained(Def->users(), Use) && |
1691 | | "Did not find use in def's use list"); |
1692 | | #endif |
1693 | 573 | } |
1694 | | |
1695 | | /// \brief Verify the immediate use information, by walking all the memory |
1696 | | /// accesses and verifying that, for each use, it appears in the |
1697 | | /// appropriate def's use list |
1698 | 91 | void MemorySSA::verifyDefUses(Function &F) const { |
1699 | 276 | for (BasicBlock &B : F) { |
1700 | 276 | // Phi nodes are attached to basic blocks |
1701 | 276 | if (MemoryPhi *Phi276 = getMemoryAccess(&B)) {81 |
1702 | 81 | assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( |
1703 | 81 | pred_begin(&B), pred_end(&B))) && |
1704 | 81 | "Incomplete MemoryPhi Node"); |
1705 | 264 | for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E264 ; ++I183 ) |
1706 | 183 | verifyUseInDefs(Phi->getIncomingValue(I), Phi); |
1707 | 81 | } |
1708 | 276 | |
1709 | 790 | for (Instruction &I : B) { |
1710 | 790 | if (MemoryUseOrDef *MA790 = getMemoryAccess(&I)) {390 |
1711 | 390 | verifyUseInDefs(MA->getDefiningAccess(), MA); |
1712 | 390 | } |
1713 | 790 | } |
1714 | 276 | } |
1715 | 91 | } |
1716 | | |
1717 | 8.33k | MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const { |
1718 | 8.33k | return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I)); |
1719 | 8.33k | } |
1720 | | |
1721 | 1.92k | MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { |
1722 | 1.92k | return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB))); |
1723 | 1.92k | } |
1724 | | |
1725 | | /// Perform a local numbering on blocks so that instruction ordering can be |
1726 | | /// determined in constant time. |
1727 | | /// TODO: We currently just number in order. If we numbered by N, we could |
1728 | | /// allow at least N-1 sequences of insertBefore or insertAfter (and at least |
1729 | | /// log2(N) sequences of mixed before and after) without needing to invalidate |
1730 | | /// the numbering. |
1731 | 7 | void MemorySSA::renumberBlock(const BasicBlock *B) const { |
1732 | 7 | // The pre-increment ensures the numbers really start at 1. |
1733 | 7 | unsigned long CurrentNumber = 0; |
1734 | 7 | const AccessList *AL = getBlockAccesses(B); |
1735 | 7 | assert(AL != nullptr && "Asking to renumber an empty block"); |
1736 | 7 | for (const auto &I : *AL) |
1737 | 27 | BlockNumbering[&I] = ++CurrentNumber; |
1738 | 7 | BlockNumberingValid.insert(B); |
1739 | 7 | } |
1740 | | |
1741 | | /// \brief Determine, for two memory accesses in the same block, |
1742 | | /// whether \p Dominator dominates \p Dominatee. |
1743 | | /// \returns True if \p Dominator dominates \p Dominatee. |
1744 | | bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, |
1745 | 16 | const MemoryAccess *Dominatee) const { |
1746 | 16 | |
1747 | 16 | const BasicBlock *DominatorBlock = Dominator->getBlock(); |
1748 | 16 | |
1749 | 16 | assert((DominatorBlock == Dominatee->getBlock()) && |
1750 | 16 | "Asking for local domination when accesses are in different blocks!"); |
1751 | 16 | // A node dominates itself. |
1752 | 16 | if (Dominatee == Dominator) |
1753 | 0 | return true; |
1754 | 16 | |
1755 | 16 | // When Dominatee is defined on function entry, it is not dominated by another |
1756 | 16 | // memory access. |
1757 | 16 | if (16 isLiveOnEntryDef(Dominatee)16 ) |
1758 | 0 | return false; |
1759 | 16 | |
1760 | 16 | // When Dominator is defined on function entry, it dominates the other memory |
1761 | 16 | // access. |
1762 | 16 | if (16 isLiveOnEntryDef(Dominator)16 ) |
1763 | 8 | return true; |
1764 | 16 | |
1765 | 8 | if (8 !BlockNumberingValid.count(DominatorBlock)8 ) |
1766 | 7 | renumberBlock(DominatorBlock); |
1767 | 8 | |
1768 | 8 | unsigned long DominatorNum = BlockNumbering.lookup(Dominator); |
1769 | 8 | // All numbers start with 1 |
1770 | 8 | assert(DominatorNum != 0 && "Block was not numbered properly"); |
1771 | 8 | unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); |
1772 | 8 | assert(DominateeNum != 0 && "Block was not numbered properly"); |
1773 | 8 | return DominatorNum < DominateeNum; |
1774 | 16 | } |
1775 | | |
1776 | | bool MemorySSA::dominates(const MemoryAccess *Dominator, |
1777 | 161 | const MemoryAccess *Dominatee) const { |
1778 | 161 | if (Dominator == Dominatee) |
1779 | 12 | return true; |
1780 | 161 | |
1781 | 149 | if (149 isLiveOnEntryDef(Dominatee)149 ) |
1782 | 47 | return false; |
1783 | 149 | |
1784 | 102 | if (102 Dominator->getBlock() != Dominatee->getBlock()102 ) |
1785 | 88 | return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); |
1786 | 14 | return locallyDominates(Dominator, Dominatee); |
1787 | 102 | } |
1788 | | |
1789 | | bool MemorySSA::dominates(const MemoryAccess *Dominator, |
1790 | 0 | const Use &Dominatee) const { |
1791 | 0 | if (MemoryPhi *MP0 = dyn_cast<MemoryPhi>(Dominatee.getUser())) {0 |
1792 | 0 | BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); |
1793 | 0 | // The def must dominate the incoming block of the phi. |
1794 | 0 | if (UseBB != Dominator->getBlock()) |
1795 | 0 | return DT->dominates(Dominator->getBlock(), UseBB); |
1796 | 0 | // If the UseBB and the DefBB are the same, compare locally. |
1797 | 0 | return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); |
1798 | 0 | } |
1799 | 0 | // If it's not a PHI node use, the normal dominates can already handle it. |
1800 | 0 | return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); |
1801 | 0 | } |
1802 | | |
1803 | | const static char LiveOnEntryStr[] = "liveOnEntry"; |
1804 | | |
1805 | 216 | void MemoryDef::print(raw_ostream &OS) const { |
1806 | 216 | MemoryAccess *UO = getDefiningAccess(); |
1807 | 216 | |
1808 | 216 | OS << getID() << " = MemoryDef("; |
1809 | 216 | if (UO && 216 UO->getID()216 ) |
1810 | 144 | OS << UO->getID(); |
1811 | 216 | else |
1812 | 72 | OS << LiveOnEntryStr; |
1813 | 216 | OS << ')'; |
1814 | 216 | } |
1815 | | |
1816 | 69 | void MemoryPhi::print(raw_ostream &OS) const { |
1817 | 69 | bool First = true; |
1818 | 69 | OS << getID() << " = MemoryPhi("; |
1819 | 159 | for (const auto &Op : operands()) { |
1820 | 159 | BasicBlock *BB = getIncomingBlock(Op); |
1821 | 159 | MemoryAccess *MA = cast<MemoryAccess>(Op); |
1822 | 159 | if (!First) |
1823 | 90 | OS << ','; |
1824 | 159 | else |
1825 | 69 | First = false; |
1826 | 159 | |
1827 | 159 | OS << '{'; |
1828 | 159 | if (BB->hasName()) |
1829 | 145 | OS << BB->getName(); |
1830 | 159 | else |
1831 | 14 | BB->printAsOperand(OS, false); |
1832 | 159 | OS << ','; |
1833 | 159 | if (unsigned ID = MA->getID()) |
1834 | 143 | OS << ID; |
1835 | 159 | else |
1836 | 16 | OS << LiveOnEntryStr; |
1837 | 159 | OS << '}'; |
1838 | 159 | } |
1839 | 69 | OS << ')'; |
1840 | 69 | } |
1841 | | |
1842 | 2.66k | MemoryAccess::~MemoryAccess() {} |
1843 | | |
1844 | 147 | void MemoryUse::print(raw_ostream &OS) const { |
1845 | 147 | MemoryAccess *UO = getDefiningAccess(); |
1846 | 147 | OS << "MemoryUse("; |
1847 | 147 | if (UO && 147 UO->getID()147 ) |
1848 | 122 | OS << UO->getID(); |
1849 | 147 | else |
1850 | 25 | OS << LiveOnEntryStr; |
1851 | 147 | OS << ')'; |
1852 | 147 | } |
1853 | | |
1854 | 0 | void MemoryAccess::dump() const { |
1855 | 0 | // Cannot completely remove virtual function even in release mode. |
1856 | 0 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1857 | | print(dbgs()); |
1858 | | dbgs() << "\n"; |
1859 | | #endif |
1860 | 0 | } |
1861 | | |
1862 | | char MemorySSAPrinterLegacyPass::ID = 0; |
1863 | | |
1864 | 20 | MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { |
1865 | 20 | initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); |
1866 | 20 | } |
1867 | | |
1868 | 20 | void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1869 | 20 | AU.setPreservesAll(); |
1870 | 20 | AU.addRequired<MemorySSAWrapperPass>(); |
1871 | 20 | AU.addPreserved<MemorySSAWrapperPass>(); |
1872 | 20 | } |
1873 | | |
1874 | 45 | bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { |
1875 | 45 | auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); |
1876 | 45 | MSSA.print(dbgs()); |
1877 | 45 | if (VerifyMemorySSA) |
1878 | 44 | MSSA.verifyMemorySSA(); |
1879 | 45 | return false; |
1880 | 45 | } |
1881 | | |
1882 | | AnalysisKey MemorySSAAnalysis::Key; |
1883 | | |
1884 | | MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, |
1885 | 101 | FunctionAnalysisManager &AM) { |
1886 | 101 | auto &DT = AM.getResult<DominatorTreeAnalysis>(F); |
1887 | 101 | auto &AA = AM.getResult<AAManager>(F); |
1888 | 101 | return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT)); |
1889 | 101 | } |
1890 | | |
1891 | | PreservedAnalyses MemorySSAPrinterPass::run(Function &F, |
1892 | 36 | FunctionAnalysisManager &AM) { |
1893 | 36 | OS << "MemorySSA for function: " << F.getName() << "\n"; |
1894 | 36 | AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS); |
1895 | 36 | |
1896 | 36 | return PreservedAnalyses::all(); |
1897 | 36 | } |
1898 | | |
1899 | | PreservedAnalyses MemorySSAVerifierPass::run(Function &F, |
1900 | 33 | FunctionAnalysisManager &AM) { |
1901 | 33 | AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); |
1902 | 33 | |
1903 | 33 | return PreservedAnalyses::all(); |
1904 | 33 | } |
1905 | | |
1906 | | char MemorySSAWrapperPass::ID = 0; |
1907 | | |
1908 | 169 | MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { |
1909 | 169 | initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); |
1910 | 169 | } |
1911 | | |
1912 | 448 | void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } |
1913 | | |
1914 | 169 | void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { |
1915 | 169 | AU.setPreservesAll(); |
1916 | 169 | AU.addRequiredTransitive<DominatorTreeWrapperPass>(); |
1917 | 169 | AU.addRequiredTransitive<AAResultsWrapperPass>(); |
1918 | 169 | } |
1919 | | |
1920 | 448 | bool MemorySSAWrapperPass::runOnFunction(Function &F) { |
1921 | 448 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
1922 | 448 | auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); |
1923 | 448 | MSSA.reset(new MemorySSA(F, &AA, &DT)); |
1924 | 448 | return false; |
1925 | 448 | } |
1926 | | |
1927 | 0 | void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); } |
1928 | | |
1929 | 1 | void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { |
1930 | 1 | MSSA->print(OS); |
1931 | 1 | } |
1932 | | |
1933 | 566 | MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} |
1934 | | |
1935 | | MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A, |
1936 | | DominatorTree *D) |
1937 | 566 | : MemorySSAWalker(M), Walker(*M, *A, *D), AutoResetWalker(true) {} |
1938 | | |
1939 | 566 | MemorySSA::CachingWalker::~CachingWalker() {} |
1940 | | |
1941 | 68 | void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { |
1942 | 68 | if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) |
1943 | 55 | MUD->resetOptimized(); |
1944 | 68 | } |
1945 | | |
1946 | | /// \brief Walk the use-def chains starting at \p MA and find |
1947 | | /// the MemoryAccess that actually clobbers Loc. |
1948 | | /// |
1949 | | /// \returns our clobbering memory access |
1950 | | MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( |
1951 | 183 | MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) { |
1952 | 183 | MemoryAccess *New = Walker.findClobber(StartingAccess, Q); |
1953 | 183 | #ifdef EXPENSIVE_CHECKS |
1954 | | MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); |
1955 | | assert(NewNoCache == New && "Cache made us hand back a different result?"); |
1956 | | #endif |
1957 | 183 | if (AutoResetWalker) |
1958 | 14 | resetClobberWalker(); |
1959 | 183 | return New; |
1960 | 183 | } |
1961 | | |
1962 | | MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( |
1963 | 2 | MemoryAccess *StartingAccess, const MemoryLocation &Loc) { |
1964 | 2 | if (isa<MemoryPhi>(StartingAccess)) |
1965 | 0 | return StartingAccess; |
1966 | 2 | |
1967 | 2 | auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); |
1968 | 2 | if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) |
1969 | 0 | return StartingUseOrDef; |
1970 | 2 | |
1971 | 2 | Instruction *I = StartingUseOrDef->getMemoryInst(); |
1972 | 2 | |
1973 | 2 | // Conservatively, fences are always clobbers, so don't perform the walk if we |
1974 | 2 | // hit a fence. |
1975 | 2 | if (!ImmutableCallSite(I) && 2 I->isFenceLike()2 ) |
1976 | 0 | return StartingUseOrDef; |
1977 | 2 | |
1978 | 2 | UpwardsMemoryQuery Q; |
1979 | 2 | Q.OriginalAccess = StartingUseOrDef; |
1980 | 2 | Q.StartingLoc = Loc; |
1981 | 2 | Q.Inst = I; |
1982 | 2 | Q.IsCall = false; |
1983 | 2 | |
1984 | 2 | // Unlike the other function, do not walk to the def of a def, because we are |
1985 | 2 | // handed something we already believe is the clobbering access. |
1986 | 2 | MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) |
1987 | 0 | ? StartingUseOrDef->getDefiningAccess() |
1988 | 2 | : StartingUseOrDef; |
1989 | 2 | |
1990 | 2 | MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); |
1991 | 2 | DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); |
1992 | 2 | DEBUG(dbgs() << *StartingUseOrDef << "\n"); |
1993 | 2 | DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); |
1994 | 2 | DEBUG(dbgs() << *Clobber << "\n"); |
1995 | 2 | return Clobber; |
1996 | 2 | } |
1997 | | |
1998 | | MemoryAccess * |
1999 | 609 | MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { |
2000 | 609 | auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); |
2001 | 609 | // If this is a MemoryPhi, we can't do anything. |
2002 | 609 | if (!StartingAccess) |
2003 | 0 | return MA; |
2004 | 609 | |
2005 | 609 | // If this is an already optimized use or def, return the optimized result. |
2006 | 609 | // Note: Currently, we do not store the optimized def result because we'd need |
2007 | 609 | // a separate field, since we can't use it as the defining access. |
2008 | 609 | if (auto *609 MUD609 = dyn_cast<MemoryUseOrDef>(StartingAccess)) |
2009 | 609 | if (609 MUD->isOptimized()609 ) |
2010 | 421 | return MUD->getOptimized(); |
2011 | 609 | |
2012 | 188 | const Instruction *I = StartingAccess->getMemoryInst(); |
2013 | 188 | UpwardsMemoryQuery Q(I, StartingAccess); |
2014 | 188 | // We can't sanely do anything with a fences, they conservatively |
2015 | 188 | // clobber all memory, and have no locations to get pointers from to |
2016 | 188 | // try to disambiguate. |
2017 | 188 | if (!Q.IsCall && 188 I->isFenceLike()184 ) |
2018 | 0 | return StartingAccess; |
2019 | 188 | |
2020 | 188 | if (188 isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)188 ) {1 |
2021 | 1 | MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); |
2022 | 1 | if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) |
2023 | 1 | MUD->setOptimized(LiveOnEntry); |
2024 | 1 | return LiveOnEntry; |
2025 | 1 | } |
2026 | 188 | |
2027 | 188 | // Start with the thing we already think clobbers this location |
2028 | 187 | MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); |
2029 | 187 | |
2030 | 187 | // At this point, DefiningAccess may be the live on entry def. |
2031 | 187 | // If it is, we will not get a better result. |
2032 | 187 | if (MSSA->isLiveOnEntryDef(DefiningAccess)) |
2033 | 6 | return DefiningAccess; |
2034 | 187 | |
2035 | 181 | MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); |
2036 | 181 | DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); |
2037 | 181 | DEBUG(dbgs() << *DefiningAccess << "\n"); |
2038 | 181 | DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); |
2039 | 181 | DEBUG(dbgs() << *Result << "\n"); |
2040 | 181 | if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess)) |
2041 | 181 | MUD->setOptimized(Result); |
2042 | 181 | |
2043 | 181 | return Result; |
2044 | 187 | } |
2045 | | |
2046 | | MemoryAccess * |
2047 | 0 | DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { |
2048 | 0 | if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) |
2049 | 0 | return Use->getDefiningAccess(); |
2050 | 0 | return MA; |
2051 | 0 | } |
2052 | | |
2053 | | MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( |
2054 | 0 | MemoryAccess *StartingAccess, const MemoryLocation &) { |
2055 | 0 | if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) |
2056 | 0 | return Use->getDefiningAccess(); |
2057 | 0 | return StartingAccess; |
2058 | 0 | } |
2059 | | } // namespace llvm |