/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/NewGVN.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===---- NewGVN.cpp - Global Value Numbering Pass --------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | /// \file |
10 | | /// This file implements the new LLVM's Global Value Numbering pass. |
11 | | /// GVN partitions values computed by a function into congruence classes. |
12 | | /// Values ending up in the same congruence class are guaranteed to be the same |
13 | | /// for every execution of the program. In that respect, congruency is a |
14 | | /// compile-time approximation of equivalence of values at runtime. |
15 | | /// The algorithm implemented here uses a sparse formulation and it's based |
16 | | /// on the ideas described in the paper: |
17 | | /// "A Sparse Algorithm for Predicated Global Value Numbering" from |
18 | | /// Karthik Gargi. |
19 | | /// |
20 | | /// A brief overview of the algorithm: The algorithm is essentially the same as |
21 | | /// the standard RPO value numbering algorithm (a good reference is the paper |
22 | | /// "SCC based value numbering" by L. Taylor Simpson) with one major difference: |
23 | | /// The RPO algorithm proceeds, on every iteration, to process every reachable |
24 | | /// block and every instruction in that block. This is because the standard RPO |
25 | | /// algorithm does not track what things have the same value number, it only |
26 | | /// tracks what the value number of a given operation is (the mapping is |
27 | | /// operation -> value number). Thus, when a value number of an operation |
28 | | /// changes, it must reprocess everything to ensure all uses of a value number |
29 | | /// get updated properly. In constrast, the sparse algorithm we use *also* |
30 | | /// tracks what operations have a given value number (IE it also tracks the |
31 | | /// reverse mapping from value number -> operations with that value number), so |
32 | | /// that it only needs to reprocess the instructions that are affected when |
33 | | /// something's value number changes. The vast majority of complexity and code |
34 | | /// in this file is devoted to tracking what value numbers could change for what |
35 | | /// instructions when various things happen. The rest of the algorithm is |
36 | | /// devoted to performing symbolic evaluation, forward propagation, and |
37 | | /// simplification of operations based on the value numbers deduced so far |
38 | | /// |
39 | | /// In order to make the GVN mostly-complete, we use a technique derived from |
40 | | /// "Detection of Redundant Expressions: A Complete and Polynomial-time |
41 | | /// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA |
42 | | /// based GVN algorithms is related to their inability to detect equivalence |
43 | | /// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). |
44 | | /// We resolve this issue by generating the equivalent "phi of ops" form for |
45 | | /// each op of phis we see, in a way that only takes polynomial time to resolve. |
46 | | /// |
47 | | /// We also do not perform elimination by using any published algorithm. All |
48 | | /// published algorithms are O(Instructions). Instead, we use a technique that |
49 | | /// is O(number of operations with the same value number), enabling us to skip |
50 | | /// trying to eliminate things that have unique value numbers. |
51 | | //===----------------------------------------------------------------------===// |
52 | | |
53 | | #include "llvm/Transforms/Scalar/NewGVN.h" |
54 | | #include "llvm/ADT/BitVector.h" |
55 | | #include "llvm/ADT/DepthFirstIterator.h" |
56 | | #include "llvm/ADT/MapVector.h" |
57 | | #include "llvm/ADT/PostOrderIterator.h" |
58 | | #include "llvm/ADT/SmallSet.h" |
59 | | #include "llvm/ADT/Statistic.h" |
60 | | #include "llvm/Analysis/AliasAnalysis.h" |
61 | | #include "llvm/Analysis/AssumptionCache.h" |
62 | | #include "llvm/Analysis/CFG.h" |
63 | | #include "llvm/Analysis/CFGPrinter.h" |
64 | | #include "llvm/Analysis/ConstantFolding.h" |
65 | | #include "llvm/Analysis/GlobalsModRef.h" |
66 | | #include "llvm/Analysis/InstructionSimplify.h" |
67 | | #include "llvm/Analysis/MemoryBuiltins.h" |
68 | | #include "llvm/Analysis/MemorySSA.h" |
69 | | #include "llvm/IR/PatternMatch.h" |
70 | | #include "llvm/Support/DebugCounter.h" |
71 | | #include "llvm/Transforms/Scalar.h" |
72 | | #include "llvm/Transforms/Scalar/GVNExpression.h" |
73 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
74 | | #include "llvm/Transforms/Utils/Local.h" |
75 | | #include "llvm/Transforms/Utils/PredicateInfo.h" |
76 | | #include "llvm/Transforms/Utils/VNCoercion.h" |
77 | | #include <numeric> |
78 | | #include <unordered_map> |
79 | | using namespace llvm; |
80 | | using namespace PatternMatch; |
81 | | using namespace llvm::GVNExpression; |
82 | | using namespace llvm::VNCoercion; |
83 | | #define DEBUG_TYPE "newgvn" |
84 | | |
85 | | STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); |
86 | | STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); |
87 | | STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); |
88 | | STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); |
89 | | STATISTIC(NumGVNMaxIterations, |
90 | | "Maximum Number of iterations it took to converge GVN"); |
91 | | STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); |
92 | | STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); |
93 | | STATISTIC(NumGVNAvoidedSortedLeaderChanges, |
94 | | "Number of avoided sorted leader changes"); |
95 | | STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); |
96 | | STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created"); |
97 | | STATISTIC(NumGVNPHIOfOpsEliminations, |
98 | | "Number of things eliminated using PHI of ops"); |
99 | | DEBUG_COUNTER(VNCounter, "newgvn-vn", |
100 | | "Controls which instructions are value numbered"); |
101 | | DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi", |
102 | | "Controls which instructions we create phi of ops for"); |
103 | | // Currently store defining access refinement is too slow due to basicaa being |
104 | | // egregiously slow. This flag lets us keep it working while we work on this |
105 | | // issue. |
106 | | static cl::opt<bool> EnableStoreRefinement("enable-store-refinement", |
107 | | cl::init(false), cl::Hidden); |
108 | | |
109 | | /// Currently, the generation "phi of ops" can result in correctness issues. |
110 | | static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true), |
111 | | cl::Hidden); |
112 | | |
113 | | //===----------------------------------------------------------------------===// |
114 | | // GVN Pass |
115 | | //===----------------------------------------------------------------------===// |
116 | | |
117 | | // Anchor methods. |
118 | | namespace llvm { |
119 | | namespace GVNExpression { |
120 | 0 | Expression::~Expression() = default; |
121 | 0 | BasicExpression::~BasicExpression() = default; |
122 | 0 | CallExpression::~CallExpression() = default; |
123 | 0 | LoadExpression::~LoadExpression() = default; |
124 | 0 | StoreExpression::~StoreExpression() = default; |
125 | 0 | AggregateValueExpression::~AggregateValueExpression() = default; |
126 | 0 | PHIExpression::~PHIExpression() = default; |
127 | | } |
128 | | } |
129 | | |
130 | | namespace { |
131 | | // Tarjan's SCC finding algorithm with Nuutila's improvements |
132 | | // SCCIterator is actually fairly complex for the simple thing we want. |
133 | | // It also wants to hand us SCC's that are unrelated to the phi node we ask |
134 | | // about, and have us process them there or risk redoing work. |
135 | | // Graph traits over a filter iterator also doesn't work that well here. |
136 | | // This SCC finder is specialized to walk use-def chains, and only follows |
137 | | // instructions, |
138 | | // not generic values (arguments, etc). |
139 | | struct TarjanSCC { |
140 | | |
141 | 307 | TarjanSCC() : Components(1) {} |
142 | | |
143 | 137 | void Start(const Instruction *Start) { |
144 | 137 | if (Root.lookup(Start) == 0) |
145 | 91 | FindSCC(Start); |
146 | 137 | } |
147 | | |
148 | 137 | const SmallPtrSetImpl<const Value *> &getComponentFor(const Value *V) const { |
149 | 137 | unsigned ComponentID = ValueToComponent.lookup(V); |
150 | 137 | |
151 | 137 | assert(ComponentID > 0 && |
152 | 137 | "Asking for a component for a value we never processed"); |
153 | 137 | return Components[ComponentID]; |
154 | 137 | } |
155 | | |
156 | | private: |
157 | 343 | void FindSCC(const Instruction *I) { |
158 | 343 | Root[I] = ++DFSNum; |
159 | 343 | // Store the DFS Number we had before it possibly gets incremented. |
160 | 343 | unsigned int OurDFS = DFSNum; |
161 | 632 | for (auto &Op : I->operands()) { |
162 | 632 | if (auto *InstOp632 = dyn_cast<Instruction>(Op)) { |
163 | 355 | if (Root.lookup(Op) == 0) |
164 | 252 | FindSCC(InstOp); |
165 | 355 | if (!InComponent.count(Op)) |
166 | 131 | Root[I] = std::min(Root.lookup(I), Root.lookup(Op)); |
167 | 355 | } |
168 | 632 | } |
169 | 343 | // See if we really were the root of a component, by seeing if we still have |
170 | 343 | // our DFSNumber. If we do, we are the root of the component, and we have |
171 | 343 | // completed a component. If we do not, we are not the root of a component, |
172 | 343 | // and belong on the component stack. |
173 | 343 | if (Root.lookup(I) == OurDFS343 ) { |
174 | 270 | unsigned ComponentID = Components.size(); |
175 | 270 | Components.resize(Components.size() + 1); |
176 | 270 | auto &Component = Components.back(); |
177 | 270 | Component.insert(I); |
178 | 270 | DEBUG(dbgs() << "Component root is " << *I << "\n"); |
179 | 270 | InComponent.insert(I); |
180 | 270 | ValueToComponent[I] = ComponentID; |
181 | 270 | // Pop a component off the stack and label it. |
182 | 343 | while (!Stack.empty() && 343 Root.lookup(Stack.back()) >= OurDFS87 ) { |
183 | 73 | auto *Member = Stack.back(); |
184 | 73 | DEBUG(dbgs() << "Component member is " << *Member << "\n"); |
185 | 73 | Component.insert(Member); |
186 | 73 | InComponent.insert(Member); |
187 | 73 | ValueToComponent[Member] = ComponentID; |
188 | 73 | Stack.pop_back(); |
189 | 73 | } |
190 | 343 | } else { |
191 | 73 | // Part of a component, push to stack |
192 | 73 | Stack.push_back(I); |
193 | 73 | } |
194 | 343 | } |
195 | | unsigned int DFSNum = 1; |
196 | | SmallPtrSet<const Value *, 8> InComponent; |
197 | | DenseMap<const Value *, unsigned int> Root; |
198 | | SmallVector<const Value *, 8> Stack; |
199 | | // Store the components as vector of ptr sets, because we need the topo order |
200 | | // of SCC's, but not individual member order |
201 | | SmallVector<SmallPtrSet<const Value *, 8>, 8> Components; |
202 | | DenseMap<const Value *, unsigned> ValueToComponent; |
203 | | }; |
204 | | // Congruence classes represent the set of expressions/instructions |
205 | | // that are all the same *during some scope in the function*. |
206 | | // That is, because of the way we perform equality propagation, and |
207 | | // because of memory value numbering, it is not correct to assume |
208 | | // you can willy-nilly replace any member with any other at any |
209 | | // point in the function. |
210 | | // |
211 | | // For any Value in the Member set, it is valid to replace any dominated member |
212 | | // with that Value. |
213 | | // |
214 | | // Every congruence class has a leader, and the leader is used to symbolize |
215 | | // instructions in a canonical way (IE every operand of an instruction that is a |
216 | | // member of the same congruence class will always be replaced with leader |
217 | | // during symbolization). To simplify symbolization, we keep the leader as a |
218 | | // constant if class can be proved to be a constant value. Otherwise, the |
219 | | // leader is the member of the value set with the smallest DFS number. Each |
220 | | // congruence class also has a defining expression, though the expression may be |
221 | | // null. If it exists, it can be used for forward propagation and reassociation |
222 | | // of values. |
223 | | |
224 | | // For memory, we also track a representative MemoryAccess, and a set of memory |
225 | | // members for MemoryPhis (which have no real instructions). Note that for |
226 | | // memory, it seems tempting to try to split the memory members into a |
227 | | // MemoryCongruenceClass or something. Unfortunately, this does not work |
228 | | // easily. The value numbering of a given memory expression depends on the |
229 | | // leader of the memory congruence class, and the leader of memory congruence |
230 | | // class depends on the value numbering of a given memory expression. This |
231 | | // leads to wasted propagation, and in some cases, missed optimization. For |
232 | | // example: If we had value numbered two stores together before, but now do not, |
233 | | // we move them to a new value congruence class. This in turn will move at one |
234 | | // of the memorydefs to a new memory congruence class. Which in turn, affects |
235 | | // the value numbering of the stores we just value numbered (because the memory |
236 | | // congruence class is part of the value number). So while theoretically |
237 | | // possible to split them up, it turns out to be *incredibly* complicated to get |
238 | | // it to work right, because of the interdependency. While structurally |
239 | | // slightly messier, it is algorithmically much simpler and faster to do what we |
240 | | // do here, and track them both at once in the same class. |
241 | | // Note: The default iterators for this class iterate over values |
242 | | class CongruenceClass { |
243 | | public: |
244 | | using MemberType = Value; |
245 | | using MemberSet = SmallPtrSet<MemberType *, 4>; |
246 | | using MemoryMemberType = MemoryPhi; |
247 | | using MemoryMemberSet = SmallPtrSet<const MemoryMemberType *, 2>; |
248 | | |
249 | 0 | explicit CongruenceClass(unsigned ID) : ID(ID) {} |
250 | | CongruenceClass(unsigned ID, Value *Leader, const Expression *E) |
251 | 3.15k | : ID(ID), RepLeader(Leader), DefiningExpr(E) {} |
252 | 0 | unsigned getID() const { return ID; } |
253 | | // True if this class has no members left. This is mainly used for assertion |
254 | | // purposes, and for skipping empty classes. |
255 | 3.15k | bool isDead() const { |
256 | 3.15k | // If it's both dead from a value perspective, and dead from a memory |
257 | 3.15k | // perspective, it's really dead. |
258 | 921 | return empty() && memory_empty(); |
259 | 3.15k | } |
260 | | // Leader functions |
261 | 11.9k | Value *getLeader() const { return RepLeader; } |
262 | 2.07k | void setLeader(Value *Leader) { RepLeader = Leader; } |
263 | 2.68k | const std::pair<Value *, unsigned int> &getNextLeader() const { |
264 | 2.68k | return NextLeader; |
265 | 2.68k | } |
266 | 204 | void resetNextLeader() { NextLeader = {nullptr, ~0}; } |
267 | | |
268 | 948 | void addPossibleNextLeader(std::pair<Value *, unsigned int> LeaderPair) { |
269 | 948 | if (LeaderPair.second < NextLeader.second) |
270 | 700 | NextLeader = LeaderPair; |
271 | 948 | } |
272 | | |
273 | 9.93k | Value *getStoredValue() const { return RepStoredValue; } |
274 | 262 | void setStoredValue(Value *Leader) { RepStoredValue = Leader; } |
275 | 2.47k | const MemoryAccess *getMemoryLeader() const { return RepMemoryAccess; } |
276 | 1.21k | void setMemoryLeader(const MemoryAccess *Leader) { RepMemoryAccess = Leader; } |
277 | | |
278 | | // Forward propagation info |
279 | 542 | const Expression *getDefiningExpr() const { return DefiningExpr; } |
280 | | |
281 | | // Value member set |
282 | 8.14k | bool empty() const { return Members.empty(); } |
283 | 1.44k | unsigned size() const { return Members.size(); } |
284 | 3.61k | MemberSet::const_iterator begin() const { return Members.begin(); } |
285 | 3.57k | MemberSet::const_iterator end() const { return Members.end(); } |
286 | 5.41k | void insert(MemberType *M) { Members.insert(M); } |
287 | 2.67k | void erase(MemberType *M) { Members.erase(M); } |
288 | 2.93k | void swap(MemberSet &Other) { Members.swap(Other); } |
289 | | |
290 | | // Memory member set |
291 | 953 | bool memory_empty() const { return MemoryMembers.empty(); } |
292 | 8 | unsigned memory_size() const { return MemoryMembers.size(); } |
293 | 485 | MemoryMemberSet::const_iterator memory_begin() const { |
294 | 485 | return MemoryMembers.begin(); |
295 | 485 | } |
296 | 479 | MemoryMemberSet::const_iterator memory_end() const { |
297 | 479 | return MemoryMembers.end(); |
298 | 479 | } |
299 | 479 | iterator_range<MemoryMemberSet::const_iterator> memory() const { |
300 | 479 | return make_range(memory_begin(), memory_end()); |
301 | 479 | } |
302 | 313 | void memory_insert(const MemoryMemberType *M) { MemoryMembers.insert(M); } |
303 | 185 | void memory_erase(const MemoryMemberType *M) { MemoryMembers.erase(M); } |
304 | | |
305 | | // Store count |
306 | 2.50k | unsigned getStoreCount() const { return StoreCount; } |
307 | 533 | void incStoreCount() { ++StoreCount; } |
308 | 276 | void decStoreCount() { |
309 | 276 | assert(StoreCount != 0 && "Store count went negative"); |
310 | 276 | --StoreCount; |
311 | 276 | } |
312 | | |
313 | | // True if this class has no memory members. |
314 | 36 | bool definesNoMemory() const { return StoreCount == 0 && 36 memory_empty()32 ; } |
315 | | |
316 | | // Return true if two congruence classes are equivalent to each other. This |
317 | | // means |
318 | | // that every field but the ID number and the dead field are equivalent. |
319 | 0 | bool isEquivalentTo(const CongruenceClass *Other) const { |
320 | 0 | if (!Other) |
321 | 0 | return false; |
322 | 0 | if (this == Other) |
323 | 0 | return true; |
324 | 0 |
|
325 | 0 | if (std::tie(StoreCount, RepLeader, RepStoredValue, RepMemoryAccess) != |
326 | 0 | std::tie(Other->StoreCount, Other->RepLeader, Other->RepStoredValue, |
327 | 0 | Other->RepMemoryAccess)) |
328 | 0 | return false; |
329 | 0 | if (DefiningExpr != Other->DefiningExpr) |
330 | 0 | if (!DefiningExpr || !Other->DefiningExpr || |
331 | 0 | *DefiningExpr != *Other->DefiningExpr) |
332 | 0 | return false; |
333 | 0 | // We need some ordered set |
334 | 0 | std::set<Value *> AMembers(Members.begin(), Members.end()); |
335 | 0 | std::set<Value *> BMembers(Members.begin(), Members.end()); |
336 | 0 | return AMembers == BMembers; |
337 | 0 | } |
338 | | |
339 | | private: |
340 | | unsigned ID; |
341 | | // Representative leader. |
342 | | Value *RepLeader = nullptr; |
343 | | // The most dominating leader after our current leader, because the member set |
344 | | // is not sorted and is expensive to keep sorted all the time. |
345 | | std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U}; |
346 | | // If this is represented by a store, the value of the store. |
347 | | Value *RepStoredValue = nullptr; |
348 | | // If this class contains MemoryDefs or MemoryPhis, this is the leading memory |
349 | | // access. |
350 | | const MemoryAccess *RepMemoryAccess = nullptr; |
351 | | // Defining Expression. |
352 | | const Expression *DefiningExpr = nullptr; |
353 | | // Actual members of this class. |
354 | | MemberSet Members; |
355 | | // This is the set of MemoryPhis that exist in the class. MemoryDefs and |
356 | | // MemoryUses have real instructions representing them, so we only need to |
357 | | // track MemoryPhis here. |
358 | | MemoryMemberSet MemoryMembers; |
359 | | // Number of stores in this congruence class. |
360 | | // This is used so we can detect store equivalence changes properly. |
361 | | int StoreCount = 0; |
362 | | }; |
363 | | } // namespace |
364 | | |
365 | | namespace llvm { |
366 | | struct ExactEqualsExpression { |
367 | | const Expression &E; |
368 | 2.95k | explicit ExactEqualsExpression(const Expression &E) : E(E) {} |
369 | 743 | hash_code getComputedHash() const { return E.getComputedHash(); } |
370 | 350 | bool operator==(const Expression &Other) const { |
371 | 350 | return E.exactlyEquals(Other); |
372 | 350 | } |
373 | | }; |
374 | | |
375 | | template <> struct DenseMapInfo<const Expression *> { |
376 | 15.8k | static const Expression *getEmptyKey() { |
377 | 15.8k | auto Val = static_cast<uintptr_t>(-1); |
378 | 15.8k | Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; |
379 | 15.8k | return reinterpret_cast<const Expression *>(Val); |
380 | 15.8k | } |
381 | 15.0k | static const Expression *getTombstoneKey() { |
382 | 15.0k | auto Val = static_cast<uintptr_t>(~1U); |
383 | 15.0k | Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable; |
384 | 15.0k | return reinterpret_cast<const Expression *>(Val); |
385 | 15.0k | } |
386 | 3.10k | static unsigned getHashValue(const Expression *E) { |
387 | 3.10k | return E->getComputedHash(); |
388 | 3.10k | } |
389 | 743 | static unsigned getHashValue(const ExactEqualsExpression &E) { |
390 | 743 | return E.getComputedHash(); |
391 | 743 | } |
392 | 845 | static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) { |
393 | 845 | if (RHS == getTombstoneKey() || 845 RHS == getEmptyKey()804 ) |
394 | 495 | return false; |
395 | 350 | return LHS == *RHS; |
396 | 350 | } |
397 | | |
398 | 33.7k | static bool isEqual(const Expression *LHS, const Expression *RHS) { |
399 | 33.7k | if (LHS == RHS) |
400 | 29.0k | return true; |
401 | 4.79k | if (4.79k LHS == getTombstoneKey() || 4.79k RHS == getTombstoneKey()4.65k || |
402 | 4.79k | LHS == getEmptyKey()4.06k || RHS == getEmptyKey()4.06k ) |
403 | 3.64k | return false; |
404 | 1.15k | // Compare hashes before equality. This is *not* what the hashtable does, |
405 | 1.15k | // since it is computing it modulo the number of buckets, whereas we are |
406 | 1.15k | // using the full hash keyspace. Since the hashes are precomputed, this |
407 | 1.15k | // check is *much* faster than equality. |
408 | 1.15k | if (1.15k LHS->getComputedHash() != RHS->getComputedHash()1.15k ) |
409 | 408 | return false; |
410 | 747 | return *LHS == *RHS; |
411 | 747 | } |
412 | | }; |
413 | | } // end namespace llvm |
414 | | |
415 | | namespace { |
416 | | class NewGVN { |
417 | | Function &F; |
418 | | DominatorTree *DT; |
419 | | const TargetLibraryInfo *TLI; |
420 | | AliasAnalysis *AA; |
421 | | MemorySSA *MSSA; |
422 | | MemorySSAWalker *MSSAWalker; |
423 | | const DataLayout &DL; |
424 | | std::unique_ptr<PredicateInfo> PredInfo; |
425 | | |
426 | | // These are the only two things the create* functions should have |
427 | | // side-effects on due to allocating memory. |
428 | | mutable BumpPtrAllocator ExpressionAllocator; |
429 | | mutable ArrayRecycler<Value *> ArgRecycler; |
430 | | mutable TarjanSCC SCCFinder; |
431 | | const SimplifyQuery SQ; |
432 | | |
433 | | // Number of function arguments, used by ranking |
434 | | unsigned int NumFuncArgs; |
435 | | |
436 | | // RPOOrdering of basic blocks |
437 | | DenseMap<const DomTreeNode *, unsigned> RPOOrdering; |
438 | | |
439 | | // Congruence class info. |
440 | | |
441 | | // This class is called INITIAL in the paper. It is the class everything |
442 | | // startsout in, and represents any value. Being an optimistic analysis, |
443 | | // anything in the TOP class has the value TOP, which is indeterminate and |
444 | | // equivalent to everything. |
445 | | CongruenceClass *TOPClass; |
446 | | std::vector<CongruenceClass *> CongruenceClasses; |
447 | | unsigned NextCongruenceNum; |
448 | | |
449 | | // Value Mappings. |
450 | | DenseMap<Value *, CongruenceClass *> ValueToClass; |
451 | | DenseMap<Value *, const Expression *> ValueToExpression; |
452 | | // Value PHI handling, used to make equivalence between phi(op, op) and |
453 | | // op(phi, phi). |
454 | | // These mappings just store various data that would normally be part of the |
455 | | // IR. |
456 | | DenseSet<const Instruction *> PHINodeUses; |
457 | | DenseMap<const Value *, bool> OpSafeForPHIOfOps; |
458 | | // Map a temporary instruction we created to a parent block. |
459 | | DenseMap<const Value *, BasicBlock *> TempToBlock; |
460 | | // Map between the already in-program instructions and the temporary phis we |
461 | | // created that they are known equivalent to. |
462 | | DenseMap<const Value *, PHINode *> RealToTemp; |
463 | | // In order to know when we should re-process instructions that have |
464 | | // phi-of-ops, we track the set of expressions that they needed as |
465 | | // leaders. When we discover new leaders for those expressions, we process the |
466 | | // associated phi-of-op instructions again in case they have changed. The |
467 | | // other way they may change is if they had leaders, and those leaders |
468 | | // disappear. However, at the point they have leaders, there are uses of the |
469 | | // relevant operands in the created phi node, and so they will get reprocessed |
470 | | // through the normal user marking we perform. |
471 | | mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers; |
472 | | DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>> |
473 | | ExpressionToPhiOfOps; |
474 | | // Map from basic block to the temporary operations we created |
475 | | DenseMap<const BasicBlock *, SmallPtrSet<PHINode *, 2>> PHIOfOpsPHIs; |
476 | | // Map from temporary operation to MemoryAccess. |
477 | | DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory; |
478 | | // Set of all temporary instructions we created. |
479 | | // Note: This will include instructions that were just created during value |
480 | | // numbering. The way to test if something is using them is to check |
481 | | // RealToTemp. |
482 | | |
483 | | DenseSet<Instruction *> AllTempInstructions; |
484 | | |
485 | | // Mapping from predicate info we used to the instructions we used it with. |
486 | | // In order to correctly ensure propagation, we must keep track of what |
487 | | // comparisons we used, so that when the values of the comparisons change, we |
488 | | // propagate the information to the places we used the comparison. |
489 | | mutable DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> |
490 | | PredicateToUsers; |
491 | | // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for |
492 | | // stores, we no longer can rely solely on the def-use chains of MemorySSA. |
493 | | mutable DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> |
494 | | MemoryToUsers; |
495 | | |
496 | | // A table storing which memorydefs/phis represent a memory state provably |
497 | | // equivalent to another memory state. |
498 | | // We could use the congruence class machinery, but the MemoryAccess's are |
499 | | // abstract memory states, so they can only ever be equivalent to each other, |
500 | | // and not to constants, etc. |
501 | | DenseMap<const MemoryAccess *, CongruenceClass *> MemoryAccessToClass; |
502 | | |
503 | | // We could, if we wanted, build MemoryPhiExpressions and |
504 | | // MemoryVariableExpressions, etc, and value number them the same way we value |
505 | | // number phi expressions. For the moment, this seems like overkill. They |
506 | | // can only exist in one of three states: they can be TOP (equal to |
507 | | // everything), Equivalent to something else, or unique. Because we do not |
508 | | // create expressions for them, we need to simulate leader change not just |
509 | | // when they change class, but when they change state. Note: We can do the |
510 | | // same thing for phis, and avoid having phi expressions if we wanted, We |
511 | | // should eventually unify in one direction or the other, so this is a little |
512 | | // bit of an experiment in which turns out easier to maintain. |
513 | | enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; |
514 | | DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState; |
515 | | |
516 | | enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; |
517 | | mutable DenseMap<const Instruction *, InstCycleState> InstCycleState; |
518 | | // Expression to class mapping. |
519 | | using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>; |
520 | | ExpressionClassMap ExpressionToClass; |
521 | | |
522 | | // We have a single expression that represents currently DeadExpressions. |
523 | | // For dead expressions we can prove will stay dead, we mark them with |
524 | | // DFS number zero. However, it's possible in the case of phi nodes |
525 | | // for us to assume/prove all arguments are dead during fixpointing. |
526 | | // We use DeadExpression for that case. |
527 | | DeadExpression *SingletonDeadExpression = nullptr; |
528 | | |
529 | | // Which values have changed as a result of leader changes. |
530 | | SmallPtrSet<Value *, 8> LeaderChanges; |
531 | | |
532 | | // Reachability info. |
533 | | using BlockEdge = BasicBlockEdge; |
534 | | DenseSet<BlockEdge> ReachableEdges; |
535 | | SmallPtrSet<const BasicBlock *, 8> ReachableBlocks; |
536 | | |
537 | | // This is a bitvector because, on larger functions, we may have |
538 | | // thousands of touched instructions at once (entire blocks, |
539 | | // instructions with hundreds of uses, etc). Even with optimization |
540 | | // for when we mark whole blocks as touched, when this was a |
541 | | // SmallPtrSet or DenseSet, for some functions, we spent >20% of all |
542 | | // the time in GVN just managing this list. The bitvector, on the |
543 | | // other hand, efficiently supports test/set/clear of both |
544 | | // individual and ranges, as well as "find next element" This |
545 | | // enables us to use it as a worklist with essentially 0 cost. |
546 | | BitVector TouchedInstructions; |
547 | | |
548 | | DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange; |
549 | | |
550 | | #ifndef NDEBUG |
551 | | // Debugging for how many times each block and instruction got processed. |
552 | | DenseMap<const Value *, unsigned> ProcessedCount; |
553 | | #endif |
554 | | |
555 | | // DFS info. |
556 | | // This contains a mapping from Instructions to DFS numbers. |
557 | | // The numbering starts at 1. An instruction with DFS number zero |
558 | | // means that the instruction is dead. |
559 | | DenseMap<const Value *, unsigned> InstrDFS; |
560 | | |
561 | | // This contains the mapping DFS numbers to instructions. |
562 | | SmallVector<Value *, 32> DFSToInstr; |
563 | | |
564 | | // Deletion info. |
565 | | SmallPtrSet<Instruction *, 8> InstructionsToErase; |
566 | | |
567 | | public: |
568 | | NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, |
569 | | TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, |
570 | | const DataLayout &DL) |
571 | | : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), |
572 | 307 | PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) { |
573 | 307 | } |
574 | | bool runGVN(); |
575 | | |
576 | | private: |
577 | | // Expression handling. |
578 | | const Expression *createExpression(Instruction *) const; |
579 | | const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *, |
580 | | Instruction *) const; |
581 | | PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, |
582 | | bool &OriginalOpsConstant) const; |
583 | | const DeadExpression *createDeadExpression() const; |
584 | | const VariableExpression *createVariableExpression(Value *) const; |
585 | | const ConstantExpression *createConstantExpression(Constant *) const; |
586 | | const Expression *createVariableOrConstant(Value *V) const; |
587 | | const UnknownExpression *createUnknownExpression(Instruction *) const; |
588 | | const StoreExpression *createStoreExpression(StoreInst *, |
589 | | const MemoryAccess *) const; |
590 | | LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, |
591 | | const MemoryAccess *) const; |
592 | | const CallExpression *createCallExpression(CallInst *, |
593 | | const MemoryAccess *) const; |
594 | | const AggregateValueExpression * |
595 | | createAggregateValueExpression(Instruction *) const; |
596 | | bool setBasicExpressionInfo(Instruction *, BasicExpression *) const; |
597 | | |
598 | | // Congruence class handling. |
599 | 3.15k | CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) { |
600 | 3.15k | auto *result = new CongruenceClass(NextCongruenceNum++, Leader, E); |
601 | 3.15k | CongruenceClasses.emplace_back(result); |
602 | 3.15k | return result; |
603 | 3.15k | } |
604 | | |
605 | 411 | CongruenceClass *createMemoryClass(MemoryAccess *MA) { |
606 | 411 | auto *CC = createCongruenceClass(nullptr, nullptr); |
607 | 411 | CC->setMemoryLeader(MA); |
608 | 411 | return CC; |
609 | 411 | } |
610 | 134 | CongruenceClass *ensureLeaderOfMemoryClass(MemoryAccess *MA) { |
611 | 134 | auto *CC = getMemoryClass(MA); |
612 | 134 | if (CC->getMemoryLeader() != MA) |
613 | 104 | CC = createMemoryClass(MA); |
614 | 134 | return CC; |
615 | 134 | } |
616 | | |
617 | 421 | CongruenceClass *createSingletonCongruenceClass(Value *Member) { |
618 | 421 | CongruenceClass *CClass = createCongruenceClass(Member, nullptr); |
619 | 421 | CClass->insert(Member); |
620 | 421 | ValueToClass[Member] = CClass; |
621 | 421 | return CClass; |
622 | 421 | } |
623 | | void initializeCongruenceClasses(Function &F); |
624 | | const Expression *makePossiblePhiOfOps(Instruction *, |
625 | | SmallPtrSetImpl<Value *> &); |
626 | | Value *findLeaderForInst(Instruction *ValueOp, |
627 | | SmallPtrSetImpl<Value *> &Visited, |
628 | | MemoryAccess *MemAccess, Instruction *OrigInst, |
629 | | BasicBlock *PredBB); |
630 | | |
631 | | bool OpIsSafeForPHIOfOps(Value *Op, Instruction *OrigInst, |
632 | | const BasicBlock *PHIBlock, |
633 | | SmallPtrSetImpl<const Value *> &); |
634 | | void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); |
635 | | void removePhiOfOps(Instruction *I, PHINode *PHITemp); |
636 | | |
637 | | // Value number an Instruction or MemoryPhi. |
638 | | void valueNumberMemoryPhi(MemoryPhi *); |
639 | | void valueNumberInstruction(Instruction *); |
640 | | |
641 | | // Symbolic evaluation. |
642 | | const Expression *checkSimplificationResults(Expression *, Instruction *, |
643 | | Value *) const; |
644 | | const Expression *performSymbolicEvaluation(Value *, |
645 | | SmallPtrSetImpl<Value *> &) const; |
646 | | const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, |
647 | | Instruction *, |
648 | | MemoryAccess *) const; |
649 | | const Expression *performSymbolicLoadEvaluation(Instruction *) const; |
650 | | const Expression *performSymbolicStoreEvaluation(Instruction *) const; |
651 | | const Expression *performSymbolicCallEvaluation(Instruction *) const; |
652 | | const Expression *performSymbolicPHIEvaluation(Instruction *) const; |
653 | | const Expression *performSymbolicAggrValueEvaluation(Instruction *) const; |
654 | | const Expression *performSymbolicCmpEvaluation(Instruction *) const; |
655 | | const Expression *performSymbolicPredicateInfoEvaluation(Instruction *) const; |
656 | | |
657 | | // Congruence finding. |
658 | | bool someEquivalentDominates(const Instruction *, const Instruction *) const; |
659 | | Value *lookupOperandLeader(Value *) const; |
660 | | CongruenceClass *getClassForExpression(const Expression *E) const; |
661 | | void performCongruenceFinding(Instruction *, const Expression *); |
662 | | void moveValueToNewCongruenceClass(Instruction *, const Expression *, |
663 | | CongruenceClass *, CongruenceClass *); |
664 | | void moveMemoryToNewCongruenceClass(Instruction *, MemoryAccess *, |
665 | | CongruenceClass *, CongruenceClass *); |
666 | | Value *getNextValueLeader(CongruenceClass *) const; |
667 | | const MemoryAccess *getNextMemoryLeader(CongruenceClass *) const; |
668 | | bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); |
669 | | CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; |
670 | | const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; |
671 | | bool isMemoryAccessTOP(const MemoryAccess *) const; |
672 | | |
673 | | // Ranking |
674 | | unsigned int getRank(const Value *) const; |
675 | | bool shouldSwapOperands(const Value *, const Value *) const; |
676 | | |
677 | | // Reachability handling. |
678 | | void updateReachableEdge(BasicBlock *, BasicBlock *); |
679 | | void processOutgoingEdges(TerminatorInst *, BasicBlock *); |
680 | | Value *findConditionEquivalence(Value *) const; |
681 | | |
682 | | // Elimination. |
683 | | struct ValueDFS; |
684 | | void convertClassToDFSOrdered(const CongruenceClass &, |
685 | | SmallVectorImpl<ValueDFS> &, |
686 | | DenseMap<const Value *, unsigned int> &, |
687 | | SmallPtrSetImpl<Instruction *> &) const; |
688 | | void convertClassToLoadsAndStores(const CongruenceClass &, |
689 | | SmallVectorImpl<ValueDFS> &) const; |
690 | | |
691 | | bool eliminateInstructions(Function &); |
692 | | void replaceInstruction(Instruction *, Value *); |
693 | | void markInstructionForDeletion(Instruction *); |
694 | | void deleteInstructionsInBlock(BasicBlock *); |
695 | | Value *findPHIOfOpsLeader(const Expression *, const Instruction *, |
696 | | const BasicBlock *) const; |
697 | | |
698 | | // New instruction creation. |
699 | 0 | void handleNewInstruction(Instruction *){}; |
700 | | |
701 | | // Various instruction touch utilities |
702 | | template <typename Map, typename KeyType, typename Func> |
703 | | void for_each_found(Map &, const KeyType &, Func); |
704 | | template <typename Map, typename KeyType> |
705 | | void touchAndErase(Map &, const KeyType &); |
706 | | void markUsersTouched(Value *); |
707 | | void markMemoryUsersTouched(const MemoryAccess *); |
708 | | void markMemoryDefTouched(const MemoryAccess *); |
709 | | void markPredicateUsersTouched(Instruction *); |
710 | | void markValueLeaderChangeTouched(CongruenceClass *CC); |
711 | | void markMemoryLeaderChangeTouched(CongruenceClass *CC); |
712 | | void markPhiOfOpsChanged(const Expression *E); |
713 | | void addPredicateUsers(const PredicateBase *, Instruction *) const; |
714 | | void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const; |
715 | | void addAdditionalUsers(Value *To, Value *User) const; |
716 | | |
717 | | // Main loop of value numbering |
718 | | void iterateTouchedInstructions(); |
719 | | |
720 | | // Utilities. |
721 | | void cleanupTables(); |
722 | | std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned); |
723 | | void updateProcessedCount(const Value *V); |
724 | | void verifyMemoryCongruency() const; |
725 | | void verifyIterationSettled(Function &F); |
726 | | void verifyStoreExpressions() const; |
727 | | bool singleReachablePHIPath(SmallPtrSet<const MemoryAccess *, 8> &, |
728 | | const MemoryAccess *, const MemoryAccess *) const; |
729 | | BasicBlock *getBlockForValue(Value *V) const; |
730 | | void deleteExpression(const Expression *E) const; |
731 | | MemoryUseOrDef *getMemoryAccess(const Instruction *) const; |
732 | | MemoryAccess *getDefiningAccess(const MemoryAccess *) const; |
733 | | MemoryPhi *getMemoryAccess(const BasicBlock *) const; |
734 | | template <class T, class Range> T *getMinDFSOfRange(const Range &) const; |
735 | 7.89k | unsigned InstrToDFSNum(const Value *V) const { |
736 | 7.89k | assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses"); |
737 | 7.89k | return InstrDFS.lookup(V); |
738 | 7.89k | } |
739 | | |
740 | 153 | unsigned InstrToDFSNum(const MemoryAccess *MA) const { |
741 | 153 | return MemoryToDFSNum(MA); |
742 | 153 | } |
743 | 4.52k | Value *InstrFromDFSNum(unsigned DFSNum) { return DFSToInstr[DFSNum]; } |
744 | | // Given a MemoryAccess, return the relevant instruction DFS number. Note: |
745 | | // This deliberately takes a value so it can be used with Use's, which will |
746 | | // auto-convert to Value's but not to MemoryAccess's. |
747 | 1.20k | unsigned MemoryToDFSNum(const Value *MA) const { |
748 | 1.20k | assert(isa<MemoryAccess>(MA) && |
749 | 1.20k | "This should not be used with instructions"); |
750 | 1.20k | return isa<MemoryUseOrDef>(MA) |
751 | 770 | ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst()) |
752 | 435 | : InstrDFS.lookup(MA); |
753 | 1.20k | } |
754 | | bool isCycleFree(const Instruction *) const; |
755 | | bool isBackedge(BasicBlock *From, BasicBlock *To) const; |
756 | | // Debug counter info. When verifying, we have to reset the value numbering |
757 | | // debug counter to the same state it started in to get the same results. |
758 | | std::pair<int, int> StartingVNCounter; |
759 | | }; |
760 | | } // end anonymous namespace |
761 | | |
762 | | template <typename T> |
763 | 436 | static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) { |
764 | 436 | if (!isa<LoadExpression>(RHS) && 436 !isa<StoreExpression>(RHS)243 ) |
765 | 0 | return false; |
766 | 436 | return LHS.MemoryExpression::equals(RHS); |
767 | 436 | } NewGVN.cpp:bool equalsLoadStoreHelper<llvm::GVNExpression::LoadExpression>(llvm::GVNExpression::LoadExpression const&, llvm::GVNExpression::Expression const&) Line | Count | Source | 763 | 217 | static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) { | 764 | 217 | if (!isa<LoadExpression>(RHS) && 217 !isa<StoreExpression>(RHS)84 ) | 765 | 0 | return false; | 766 | 217 | return LHS.MemoryExpression::equals(RHS); | 767 | 217 | } |
NewGVN.cpp:bool equalsLoadStoreHelper<llvm::GVNExpression::StoreExpression>(llvm::GVNExpression::StoreExpression const&, llvm::GVNExpression::Expression const&) Line | Count | Source | 763 | 219 | static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) { | 764 | 219 | if (!isa<LoadExpression>(RHS) && 219 !isa<StoreExpression>(RHS)159 ) | 765 | 0 | return false; | 766 | 219 | return LHS.MemoryExpression::equals(RHS); | 767 | 219 | } |
|
768 | | |
769 | 217 | bool LoadExpression::equals(const Expression &Other) const { |
770 | 217 | return equalsLoadStoreHelper(*this, Other); |
771 | 217 | } |
772 | | |
773 | 219 | bool StoreExpression::equals(const Expression &Other) const { |
774 | 219 | if (!equalsLoadStoreHelper(*this, Other)) |
775 | 31 | return false; |
776 | 188 | // Make sure that store vs store includes the value operand. |
777 | 188 | if (const auto *188 S188 = dyn_cast<StoreExpression>(&Other)) |
778 | 128 | if (128 getStoredValue() != S->getStoredValue()128 ) |
779 | 36 | return false; |
780 | 152 | return true; |
781 | 152 | } |
782 | | |
783 | | // Determine if the edge From->To is a backedge |
784 | 750 | bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { |
785 | 750 | return From == To || |
786 | 713 | RPOOrdering.lookup(DT->getNode(From)) >= |
787 | 713 | RPOOrdering.lookup(DT->getNode(To)); |
788 | 750 | } |
789 | | |
790 | | #ifndef NDEBUG |
791 | | static std::string getBlockName(const BasicBlock *B) { |
792 | | return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr); |
793 | | } |
794 | | #endif |
795 | | |
796 | | // Get a MemoryAccess for an instruction, fake or real. |
797 | 6.94k | MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { |
798 | 6.94k | auto *Result = MSSA->getMemoryAccess(I); |
799 | 6.94k | return Result ? Result2.83k : TempToMemory.lookup(I)4.11k ; |
800 | 6.94k | } |
801 | | |
802 | | // Get a MemoryPhi for a basic block. These are all real. |
803 | 1.45k | MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { |
804 | 1.45k | return MSSA->getMemoryAccess(BB); |
805 | 1.45k | } |
806 | | |
807 | | // Get the basic block from an instruction/memory value. |
808 | 6.47k | BasicBlock *NewGVN::getBlockForValue(Value *V) const { |
809 | 6.47k | if (auto *I6.47k = dyn_cast<Instruction>(V)) { |
810 | 6.24k | auto *Parent = I->getParent(); |
811 | 6.24k | if (Parent) |
812 | 6.19k | return Parent; |
813 | 57 | Parent = TempToBlock.lookup(V); |
814 | 57 | assert(Parent && "Every fake instruction should have a block"); |
815 | 57 | return Parent; |
816 | 57 | } |
817 | 223 | |
818 | 223 | auto *MP = dyn_cast<MemoryPhi>(V); |
819 | 223 | assert(MP && "Should have been an instruction or a MemoryPhi"); |
820 | 223 | return MP->getBlock(); |
821 | 223 | } |
822 | | |
823 | | // Delete a definitely dead expression, so it can be reused by the expression |
824 | | // allocator. Some of these are not in creation functions, so we have to accept |
825 | | // const versions. |
826 | 780 | void NewGVN::deleteExpression(const Expression *E) const { |
827 | 780 | assert(isa<BasicExpression>(E)); |
828 | 780 | auto *BE = cast<BasicExpression>(E); |
829 | 780 | const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler); |
830 | 780 | ExpressionAllocator.Deallocate(E); |
831 | 780 | } |
832 | | |
833 | | // If V is a predicateinfo copy, get the thing it is a copy of. |
834 | 1.00k | static Value *getCopyOf(const Value *V) { |
835 | 1.00k | if (auto *II = dyn_cast<IntrinsicInst>(V)) |
836 | 65 | if (65 II->getIntrinsicID() == Intrinsic::ssa_copy65 ) |
837 | 65 | return II->getOperand(0); |
838 | 936 | return nullptr; |
839 | 936 | } |
840 | | |
841 | | // Return true if V is really PN, even accounting for predicateinfo copies. |
842 | 940 | static bool isCopyOfPHI(const Value *V, const PHINode *PN) { |
843 | 929 | return V == PN || getCopyOf(V) == PN; |
844 | 940 | } |
845 | | |
846 | 72 | static bool isCopyOfAPHI(const Value *V) { |
847 | 72 | auto *CO = getCopyOf(V); |
848 | 8 | return CO && isa<PHINode>(CO); |
849 | 72 | } |
850 | | |
851 | | PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, |
852 | 468 | bool &OriginalOpsConstant) const { |
853 | 468 | BasicBlock *PHIBlock = getBlockForValue(I); |
854 | 468 | auto *PN = cast<PHINode>(I); |
855 | 468 | auto *E = |
856 | 468 | new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); |
857 | 468 | |
858 | 468 | E->allocateOperands(ArgRecycler, ExpressionAllocator); |
859 | 468 | E->setType(I->getType()); |
860 | 468 | E->setOpcode(I->getOpcode()); |
861 | 468 | |
862 | 468 | // NewGVN assumes the operands of a PHI node are in a consistent order across |
863 | 468 | // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix |
864 | 468 | // this in LLVM at some point we don't want GVN to find wrong congruences. |
865 | 468 | // Therefore, here we sort uses in predecessor order. |
866 | 468 | // We're sorting the values by pointer. In theory this might be cause of |
867 | 468 | // non-determinism, but here we don't rely on the ordering for anything |
868 | 468 | // significant, e.g. we don't create new instructions based on it so we're |
869 | 468 | // fine. |
870 | 468 | SmallVector<const Use *, 4> PHIOperands; |
871 | 468 | for (const Use &U : PN->operands()) |
872 | 940 | PHIOperands.push_back(&U); |
873 | 468 | std::sort(PHIOperands.begin(), PHIOperands.end(), |
874 | 505 | [&](const Use *U1, const Use *U2) { |
875 | 505 | return PN->getIncomingBlock(*U1) < PN->getIncomingBlock(*U2); |
876 | 505 | }); |
877 | 468 | |
878 | 468 | // Filter out unreachable phi operands. |
879 | 940 | auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { |
880 | 940 | auto *BB = PN->getIncomingBlock(*U); |
881 | 940 | if (isCopyOfPHI(*U, PN)) |
882 | 13 | return false; |
883 | 927 | if (927 !ReachableEdges.count({BB, PHIBlock})927 ) |
884 | 169 | return false; |
885 | 758 | // Things in TOPClass are equivalent to everything. |
886 | 758 | if (758 ValueToClass.lookup(*U) == TOPClass758 ) |
887 | 0 | return false; |
888 | 758 | OriginalOpsConstant = OriginalOpsConstant && 758 isa<Constant>(*U)599 ; |
889 | 750 | HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); |
890 | 940 | return lookupOperandLeader(*U) != PN; |
891 | 940 | }); |
892 | 468 | std::transform( |
893 | 468 | Filtered.begin(), Filtered.end(), op_inserter(E), |
894 | 749 | [&](const Use *U) -> Value * { return lookupOperandLeader(*U); }); |
895 | 468 | return E; |
896 | 468 | } |
897 | | |
898 | | // Set basic expression info (Arguments, type, opcode) for Expression |
899 | | // E from Instruction I in block B. |
900 | 1.44k | bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const { |
901 | 1.44k | bool AllConstant = true; |
902 | 1.44k | if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) |
903 | 226 | E->setType(GEP->getSourceElementType()); |
904 | 1.44k | else |
905 | 1.21k | E->setType(I->getType()); |
906 | 1.44k | E->setOpcode(I->getOpcode()); |
907 | 1.44k | E->allocateOperands(ArgRecycler, ExpressionAllocator); |
908 | 1.44k | |
909 | 1.44k | // Transform the operand array into an operand leader array, and keep track of |
910 | 1.44k | // whether all members are constant. |
911 | 2.97k | std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { |
912 | 2.97k | auto Operand = lookupOperandLeader(O); |
913 | 1.88k | AllConstant = AllConstant && isa<Constant>(Operand); |
914 | 2.97k | return Operand; |
915 | 2.97k | }); |
916 | 1.44k | |
917 | 1.44k | return AllConstant; |
918 | 1.44k | } |
919 | | |
920 | | const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, |
921 | | Value *Arg1, Value *Arg2, |
922 | 8 | Instruction *I) const { |
923 | 8 | auto *E = new (ExpressionAllocator) BasicExpression(2); |
924 | 8 | |
925 | 8 | E->setType(T); |
926 | 8 | E->setOpcode(Opcode); |
927 | 8 | E->allocateOperands(ArgRecycler, ExpressionAllocator); |
928 | 8 | if (Instruction::isCommutative(Opcode)8 ) { |
929 | 6 | // Ensure that commutative instructions that only differ by a permutation |
930 | 6 | // of their operands get the same value number by sorting the operand value |
931 | 6 | // numbers. Since all commutative instructions have two operands it is more |
932 | 6 | // efficient to sort by hand rather than using, say, std::sort. |
933 | 6 | if (shouldSwapOperands(Arg1, Arg2)) |
934 | 0 | std::swap(Arg1, Arg2); |
935 | 6 | } |
936 | 8 | E->op_push_back(lookupOperandLeader(Arg1)); |
937 | 8 | E->op_push_back(lookupOperandLeader(Arg2)); |
938 | 8 | |
939 | 8 | Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ); |
940 | 8 | if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) |
941 | 2 | return SimplifiedE; |
942 | 6 | return E; |
943 | 6 | } |
944 | | |
945 | | // Take a Value returned by simplification of Expression E/Instruction |
946 | | // I, and see if it resulted in a simpler expression. If so, return |
947 | | // that expression. |
948 | | const Expression *NewGVN::checkSimplificationResults(Expression *E, |
949 | | Instruction *I, |
950 | 1.31k | Value *V) const { |
951 | 1.31k | if (!V) |
952 | 970 | return nullptr; |
953 | 347 | if (auto *347 C347 = dyn_cast<Constant>(V)) { |
954 | 287 | if (I) |
955 | 287 | DEBUG(dbgs() << "Simplified " << *I << " to " |
956 | 287 | << " constant " << *C << "\n"); |
957 | 287 | NumGVNOpsSimplified++; |
958 | 287 | assert(isa<BasicExpression>(E) && |
959 | 287 | "We should always have had a basic expression here"); |
960 | 287 | deleteExpression(E); |
961 | 287 | return createConstantExpression(C); |
962 | 60 | } else if (60 isa<Argument>(V) || 60 isa<GlobalVariable>(V)51 ) { |
963 | 9 | if (I) |
964 | 9 | DEBUG(dbgs() << "Simplified " << *I << " to " |
965 | 60 | << " variable " << *V << "\n"); |
966 | 60 | deleteExpression(E); |
967 | 60 | return createVariableExpression(V); |
968 | 60 | } |
969 | 51 | |
970 | 51 | CongruenceClass *CC = ValueToClass.lookup(V); |
971 | 51 | if (CC51 ) { |
972 | 51 | if (CC->getLeader() && 51 CC->getLeader() != I51 ) { |
973 | 51 | // Don't add temporary instructions to the user lists. |
974 | 51 | if (!AllTempInstructions.count(I)) |
975 | 50 | addAdditionalUsers(V, I); |
976 | 51 | return createVariableOrConstant(CC->getLeader()); |
977 | 51 | } |
978 | 0 | if (0 CC->getDefiningExpr()0 ) { |
979 | 0 | // If we simplified to something else, we need to communicate |
980 | 0 | // that we're users of the value we simplified to. |
981 | 0 | if (I != V0 ) { |
982 | 0 | // Don't add temporary instructions to the user lists. |
983 | 0 | if (!AllTempInstructions.count(I)) |
984 | 0 | addAdditionalUsers(V, I); |
985 | 0 | } |
986 | 0 |
|
987 | 0 | if (I) |
988 | 0 | DEBUG(dbgs() << "Simplified " << *I << " to " |
989 | 0 | << " expression " << *CC->getDefiningExpr() << "\n"); |
990 | 0 | NumGVNOpsSimplified++; |
991 | 0 | deleteExpression(E); |
992 | 0 | return CC->getDefiningExpr(); |
993 | 0 | } |
994 | 0 | } |
995 | 0 |
|
996 | 0 | return nullptr; |
997 | 0 | } |
998 | | |
999 | | // Create a value expression from the instruction I, replacing operands with |
1000 | | // their leaders. |
1001 | | |
1002 | 1.39k | const Expression *NewGVN::createExpression(Instruction *I) const { |
1003 | 1.39k | auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); |
1004 | 1.39k | |
1005 | 1.39k | bool AllConstant = setBasicExpressionInfo(I, E); |
1006 | 1.39k | |
1007 | 1.39k | if (I->isCommutative()1.39k ) { |
1008 | 415 | // Ensure that commutative instructions that only differ by a permutation |
1009 | 415 | // of their operands get the same value number by sorting the operand value |
1010 | 415 | // numbers. Since all commutative instructions have two operands it is more |
1011 | 415 | // efficient to sort by hand rather than using, say, std::sort. |
1012 | 415 | assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); |
1013 | 415 | if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))) |
1014 | 226 | E->swapOperands(0, 1); |
1015 | 415 | } |
1016 | 1.39k | // Perform simplification. |
1017 | 1.39k | if (auto *CI1.39k = dyn_cast<CmpInst>(I)) { |
1018 | 441 | // Sort the operand value numbers so x<y and y>x get the same value |
1019 | 441 | // number. |
1020 | 441 | CmpInst::Predicate Predicate = CI->getPredicate(); |
1021 | 441 | if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))441 ) { |
1022 | 318 | E->swapOperands(0, 1); |
1023 | 318 | Predicate = CmpInst::getSwappedPredicate(Predicate); |
1024 | 318 | } |
1025 | 441 | E->setOpcode((CI->getOpcode() << 8) | Predicate); |
1026 | 441 | // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands |
1027 | 441 | assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() && |
1028 | 441 | "Wrong types on cmp instruction"); |
1029 | 441 | assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && |
1030 | 441 | E->getOperand(1)->getType() == I->getOperand(1)->getType())); |
1031 | 441 | Value *V = |
1032 | 441 | SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ); |
1033 | 441 | if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) |
1034 | 57 | return SimplifiedE; |
1035 | 951 | } else if (951 isa<SelectInst>(I)951 ) { |
1036 | 29 | if (isa<Constant>(E->getOperand(0)) || |
1037 | 29 | E->getOperand(1) == E->getOperand(2)23 ) { |
1038 | 13 | assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && |
1039 | 13 | E->getOperand(2)->getType() == I->getOperand(2)->getType()); |
1040 | 13 | Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), |
1041 | 13 | E->getOperand(2), SQ); |
1042 | 13 | if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) |
1043 | 13 | return SimplifiedE; |
1044 | 951 | } |
1045 | 922 | } else if (922 I->isBinaryOp()922 ) { |
1046 | 535 | Value *V = |
1047 | 535 | SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ); |
1048 | 535 | if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) |
1049 | 194 | return SimplifiedE; |
1050 | 387 | } else if (auto *387 BI387 = dyn_cast<BitCastInst>(I)) { |
1051 | 84 | Value *V = |
1052 | 84 | SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ); |
1053 | 84 | if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) |
1054 | 6 | return SimplifiedE; |
1055 | 303 | } else if (303 isa<GetElementPtrInst>(I)303 ) { |
1056 | 226 | Value *V = SimplifyGEPInst( |
1057 | 226 | E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ); |
1058 | 226 | if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) |
1059 | 65 | return SimplifiedE; |
1060 | 77 | } else if (77 AllConstant77 ) { |
1061 | 12 | // We don't bother trying to simplify unless all of the operands |
1062 | 12 | // were constant. |
1063 | 12 | // TODO: There are a lot of Simplify*'s we could call here, if we |
1064 | 12 | // wanted to. The original motivating case for this code was a |
1065 | 12 | // zext i1 false to i8, which we don't have an interface to |
1066 | 12 | // simplify (IE there is no SimplifyZExt). |
1067 | 12 | |
1068 | 12 | SmallVector<Constant *, 8> C; |
1069 | 12 | for (Value *Arg : E->operands()) |
1070 | 14 | C.emplace_back(cast<Constant>(Arg)); |
1071 | 12 | |
1072 | 12 | if (Value *V = ConstantFoldInstOperands(I, C, DL, TLI)) |
1073 | 10 | if (const Expression *10 SimplifiedE10 = checkSimplificationResults(E, I, V)) |
1074 | 10 | return SimplifiedE; |
1075 | 1.04k | } |
1076 | 1.04k | return E; |
1077 | 1.04k | } |
1078 | | |
1079 | | const AggregateValueExpression * |
1080 | 2 | NewGVN::createAggregateValueExpression(Instruction *I) const { |
1081 | 2 | if (auto *II2 = dyn_cast<InsertValueInst>(I)) { |
1082 | 0 | auto *E = new (ExpressionAllocator) |
1083 | 0 | AggregateValueExpression(I->getNumOperands(), II->getNumIndices()); |
1084 | 0 | setBasicExpressionInfo(I, E); |
1085 | 0 | E->allocateIntOperands(ExpressionAllocator); |
1086 | 0 | std::copy(II->idx_begin(), II->idx_end(), int_op_inserter(E)); |
1087 | 0 | return E; |
1088 | 2 | } else if (auto *2 EI2 = dyn_cast<ExtractValueInst>(I)) { |
1089 | 2 | auto *E = new (ExpressionAllocator) |
1090 | 2 | AggregateValueExpression(I->getNumOperands(), EI->getNumIndices()); |
1091 | 2 | setBasicExpressionInfo(EI, E); |
1092 | 2 | E->allocateIntOperands(ExpressionAllocator); |
1093 | 2 | std::copy(EI->idx_begin(), EI->idx_end(), int_op_inserter(E)); |
1094 | 2 | return E; |
1095 | 2 | } |
1096 | 0 | llvm_unreachable0 ("Unhandled type of aggregate value operation"); |
1097 | 0 | } |
1098 | | |
1099 | 0 | const DeadExpression *NewGVN::createDeadExpression() const { |
1100 | 0 | // DeadExpression has no arguments and all DeadExpression's are the same, |
1101 | 0 | // so we only need one of them. |
1102 | 0 | return SingletonDeadExpression; |
1103 | 0 | } |
1104 | | |
1105 | 256 | const VariableExpression *NewGVN::createVariableExpression(Value *V) const { |
1106 | 256 | auto *E = new (ExpressionAllocator) VariableExpression(V); |
1107 | 256 | E->setOpcode(V->getValueID()); |
1108 | 256 | return E; |
1109 | 256 | } |
1110 | | |
1111 | 362 | const Expression *NewGVN::createVariableOrConstant(Value *V) const { |
1112 | 362 | if (auto *C = dyn_cast<Constant>(V)) |
1113 | 115 | return createConstantExpression(C); |
1114 | 247 | return createVariableExpression(V); |
1115 | 247 | } |
1116 | | |
1117 | 486 | const ConstantExpression *NewGVN::createConstantExpression(Constant *C) const { |
1118 | 486 | auto *E = new (ExpressionAllocator) ConstantExpression(C); |
1119 | 486 | E->setOpcode(C->getValueID()); |
1120 | 486 | return E; |
1121 | 486 | } |
1122 | | |
1123 | 324 | const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const { |
1124 | 324 | auto *E = new (ExpressionAllocator) UnknownExpression(I); |
1125 | 324 | E->setOpcode(I->getOpcode()); |
1126 | 324 | return E; |
1127 | 324 | } |
1128 | | |
1129 | | const CallExpression * |
1130 | 50 | NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const { |
1131 | 50 | // FIXME: Add operand bundles for calls. |
1132 | 50 | auto *E = |
1133 | 50 | new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA); |
1134 | 50 | setBasicExpressionInfo(CI, E); |
1135 | 50 | return E; |
1136 | 50 | } |
1137 | | |
1138 | | // Return true if some equivalent of instruction Inst dominates instruction U. |
1139 | | bool NewGVN::someEquivalentDominates(const Instruction *Inst, |
1140 | 7 | const Instruction *U) const { |
1141 | 7 | auto *CC = ValueToClass.lookup(Inst); |
1142 | 7 | // This must be an instruction because we are only called from phi nodes |
1143 | 7 | // in the case that the value it needs to check against is an instruction. |
1144 | 7 | |
1145 | 7 | // The most likely candiates for dominance are the leader and the next leader. |
1146 | 7 | // The leader or nextleader will dominate in all cases where there is an |
1147 | 7 | // equivalent that is higher up in the dom tree. |
1148 | 7 | // We can't *only* check them, however, because the |
1149 | 7 | // dominator tree could have an infinite number of non-dominating siblings |
1150 | 7 | // with instructions that are in the right congruence class. |
1151 | 7 | // A |
1152 | 7 | // B C D E F G |
1153 | 7 | // | |
1154 | 7 | // H |
1155 | 7 | // Instruction U could be in H, with equivalents in every other sibling. |
1156 | 7 | // Depending on the rpo order picked, the leader could be the equivalent in |
1157 | 7 | // any of these siblings. |
1158 | 7 | if (!CC) |
1159 | 0 | return false; |
1160 | 7 | if (7 DT->dominates(cast<Instruction>(CC->getLeader()), U)7 ) |
1161 | 3 | return true; |
1162 | 4 | if (4 CC->getNextLeader().first && |
1163 | 2 | DT->dominates(cast<Instruction>(CC->getNextLeader().first), U)) |
1164 | 1 | return true; |
1165 | 3 | return llvm::any_of(*CC, [&](const Value *Member) 3 { |
1166 | 5 | return Member != CC->getLeader() && |
1167 | 2 | DT->dominates(cast<Instruction>(Member), U); |
1168 | 5 | }); |
1169 | 7 | } |
1170 | | |
1171 | | // See if we have a congruence class and leader for this operand, and if so, |
1172 | | // return it. Otherwise, return the operand itself. |
1173 | 7.61k | Value *NewGVN::lookupOperandLeader(Value *V) const { |
1174 | 7.61k | CongruenceClass *CC = ValueToClass.lookup(V); |
1175 | 7.61k | if (CC7.61k ) { |
1176 | 4.98k | // Everything in TOP is represented by undef, as it can be any value. |
1177 | 4.98k | // We do have to make sure we get the type right though, so we can't set the |
1178 | 4.98k | // RepLeader to undef. |
1179 | 4.98k | if (CC == TOPClass) |
1180 | 0 | return UndefValue::get(V->getType()); |
1181 | 4.98k | return CC->getStoredValue() ? 4.98k CC->getStoredValue()181 : CC->getLeader()4.80k ; |
1182 | 4.98k | } |
1183 | 2.63k | |
1184 | 2.63k | return V; |
1185 | 2.63k | } |
1186 | | |
1187 | 1.17k | const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { |
1188 | 1.17k | auto *CC = getMemoryClass(MA); |
1189 | 1.17k | assert(CC->getMemoryLeader() && |
1190 | 1.17k | "Every MemoryAccess should be mapped to a congruence class with a " |
1191 | 1.17k | "representative memory access"); |
1192 | 1.17k | return CC->getMemoryLeader(); |
1193 | 1.17k | } |
1194 | | |
1195 | | // Return true if the MemoryAccess is really equivalent to everything. This is |
1196 | | // equivalent to the lattice value "TOP" in most lattices. This is the initial |
1197 | | // state of all MemoryAccesses. |
1198 | 464 | bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const { |
1199 | 464 | return getMemoryClass(MA) == TOPClass; |
1200 | 464 | } |
1201 | | |
1202 | | LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, |
1203 | | LoadInst *LI, |
1204 | 482 | const MemoryAccess *MA) const { |
1205 | 482 | auto *E = |
1206 | 482 | new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA)); |
1207 | 482 | E->allocateOperands(ArgRecycler, ExpressionAllocator); |
1208 | 482 | E->setType(LoadType); |
1209 | 482 | |
1210 | 482 | // Give store and loads same opcode so they value number together. |
1211 | 482 | E->setOpcode(0); |
1212 | 482 | E->op_push_back(PointerOp); |
1213 | 482 | if (LI) |
1214 | 482 | E->setAlignment(LI->getAlignment()); |
1215 | 482 | |
1216 | 482 | // TODO: Value number heap versions. We may be able to discover |
1217 | 482 | // things alias analysis can't on it's own (IE that a store and a |
1218 | 482 | // load have the same value, and thus, it isn't clobbering the load). |
1219 | 482 | return E; |
1220 | 482 | } |
1221 | | |
1222 | | const StoreExpression * |
1223 | 591 | NewGVN::createStoreExpression(StoreInst *SI, const MemoryAccess *MA) const { |
1224 | 591 | auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand()); |
1225 | 591 | auto *E = new (ExpressionAllocator) |
1226 | 591 | StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA); |
1227 | 591 | E->allocateOperands(ArgRecycler, ExpressionAllocator); |
1228 | 591 | E->setType(SI->getValueOperand()->getType()); |
1229 | 591 | |
1230 | 591 | // Give store and loads same opcode so they value number together. |
1231 | 591 | E->setOpcode(0); |
1232 | 591 | E->op_push_back(lookupOperandLeader(SI->getPointerOperand())); |
1233 | 591 | |
1234 | 591 | // TODO: Value number heap versions. We may be able to discover |
1235 | 591 | // things alias analysis can't on it's own (IE that a store and a |
1236 | 591 | // load have the same value, and thus, it isn't clobbering the load). |
1237 | 591 | return E; |
1238 | 591 | } |
1239 | | |
1240 | 315 | const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { |
1241 | 315 | // Unlike loads, we never try to eliminate stores, so we do not check if they |
1242 | 315 | // are simple and avoid value numbering them. |
1243 | 315 | auto *SI = cast<StoreInst>(I); |
1244 | 315 | auto *StoreAccess = getMemoryAccess(SI); |
1245 | 315 | // Get the expression, if any, for the RHS of the MemoryDef. |
1246 | 315 | const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); |
1247 | 315 | if (EnableStoreRefinement) |
1248 | 2 | StoreRHS = MSSAWalker->getClobberingMemoryAccess(StoreAccess); |
1249 | 315 | // If we bypassed the use-def chains, make sure we add a use. |
1250 | 315 | StoreRHS = lookupMemoryLeader(StoreRHS); |
1251 | 315 | if (StoreRHS != StoreAccess->getDefiningAccess()) |
1252 | 49 | addMemoryUsers(StoreRHS, StoreAccess); |
1253 | 315 | // If we are defined by ourselves, use the live on entry def. |
1254 | 315 | if (StoreRHS == StoreAccess) |
1255 | 1 | StoreRHS = MSSA->getLiveOnEntryDef(); |
1256 | 315 | |
1257 | 315 | if (SI->isSimple()315 ) { |
1258 | 307 | // See if we are defined by a previous store expression, it already has a |
1259 | 307 | // value, and it's the same value as our current store. FIXME: Right now, we |
1260 | 307 | // only do this for simple stores, we should expand to cover memcpys, etc. |
1261 | 307 | const auto *LastStore = createStoreExpression(SI, StoreRHS); |
1262 | 307 | const auto *LastCC = ExpressionToClass.lookup(LastStore); |
1263 | 307 | // We really want to check whether the expression we matched was a store. No |
1264 | 307 | // easy way to do that. However, we can check that the class we found has a |
1265 | 307 | // store, which, assuming the value numbering state is not corrupt, is |
1266 | 307 | // sufficient, because we must also be equivalent to that store's expression |
1267 | 307 | // for it to be in the same class as the load. |
1268 | 307 | if (LastCC && 307 LastCC->getStoredValue() == LastStore->getStoredValue()58 ) |
1269 | 23 | return LastStore; |
1270 | 284 | // Also check if our value operand is defined by a load of the same memory |
1271 | 284 | // location, and the memory state is the same as it was then (otherwise, it |
1272 | 284 | // could have been overwritten later. See test32 in |
1273 | 284 | // transforms/DeadStoreElimination/simple.ll). |
1274 | 284 | if (auto *284 LI284 = dyn_cast<LoadInst>(LastStore->getStoredValue())) |
1275 | 33 | if (33 (lookupOperandLeader(LI->getPointerOperand()) == |
1276 | 33 | LastStore->getOperand(0)) && |
1277 | 9 | (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == |
1278 | 9 | StoreRHS)) |
1279 | 8 | return LastStore; |
1280 | 276 | deleteExpression(LastStore); |
1281 | 276 | } |
1282 | 315 | |
1283 | 315 | // If the store is not equivalent to anything, value number it as a store that |
1284 | 315 | // produces a unique memory state (instead of using it's MemoryUse, we use |
1285 | 315 | // it's MemoryDef). |
1286 | 284 | return createStoreExpression(SI, StoreAccess); |
1287 | 315 | } |
1288 | | |
1289 | | // See if we can extract the value of a loaded pointer from a load, a store, or |
1290 | | // a memory instruction. |
1291 | | const Expression * |
1292 | | NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, |
1293 | | LoadInst *LI, Instruction *DepInst, |
1294 | 193 | MemoryAccess *DefiningAccess) const { |
1295 | 193 | assert((!LI || LI->isSimple()) && "Not a simple load"); |
1296 | 193 | if (auto *DepSI193 = dyn_cast<StoreInst>(DepInst)) { |
1297 | 96 | // Can't forward from non-atomic to atomic without violating memory model. |
1298 | 96 | // Also don't need to coerce if they are the same type, we will just |
1299 | 96 | // propagate. |
1300 | 96 | if (LI->isAtomic() > DepSI->isAtomic() || |
1301 | 96 | LoadType == DepSI->getValueOperand()->getType()) |
1302 | 77 | return nullptr; |
1303 | 19 | int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI, DL); |
1304 | 19 | if (Offset >= 019 ) { |
1305 | 5 | if (auto *C = dyn_cast<Constant>( |
1306 | 1 | lookupOperandLeader(DepSI->getValueOperand()))) { |
1307 | 1 | DEBUG(dbgs() << "Coercing load from store " << *DepSI << " to constant " |
1308 | 1 | << *C << "\n"); |
1309 | 1 | return createConstantExpression( |
1310 | 1 | getConstantStoreValueForLoad(C, Offset, LoadType, DL)); |
1311 | 1 | } |
1312 | 193 | } |
1313 | 96 | |
1314 | 97 | } else if (auto *97 DepLI97 = dyn_cast<LoadInst>(DepInst)) { |
1315 | 0 | // Can't forward from non-atomic to atomic without violating memory model. |
1316 | 0 | if (LI->isAtomic() > DepLI->isAtomic()) |
1317 | 0 | return nullptr; |
1318 | 0 | int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL); |
1319 | 0 | if (Offset >= 00 ) { |
1320 | 0 | // We can coerce a constant load into a load. |
1321 | 0 | if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI))) |
1322 | 0 | if (auto *0 PossibleConstant0 = |
1323 | 0 | getConstantLoadValueForLoad(C, Offset, LoadType, DL)) { |
1324 | 0 | DEBUG(dbgs() << "Coercing load from load " << *LI << " to constant " |
1325 | 0 | << *PossibleConstant << "\n"); |
1326 | 0 | return createConstantExpression(PossibleConstant); |
1327 | 0 | } |
1328 | 97 | } |
1329 | 0 |
|
1330 | 97 | } else if (auto *97 DepMI97 = dyn_cast<MemIntrinsic>(DepInst)) { |
1331 | 10 | int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL); |
1332 | 10 | if (Offset >= 010 ) { |
1333 | 10 | if (auto *PossibleConstant = |
1334 | 10 | getConstantMemInstValueForLoad(DepMI, Offset, LoadType, DL)) { |
1335 | 10 | DEBUG(dbgs() << "Coercing load from meminst " << *DepMI |
1336 | 10 | << " to constant " << *PossibleConstant << "\n"); |
1337 | 10 | return createConstantExpression(PossibleConstant); |
1338 | 10 | } |
1339 | 105 | } |
1340 | 97 | } |
1341 | 105 | |
1342 | 105 | // All of the below are only true if the loaded pointer is produced |
1343 | 105 | // by the dependent instruction. |
1344 | 105 | if (105 LoadPtr != lookupOperandLeader(DepInst) && |
1345 | 99 | !AA->isMustAlias(LoadPtr, DepInst)) |
1346 | 97 | return nullptr; |
1347 | 8 | // If this load really doesn't depend on anything, then we must be loading an |
1348 | 8 | // undef value. This can happen when loading for a fresh allocation with no |
1349 | 8 | // intervening stores, for example. Note that this is only true in the case |
1350 | 8 | // that the result of the allocation is pointer equal to the load ptr. |
1351 | 8 | if (8 isa<AllocaInst>(DepInst) || 8 isMallocLikeFn(DepInst, TLI)8 ) { |
1352 | 2 | return createConstantExpression(UndefValue::get(LoadType)); |
1353 | 2 | } |
1354 | 8 | // If this load occurs either right after a lifetime begin, |
1355 | 8 | // then the loaded value is undefined. |
1356 | 6 | else if (auto *6 II6 = dyn_cast<IntrinsicInst>(DepInst)) { |
1357 | 0 | if (II->getIntrinsicID() == Intrinsic::lifetime_start) |
1358 | 0 | return createConstantExpression(UndefValue::get(LoadType)); |
1359 | 6 | } |
1360 | 6 | // If this load follows a calloc (which zero initializes memory), |
1361 | 6 | // then the loaded value is zero |
1362 | 6 | else if (6 isCallocLikeFn(DepInst, TLI)6 ) { |
1363 | 1 | return createConstantExpression(Constant::getNullValue(LoadType)); |
1364 | 1 | } |
1365 | 5 | |
1366 | 5 | return nullptr; |
1367 | 5 | } |
1368 | | |
1369 | 511 | const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { |
1370 | 511 | auto *LI = cast<LoadInst>(I); |
1371 | 511 | |
1372 | 511 | // We can eliminate in favor of non-simple loads, but we won't be able to |
1373 | 511 | // eliminate the loads themselves. |
1374 | 511 | if (!LI->isSimple()) |
1375 | 8 | return nullptr; |
1376 | 503 | |
1377 | 503 | Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand()); |
1378 | 503 | // Load of undef is undef. |
1379 | 503 | if (isa<UndefValue>(LoadAddressLeader)) |
1380 | 7 | return createConstantExpression(UndefValue::get(LI->getType())); |
1381 | 496 | MemoryAccess *OriginalAccess = getMemoryAccess(I); |
1382 | 496 | MemoryAccess *DefiningAccess = |
1383 | 496 | MSSAWalker->getClobberingMemoryAccess(OriginalAccess); |
1384 | 496 | |
1385 | 496 | if (!MSSA->isLiveOnEntryDef(DefiningAccess)496 ) { |
1386 | 331 | if (auto *MD331 = dyn_cast<MemoryDef>(DefiningAccess)) { |
1387 | 193 | Instruction *DefiningInst = MD->getMemoryInst(); |
1388 | 193 | // If the defining instruction is not reachable, replace with undef. |
1389 | 193 | if (!ReachableBlocks.count(DefiningInst->getParent())) |
1390 | 0 | return createConstantExpression(UndefValue::get(LI->getType())); |
1391 | 193 | // This will handle stores and memory insts. We only do if it the |
1392 | 193 | // defining access has a different type, or it is a pointer produced by |
1393 | 193 | // certain memory operations that cause the memory to have a fixed value |
1394 | 193 | // (IE things like calloc). |
1395 | 193 | if (const auto *193 CoercionResult193 = |
1396 | 193 | performSymbolicLoadCoercion(LI->getType(), LoadAddressLeader, LI, |
1397 | 193 | DefiningInst, DefiningAccess)) |
1398 | 14 | return CoercionResult; |
1399 | 482 | } |
1400 | 331 | } |
1401 | 482 | |
1402 | 482 | const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI, |
1403 | 482 | DefiningAccess); |
1404 | 482 | // If our MemoryLeader is not our defining access, add a use to the |
1405 | 482 | // MemoryLeader, so that we get reprocessed when it changes. |
1406 | 482 | if (LE->getMemoryLeader() != DefiningAccess) |
1407 | 76 | addMemoryUsers(LE->getMemoryLeader(), OriginalAccess); |
1408 | 511 | return LE; |
1409 | 511 | } |
1410 | | |
1411 | | const Expression * |
1412 | 123 | NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { |
1413 | 123 | auto *PI = PredInfo->getPredicateInfoFor(I); |
1414 | 123 | if (!PI) |
1415 | 0 | return nullptr; |
1416 | 123 | |
1417 | 123 | DEBUG123 (dbgs() << "Found predicate info from instruction !\n"); |
1418 | 123 | |
1419 | 123 | auto *PWC = dyn_cast<PredicateWithCondition>(PI); |
1420 | 123 | if (!PWC) |
1421 | 0 | return nullptr; |
1422 | 123 | |
1423 | 123 | auto *CopyOf = I->getOperand(0); |
1424 | 123 | auto *Cond = PWC->Condition; |
1425 | 123 | |
1426 | 123 | // If this a copy of the condition, it must be either true or false depending |
1427 | 123 | // on the predicate info type and edge. |
1428 | 123 | if (CopyOf == Cond123 ) { |
1429 | 18 | // We should not need to add predicate users because the predicate info is |
1430 | 18 | // already a use of this operand. |
1431 | 18 | if (isa<PredicateAssume>(PI)) |
1432 | 12 | return createConstantExpression(ConstantInt::getTrue(Cond->getType())); |
1433 | 6 | if (auto *6 PBranch6 = dyn_cast<PredicateBranch>(PI)) { |
1434 | 4 | if (PBranch->TrueEdge) |
1435 | 3 | return createConstantExpression(ConstantInt::getTrue(Cond->getType())); |
1436 | 1 | return createConstantExpression(ConstantInt::getFalse(Cond->getType())); |
1437 | 1 | } |
1438 | 2 | if (auto *2 PSwitch2 = dyn_cast<PredicateSwitch>(PI)) |
1439 | 2 | return createConstantExpression(cast<Constant>(PSwitch->CaseValue)); |
1440 | 105 | } |
1441 | 105 | |
1442 | 105 | // Not a copy of the condition, so see what the predicates tell us about this |
1443 | 105 | // value. First, though, we check to make sure the value is actually a copy |
1444 | 105 | // of one of the condition operands. It's possible, in certain cases, for it |
1445 | 105 | // to be a copy of a predicateinfo copy. In particular, if two branch |
1446 | 105 | // operations use the same condition, and one branch dominates the other, we |
1447 | 105 | // will end up with a copy of a copy. This is currently a small deficiency in |
1448 | 105 | // predicateinfo. What will end up happening here is that we will value |
1449 | 105 | // number both copies the same anyway. |
1450 | 105 | |
1451 | 105 | // Everything below relies on the condition being a comparison. |
1452 | 105 | auto *Cmp = dyn_cast<CmpInst>(Cond); |
1453 | 105 | if (!Cmp) |
1454 | 0 | return nullptr; |
1455 | 105 | |
1456 | 105 | if (105 CopyOf != Cmp->getOperand(0) && 105 CopyOf != Cmp->getOperand(1)22 ) { |
1457 | 2 | DEBUG(dbgs() << "Copy is not of any condition operands!\n"); |
1458 | 2 | return nullptr; |
1459 | 2 | } |
1460 | 103 | Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0)); |
1461 | 103 | Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1)); |
1462 | 103 | bool SwappedOps = false; |
1463 | 103 | // Sort the ops. |
1464 | 103 | if (shouldSwapOperands(FirstOp, SecondOp)103 ) { |
1465 | 64 | std::swap(FirstOp, SecondOp); |
1466 | 64 | SwappedOps = true; |
1467 | 64 | } |
1468 | 103 | CmpInst::Predicate Predicate = |
1469 | 103 | SwappedOps ? Cmp->getSwappedPredicate()64 : Cmp->getPredicate()39 ; |
1470 | 103 | |
1471 | 103 | if (isa<PredicateAssume>(PI)103 ) { |
1472 | 8 | // If the comparison is true when the operands are equal, then we know the |
1473 | 8 | // operands are equal, because assumes must always be true. |
1474 | 8 | if (CmpInst::isTrueWhenEqual(Predicate)8 ) { |
1475 | 7 | addPredicateUsers(PI, I); |
1476 | 7 | addAdditionalUsers(Cmp->getOperand(0), I); |
1477 | 7 | return createVariableOrConstant(FirstOp); |
1478 | 7 | } |
1479 | 96 | } |
1480 | 96 | if (const auto *96 PBranch96 = dyn_cast<PredicateBranch>(PI)) { |
1481 | 95 | // If we are *not* a copy of the comparison, we may equal to the other |
1482 | 95 | // operand when the predicate implies something about equality of |
1483 | 95 | // operations. In particular, if the comparison is true/false when the |
1484 | 95 | // operands are equal, and we are on the right edge, we know this operation |
1485 | 95 | // is equal to something. |
1486 | 95 | if ((PBranch->TrueEdge && 95 Predicate == CmpInst::ICMP_EQ48 ) || |
1487 | 95 | (!PBranch->TrueEdge && 77 Predicate == CmpInst::ICMP_NE47 )) { |
1488 | 19 | addPredicateUsers(PI, I); |
1489 | 19 | addAdditionalUsers(SwappedOps ? Cmp->getOperand(1)11 : Cmp->getOperand(0)8 , |
1490 | 19 | I); |
1491 | 19 | return createVariableOrConstant(FirstOp); |
1492 | 19 | } |
1493 | 76 | // Handle the special case of floating point. |
1494 | 76 | if (76 ((PBranch->TrueEdge && 76 Predicate == CmpInst::FCMP_OEQ30 ) || |
1495 | 73 | (!PBranch->TrueEdge && 73 Predicate == CmpInst::FCMP_UNE46 )) && |
1496 | 76 | isa<ConstantFP>(FirstOp)6 && !cast<ConstantFP>(FirstOp)->isZero()4 ) { |
1497 | 2 | addPredicateUsers(PI, I); |
1498 | 2 | addAdditionalUsers(SwappedOps ? Cmp->getOperand(1)2 : Cmp->getOperand(0)0 , |
1499 | 2 | I); |
1500 | 2 | return createConstantExpression(cast<Constant>(FirstOp)); |
1501 | 2 | } |
1502 | 75 | } |
1503 | 75 | return nullptr; |
1504 | 75 | } |
1505 | | |
1506 | | // Evaluate read only and pure calls, and create an expression result. |
1507 | 407 | const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const { |
1508 | 407 | auto *CI = cast<CallInst>(I); |
1509 | 407 | if (auto *II407 = dyn_cast<IntrinsicInst>(I)) { |
1510 | 188 | // Instrinsics with the returned attribute are copies of arguments. |
1511 | 188 | if (auto *ReturnedValue188 = II->getReturnedArgOperand()) { |
1512 | 123 | if (II->getIntrinsicID() == Intrinsic::ssa_copy) |
1513 | 123 | if (const auto *123 Result123 = performSymbolicPredicateInfoEvaluation(I)) |
1514 | 46 | return Result; |
1515 | 77 | return createVariableOrConstant(ReturnedValue); |
1516 | 77 | } |
1517 | 188 | } |
1518 | 284 | if (284 AA->doesNotAccessMemory(CI)284 ) { |
1519 | 17 | return createCallExpression(CI, TOPClass->getMemoryLeader()); |
1520 | 267 | } else if (267 AA->onlyReadsMemory(CI)267 ) { |
1521 | 33 | MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI); |
1522 | 33 | return createCallExpression(CI, DefiningAccess); |
1523 | 33 | } |
1524 | 234 | return nullptr; |
1525 | 234 | } |
1526 | | |
1527 | | // Retrieve the memory class for a given MemoryAccess. |
1528 | 1.86k | CongruenceClass *NewGVN::getMemoryClass(const MemoryAccess *MA) const { |
1529 | 1.86k | |
1530 | 1.86k | auto *Result = MemoryAccessToClass.lookup(MA); |
1531 | 1.86k | assert(Result && "Should have found memory class"); |
1532 | 1.86k | return Result; |
1533 | 1.86k | } |
1534 | | |
1535 | | // Update the MemoryAccess equivalence table to say that From is equal to To, |
1536 | | // and return true if this is different from what already existed in the table. |
1537 | | bool NewGVN::setMemoryClass(const MemoryAccess *From, |
1538 | 716 | CongruenceClass *NewClass) { |
1539 | 716 | assert(NewClass && |
1540 | 716 | "Every MemoryAccess should be getting mapped to a non-null class"); |
1541 | 716 | DEBUG(dbgs() << "Setting " << *From); |
1542 | 716 | DEBUG(dbgs() << " equivalent to congruence class "); |
1543 | 716 | DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader "); |
1544 | 716 | DEBUG(dbgs() << *NewClass->getMemoryLeader() << "\n"); |
1545 | 716 | |
1546 | 716 | auto LookupResult = MemoryAccessToClass.find(From); |
1547 | 716 | bool Changed = false; |
1548 | 716 | // If it's already in the table, see if the value changed. |
1549 | 716 | if (LookupResult != MemoryAccessToClass.end()716 ) { |
1550 | 716 | auto *OldClass = LookupResult->second; |
1551 | 716 | if (OldClass != NewClass716 ) { |
1552 | 678 | // If this is a phi, we have to handle memory member updates. |
1553 | 678 | if (auto *MP678 = dyn_cast<MemoryPhi>(From)) { |
1554 | 185 | OldClass->memory_erase(MP); |
1555 | 185 | NewClass->memory_insert(MP); |
1556 | 185 | // This may have killed the class if it had no non-memory members |
1557 | 185 | if (OldClass->getMemoryLeader() == From185 ) { |
1558 | 3 | if (OldClass->definesNoMemory()3 ) { |
1559 | 3 | OldClass->setMemoryLeader(nullptr); |
1560 | 3 | } else { |
1561 | 0 | OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); |
1562 | 0 | DEBUG(dbgs() << "Memory class leader change for class " |
1563 | 0 | << OldClass->getID() << " to " |
1564 | 0 | << *OldClass->getMemoryLeader() |
1565 | 0 | << " due to removal of a memory member " << *From |
1566 | 0 | << "\n"); |
1567 | 0 | markMemoryLeaderChangeTouched(OldClass); |
1568 | 0 | } |
1569 | 3 | } |
1570 | 185 | } |
1571 | 678 | // It wasn't equivalent before, and now it is. |
1572 | 678 | LookupResult->second = NewClass; |
1573 | 678 | Changed = true; |
1574 | 678 | } |
1575 | 716 | } |
1576 | 716 | |
1577 | 716 | return Changed; |
1578 | 716 | } |
1579 | | |
1580 | | // Determine if a instruction is cycle-free. That means the values in the |
1581 | | // instruction don't depend on any expressions that can change value as a result |
1582 | | // of the instruction. For example, a non-cycle free instruction would be v = |
1583 | | // phi(0, v+1). |
1584 | 184 | bool NewGVN::isCycleFree(const Instruction *I) const { |
1585 | 184 | // In order to compute cycle-freeness, we do SCC finding on the instruction, |
1586 | 184 | // and see what kind of SCC it ends up in. If it is a singleton, it is |
1587 | 184 | // cycle-free. If it is not in a singleton, it is only cycle free if the |
1588 | 184 | // other members are all phi nodes (as they do not compute anything, they are |
1589 | 184 | // copies). |
1590 | 184 | auto ICS = InstCycleState.lookup(I); |
1591 | 184 | if (ICS == ICS_Unknown184 ) { |
1592 | 137 | SCCFinder.Start(I); |
1593 | 137 | auto &SCC = SCCFinder.getComponentFor(I); |
1594 | 137 | // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. |
1595 | 137 | if (SCC.size() == 1) |
1596 | 65 | InstCycleState.insert({I, ICS_CycleFree}); |
1597 | 72 | else { |
1598 | 103 | bool AllPhis = llvm::all_of(SCC, [](const Value *V) { |
1599 | 72 | return isa<PHINode>(V) || isCopyOfAPHI(V); |
1600 | 103 | }); |
1601 | 72 | ICS = AllPhis ? ICS_CycleFree2 : ICS_Cycle70 ; |
1602 | 72 | for (auto *Member : SCC) |
1603 | 176 | if (auto *176 MemberPhi176 = dyn_cast<PHINode>(Member)) |
1604 | 87 | InstCycleState.insert({MemberPhi, ICS}); |
1605 | 72 | } |
1606 | 137 | } |
1607 | 184 | if (ICS == ICS_Cycle) |
1608 | 73 | return false; |
1609 | 111 | return true; |
1610 | 111 | } |
1611 | | |
1612 | | // Evaluate PHI nodes symbolically and create an expression result. |
1613 | 468 | const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { |
1614 | 468 | // True if one of the incoming phi edges is a backedge. |
1615 | 468 | bool HasBackedge = false; |
1616 | 468 | // All constant tracks the state of whether all the *original* phi operands |
1617 | 468 | // This is really shorthand for "this phi cannot cycle due to forward |
1618 | 468 | // change in value of the phi is guaranteed not to later change the value of |
1619 | 468 | // the phi. IE it can't be v = phi(undef, v+1) |
1620 | 468 | bool OriginalOpsConstant = true; |
1621 | 468 | auto *E = cast<PHIExpression>( |
1622 | 468 | createPHIExpression(I, HasBackedge, OriginalOpsConstant)); |
1623 | 468 | // We match the semantics of SimplifyPhiNode from InstructionSimplify here. |
1624 | 468 | // See if all arguments are the same. |
1625 | 468 | // We track if any were undef because they need special handling. |
1626 | 468 | bool HasUndef = false; |
1627 | 990 | auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) { |
1628 | 990 | if (isa<UndefValue>(Arg)990 ) { |
1629 | 60 | HasUndef = true; |
1630 | 60 | return false; |
1631 | 60 | } |
1632 | 930 | return true; |
1633 | 930 | }); |
1634 | 468 | // If we are left with no operands, it's dead. |
1635 | 468 | if (Filtered.begin() == Filtered.end()468 ) { |
1636 | 24 | // If it has undef at this point, it means there are no-non-undef arguments, |
1637 | 24 | // and thus, the value of the phi node must be undef. |
1638 | 24 | if (HasUndef24 ) { |
1639 | 24 | DEBUG(dbgs() << "PHI Node " << *I |
1640 | 24 | << " has no non-undef arguments, valuing it as undef\n"); |
1641 | 24 | return createConstantExpression(UndefValue::get(I->getType())); |
1642 | 24 | } |
1643 | 0 |
|
1644 | 0 | DEBUG0 (dbgs() << "No arguments of PHI node " << *I << " are live\n"); |
1645 | 0 | deleteExpression(E); |
1646 | 0 | return createDeadExpression(); |
1647 | 0 | } |
1648 | 444 | Value *AllSameValue = *(Filtered.begin()); |
1649 | 444 | ++Filtered.begin(); |
1650 | 444 | // Can't use std::equal here, sadly, because filter.begin moves. |
1651 | 687 | if (llvm::all_of(Filtered, [&](Value *Arg) 444 { return Arg == AllSameValue; }687 )) { |
1652 | 225 | // In LLVM's non-standard representation of phi nodes, it's possible to have |
1653 | 225 | // phi nodes with cycles (IE dependent on other phis that are .... dependent |
1654 | 225 | // on the original phi node), especially in weird CFG's where some arguments |
1655 | 225 | // are unreachable, or uninitialized along certain paths. This can cause |
1656 | 225 | // infinite loops during evaluation. We work around this by not trying to |
1657 | 225 | // really evaluate them independently, but instead using a variable |
1658 | 225 | // expression to say if one is equivalent to the other. |
1659 | 225 | // We also special case undef, so that if we have an undef, we can't use the |
1660 | 225 | // common value unless it dominates the phi block. |
1661 | 225 | if (HasUndef225 ) { |
1662 | 26 | // If we have undef and at least one other value, this is really a |
1663 | 26 | // multivalued phi, and we need to know if it's cycle free in order to |
1664 | 26 | // evaluate whether we can ignore the undef. The other parts of this are |
1665 | 26 | // just shortcuts. If there is no backedge, or all operands are |
1666 | 26 | // constants, it also must be cycle free. |
1667 | 26 | if (HasBackedge && 26 !OriginalOpsConstant17 && |
1668 | 26 | !isa<UndefValue>(AllSameValue)15 && !isCycleFree(I)15 ) |
1669 | 5 | return E; |
1670 | 21 | |
1671 | 21 | // Only have to check for instructions |
1672 | 21 | if (auto *21 AllSameInst21 = dyn_cast<Instruction>(AllSameValue)) |
1673 | 7 | if (7 !someEquivalentDominates(AllSameInst, I)7 ) |
1674 | 3 | return E; |
1675 | 217 | } |
1676 | 217 | // Can't simplify to something that comes later in the iteration. |
1677 | 217 | // Otherwise, when and if it changes congruence class, we will never catch |
1678 | 217 | // up. We will always be a class behind it. |
1679 | 217 | if (217 isa<Instruction>(AllSameValue) && |
1680 | 90 | InstrToDFSNum(AllSameValue) > InstrToDFSNum(I)) |
1681 | 9 | return E; |
1682 | 208 | NumGVNPhisAllSame++; |
1683 | 208 | DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue |
1684 | 225 | << "\n"); |
1685 | 225 | deleteExpression(E); |
1686 | 225 | return createVariableOrConstant(AllSameValue); |
1687 | 225 | } |
1688 | 219 | return E; |
1689 | 219 | } |
1690 | | |
1691 | | const Expression * |
1692 | 10 | NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const { |
1693 | 10 | if (auto *EI10 = dyn_cast<ExtractValueInst>(I)) { |
1694 | 10 | auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); |
1695 | 10 | if (II && 10 EI->getNumIndices() == 18 && *EI->idx_begin() == 08 ) { |
1696 | 8 | unsigned Opcode = 0; |
1697 | 8 | // EI might be an extract from one of our recognised intrinsics. If it |
1698 | 8 | // is we'll synthesize a semantically equivalent expression instead on |
1699 | 8 | // an extract value expression. |
1700 | 8 | switch (II->getIntrinsicID()) { |
1701 | 4 | case Intrinsic::sadd_with_overflow: |
1702 | 4 | case Intrinsic::uadd_with_overflow: |
1703 | 4 | Opcode = Instruction::Add; |
1704 | 4 | break; |
1705 | 2 | case Intrinsic::ssub_with_overflow: |
1706 | 2 | case Intrinsic::usub_with_overflow: |
1707 | 2 | Opcode = Instruction::Sub; |
1708 | 2 | break; |
1709 | 2 | case Intrinsic::smul_with_overflow: |
1710 | 2 | case Intrinsic::umul_with_overflow: |
1711 | 2 | Opcode = Instruction::Mul; |
1712 | 2 | break; |
1713 | 0 | default: |
1714 | 0 | break; |
1715 | 8 | } |
1716 | 8 | |
1717 | 8 | if (8 Opcode != 08 ) { |
1718 | 8 | // Intrinsic recognized. Grab its args to finish building the |
1719 | 8 | // expression. |
1720 | 8 | assert(II->getNumArgOperands() == 2 && |
1721 | 8 | "Expect two args for recognised intrinsics."); |
1722 | 8 | return createBinaryExpression(Opcode, EI->getType(), |
1723 | 8 | II->getArgOperand(0), |
1724 | 8 | II->getArgOperand(1), I); |
1725 | 8 | } |
1726 | 2 | } |
1727 | 10 | } |
1728 | 2 | |
1729 | 2 | return createAggregateValueExpression(I); |
1730 | 2 | } |
1731 | 309 | const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { |
1732 | 309 | assert(isa<CmpInst>(I) && "Expected a cmp instruction."); |
1733 | 309 | |
1734 | 309 | auto *CI = cast<CmpInst>(I); |
1735 | 309 | // See if our operands are equal to those of a previous predicate, and if so, |
1736 | 309 | // if it implies true or false. |
1737 | 309 | auto Op0 = lookupOperandLeader(CI->getOperand(0)); |
1738 | 309 | auto Op1 = lookupOperandLeader(CI->getOperand(1)); |
1739 | 309 | auto OurPredicate = CI->getPredicate(); |
1740 | 309 | if (shouldSwapOperands(Op0, Op1)309 ) { |
1741 | 209 | std::swap(Op0, Op1); |
1742 | 209 | OurPredicate = CI->getSwappedPredicate(); |
1743 | 209 | } |
1744 | 309 | |
1745 | 309 | // Avoid processing the same info twice. |
1746 | 309 | const PredicateBase *LastPredInfo = nullptr; |
1747 | 309 | // See if we know something about the comparison itself, like it is the target |
1748 | 309 | // of an assume. |
1749 | 309 | auto *CmpPI = PredInfo->getPredicateInfoFor(I); |
1750 | 309 | if (dyn_cast_or_null<PredicateAssume>(CmpPI)) |
1751 | 0 | return createConstantExpression(ConstantInt::getTrue(CI->getType())); |
1752 | 309 | |
1753 | 309 | if (309 Op0 == Op1309 ) { |
1754 | 12 | // This condition does not depend on predicates, no need to add users |
1755 | 12 | if (CI->isTrueWhenEqual()) |
1756 | 8 | return createConstantExpression(ConstantInt::getTrue(CI->getType())); |
1757 | 4 | else if (4 CI->isFalseWhenEqual()4 ) |
1758 | 4 | return createConstantExpression(ConstantInt::getFalse(CI->getType())); |
1759 | 297 | } |
1760 | 297 | |
1761 | 297 | // NOTE: Because we are comparing both operands here and below, and using |
1762 | 297 | // previous comparisons, we rely on fact that predicateinfo knows to mark |
1763 | 297 | // comparisons that use renamed operands as users of the earlier comparisons. |
1764 | 297 | // It is *not* enough to just mark predicateinfo renamed operands as users of |
1765 | 297 | // the earlier comparisons, because the *other* operand may have changed in a |
1766 | 297 | // previous iteration. |
1767 | 297 | // Example: |
1768 | 297 | // icmp slt %a, %b |
1769 | 297 | // %b.0 = ssa.copy(%b) |
1770 | 297 | // false branch: |
1771 | 297 | // icmp slt %c, %b.0 |
1772 | 297 | |
1773 | 297 | // %c and %a may start out equal, and thus, the code below will say the second |
1774 | 297 | // %icmp is false. c may become equal to something else, and in that case the |
1775 | 297 | // %second icmp *must* be reexamined, but would not if only the renamed |
1776 | 297 | // %operands are considered users of the icmp. |
1777 | 297 | |
1778 | 297 | // *Currently* we only check one level of comparisons back, and only mark one |
1779 | 297 | // level back as touched when changes happen. If you modify this code to look |
1780 | 297 | // back farther through comparisons, you *must* mark the appropriate |
1781 | 297 | // comparisons as users in PredicateInfo.cpp, or you will cause bugs. See if |
1782 | 297 | // we know something just from the operands themselves |
1783 | 297 | |
1784 | 297 | // See if our operands have predicate info, so that we may be able to derive |
1785 | 297 | // something from a previous comparison. |
1786 | 297 | for (const auto &Op : CI->operands()) 297 { |
1787 | 587 | auto *PI = PredInfo->getPredicateInfoFor(Op); |
1788 | 587 | if (const auto *PBranch587 = dyn_cast_or_null<PredicateBranch>(PI)) { |
1789 | 41 | if (PI == LastPredInfo) |
1790 | 0 | continue; |
1791 | 41 | LastPredInfo = PI; |
1792 | 41 | // In phi of ops cases, we may have predicate info that we are evaluating |
1793 | 41 | // in a different context. |
1794 | 41 | if (!DT->dominates(PBranch->To, getBlockForValue(I))) |
1795 | 5 | continue; |
1796 | 36 | // TODO: Along the false edge, we may know more things too, like |
1797 | 36 | // icmp of |
1798 | 36 | // same operands is false. |
1799 | 36 | // TODO: We only handle actual comparison conditions below, not |
1800 | 36 | // and/or. |
1801 | 36 | auto *BranchCond = dyn_cast<CmpInst>(PBranch->Condition); |
1802 | 36 | if (!BranchCond) |
1803 | 0 | continue; |
1804 | 36 | auto *BranchOp0 = lookupOperandLeader(BranchCond->getOperand(0)); |
1805 | 36 | auto *BranchOp1 = lookupOperandLeader(BranchCond->getOperand(1)); |
1806 | 36 | auto BranchPredicate = BranchCond->getPredicate(); |
1807 | 36 | if (shouldSwapOperands(BranchOp0, BranchOp1)36 ) { |
1808 | 22 | std::swap(BranchOp0, BranchOp1); |
1809 | 22 | BranchPredicate = BranchCond->getSwappedPredicate(); |
1810 | 22 | } |
1811 | 36 | if (BranchOp0 == Op0 && 36 BranchOp1 == Op116 ) { |
1812 | 8 | if (PBranch->TrueEdge8 ) { |
1813 | 3 | // If we know the previous predicate is true and we are in the true |
1814 | 3 | // edge then we may be implied true or false. |
1815 | 3 | if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate, |
1816 | 3 | OurPredicate)) { |
1817 | 0 | addPredicateUsers(PI, I); |
1818 | 0 | return createConstantExpression( |
1819 | 0 | ConstantInt::getTrue(CI->getType())); |
1820 | 0 | } |
1821 | 3 | |
1822 | 3 | if (3 CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate, |
1823 | 3 | OurPredicate)) { |
1824 | 2 | addPredicateUsers(PI, I); |
1825 | 2 | return createConstantExpression( |
1826 | 2 | ConstantInt::getFalse(CI->getType())); |
1827 | 2 | } |
1828 | 8 | |
1829 | 5 | } else { |
1830 | 5 | // Just handle the ne and eq cases, where if we have the same |
1831 | 5 | // operands, we may know something. |
1832 | 5 | if (BranchPredicate == OurPredicate5 ) { |
1833 | 5 | addPredicateUsers(PI, I); |
1834 | 5 | // Same predicate, same ops,we know it was false, so this is false. |
1835 | 5 | return createConstantExpression( |
1836 | 5 | ConstantInt::getFalse(CI->getType())); |
1837 | 0 | } else if (0 BranchPredicate == |
1838 | 0 | CmpInst::getInversePredicate(OurPredicate)) { |
1839 | 0 | addPredicateUsers(PI, I); |
1840 | 0 | // Inverse predicate, we know the other was false, so this is true. |
1841 | 0 | return createConstantExpression( |
1842 | 0 | ConstantInt::getTrue(CI->getType())); |
1843 | 0 | } |
1844 | 290 | } |
1845 | 8 | } |
1846 | 41 | } |
1847 | 587 | } |
1848 | 290 | // Create expression will take care of simplifyCmpInst |
1849 | 290 | return createExpression(I); |
1850 | 290 | } |
1851 | | |
1852 | | // Return true if V is a value that will always be available (IE can |
1853 | | // be placed anywhere) in the function. We don't do globals here |
1854 | | // because they are often worse to put in place. |
1855 | 2.21k | static bool alwaysAvailable(Value *V) { |
1856 | 1.89k | return isa<Constant>(V) || isa<Argument>(V); |
1857 | 2.21k | } |
1858 | | |
1859 | | // Substitute and symbolize the value before value numbering. |
1860 | | const Expression * |
1861 | | NewGVN::performSymbolicEvaluation(Value *V, |
1862 | 3.04k | SmallPtrSetImpl<Value *> &Visited) const { |
1863 | 3.04k | const Expression *E = nullptr; |
1864 | 3.04k | if (auto *C = dyn_cast<Constant>(V)) |
1865 | 0 | E = createConstantExpression(C); |
1866 | 3.04k | else if (3.04k isa<Argument>(V) || 3.04k isa<GlobalVariable>(V)3.04k ) { |
1867 | 0 | E = createVariableExpression(V); |
1868 | 3.04k | } else { |
1869 | 3.04k | // TODO: memory intrinsics. |
1870 | 3.04k | // TODO: Some day, we should do the forward propagation and reassociation |
1871 | 3.04k | // parts of the algorithm. |
1872 | 3.04k | auto *I = cast<Instruction>(V); |
1873 | 3.04k | switch (I->getOpcode()) { |
1874 | 10 | case Instruction::ExtractValue: |
1875 | 10 | case Instruction::InsertValue: |
1876 | 10 | E = performSymbolicAggrValueEvaluation(I); |
1877 | 10 | break; |
1878 | 468 | case Instruction::PHI: |
1879 | 468 | E = performSymbolicPHIEvaluation(I); |
1880 | 468 | break; |
1881 | 407 | case Instruction::Call: |
1882 | 407 | E = performSymbolicCallEvaluation(I); |
1883 | 407 | break; |
1884 | 315 | case Instruction::Store: |
1885 | 315 | E = performSymbolicStoreEvaluation(I); |
1886 | 315 | break; |
1887 | 511 | case Instruction::Load: |
1888 | 511 | E = performSymbolicLoadEvaluation(I); |
1889 | 511 | break; |
1890 | 84 | case Instruction::BitCast: { |
1891 | 84 | E = createExpression(I); |
1892 | 84 | } break; |
1893 | 309 | case Instruction::ICmp: |
1894 | 309 | case Instruction::FCmp: { |
1895 | 309 | E = performSymbolicCmpEvaluation(I); |
1896 | 309 | } break; |
1897 | 859 | case Instruction::Add: |
1898 | 859 | case Instruction::FAdd: |
1899 | 859 | case Instruction::Sub: |
1900 | 859 | case Instruction::FSub: |
1901 | 859 | case Instruction::Mul: |
1902 | 859 | case Instruction::FMul: |
1903 | 859 | case Instruction::UDiv: |
1904 | 859 | case Instruction::SDiv: |
1905 | 859 | case Instruction::FDiv: |
1906 | 859 | case Instruction::URem: |
1907 | 859 | case Instruction::SRem: |
1908 | 859 | case Instruction::FRem: |
1909 | 859 | case Instruction::Shl: |
1910 | 859 | case Instruction::LShr: |
1911 | 859 | case Instruction::AShr: |
1912 | 859 | case Instruction::And: |
1913 | 859 | case Instruction::Or: |
1914 | 859 | case Instruction::Xor: |
1915 | 859 | case Instruction::Trunc: |
1916 | 859 | case Instruction::ZExt: |
1917 | 859 | case Instruction::SExt: |
1918 | 859 | case Instruction::FPToUI: |
1919 | 859 | case Instruction::FPToSI: |
1920 | 859 | case Instruction::UIToFP: |
1921 | 859 | case Instruction::SIToFP: |
1922 | 859 | case Instruction::FPTrunc: |
1923 | 859 | case Instruction::FPExt: |
1924 | 859 | case Instruction::PtrToInt: |
1925 | 859 | case Instruction::IntToPtr: |
1926 | 859 | case Instruction::Select: |
1927 | 859 | case Instruction::ExtractElement: |
1928 | 859 | case Instruction::InsertElement: |
1929 | 859 | case Instruction::ShuffleVector: |
1930 | 859 | case Instruction::GetElementPtr: |
1931 | 859 | E = createExpression(I); |
1932 | 859 | break; |
1933 | 80 | default: |
1934 | 80 | return nullptr; |
1935 | 2.96k | } |
1936 | 2.96k | } |
1937 | 2.96k | return E; |
1938 | 2.96k | } |
1939 | | |
1940 | | // Look up a container in a map, and then call a function for each thing in the |
1941 | | // found container. |
1942 | | template <typename Map, typename KeyType, typename Func> |
1943 | 295 | void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) { |
1944 | 295 | const auto Result = M.find_as(Key); |
1945 | 295 | if (Result != M.end()) |
1946 | 3 | for (typename Map::mapped_type::value_type Mapped : Result->second) |
1947 | 3 | F(Mapped); |
1948 | 295 | } NewGVN.cpp:void (anonymous namespace)::NewGVN::for_each_found<llvm::DenseMap<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u>, llvm::DenseMapInfo<llvm::BasicBlock const*>, llvm::detail::DenseMapPair<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u> > >, llvm::BasicBlock*, (anonymous namespace)::NewGVN::updateReachableEdge(llvm::BasicBlock*, llvm::BasicBlock*)::$_9>(llvm::DenseMap<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u>, llvm::DenseMapInfo<llvm::BasicBlock const*>, llvm::detail::DenseMapPair<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u> > >&, llvm::BasicBlock* const&, (anonymous namespace)::NewGVN::updateReachableEdge(llvm::BasicBlock*, llvm::BasicBlock*)::$_9) Line | Count | Source | 1943 | 267 | void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) { | 1944 | 267 | const auto Result = M.find_as(Key); | 1945 | 267 | if (Result != M.end()) | 1946 | 2 | for (typename Map::mapped_type::value_type Mapped : Result->second) | 1947 | 2 | F(Mapped); | 1948 | 267 | } |
NewGVN.cpp:void (anonymous namespace)::NewGVN::for_each_found<llvm::DenseMap<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u>, llvm::DenseMapInfo<llvm::BasicBlock const*>, llvm::detail::DenseMapPair<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u> > >, llvm::BasicBlock*, (anonymous namespace)::NewGVN::eliminateInstructions(llvm::Function&)::$_16>(llvm::DenseMap<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u>, llvm::DenseMapInfo<llvm::BasicBlock const*>, llvm::detail::DenseMapPair<llvm::BasicBlock const*, llvm::SmallPtrSet<llvm::PHINode*, 2u> > >&, llvm::BasicBlock* const&, (anonymous namespace)::NewGVN::eliminateInstructions(llvm::Function&)::$_16) Line | Count | Source | 1943 | 28 | void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) { | 1944 | 28 | const auto Result = M.find_as(Key); | 1945 | 28 | if (Result != M.end()) | 1946 | 1 | for (typename Map::mapped_type::value_type Mapped : Result->second) | 1947 | 1 | F(Mapped); | 1948 | 28 | } |
|
1949 | | |
1950 | | // Look up a container of values/instructions in a map, and touch all the |
1951 | | // instructions in the container. Then erase value from the map. |
1952 | | template <typename Map, typename KeyType> |
1953 | 6.26k | void NewGVN::touchAndErase(Map &M, const KeyType &Key) { |
1954 | 6.26k | const auto Result = M.find_as(Key); |
1955 | 6.26k | if (Result != M.end()6.26k ) { |
1956 | 45 | for (const typename Map::mapped_type::value_type Mapped : Result->second) |
1957 | 55 | TouchedInstructions.set(InstrToDFSNum(Mapped)); |
1958 | 45 | M.erase(Result); |
1959 | 45 | } |
1960 | 6.26k | } NewGVN.cpp:void (anonymous namespace)::NewGVN::touchAndErase<llvm::DenseMap<llvm::Value const*, llvm::SmallPtrSet<llvm::Instruction*, 2u>, llvm::DenseMapInfo<llvm::Value const*>, llvm::detail::DenseMapPair<llvm::Value const*, llvm::SmallPtrSet<llvm::Instruction*, 2u> > >, llvm::Instruction*>(llvm::DenseMap<llvm::Value const*, llvm::SmallPtrSet<llvm::Instruction*, 2u>, llvm::DenseMapInfo<llvm::Value const*>, llvm::detail::DenseMapPair<llvm::Value const*, llvm::SmallPtrSet<llvm::Instruction*, 2u> > >&, llvm::Instruction* const&) Line | Count | Source | 1953 | 250 | void NewGVN::touchAndErase(Map &M, const KeyType &Key) { | 1954 | 250 | const auto Result = M.find_as(Key); | 1955 | 250 | if (Result != M.end()250 ) { | 1956 | 1 | for (const typename Map::mapped_type::value_type Mapped : Result->second) | 1957 | 1 | TouchedInstructions.set(InstrToDFSNum(Mapped)); | 1958 | 1 | M.erase(Result); | 1959 | 1 | } | 1960 | 250 | } |
NewGVN.cpp:void (anonymous namespace)::NewGVN::touchAndErase<llvm::DenseMap<llvm::MemoryAccess const*, llvm::SmallPtrSet<llvm::MemoryAccess*, 2u>, llvm::DenseMapInfo<llvm::MemoryAccess const*>, llvm::detail::DenseMapPair<llvm::MemoryAccess const*, llvm::SmallPtrSet<llvm::MemoryAccess*, 2u> > >, llvm::MemoryAccess const*>(llvm::DenseMap<llvm::MemoryAccess const*, llvm::SmallPtrSet<llvm::MemoryAccess*, 2u>, llvm::DenseMapInfo<llvm::MemoryAccess const*>, llvm::detail::DenseMapPair<llvm::MemoryAccess const*, llvm::SmallPtrSet<llvm::MemoryAccess*, 2u> > >&, llvm::MemoryAccess const* const&) Line | Count | Source | 1953 | 689 | void NewGVN::touchAndErase(Map &M, const KeyType &Key) { | 1954 | 689 | const auto Result = M.find_as(Key); | 1955 | 689 | if (Result != M.end()689 ) { | 1956 | 7 | for (const typename Map::mapped_type::value_type Mapped : Result->second) | 1957 | 16 | TouchedInstructions.set(InstrToDFSNum(Mapped)); | 1958 | 7 | M.erase(Result); | 1959 | 7 | } | 1960 | 689 | } |
NewGVN.cpp:void (anonymous namespace)::NewGVN::touchAndErase<llvm::DenseMap<llvm::GVNExpression::Expression const*, llvm::SmallPtrSet<llvm::Instruction*, 2u>, llvm::DenseMapInfo<llvm::GVNExpression::Expression const*>, llvm::detail::DenseMapPair<llvm::GVNExpression::Expression const*, llvm::SmallPtrSet<llvm::Instruction*, 2u> > >, llvm::ExactEqualsExpression>(llvm::DenseMap<llvm::GVNExpression::Expression const*, llvm::SmallPtrSet<llvm::Instruction*, 2u>, llvm::DenseMapInfo<llvm::GVNExpression::Expression const*>, llvm::detail::DenseMapPair<llvm::GVNExpression::Expression const*, llvm::SmallPtrSet<llvm::Instruction*, 2u> > >&, llvm::ExactEqualsExpression const&) Line | Count | Source | 1953 | 2.65k | void NewGVN::touchAndErase(Map &M, const KeyType &Key) { | 1954 | 2.65k | const auto Result = M.find_as(Key); | 1955 | 2.65k | if (Result != M.end()2.65k ) { | 1956 | 18 | for (const typename Map::mapped_type::value_type Mapped : Result->second) | 1957 | 18 | TouchedInstructions.set(InstrToDFSNum(Mapped)); | 1958 | 18 | M.erase(Result); | 1959 | 18 | } | 1960 | 2.65k | } |
NewGVN.cpp:void (anonymous namespace)::NewGVN::touchAndErase<llvm::DenseMap<llvm::Value const*, llvm::SmallPtrSet<llvm::Value*, 2u>, llvm::DenseMapInfo<llvm::Value const*>, llvm::detail::DenseMapPair<llvm::Value const*, llvm::SmallPtrSet<llvm::Value*, 2u> > >, llvm::Value*>(llvm::DenseMap<llvm::Value const*, llvm::SmallPtrSet<llvm::Value*, 2u>, llvm::DenseMapInfo<llvm::Value const*>, llvm::detail::DenseMapPair<llvm::Value const*, llvm::SmallPtrSet<llvm::Value*, 2u> > >&, llvm::Value* const&) Line | Count | Source | 1953 | 2.66k | void NewGVN::touchAndErase(Map &M, const KeyType &Key) { | 1954 | 2.66k | const auto Result = M.find_as(Key); | 1955 | 2.66k | if (Result != M.end()2.66k ) { | 1956 | 19 | for (const typename Map::mapped_type::value_type Mapped : Result->second) | 1957 | 20 | TouchedInstructions.set(InstrToDFSNum(Mapped)); | 1958 | 19 | M.erase(Result); | 1959 | 19 | } | 1960 | 2.66k | } |
|
1961 | | |
1962 | 340 | void NewGVN::addAdditionalUsers(Value *To, Value *User) const { |
1963 | 340 | assert(User && To != User); |
1964 | 340 | if (isa<Instruction>(To)) |
1965 | 192 | AdditionalUsers[To].insert(User); |
1966 | 340 | } |
1967 | | |
1968 | 2.66k | void NewGVN::markUsersTouched(Value *V) { |
1969 | 2.66k | // Now mark the users as touched. |
1970 | 2.89k | for (auto *User : V->users()) { |
1971 | 2.89k | assert(isa<Instruction>(User) && "Use of value not within an instruction?"); |
1972 | 2.89k | TouchedInstructions.set(InstrToDFSNum(User)); |
1973 | 2.89k | } |
1974 | 2.66k | touchAndErase(AdditionalUsers, V); |
1975 | 2.66k | } |
1976 | | |
1977 | 125 | void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const { |
1978 | 125 | DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n"); |
1979 | 125 | MemoryToUsers[To].insert(U); |
1980 | 125 | } |
1981 | | |
1982 | 11 | void NewGVN::markMemoryDefTouched(const MemoryAccess *MA) { |
1983 | 11 | TouchedInstructions.set(MemoryToDFSNum(MA)); |
1984 | 11 | } |
1985 | | |
1986 | 1.20k | void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { |
1987 | 1.20k | if (isa<MemoryUse>(MA)) |
1988 | 514 | return; |
1989 | 689 | for (auto U : MA->users()) |
1990 | 1.04k | TouchedInstructions.set(MemoryToDFSNum(U)); |
1991 | 1.20k | touchAndErase(MemoryToUsers, MA); |
1992 | 1.20k | } |
1993 | | |
1994 | | // Add I to the set of users of a given predicate. |
1995 | 35 | void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { |
1996 | 35 | // Don't add temporary instructions to the user lists. |
1997 | 35 | if (AllTempInstructions.count(I)) |
1998 | 2 | return; |
1999 | 33 | |
2000 | 33 | if (auto *33 PBranch33 = dyn_cast<PredicateBranch>(PB)) |
2001 | 26 | PredicateToUsers[PBranch->Condition].insert(I); |
2002 | 7 | else if (auto *7 PAssume7 = dyn_cast<PredicateBranch>(PB)) |
2003 | 0 | PredicateToUsers[PAssume->Condition].insert(I); |
2004 | 35 | } |
2005 | | |
2006 | | // Touch all the predicates that depend on this instruction. |
2007 | 250 | void NewGVN::markPredicateUsersTouched(Instruction *I) { |
2008 | 250 | touchAndErase(PredicateToUsers, I); |
2009 | 250 | } |
2010 | | |
2011 | | // Mark users affected by a memory leader change. |
2012 | 477 | void NewGVN::markMemoryLeaderChangeTouched(CongruenceClass *CC) { |
2013 | 477 | for (auto M : CC->memory()) |
2014 | 11 | markMemoryDefTouched(M); |
2015 | 477 | } |
2016 | | |
2017 | | // Touch the instructions that need to be updated after a congruence class has a |
2018 | | // leader change, and mark changed values. |
2019 | 59 | void NewGVN::markValueLeaderChangeTouched(CongruenceClass *CC) { |
2020 | 88 | for (auto M : *CC) { |
2021 | 88 | if (auto *I = dyn_cast<Instruction>(M)) |
2022 | 88 | TouchedInstructions.set(InstrToDFSNum(I)); |
2023 | 88 | LeaderChanges.insert(M); |
2024 | 88 | } |
2025 | 59 | } |
2026 | | |
2027 | | // Give a range of things that have instruction DFS numbers, this will return |
2028 | | // the member of the range with the smallest dfs number. |
2029 | | template <class T, class Range> |
2030 | 7 | T *NewGVN::getMinDFSOfRange(const Range &R) const { |
2031 | 7 | std::pair<T *, unsigned> MinDFS = {nullptr, ~0U}; |
2032 | 14 | for (const auto X : R) { |
2033 | 14 | auto DFSNum = InstrToDFSNum(X); |
2034 | 14 | if (DFSNum < MinDFS.second) |
2035 | 8 | MinDFS = {X, DFSNum}; |
2036 | 14 | } |
2037 | 7 | return MinDFS.first; |
2038 | 7 | } NewGVN.cpp:llvm::Value* (anonymous namespace)::NewGVN::getMinDFSOfRange<llvm::Value, llvm::iterator_range<llvm::filter_iterator<llvm::SmallPtrSetIterator<llvm::Value*>, (anonymous namespace)::NewGVN::getNextMemoryLeader((anonymous namespace)::CongruenceClass*) const::$_8> > >(llvm::iterator_range<llvm::filter_iterator<llvm::SmallPtrSetIterator<llvm::Value*>, (anonymous namespace)::NewGVN::getNextMemoryLeader((anonymous namespace)::CongruenceClass*) const::$_8> > const&) const Line | Count | Source | 2030 | 1 | T *NewGVN::getMinDFSOfRange(const Range &R) const { | 2031 | 1 | std::pair<T *, unsigned> MinDFS = {nullptr, ~0U}; | 2032 | 1 | for (const auto X : R) { | 2033 | 1 | auto DFSNum = InstrToDFSNum(X); | 2034 | 1 | if (DFSNum < MinDFS.second) | 2035 | 1 | MinDFS = {X, DFSNum}; | 2036 | 1 | } | 2037 | 1 | return MinDFS.first; | 2038 | 1 | } |
NewGVN.cpp:llvm::Value* (anonymous namespace)::NewGVN::getMinDFSOfRange<llvm::Value, (anonymous namespace)::CongruenceClass>((anonymous namespace)::CongruenceClass const&) const Line | Count | Source | 2030 | 4 | T *NewGVN::getMinDFSOfRange(const Range &R) const { | 2031 | 4 | std::pair<T *, unsigned> MinDFS = {nullptr, ~0U}; | 2032 | 9 | for (const auto X : R) { | 2033 | 9 | auto DFSNum = InstrToDFSNum(X); | 2034 | 9 | if (DFSNum < MinDFS.second) | 2035 | 5 | MinDFS = {X, DFSNum}; | 2036 | 9 | } | 2037 | 4 | return MinDFS.first; | 2038 | 4 | } |
NewGVN.cpp:llvm::MemoryPhi const* (anonymous namespace)::NewGVN::getMinDFSOfRange<llvm::MemoryPhi const, llvm::iterator_range<llvm::SmallPtrSetIterator<llvm::MemoryPhi const*> > >(llvm::iterator_range<llvm::SmallPtrSetIterator<llvm::MemoryPhi const*> > const&) const Line | Count | Source | 2030 | 2 | T *NewGVN::getMinDFSOfRange(const Range &R) const { | 2031 | 2 | std::pair<T *, unsigned> MinDFS = {nullptr, ~0U}; | 2032 | 4 | for (const auto X : R) { | 2033 | 4 | auto DFSNum = InstrToDFSNum(X); | 2034 | 4 | if (DFSNum < MinDFS.second) | 2035 | 2 | MinDFS = {X, DFSNum}; | 2036 | 4 | } | 2037 | 2 | return MinDFS.first; | 2038 | 2 | } |
|
2039 | | |
2040 | | // This function returns the MemoryAccess that should be the next leader of |
2041 | | // congruence class CC, under the assumption that the current leader is going to |
2042 | | // disappear. |
2043 | 12 | const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { |
2044 | 12 | // TODO: If this ends up to slow, we can maintain a next memory leader like we |
2045 | 12 | // do for regular leaders. |
2046 | 12 | // Make sure there will be a leader to find. |
2047 | 12 | assert(!CC->definesNoMemory() && "Can't get next leader if there is none"); |
2048 | 12 | if (CC->getStoreCount() > 012 ) { |
2049 | 4 | if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first)) |
2050 | 3 | return getMemoryAccess(NL); |
2051 | 1 | // Find the store with the minimum DFS number. |
2052 | 1 | auto *V = getMinDFSOfRange<Value>(make_filter_range( |
2053 | 2 | *CC, [&](const Value *V) { return isa<StoreInst>(V); })); |
2054 | 4 | return getMemoryAccess(cast<StoreInst>(V)); |
2055 | 4 | } |
2056 | 12 | assert(CC->getStoreCount() == 0); |
2057 | 8 | |
2058 | 8 | // Given our assertion, hitting this part must mean |
2059 | 8 | // !OldClass->memory_empty() |
2060 | 8 | if (CC->memory_size() == 1) |
2061 | 6 | return *CC->memory_begin(); |
2062 | 2 | return getMinDFSOfRange<const MemoryPhi>(CC->memory()); |
2063 | 2 | } |
2064 | | |
2065 | | // This function returns the next value leader of a congruence class, under the |
2066 | | // assumption that the current leader is going away. This should end up being |
2067 | | // the next most dominating member. |
2068 | 50 | Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const { |
2069 | 50 | // We don't need to sort members if there is only 1, and we don't care about |
2070 | 50 | // sorting the TOP class because everything either gets out of it or is |
2071 | 50 | // unreachable. |
2072 | 50 | |
2073 | 50 | if (CC->size() == 1 || 50 CC == TOPClass11 ) { |
2074 | 39 | return *(CC->begin()); |
2075 | 11 | } else if (11 CC->getNextLeader().first11 ) { |
2076 | 7 | ++NumGVNAvoidedSortedLeaderChanges; |
2077 | 7 | return CC->getNextLeader().first; |
2078 | 0 | } else { |
2079 | 4 | ++NumGVNSortedLeaderChanges; |
2080 | 4 | // NOTE: If this ends up to slow, we can maintain a dual structure for |
2081 | 4 | // member testing/insertion, or keep things mostly sorted, and sort only |
2082 | 4 | // here, or use SparseBitVector or .... |
2083 | 4 | return getMinDFSOfRange<Value>(*CC); |
2084 | 4 | } |
2085 | 0 | } |
2086 | | |
2087 | | // Move a MemoryAccess, currently in OldClass, to NewClass, including updates to |
2088 | | // the memory members, etc for the move. |
2089 | | // |
2090 | | // The invariants of this function are: |
2091 | | // |
2092 | | // - I must be moving to NewClass from OldClass |
2093 | | // - The StoreCount of OldClass and NewClass is expected to have been updated |
2094 | | // for I already if it is is a store. |
2095 | | // - The OldClass memory leader has not been updated yet if I was the leader. |
2096 | | void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, |
2097 | | MemoryAccess *InstMA, |
2098 | | CongruenceClass *OldClass, |
2099 | 485 | CongruenceClass *NewClass) { |
2100 | 485 | // If the leader is I, and we had a represenative MemoryAccess, it should |
2101 | 485 | // be the MemoryAccess of OldClass. |
2102 | 485 | assert((!InstMA || !OldClass->getMemoryLeader() || |
2103 | 485 | OldClass->getLeader() != I || |
2104 | 485 | MemoryAccessToClass.lookup(OldClass->getMemoryLeader()) == |
2105 | 485 | MemoryAccessToClass.lookup(InstMA)) && |
2106 | 485 | "Representative MemoryAccess mismatch"); |
2107 | 485 | // First, see what happens to the new class |
2108 | 485 | if (!NewClass->getMemoryLeader()485 ) { |
2109 | 465 | // Should be a new class, or a store becoming a leader of a new class. |
2110 | 465 | assert(NewClass->size() == 1 || |
2111 | 465 | (isa<StoreInst>(I) && NewClass->getStoreCount() == 1)); |
2112 | 465 | NewClass->setMemoryLeader(InstMA); |
2113 | 465 | // Mark it touched if we didn't just create a singleton |
2114 | 465 | DEBUG(dbgs() << "Memory class leader change for class " << NewClass->getID() |
2115 | 465 | << " due to new memory instruction becoming leader\n"); |
2116 | 465 | markMemoryLeaderChangeTouched(NewClass); |
2117 | 465 | } |
2118 | 485 | setMemoryClass(InstMA, NewClass); |
2119 | 485 | // Now, fixup the old class if necessary |
2120 | 485 | if (OldClass->getMemoryLeader() == InstMA485 ) { |
2121 | 33 | if (!OldClass->definesNoMemory()33 ) { |
2122 | 12 | OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); |
2123 | 12 | DEBUG(dbgs() << "Memory class leader change for class " |
2124 | 12 | << OldClass->getID() << " to " |
2125 | 12 | << *OldClass->getMemoryLeader() |
2126 | 12 | << " due to removal of old leader " << *InstMA << "\n"); |
2127 | 12 | markMemoryLeaderChangeTouched(OldClass); |
2128 | 12 | } else |
2129 | 21 | OldClass->setMemoryLeader(nullptr); |
2130 | 33 | } |
2131 | 485 | } |
2132 | | |
2133 | | // Move a value, currently in OldClass, to be part of NewClass |
2134 | | // Update OldClass and NewClass for the move (including changing leaders, etc). |
2135 | | void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, |
2136 | | CongruenceClass *OldClass, |
2137 | 2.65k | CongruenceClass *NewClass) { |
2138 | 2.65k | if (I == OldClass->getNextLeader().first) |
2139 | 154 | OldClass->resetNextLeader(); |
2140 | 2.65k | |
2141 | 2.65k | OldClass->erase(I); |
2142 | 2.65k | NewClass->insert(I); |
2143 | 2.65k | |
2144 | 2.65k | if (NewClass->getLeader() != I) |
2145 | 948 | NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)}); |
2146 | 2.65k | // Handle our special casing of stores. |
2147 | 2.65k | if (auto *SI2.65k = dyn_cast<StoreInst>(I)) { |
2148 | 276 | OldClass->decStoreCount(); |
2149 | 276 | // Okay, so when do we want to make a store a leader of a class? |
2150 | 276 | // If we have a store defined by an earlier load, we want the earlier load |
2151 | 276 | // to lead the class. |
2152 | 276 | // If we have a store defined by something else, we want the store to lead |
2153 | 276 | // the class so everything else gets the "something else" as a value. |
2154 | 276 | // If we have a store as the single member of the class, we want the store |
2155 | 276 | // as the leader |
2156 | 276 | if (NewClass->getStoreCount() == 0 && 276 !NewClass->getStoredValue()257 ) { |
2157 | 9 | // If it's a store expression we are using, it means we are not equivalent |
2158 | 9 | // to something earlier. |
2159 | 9 | if (auto *SE9 = dyn_cast<StoreExpression>(E)) { |
2160 | 9 | NewClass->setStoredValue(SE->getStoredValue()); |
2161 | 9 | markValueLeaderChangeTouched(NewClass); |
2162 | 9 | // Shift the new class leader to be the store |
2163 | 9 | DEBUG(dbgs() << "Changing leader of congruence class " |
2164 | 9 | << NewClass->getID() << " from " << *NewClass->getLeader() |
2165 | 9 | << " to " << *SI << " because store joined class\n"); |
2166 | 9 | // If we changed the leader, we have to mark it changed because we don't |
2167 | 9 | // know what it will do to symbolic evaluation. |
2168 | 9 | NewClass->setLeader(SI); |
2169 | 9 | } |
2170 | 9 | // We rely on the code below handling the MemoryAccess change. |
2171 | 9 | } |
2172 | 276 | NewClass->incStoreCount(); |
2173 | 276 | } |
2174 | 2.65k | // True if there is no memory instructions left in a class that had memory |
2175 | 2.65k | // instructions before. |
2176 | 2.65k | |
2177 | 2.65k | // If it's not a memory use, set the MemoryAccess equivalence |
2178 | 2.65k | auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I)); |
2179 | 2.65k | if (InstMA) |
2180 | 485 | moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); |
2181 | 2.65k | ValueToClass[I] = NewClass; |
2182 | 2.65k | // See if we destroyed the class or need to swap leaders. |
2183 | 2.65k | if (OldClass->empty() && 2.65k OldClass != TOPClass494 ) { |
2184 | 271 | if (OldClass->getDefiningExpr()271 ) { |
2185 | 271 | DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr() |
2186 | 271 | << " from table\n"); |
2187 | 271 | // We erase it as an exact expression to make sure we don't just erase an |
2188 | 271 | // equivalent one. |
2189 | 271 | auto Iter = ExpressionToClass.find_as( |
2190 | 271 | ExactEqualsExpression(*OldClass->getDefiningExpr())); |
2191 | 271 | if (Iter != ExpressionToClass.end()) |
2192 | 264 | ExpressionToClass.erase(Iter); |
2193 | | #ifdef EXPENSIVE_CHECKS |
2194 | | assert( |
2195 | | (*OldClass->getDefiningExpr() != *E || ExpressionToClass.lookup(E)) && |
2196 | | "We erased the expression we just inserted, which should not happen"); |
2197 | | #endif |
2198 | | } |
2199 | 2.65k | } else if (2.38k OldClass->getLeader() == I2.38k ) { |
2200 | 50 | // When the leader changes, the value numbering of |
2201 | 50 | // everything may change due to symbolization changes, so we need to |
2202 | 50 | // reprocess. |
2203 | 50 | DEBUG(dbgs() << "Value class leader change for class " << OldClass->getID() |
2204 | 50 | << "\n"); |
2205 | 50 | ++NumGVNLeaderChanges; |
2206 | 50 | // Destroy the stored value if there are no more stores to represent it. |
2207 | 50 | // Note that this is basically clean up for the expression removal that |
2208 | 50 | // happens below. If we remove stores from a class, we may leave it as a |
2209 | 50 | // class of equivalent memory phis. |
2210 | 50 | if (OldClass->getStoreCount() == 050 ) { |
2211 | 45 | if (OldClass->getStoredValue()) |
2212 | 5 | OldClass->setStoredValue(nullptr); |
2213 | 45 | } |
2214 | 2.38k | OldClass->setLeader(getNextValueLeader(OldClass)); |
2215 | 2.38k | OldClass->resetNextLeader(); |
2216 | 2.38k | markValueLeaderChangeTouched(OldClass); |
2217 | 2.38k | } |
2218 | 2.65k | } |
2219 | | |
2220 | | // For a given expression, mark the phi of ops instructions that could have |
2221 | | // changed as a result. |
2222 | 2.65k | void NewGVN::markPhiOfOpsChanged(const Expression *E) { |
2223 | 2.65k | touchAndErase(ExpressionToPhiOfOps, ExactEqualsExpression(*E)); |
2224 | 2.65k | } |
2225 | | |
2226 | | // Perform congruence finding on a given value numbering expression. |
2227 | 2.89k | void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { |
2228 | 2.89k | // This is guaranteed to return something, since it will at least find |
2229 | 2.89k | // TOP. |
2230 | 2.89k | |
2231 | 2.89k | CongruenceClass *IClass = ValueToClass.lookup(I); |
2232 | 2.89k | assert(IClass && "Should have found a IClass"); |
2233 | 2.89k | // Dead classes should have been eliminated from the mapping. |
2234 | 2.89k | assert(!IClass->isDead() && "Found a dead class"); |
2235 | 2.89k | |
2236 | 2.89k | CongruenceClass *EClass = nullptr; |
2237 | 2.89k | if (const auto *VE2.89k = dyn_cast<VariableExpression>(E)) { |
2238 | 243 | EClass = ValueToClass.lookup(VE->getVariableValue()); |
2239 | 2.89k | } else if (2.65k isa<DeadExpression>(E)2.65k ) { |
2240 | 0 | EClass = TOPClass; |
2241 | 0 | } |
2242 | 2.89k | if (!EClass2.89k ) { |
2243 | 2.65k | auto lookupResult = ExpressionToClass.insert({E, nullptr}); |
2244 | 2.65k | |
2245 | 2.65k | // If it's not in the value table, create a new congruence class. |
2246 | 2.65k | if (lookupResult.second2.65k ) { |
2247 | 2.01k | CongruenceClass *NewClass = createCongruenceClass(nullptr, E); |
2248 | 2.01k | auto place = lookupResult.first; |
2249 | 2.01k | place->second = NewClass; |
2250 | 2.01k | |
2251 | 2.01k | // Constants and variables should always be made the leader. |
2252 | 2.01k | if (const auto *CE2.01k = dyn_cast<ConstantExpression>(E)) { |
2253 | 310 | NewClass->setLeader(CE->getConstantValue()); |
2254 | 2.01k | } else if (const auto *1.70k SE1.70k = dyn_cast<StoreExpression>(E)) { |
2255 | 248 | StoreInst *SI = SE->getStoreInst(); |
2256 | 248 | NewClass->setLeader(SI); |
2257 | 248 | NewClass->setStoredValue(SE->getStoredValue()); |
2258 | 248 | // The RepMemoryAccess field will be filled in properly by the |
2259 | 248 | // moveValueToNewCongruenceClass call. |
2260 | 1.70k | } else { |
2261 | 1.45k | NewClass->setLeader(I); |
2262 | 1.45k | } |
2263 | 2.01k | assert(!isa<VariableExpression>(E) && |
2264 | 2.01k | "VariableExpression should have been handled already"); |
2265 | 2.01k | |
2266 | 2.01k | EClass = NewClass; |
2267 | 2.01k | DEBUG(dbgs() << "Created new congruence class for " << *I |
2268 | 2.01k | << " using expression " << *E << " at " << NewClass->getID() |
2269 | 2.01k | << " and leader " << *(NewClass->getLeader())); |
2270 | 2.01k | if (NewClass->getStoredValue()) |
2271 | 2.01k | DEBUG(dbgs() << " and stored value " << *(NewClass->getStoredValue())); |
2272 | 2.01k | DEBUG(dbgs() << "\n"); |
2273 | 2.65k | } else { |
2274 | 637 | EClass = lookupResult.first->second; |
2275 | 637 | if (isa<ConstantExpression>(E)) |
2276 | 637 | assert((isa<Constant>(EClass->getLeader()) || |
2277 | 637 | (EClass->getStoredValue() && |
2278 | 637 | isa<Constant>(EClass->getStoredValue()))) && |
2279 | 637 | "Any class with a constant expression should have a " |
2280 | 637 | "constant leader"); |
2281 | 637 | |
2282 | 637 | assert(EClass && "Somehow don't have an eclass"); |
2283 | 637 | |
2284 | 637 | assert(!EClass->isDead() && "We accidentally looked up a dead class"); |
2285 | 637 | } |
2286 | 2.65k | } |
2287 | 2.89k | bool ClassChanged = IClass != EClass; |
2288 | 2.89k | bool LeaderChanged = LeaderChanges.erase(I); |
2289 | 2.89k | if (ClassChanged || 2.89k LeaderChanged242 ) { |
2290 | 2.66k | DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E |
2291 | 2.66k | << "\n"); |
2292 | 2.66k | if (ClassChanged2.66k ) { |
2293 | 2.65k | moveValueToNewCongruenceClass(I, E, IClass, EClass); |
2294 | 2.65k | markPhiOfOpsChanged(E); |
2295 | 2.65k | } |
2296 | 2.66k | |
2297 | 2.66k | markUsersTouched(I); |
2298 | 2.66k | if (MemoryAccess *MA = getMemoryAccess(I)) |
2299 | 1.00k | markMemoryUsersTouched(MA); |
2300 | 2.66k | if (auto *CI = dyn_cast<CmpInst>(I)) |
2301 | 250 | markPredicateUsersTouched(CI); |
2302 | 2.66k | } |
2303 | 2.89k | // If we changed the class of the store, we want to ensure nothing finds the |
2304 | 2.89k | // old store expression. In particular, loads do not compare against stored |
2305 | 2.89k | // value, so they will find old store expressions (and associated class |
2306 | 2.89k | // mappings) if we leave them in the table. |
2307 | 2.89k | if (ClassChanged && 2.89k isa<StoreInst>(I)2.65k ) { |
2308 | 276 | auto *OldE = ValueToExpression.lookup(I); |
2309 | 276 | // It could just be that the old class died. We don't want to erase it if we |
2310 | 276 | // just moved classes. |
2311 | 276 | if (OldE && 276 isa<StoreExpression>(OldE)35 && *E != *OldE35 ) { |
2312 | 35 | // Erase this as an exact expression to ensure we don't erase expressions |
2313 | 35 | // equivalent to it. |
2314 | 35 | auto Iter = ExpressionToClass.find_as(ExactEqualsExpression(*OldE)); |
2315 | 35 | if (Iter != ExpressionToClass.end()) |
2316 | 7 | ExpressionToClass.erase(Iter); |
2317 | 35 | } |
2318 | 276 | } |
2319 | 2.89k | ValueToExpression[I] = E; |
2320 | 2.89k | } |
2321 | | |
2322 | | // Process the fact that Edge (from, to) is reachable, including marking |
2323 | | // any newly reachable blocks and instructions for processing. |
2324 | 1.05k | void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { |
2325 | 1.05k | // Check if the Edge was reachable before. |
2326 | 1.05k | if (ReachableEdges.insert({From, To}).second1.05k ) { |
2327 | 985 | // If this block wasn't reachable before, all instructions are touched. |
2328 | 985 | if (ReachableBlocks.insert(To).second985 ) { |
2329 | 718 | DEBUG(dbgs() << "Block " << getBlockName(To) << " marked reachable\n"); |
2330 | 718 | const auto &InstRange = BlockInstRange.lookup(To); |
2331 | 718 | TouchedInstructions.set(InstRange.first, InstRange.second); |
2332 | 985 | } else { |
2333 | 267 | DEBUG(dbgs() << "Block " << getBlockName(To) |
2334 | 267 | << " was reachable, but new edge {" << getBlockName(From) |
2335 | 267 | << "," << getBlockName(To) << "} to it found\n"); |
2336 | 267 | |
2337 | 267 | // We've made an edge reachable to an existing block, which may |
2338 | 267 | // impact predicates. Otherwise, only mark the phi nodes as touched, as |
2339 | 267 | // they are the only thing that depend on new edges. Anything using their |
2340 | 267 | // values will get propagated to if necessary. |
2341 | 267 | if (MemoryAccess *MemPhi = getMemoryAccess(To)) |
2342 | 133 | TouchedInstructions.set(InstrToDFSNum(MemPhi)); |
2343 | 267 | |
2344 | 267 | auto BI = To->begin(); |
2345 | 451 | while (isa<PHINode>(BI)451 ) { |
2346 | 184 | TouchedInstructions.set(InstrToDFSNum(&*BI)); |
2347 | 184 | ++BI; |
2348 | 184 | } |
2349 | 2 | for_each_found(PHIOfOpsPHIs, To, [&](const PHINode *I) { |
2350 | 2 | TouchedInstructions.set(InstrToDFSNum(I)); |
2351 | 2 | }); |
2352 | 267 | } |
2353 | 985 | } |
2354 | 1.05k | } |
2355 | | |
2356 | | // Given a predicate condition (from a switch, cmp, or whatever) and a block, |
2357 | | // see if we know some constant value for it already. |
2358 | 364 | Value *NewGVN::findConditionEquivalence(Value *Cond) const { |
2359 | 364 | auto Result = lookupOperandLeader(Cond); |
2360 | 364 | return isa<Constant>(Result) ? Result181 : nullptr183 ; |
2361 | 364 | } |
2362 | | |
2363 | | // Process the outgoing edges of a block for reachability. |
2364 | 1.06k | void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { |
2365 | 1.06k | // Evaluate reachability of terminator instruction. |
2366 | 1.06k | BranchInst *BR; |
2367 | 1.06k | if ((BR = dyn_cast<BranchInst>(TI)) && 1.06k BR->isConditional()712 ) { |
2368 | 347 | Value *Cond = BR->getCondition(); |
2369 | 347 | Value *CondEvaluated = findConditionEquivalence(Cond); |
2370 | 347 | if (!CondEvaluated347 ) { |
2371 | 178 | if (auto *I178 = dyn_cast<Instruction>(Cond)) { |
2372 | 159 | const Expression *E = createExpression(I); |
2373 | 159 | if (const auto *CE159 = dyn_cast<ConstantExpression>(E)) { |
2374 | 0 | CondEvaluated = CE->getConstantValue(); |
2375 | 0 | } |
2376 | 178 | } else if (19 isa<ConstantInt>(Cond)19 ) { |
2377 | 0 | CondEvaluated = Cond; |
2378 | 0 | } |
2379 | 178 | } |
2380 | 347 | ConstantInt *CI; |
2381 | 347 | BasicBlock *TrueSucc = BR->getSuccessor(0); |
2382 | 347 | BasicBlock *FalseSucc = BR->getSuccessor(1); |
2383 | 347 | if (CondEvaluated && 347 (CI = dyn_cast<ConstantInt>(CondEvaluated))169 ) { |
2384 | 80 | if (CI->isOne()80 ) { |
2385 | 30 | DEBUG(dbgs() << "Condition for Terminator " << *TI |
2386 | 30 | << " evaluated to true\n"); |
2387 | 30 | updateReachableEdge(B, TrueSucc); |
2388 | 80 | } else if (50 CI->isZero()50 ) { |
2389 | 50 | DEBUG(dbgs() << "Condition for Terminator " << *TI |
2390 | 50 | << " evaluated to false\n"); |
2391 | 50 | updateReachableEdge(B, FalseSucc); |
2392 | 50 | } |
2393 | 347 | } else { |
2394 | 267 | updateReachableEdge(B, TrueSucc); |
2395 | 267 | updateReachableEdge(B, FalseSucc); |
2396 | 267 | } |
2397 | 1.06k | } else if (auto *720 SI720 = dyn_cast<SwitchInst>(TI)) { |
2398 | 17 | // For switches, propagate the case values into the case |
2399 | 17 | // destinations. |
2400 | 17 | |
2401 | 17 | // Remember how many outgoing edges there are to every successor. |
2402 | 17 | SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; |
2403 | 17 | |
2404 | 17 | Value *SwitchCond = SI->getCondition(); |
2405 | 17 | Value *CondEvaluated = findConditionEquivalence(SwitchCond); |
2406 | 17 | // See if we were able to turn this switch statement into a constant. |
2407 | 17 | if (CondEvaluated && 17 isa<ConstantInt>(CondEvaluated)12 ) { |
2408 | 5 | auto *CondVal = cast<ConstantInt>(CondEvaluated); |
2409 | 5 | // We should be able to get case value for this. |
2410 | 5 | auto Case = *SI->findCaseValue(CondVal); |
2411 | 5 | if (Case.getCaseSuccessor() == SI->getDefaultDest()5 ) { |
2412 | 4 | // We proved the value is outside of the range of the case. |
2413 | 4 | // We can't do anything other than mark the default dest as reachable, |
2414 | 4 | // and go home. |
2415 | 4 | updateReachableEdge(B, SI->getDefaultDest()); |
2416 | 4 | return; |
2417 | 4 | } |
2418 | 1 | // Now get where it goes and mark it reachable. |
2419 | 1 | BasicBlock *TargetBlock = Case.getCaseSuccessor(); |
2420 | 1 | updateReachableEdge(B, TargetBlock); |
2421 | 17 | } else { |
2422 | 60 | for (unsigned i = 0, e = SI->getNumSuccessors(); i != e60 ; ++i48 ) { |
2423 | 48 | BasicBlock *TargetBlock = SI->getSuccessor(i); |
2424 | 48 | ++SwitchEdges[TargetBlock]; |
2425 | 48 | updateReachableEdge(B, TargetBlock); |
2426 | 48 | } |
2427 | 12 | } |
2428 | 720 | } else { |
2429 | 703 | // Otherwise this is either unconditional, or a type we have no |
2430 | 703 | // idea about. Just mark successors as reachable. |
2431 | 1.08k | for (unsigned i = 0, e = TI->getNumSuccessors(); i != e1.08k ; ++i383 ) { |
2432 | 383 | BasicBlock *TargetBlock = TI->getSuccessor(i); |
2433 | 383 | updateReachableEdge(B, TargetBlock); |
2434 | 383 | } |
2435 | 703 | |
2436 | 703 | // This also may be a memory defining terminator, in which case, set it |
2437 | 703 | // equivalent only to itself. |
2438 | 703 | // |
2439 | 703 | auto *MA = getMemoryAccess(TI); |
2440 | 703 | if (MA && 703 !isa<MemoryUse>(MA)9 ) { |
2441 | 9 | auto *CC = ensureLeaderOfMemoryClass(MA); |
2442 | 9 | if (setMemoryClass(MA, CC)) |
2443 | 8 | markMemoryUsersTouched(MA); |
2444 | 9 | } |
2445 | 720 | } |
2446 | 1.06k | } |
2447 | | |
2448 | | // Remove the PHI of Ops PHI for I |
2449 | 6 | void NewGVN::removePhiOfOps(Instruction *I, PHINode *PHITemp) { |
2450 | 6 | InstrDFS.erase(PHITemp); |
2451 | 6 | // It's still a temp instruction. We keep it in the array so it gets erased. |
2452 | 6 | // However, it's no longer used by I, or in the block/ |
2453 | 6 | PHIOfOpsPHIs[getBlockForValue(PHITemp)].erase(PHITemp); |
2454 | 6 | TempToBlock.erase(PHITemp); |
2455 | 6 | RealToTemp.erase(I); |
2456 | 6 | } |
2457 | | |
2458 | | // Add PHI Op in BB as a PHI of operations version of ExistingValue. |
2459 | | void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, |
2460 | 21 | Instruction *ExistingValue) { |
2461 | 21 | InstrDFS[Op] = InstrToDFSNum(ExistingValue); |
2462 | 21 | AllTempInstructions.insert(Op); |
2463 | 21 | PHIOfOpsPHIs[BB].insert(Op); |
2464 | 21 | TempToBlock[Op] = BB; |
2465 | 21 | RealToTemp[ExistingValue] = Op; |
2466 | 21 | } |
2467 | | |
2468 | 468 | static bool okayForPHIOfOps(const Instruction *I) { |
2469 | 468 | if (!EnablePhiOfOps) |
2470 | 0 | return false; |
2471 | 468 | return isa<BinaryOperator>(I) || 468 isa<SelectInst>(I)279 || isa<CmpInst>(I)265 || |
2472 | 208 | isa<LoadInst>(I); |
2473 | 468 | } |
2474 | | |
2475 | | // Return true if this operand will be safe to use for phi of ops. |
2476 | | // |
2477 | | // The reason some operands are unsafe is that we are not trying to recursively |
2478 | | // translate everything back through phi nodes. We actually expect some lookups |
2479 | | // of expressions to fail. In particular, a lookup where the expression cannot |
2480 | | // exist in the predecessor. This is true even if the expression, as shown, can |
2481 | | // be determined to be constant. |
2482 | | bool NewGVN::OpIsSafeForPHIOfOps(Value *V, Instruction *OrigInst, |
2483 | | const BasicBlock *PHIBlock, |
2484 | 133 | SmallPtrSetImpl<const Value *> &Visited) { |
2485 | 133 | if (!isa<Instruction>(V)) |
2486 | 59 | return true; |
2487 | 74 | auto OISIt = OpSafeForPHIOfOps.find(V); |
2488 | 74 | if (OISIt != OpSafeForPHIOfOps.end()) |
2489 | 39 | return OISIt->second; |
2490 | 35 | // Keep walking until we either dominate the phi block, or hit a phi, or run |
2491 | 35 | // out of things to check. |
2492 | 35 | if (35 DT->properlyDominates(getBlockForValue(V), PHIBlock)35 ) { |
2493 | 15 | OpSafeForPHIOfOps.insert({V, true}); |
2494 | 15 | return true; |
2495 | 15 | } |
2496 | 20 | // PHI in the same block. |
2497 | 20 | if (20 isa<PHINode>(V) && 20 getBlockForValue(V) == PHIBlock6 ) { |
2498 | 5 | OpSafeForPHIOfOps.insert({V, false}); |
2499 | 5 | return false; |
2500 | 5 | } |
2501 | 15 | for (auto Op : cast<Instruction>(V)->operand_values()) 15 { |
2502 | 21 | if (!isa<Instruction>(Op)) |
2503 | 9 | continue; |
2504 | 12 | // See if we already know the answer for this node. |
2505 | 12 | auto OISIt = OpSafeForPHIOfOps.find(Op); |
2506 | 12 | if (OISIt != OpSafeForPHIOfOps.end()12 ) { |
2507 | 3 | if (!OISIt->second3 ) { |
2508 | 0 | OpSafeForPHIOfOps.insert({V, false}); |
2509 | 0 | return false; |
2510 | 0 | } |
2511 | 12 | } |
2512 | 12 | if (12 !Visited.insert(Op).second12 ) |
2513 | 0 | continue; |
2514 | 12 | if (12 !OpIsSafeForPHIOfOps(Op, OrigInst, PHIBlock, Visited)12 ) { |
2515 | 6 | OpSafeForPHIOfOps.insert({V, false}); |
2516 | 6 | return false; |
2517 | 6 | } |
2518 | 9 | } |
2519 | 9 | OpSafeForPHIOfOps.insert({V, true}); |
2520 | 9 | return true; |
2521 | 9 | } |
2522 | | |
2523 | | // Try to find a leader for instruction TransInst, which is a phi translated |
2524 | | // version of something in our original program. Visited is used to ensure we |
2525 | | // don't infinite loop during translations of cycles. OrigInst is the |
2526 | | // instruction in the original program, and PredBB is the predecessor we |
2527 | | // translated it through. |
2528 | | Value *NewGVN::findLeaderForInst(Instruction *TransInst, |
2529 | | SmallPtrSetImpl<Value *> &Visited, |
2530 | | MemoryAccess *MemAccess, Instruction *OrigInst, |
2531 | 126 | BasicBlock *PredBB) { |
2532 | 126 | unsigned IDFSNum = InstrToDFSNum(OrigInst); |
2533 | 126 | // Make sure it's marked as a temporary instruction. |
2534 | 126 | AllTempInstructions.insert(TransInst); |
2535 | 126 | // and make sure anything that tries to add it's DFS number is |
2536 | 126 | // redirected to the instruction we are making a phi of ops |
2537 | 126 | // for. |
2538 | 126 | TempToBlock.insert({TransInst, PredBB}); |
2539 | 126 | InstrDFS.insert({TransInst, IDFSNum}); |
2540 | 126 | |
2541 | 126 | const Expression *E = performSymbolicEvaluation(TransInst, Visited); |
2542 | 126 | InstrDFS.erase(TransInst); |
2543 | 126 | AllTempInstructions.erase(TransInst); |
2544 | 126 | TempToBlock.erase(TransInst); |
2545 | 126 | if (MemAccess) |
2546 | 13 | TempToMemory.erase(TransInst); |
2547 | 126 | if (!E) |
2548 | 0 | return nullptr; |
2549 | 126 | auto *FoundVal = findPHIOfOpsLeader(E, OrigInst, PredBB); |
2550 | 126 | if (!FoundVal126 ) { |
2551 | 69 | ExpressionToPhiOfOps[E].insert(OrigInst); |
2552 | 69 | DEBUG(dbgs() << "Cannot find phi of ops operand for " << *TransInst |
2553 | 69 | << " in block " << getBlockName(PredBB) << "\n"); |
2554 | 69 | return nullptr; |
2555 | 69 | } |
2556 | 57 | if (auto *57 SI57 = dyn_cast<StoreInst>(FoundVal)) |
2557 | 1 | FoundVal = SI->getValueOperand(); |
2558 | 126 | return FoundVal; |
2559 | 126 | } |
2560 | | |
2561 | | // When we see an instruction that is an op of phis, generate the equivalent phi |
2562 | | // of ops form. |
2563 | | const Expression * |
2564 | | NewGVN::makePossiblePhiOfOps(Instruction *I, |
2565 | 169 | SmallPtrSetImpl<Value *> &Visited) { |
2566 | 169 | if (!okayForPHIOfOps(I)) |
2567 | 0 | return nullptr; |
2568 | 169 | |
2569 | 169 | if (169 !Visited.insert(I).second169 ) |
2570 | 0 | return nullptr; |
2571 | 169 | // For now, we require the instruction be cycle free because we don't |
2572 | 169 | // *always* create a phi of ops for instructions that could be done as phi |
2573 | 169 | // of ops, we only do it if we think it is useful. If we did do it all the |
2574 | 169 | // time, we could remove the cycle free check. |
2575 | 169 | if (169 !isCycleFree(I)169 ) |
2576 | 68 | return nullptr; |
2577 | 101 | |
2578 | 101 | SmallPtrSet<const Value *, 8> ProcessedPHIs; |
2579 | 101 | // TODO: We don't do phi translation on memory accesses because it's |
2580 | 101 | // complicated. For a load, we'd need to be able to simulate a new memoryuse, |
2581 | 101 | // which we don't have a good way of doing ATM. |
2582 | 101 | auto *MemAccess = getMemoryAccess(I); |
2583 | 101 | // If the memory operation is defined by a memory operation this block that |
2584 | 101 | // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi |
2585 | 101 | // can't help, as it would still be killed by that memory operation. |
2586 | 101 | if (MemAccess && 101 !isa<MemoryPhi>(MemAccess->getDefiningAccess())13 && |
2587 | 6 | MemAccess->getDefiningAccess()->getBlock() == I->getParent()) |
2588 | 0 | return nullptr; |
2589 | 101 | |
2590 | 101 | SmallPtrSet<const Value *, 10> VisitedOps; |
2591 | 101 | // Convert op of phis to phi of ops |
2592 | 121 | for (auto &Op : I->operands()) { |
2593 | 121 | if (!isa<PHINode>(Op)) |
2594 | 20 | continue; |
2595 | 101 | auto *OpPHI = cast<PHINode>(Op); |
2596 | 101 | // No point in doing this for one-operand phis. |
2597 | 101 | if (OpPHI->getNumOperands() == 1) |
2598 | 1 | continue; |
2599 | 100 | if (100 !DebugCounter::shouldExecute(PHIOfOpsCounter)100 ) |
2600 | 0 | return nullptr; |
2601 | 100 | SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops; |
2602 | 100 | auto *PHIBlock = getBlockForValue(OpPHI); |
2603 | 144 | for (auto PredBB : OpPHI->blocks()) { |
2604 | 144 | Value *FoundVal = nullptr; |
2605 | 144 | // We could just skip unreachable edges entirely but it's tricky to do |
2606 | 144 | // with rewriting existing phi nodes. |
2607 | 144 | if (ReachableEdges.count({PredBB, PHIBlock})144 ) { |
2608 | 133 | // Clone the instruction, create an expression from it, and see if we |
2609 | 133 | // have a leader. |
2610 | 133 | Instruction *ValueOp = I->clone(); |
2611 | 133 | if (MemAccess) |
2612 | 13 | TempToMemory.insert({ValueOp, MemAccess}); |
2613 | 133 | bool SafeForPHIOfOps = true; |
2614 | 133 | VisitedOps.clear(); |
2615 | 262 | for (auto &Op : ValueOp->operands()) { |
2616 | 262 | auto *OrigOp = &*Op; |
2617 | 262 | Op = Op->DoPHITranslation(PHIBlock, PredBB); |
2618 | 262 | // When this operand changes, it could change whether there is a |
2619 | 262 | // leader for us or not. |
2620 | 262 | addAdditionalUsers(Op, I); |
2621 | 262 | // If we phi-translated the op, it must be safe. |
2622 | 262 | SafeForPHIOfOps = SafeForPHIOfOps && |
2623 | 255 | (Op != OrigOp || |
2624 | 255 | OpIsSafeForPHIOfOps(Op, I, PHIBlock, VisitedOps)); |
2625 | 262 | } |
2626 | 133 | // FIXME: For those things that are not safe We could generate |
2627 | 133 | // expressions all the way down, and see if this comes out to a |
2628 | 133 | // constant. For anything where that is true, and unsafe, we should |
2629 | 133 | // have made a phi-of-ops (or value numbered it equivalent to something) |
2630 | 133 | // for the pieces already. |
2631 | 7 | FoundVal = !SafeForPHIOfOps ? nullptr |
2632 | 126 | : findLeaderForInst(ValueOp, Visited, |
2633 | 126 | MemAccess, I, PredBB); |
2634 | 133 | ValueOp->deleteValue(); |
2635 | 133 | if (!FoundVal) |
2636 | 76 | return nullptr; |
2637 | 11 | } else { |
2638 | 11 | DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " |
2639 | 11 | << getBlockName(PredBB) |
2640 | 11 | << " because the block is unreachable\n"); |
2641 | 11 | FoundVal = UndefValue::get(I->getType()); |
2642 | 11 | } |
2643 | 144 | |
2644 | 68 | Ops.push_back({FoundVal, PredBB}); |
2645 | 68 | DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " |
2646 | 144 | << getBlockName(PredBB) << "\n"); |
2647 | 144 | } |
2648 | 24 | auto *ValuePHI = RealToTemp.lookup(I); |
2649 | 24 | bool NewPHI = false; |
2650 | 24 | if (!ValuePHI24 ) { |
2651 | 21 | ValuePHI = |
2652 | 21 | PHINode::Create(I->getType(), OpPHI->getNumOperands(), "phiofops"); |
2653 | 21 | addPhiOfOps(ValuePHI, PHIBlock, I); |
2654 | 21 | NewPHI = true; |
2655 | 21 | NumGVNPHIOfOpsCreated++; |
2656 | 21 | } |
2657 | 24 | if (NewPHI24 ) { |
2658 | 21 | for (auto PHIOp : Ops) |
2659 | 42 | ValuePHI->addIncoming(PHIOp.first, PHIOp.second); |
2660 | 24 | } else { |
2661 | 3 | unsigned int i = 0; |
2662 | 6 | for (auto PHIOp : Ops) { |
2663 | 6 | ValuePHI->setIncomingValue(i, PHIOp.first); |
2664 | 6 | ValuePHI->setIncomingBlock(i, PHIOp.second); |
2665 | 6 | ++i; |
2666 | 6 | } |
2667 | 3 | } |
2668 | 24 | |
2669 | 24 | DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I |
2670 | 24 | << "\n"); |
2671 | 24 | return performSymbolicEvaluation(ValuePHI, Visited); |
2672 | 1 | } |
2673 | 1 | return nullptr; |
2674 | 1 | } |
2675 | | |
2676 | | // The algorithm initially places the values of the routine in the TOP |
2677 | | // congruence class. The leader of TOP is the undetermined value `undef`. |
2678 | | // When the algorithm has finished, values still in TOP are unreachable. |
2679 | 307 | void NewGVN::initializeCongruenceClasses(Function &F) { |
2680 | 307 | NextCongruenceNum = 0; |
2681 | 307 | |
2682 | 307 | // Note that even though we use the live on entry def as a representative |
2683 | 307 | // MemoryAccess, it is *not* the same as the actual live on entry def. We |
2684 | 307 | // have no real equivalemnt to undef for MemoryAccesses, and so we really |
2685 | 307 | // should be checking whether the MemoryAccess is top if we want to know if it |
2686 | 307 | // is equivalent to everything. Otherwise, what this really signifies is that |
2687 | 307 | // the access "it reaches all the way back to the beginning of the function" |
2688 | 307 | |
2689 | 307 | // Initialize all other instructions to be in TOP class. |
2690 | 307 | TOPClass = createCongruenceClass(nullptr, nullptr); |
2691 | 307 | TOPClass->setMemoryLeader(MSSA->getLiveOnEntryDef()); |
2692 | 307 | // The live on entry def gets put into it's own class |
2693 | 307 | MemoryAccessToClass[MSSA->getLiveOnEntryDef()] = |
2694 | 307 | createMemoryClass(MSSA->getLiveOnEntryDef()); |
2695 | 307 | |
2696 | 1.18k | for (auto DTN : nodes(DT)) { |
2697 | 1.18k | BasicBlock *BB = DTN->getBlock(); |
2698 | 1.18k | // All MemoryAccesses are equivalent to live on entry to start. They must |
2699 | 1.18k | // be initialized to something so that initial changes are noticed. For |
2700 | 1.18k | // the maximal answer, we initialize them all to be the same as |
2701 | 1.18k | // liveOnEntry. |
2702 | 1.18k | auto *MemoryBlockDefs = MSSA->getBlockDefs(BB); |
2703 | 1.18k | if (MemoryBlockDefs) |
2704 | 386 | for (const auto &Def : *MemoryBlockDefs) 386 { |
2705 | 606 | MemoryAccessToClass[&Def] = TOPClass; |
2706 | 606 | auto *MD = dyn_cast<MemoryDef>(&Def); |
2707 | 606 | // Insert the memory phis into the member list. |
2708 | 606 | if (!MD606 ) { |
2709 | 128 | const MemoryPhi *MP = cast<MemoryPhi>(&Def); |
2710 | 128 | TOPClass->memory_insert(MP); |
2711 | 128 | MemoryPhiState.insert({MP, MPS_TOP}); |
2712 | 128 | } |
2713 | 606 | |
2714 | 606 | if (MD && 606 isa<StoreInst>(MD->getMemoryInst())478 ) |
2715 | 257 | TOPClass->incStoreCount(); |
2716 | 386 | } |
2717 | 3.52k | for (auto &I : *BB) { |
2718 | 3.52k | // TODO: Move to helper |
2719 | 3.52k | if (isa<PHINode>(&I)) |
2720 | 212 | for (auto *U : I.users()) |
2721 | 308 | if (auto *308 UInst308 = dyn_cast<Instruction>(U)) |
2722 | 308 | if (308 InstrToDFSNum(UInst) != 0 && 308 okayForPHIOfOps(UInst)299 ) |
2723 | 118 | PHINodeUses.insert(UInst); |
2724 | 3.52k | // Don't insert void terminators into the class. We don't value number |
2725 | 3.52k | // them, and they just end up sitting in TOP. |
2726 | 3.52k | if (isa<TerminatorInst>(I) && 3.52k I.getType()->isVoidTy()1.18k ) |
2727 | 1.18k | continue; |
2728 | 2.34k | TOPClass->insert(&I); |
2729 | 2.34k | ValueToClass[&I] = TOPClass; |
2730 | 2.34k | } |
2731 | 1.18k | } |
2732 | 307 | |
2733 | 307 | // Initialize arguments to be in their own unique congruence classes |
2734 | 307 | for (auto &FA : F.args()) |
2735 | 421 | createSingletonCongruenceClass(&FA); |
2736 | 307 | } |
2737 | | |
2738 | 307 | void NewGVN::cleanupTables() { |
2739 | 3.46k | for (unsigned i = 0, e = CongruenceClasses.size(); i != e3.46k ; ++i3.15k ) { |
2740 | 3.15k | DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->getID() |
2741 | 3.15k | << " has " << CongruenceClasses[i]->size() << " members\n"); |
2742 | 3.15k | // Make sure we delete the congruence class (probably worth switching to |
2743 | 3.15k | // a unique_ptr at some point. |
2744 | 3.15k | delete CongruenceClasses[i]; |
2745 | 3.15k | CongruenceClasses[i] = nullptr; |
2746 | 3.15k | } |
2747 | 307 | |
2748 | 307 | // Destroy the value expressions |
2749 | 307 | SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(), |
2750 | 307 | AllTempInstructions.end()); |
2751 | 307 | AllTempInstructions.clear(); |
2752 | 307 | |
2753 | 307 | // We have to drop all references for everything first, so there are no uses |
2754 | 307 | // left as we delete them. |
2755 | 8 | for (auto *I : TempInst) { |
2756 | 8 | I->dropAllReferences(); |
2757 | 8 | } |
2758 | 307 | |
2759 | 315 | while (!TempInst.empty()315 ) { |
2760 | 8 | auto *I = TempInst.back(); |
2761 | 8 | TempInst.pop_back(); |
2762 | 8 | I->deleteValue(); |
2763 | 8 | } |
2764 | 307 | |
2765 | 307 | ValueToClass.clear(); |
2766 | 307 | ArgRecycler.clear(ExpressionAllocator); |
2767 | 307 | ExpressionAllocator.Reset(); |
2768 | 307 | CongruenceClasses.clear(); |
2769 | 307 | ExpressionToClass.clear(); |
2770 | 307 | ValueToExpression.clear(); |
2771 | 307 | RealToTemp.clear(); |
2772 | 307 | AdditionalUsers.clear(); |
2773 | 307 | ExpressionToPhiOfOps.clear(); |
2774 | 307 | TempToBlock.clear(); |
2775 | 307 | TempToMemory.clear(); |
2776 | 307 | PHIOfOpsPHIs.clear(); |
2777 | 307 | PHINodeUses.clear(); |
2778 | 307 | OpSafeForPHIOfOps.clear(); |
2779 | 307 | ReachableBlocks.clear(); |
2780 | 307 | ReachableEdges.clear(); |
2781 | | #ifndef NDEBUG |
2782 | | ProcessedCount.clear(); |
2783 | | #endif |
2784 | | InstrDFS.clear(); |
2785 | 307 | InstructionsToErase.clear(); |
2786 | 307 | DFSToInstr.clear(); |
2787 | 307 | BlockInstRange.clear(); |
2788 | 307 | TouchedInstructions.clear(); |
2789 | 307 | MemoryAccessToClass.clear(); |
2790 | 307 | PredicateToUsers.clear(); |
2791 | 307 | MemoryToUsers.clear(); |
2792 | 307 | } |
2793 | | |
2794 | | // Assign local DFS number mapping to instructions, and leave space for Value |
2795 | | // PHI's. |
2796 | | std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B, |
2797 | 1.18k | unsigned Start) { |
2798 | 1.18k | unsigned End = Start; |
2799 | 1.18k | if (MemoryAccess *MemPhi1.18k = getMemoryAccess(B)) { |
2800 | 128 | InstrDFS[MemPhi] = End++; |
2801 | 128 | DFSToInstr.emplace_back(MemPhi); |
2802 | 128 | } |
2803 | 1.18k | |
2804 | 1.18k | // Then the real block goes next. |
2805 | 3.52k | for (auto &I : *B) { |
2806 | 3.52k | // There's no need to call isInstructionTriviallyDead more than once on |
2807 | 3.52k | // an instruction. Therefore, once we know that an instruction is dead |
2808 | 3.52k | // we change its DFS number so that it doesn't get value numbered. |
2809 | 3.52k | if (isInstructionTriviallyDead(&I, TLI)3.52k ) { |
2810 | 110 | InstrDFS[&I] = 0; |
2811 | 110 | DEBUG(dbgs() << "Skipping trivially dead instruction " << I << "\n"); |
2812 | 110 | markInstructionForDeletion(&I); |
2813 | 110 | continue; |
2814 | 110 | } |
2815 | 3.41k | InstrDFS[&I] = End++; |
2816 | 3.41k | DFSToInstr.emplace_back(&I); |
2817 | 3.41k | } |
2818 | 1.18k | |
2819 | 1.18k | // All of the range functions taken half-open ranges (open on the end side). |
2820 | 1.18k | // So we do not subtract one from count, because at this point it is one |
2821 | 1.18k | // greater than the last instruction. |
2822 | 1.18k | return std::make_pair(Start, End); |
2823 | 1.18k | } |
2824 | | |
2825 | 5.21k | void NewGVN::updateProcessedCount(const Value *V) { |
2826 | | #ifndef NDEBUG |
2827 | | if (ProcessedCount.count(V) == 0) { |
2828 | | ProcessedCount.insert({V, 1}); |
2829 | | } else { |
2830 | | ++ProcessedCount[V]; |
2831 | | assert(ProcessedCount[V] < 100 && |
2832 | | "Seem to have processed the same Value a lot"); |
2833 | | } |
2834 | | #endif |
2835 | | } |
2836 | | // Evaluate MemoryPhi nodes symbolically, just like PHI nodes |
2837 | 222 | void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { |
2838 | 222 | // If all the arguments are the same, the MemoryPhi has the same value as the |
2839 | 222 | // argument. Filter out unreachable blocks and self phis from our operands. |
2840 | 222 | // TODO: We could do cycle-checking on the memory phis to allow valueizing for |
2841 | 222 | // self-phi checking. |
2842 | 222 | const BasicBlock *PHIBlock = MP->getBlock(); |
2843 | 468 | auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { |
2844 | 468 | return cast<MemoryAccess>(U) != MP && |
2845 | 464 | !isMemoryAccessTOP(cast<MemoryAccess>(U)) && |
2846 | 385 | ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); |
2847 | 468 | }); |
2848 | 222 | // If all that is left is nothing, our memoryphi is undef. We keep it as |
2849 | 222 | // InitialClass. Note: The only case this should happen is if we have at |
2850 | 222 | // least one self-argument. |
2851 | 222 | if (Filtered.begin() == Filtered.end()222 ) { |
2852 | 0 | if (setMemoryClass(MP, TOPClass)) |
2853 | 0 | markMemoryUsersTouched(MP); |
2854 | 0 | return; |
2855 | 0 | } |
2856 | 222 | |
2857 | 222 | // Transform the remaining operands into operand leaders. |
2858 | 222 | // FIXME: mapped_iterator should have a range version. |
2859 | 222 | auto LookupFunc = [&](const Use &U) 222 { |
2860 | 367 | return lookupMemoryLeader(cast<MemoryAccess>(U)); |
2861 | 367 | }; |
2862 | 222 | auto MappedBegin = map_iterator(Filtered.begin(), LookupFunc); |
2863 | 222 | auto MappedEnd = map_iterator(Filtered.end(), LookupFunc); |
2864 | 222 | |
2865 | 222 | // and now check if all the elements are equal. |
2866 | 222 | // Sadly, we can't use std::equals since these are random access iterators. |
2867 | 222 | const auto *AllSameValue = *MappedBegin; |
2868 | 222 | ++MappedBegin; |
2869 | 222 | bool AllEqual = std::all_of( |
2870 | 222 | MappedBegin, MappedEnd, |
2871 | 145 | [&AllSameValue](const MemoryAccess *V) { return V == AllSameValue; }); |
2872 | 222 | |
2873 | 222 | if (AllEqual) |
2874 | 222 | DEBUG(dbgs() << "Memory Phi value numbered to " << *AllSameValue << "\n"); |
2875 | 222 | else |
2876 | 222 | DEBUG(dbgs() << "Memory Phi value numbered to itself\n"); |
2877 | 222 | // If it's equal to something, it's in that class. Otherwise, it has to be in |
2878 | 222 | // a class where it is the leader (other things may be equivalent to it, but |
2879 | 222 | // it needs to start off in its own class, which means it must have been the |
2880 | 222 | // leader, and it can't have stopped being the leader because it was never |
2881 | 222 | // removed). |
2882 | 222 | CongruenceClass *CC = |
2883 | 222 | AllEqual ? getMemoryClass(AllSameValue)97 : ensureLeaderOfMemoryClass(MP)125 ; |
2884 | 222 | auto OldState = MemoryPhiState.lookup(MP); |
2885 | 222 | assert(OldState != MPS_Invalid && "Invalid memory phi state"); |
2886 | 222 | auto NewState = AllEqual ? MPS_Equivalent97 : MPS_Unique125 ; |
2887 | 222 | MemoryPhiState[MP] = NewState; |
2888 | 222 | if (setMemoryClass(MP, CC) || 222 OldState != NewState37 ) |
2889 | 191 | markMemoryUsersTouched(MP); |
2890 | 222 | } |
2891 | | |
2892 | | // Value number a single instruction, symbolically evaluating, performing |
2893 | | // congruence finding, and updating mappings. |
2894 | 3.96k | void NewGVN::valueNumberInstruction(Instruction *I) { |
2895 | 3.96k | DEBUG(dbgs() << "Processing instruction " << *I << "\n"); |
2896 | 3.96k | if (!I->isTerminator()3.96k ) { |
2897 | 2.89k | const Expression *Symbolized = nullptr; |
2898 | 2.89k | SmallPtrSet<Value *, 2> Visited; |
2899 | 2.89k | if (DebugCounter::shouldExecute(VNCounter)2.89k ) { |
2900 | 2.89k | Symbolized = performSymbolicEvaluation(I, Visited); |
2901 | 2.89k | // Make a phi of ops if necessary |
2902 | 2.89k | if (Symbolized && 2.89k !isa<ConstantExpression>(Symbolized)2.57k && |
2903 | 2.89k | !isa<VariableExpression>(Symbolized)2.13k && PHINodeUses.count(I)1.89k ) { |
2904 | 169 | auto *PHIE = makePossiblePhiOfOps(I, Visited); |
2905 | 169 | // If we created a phi of ops, use it. |
2906 | 169 | // If we couldn't create one, make sure we don't leave one lying around |
2907 | 169 | if (PHIE169 ) { |
2908 | 24 | Symbolized = PHIE; |
2909 | 169 | } else if (auto *145 Op145 = RealToTemp.lookup(I)) { |
2910 | 6 | removePhiOfOps(I, Op); |
2911 | 6 | } |
2912 | 169 | } |
2913 | 2.89k | |
2914 | 0 | } else { |
2915 | 0 | // Mark the instruction as unused so we don't value number it again. |
2916 | 0 | InstrDFS[I] = 0; |
2917 | 0 | } |
2918 | 2.89k | // If we couldn't come up with a symbolic expression, use the unknown |
2919 | 2.89k | // expression |
2920 | 2.89k | if (Symbolized == nullptr) |
2921 | 322 | Symbolized = createUnknownExpression(I); |
2922 | 2.89k | performCongruenceFinding(I, Symbolized); |
2923 | 3.96k | } else { |
2924 | 1.06k | // Handle terminators that return values. All of them produce values we |
2925 | 1.06k | // don't currently understand. We don't place non-value producing |
2926 | 1.06k | // terminators in a class. |
2927 | 1.06k | if (!I->getType()->isVoidTy()1.06k ) { |
2928 | 2 | auto *Symbolized = createUnknownExpression(I); |
2929 | 2 | performCongruenceFinding(I, Symbolized); |
2930 | 2 | } |
2931 | 1.06k | processOutgoingEdges(dyn_cast<TerminatorInst>(I), I->getParent()); |
2932 | 1.06k | } |
2933 | 3.96k | } |
2934 | | |
2935 | | // Check if there is a path, using single or equal argument phi nodes, from |
2936 | | // First to Second. |
2937 | | bool NewGVN::singleReachablePHIPath( |
2938 | | SmallPtrSet<const MemoryAccess *, 8> &Visited, const MemoryAccess *First, |
2939 | 0 | const MemoryAccess *Second) const { |
2940 | 0 | if (First == Second) |
2941 | 0 | return true; |
2942 | 0 | if (MSSA->isLiveOnEntryDef(First)) |
2943 | 0 | return false; |
2944 | 0 |
|
2945 | 0 | // This is not perfect, but as we're just verifying here, we can live with |
2946 | 0 | // the loss of precision. The real solution would be that of doing strongly |
2947 | 0 | // connected component finding in this routine, and it's probably not worth |
2948 | 0 | // the complexity for the time being. So, we just keep a set of visited |
2949 | 0 | // MemoryAccess and return true when we hit a cycle. |
2950 | 0 | if (Visited.count(First)) |
2951 | 0 | return true; |
2952 | 0 | Visited.insert(First); |
2953 | 0 |
|
2954 | 0 | const auto *EndDef = First; |
2955 | 0 | for (auto *ChainDef : optimized_def_chain(First)) { |
2956 | 0 | if (ChainDef == Second) |
2957 | 0 | return true; |
2958 | 0 | if (MSSA->isLiveOnEntryDef(ChainDef)) |
2959 | 0 | return false; |
2960 | 0 | EndDef = ChainDef; |
2961 | 0 | } |
2962 | 0 | auto *MP = cast<MemoryPhi>(EndDef); |
2963 | 0 | auto ReachableOperandPred = [&](const Use &U) { |
2964 | 0 | return ReachableEdges.count({MP->getIncomingBlock(U), MP->getBlock()}); |
2965 | 0 | }; |
2966 | 0 | auto FilteredPhiArgs = |
2967 | 0 | make_filter_range(MP->operands(), ReachableOperandPred); |
2968 | 0 | SmallVector<const Value *, 32> OperandList; |
2969 | 0 | std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(), |
2970 | 0 | std::back_inserter(OperandList)); |
2971 | 0 | bool Okay = OperandList.size() == 1; |
2972 | 0 | if (!Okay) |
2973 | 0 | Okay = |
2974 | 0 | std::equal(OperandList.begin(), OperandList.end(), OperandList.begin()); |
2975 | 0 | if (Okay) |
2976 | 0 | return singleReachablePHIPath(Visited, cast<MemoryAccess>(OperandList[0]), |
2977 | 0 | Second); |
2978 | 0 | return false; |
2979 | 0 | } |
2980 | | |
2981 | | // Verify the that the memory equivalence table makes sense relative to the |
2982 | | // congruence classes. Note that this checking is not perfect, and is currently |
2983 | | // subject to very rare false negatives. It is only useful for |
2984 | | // testing/debugging. |
2985 | 307 | void NewGVN::verifyMemoryCongruency() const { |
2986 | | #ifndef NDEBUG |
2987 | | // Verify that the memory table equivalence and memory member set match |
2988 | | for (const auto *CC : CongruenceClasses) { |
2989 | | if (CC == TOPClass || CC->isDead()) |
2990 | | continue; |
2991 | | if (CC->getStoreCount() != 0) { |
2992 | | assert((CC->getStoredValue() || !isa<StoreInst>(CC->getLeader())) && |
2993 | | "Any class with a store as a leader should have a " |
2994 | | "representative stored value"); |
2995 | | assert(CC->getMemoryLeader() && |
2996 | | "Any congruence class with a store should have a " |
2997 | | "representative access"); |
2998 | | } |
2999 | | |
3000 | | if (CC->getMemoryLeader()) |
3001 | | assert(MemoryAccessToClass.lookup(CC->getMemoryLeader()) == CC && |
3002 | | "Representative MemoryAccess does not appear to be reverse " |
3003 | | "mapped properly"); |
3004 | | for (auto M : CC->memory()) |
3005 | | assert(MemoryAccessToClass.lookup(M) == CC && |
3006 | | "Memory member does not appear to be reverse mapped properly"); |
3007 | | } |
3008 | | |
3009 | | // Anything equivalent in the MemoryAccess table should be in the same |
3010 | | // congruence class. |
3011 | | |
3012 | | // Filter out the unreachable and trivially dead entries, because they may |
3013 | | // never have been updated if the instructions were not processed. |
3014 | | auto ReachableAccessPred = |
3015 | | [&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) { |
3016 | | bool Result = ReachableBlocks.count(Pair.first->getBlock()); |
3017 | | if (!Result || MSSA->isLiveOnEntryDef(Pair.first) || |
3018 | | MemoryToDFSNum(Pair.first) == 0) |
3019 | | return false; |
3020 | | if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first)) |
3021 | | return !isInstructionTriviallyDead(MemDef->getMemoryInst()); |
3022 | | |
3023 | | // We could have phi nodes which operands are all trivially dead, |
3024 | | // so we don't process them. |
3025 | | if (auto *MemPHI = dyn_cast<MemoryPhi>(Pair.first)) { |
3026 | | for (auto &U : MemPHI->incoming_values()) { |
3027 | | if (Instruction *I = dyn_cast<Instruction>(U.get())) { |
3028 | | if (!isInstructionTriviallyDead(I)) |
3029 | | return true; |
3030 | | } |
3031 | | } |
3032 | | return false; |
3033 | | } |
3034 | | |
3035 | | return true; |
3036 | | }; |
3037 | | |
3038 | | auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); |
3039 | | for (auto KV : Filtered) { |
3040 | | if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) { |
3041 | | auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader()); |
3042 | | if (FirstMUD && SecondMUD) { |
3043 | | SmallPtrSet<const MemoryAccess *, 8> VisitedMAS; |
3044 | | assert((singleReachablePHIPath(VisitedMAS, FirstMUD, SecondMUD) || |
3045 | | ValueToClass.lookup(FirstMUD->getMemoryInst()) == |
3046 | | ValueToClass.lookup(SecondMUD->getMemoryInst())) && |
3047 | | "The instructions for these memory operations should have " |
3048 | | "been in the same congruence class or reachable through" |
3049 | | "a single argument phi"); |
3050 | | } |
3051 | | } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) { |
3052 | | // We can only sanely verify that MemoryDefs in the operand list all have |
3053 | | // the same class. |
3054 | | auto ReachableOperandPred = [&](const Use &U) { |
3055 | | return ReachableEdges.count( |
3056 | | {FirstMP->getIncomingBlock(U), FirstMP->getBlock()}) && |
3057 | | isa<MemoryDef>(U); |
3058 | | |
3059 | | }; |
3060 | | // All arguments should in the same class, ignoring unreachable arguments |
3061 | | auto FilteredPhiArgs = |
3062 | | make_filter_range(FirstMP->operands(), ReachableOperandPred); |
3063 | | SmallVector<const CongruenceClass *, 16> PhiOpClasses; |
3064 | | std::transform(FilteredPhiArgs.begin(), FilteredPhiArgs.end(), |
3065 | | std::back_inserter(PhiOpClasses), [&](const Use &U) { |
3066 | | const MemoryDef *MD = cast<MemoryDef>(U); |
3067 | | return ValueToClass.lookup(MD->getMemoryInst()); |
3068 | | }); |
3069 | | assert(std::equal(PhiOpClasses.begin(), PhiOpClasses.end(), |
3070 | | PhiOpClasses.begin()) && |
3071 | | "All MemoryPhi arguments should be in the same class"); |
3072 | | } |
3073 | | } |
3074 | | #endif |
3075 | | } |
3076 | | |
3077 | | // Verify that the sparse propagation we did actually found the maximal fixpoint |
3078 | | // We do this by storing the value to class mapping, touching all instructions, |
3079 | | // and redoing the iteration to see if anything changed. |
3080 | 307 | void NewGVN::verifyIterationSettled(Function &F) { |
3081 | | #ifndef NDEBUG |
3082 | | DEBUG(dbgs() << "Beginning iteration verification\n"); |
3083 | | if (DebugCounter::isCounterSet(VNCounter)) |
3084 | | DebugCounter::setCounterValue(VNCounter, StartingVNCounter); |
3085 | | |
3086 | | // Note that we have to store the actual classes, as we may change existing |
3087 | | // classes during iteration. This is because our memory iteration propagation |
3088 | | // is not perfect, and so may waste a little work. But it should generate |
3089 | | // exactly the same congruence classes we have now, with different IDs. |
3090 | | std::map<const Value *, CongruenceClass> BeforeIteration; |
3091 | | |
3092 | | for (auto &KV : ValueToClass) { |
3093 | | if (auto *I = dyn_cast<Instruction>(KV.first)) |
3094 | | // Skip unused/dead instructions. |
3095 | | if (InstrToDFSNum(I) == 0) |
3096 | | continue; |
3097 | | BeforeIteration.insert({KV.first, *KV.second}); |
3098 | | } |
3099 | | |
3100 | | TouchedInstructions.set(); |
3101 | | TouchedInstructions.reset(0); |
3102 | | iterateTouchedInstructions(); |
3103 | | DenseSet<std::pair<const CongruenceClass *, const CongruenceClass *>> |
3104 | | EqualClasses; |
3105 | | for (const auto &KV : ValueToClass) { |
3106 | | if (auto *I = dyn_cast<Instruction>(KV.first)) |
3107 | | // Skip unused/dead instructions. |
3108 | | if (InstrToDFSNum(I) == 0) |
3109 | | continue; |
3110 | | // We could sink these uses, but i think this adds a bit of clarity here as |
3111 | | // to what we are comparing. |
3112 | | auto *BeforeCC = &BeforeIteration.find(KV.first)->second; |
3113 | | auto *AfterCC = KV.second; |
3114 | | // Note that the classes can't change at this point, so we memoize the set |
3115 | | // that are equal. |
3116 | | if (!EqualClasses.count({BeforeCC, AfterCC})) { |
3117 | | assert(BeforeCC->isEquivalentTo(AfterCC) && |
3118 | | "Value number changed after main loop completed!"); |
3119 | | EqualClasses.insert({BeforeCC, AfterCC}); |
3120 | | } |
3121 | | } |
3122 | | #endif |
3123 | | } |
3124 | | |
3125 | | // Verify that for each store expression in the expression to class mapping, |
3126 | | // only the latest appears, and multiple ones do not appear. |
3127 | | // Because loads do not use the stored value when doing equality with stores, |
3128 | | // if we don't erase the old store expressions from the table, a load can find |
3129 | | // a no-longer valid StoreExpression. |
3130 | 307 | void NewGVN::verifyStoreExpressions() const { |
3131 | | #ifndef NDEBUG |
3132 | | // This is the only use of this, and it's not worth defining a complicated |
3133 | | // densemapinfo hash/equality function for it. |
3134 | | std::set< |
3135 | | std::pair<const Value *, |
3136 | | std::tuple<const Value *, const CongruenceClass *, Value *>>> |
3137 | | StoreExpressionSet; |
3138 | | for (const auto &KV : ExpressionToClass) { |
3139 | | if (auto *SE = dyn_cast<StoreExpression>(KV.first)) { |
3140 | | // Make sure a version that will conflict with loads is not already there |
3141 | | auto Res = StoreExpressionSet.insert( |
3142 | | {SE->getOperand(0), std::make_tuple(SE->getMemoryLeader(), KV.second, |
3143 | | SE->getStoredValue())}); |
3144 | | bool Okay = Res.second; |
3145 | | // It's okay to have the same expression already in there if it is |
3146 | | // identical in nature. |
3147 | | // This can happen when the leader of the stored value changes over time. |
3148 | | if (!Okay) |
3149 | | Okay = (std::get<1>(Res.first->second) == KV.second) && |
3150 | | (lookupOperandLeader(std::get<2>(Res.first->second)) == |
3151 | | lookupOperandLeader(SE->getStoredValue())); |
3152 | | assert(Okay && "Stored expression conflict exists in expression table"); |
3153 | | auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst()); |
3154 | | assert(ValueExpr && ValueExpr->equals(*SE) && |
3155 | | "StoreExpression in ExpressionToClass is not latest " |
3156 | | "StoreExpression for value"); |
3157 | | } |
3158 | | } |
3159 | | #endif |
3160 | | } |
3161 | | |
3162 | | // This is the main value numbering loop, it iterates over the initial touched |
3163 | | // instruction set, propagating value numbers, marking things touched, etc, |
3164 | | // until the set of touched instructions is completely empty. |
3165 | 307 | void NewGVN::iterateTouchedInstructions() { |
3166 | 307 | unsigned int Iterations = 0; |
3167 | 307 | // Figure out where touchedinstructions starts |
3168 | 307 | int FirstInstr = TouchedInstructions.find_first(); |
3169 | 307 | // Nothing set, nothing to iterate, just return. |
3170 | 307 | if (FirstInstr == -1) |
3171 | 0 | return; |
3172 | 307 | const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); |
3173 | 772 | while (TouchedInstructions.any()772 ) { |
3174 | 465 | ++Iterations; |
3175 | 465 | // Walk through all the instructions in all the blocks in RPO. |
3176 | 465 | // TODO: As we hit a new block, we should push and pop equalities into a |
3177 | 465 | // table lookupOperandLeader can use, to catch things PredicateInfo |
3178 | 465 | // might miss, like edge-only equivalences. |
3179 | 4.27k | for (unsigned InstrNum : TouchedInstructions.set_bits()) { |
3180 | 4.27k | |
3181 | 4.27k | // This instruction was found to be dead. We don't bother looking |
3182 | 4.27k | // at it again. |
3183 | 4.27k | if (InstrNum == 04.27k ) { |
3184 | 58 | TouchedInstructions.reset(InstrNum); |
3185 | 58 | continue; |
3186 | 58 | } |
3187 | 4.21k | |
3188 | 4.21k | Value *V = InstrFromDFSNum(InstrNum); |
3189 | 4.21k | const BasicBlock *CurrBlock = getBlockForValue(V); |
3190 | 4.21k | |
3191 | 4.21k | // If we hit a new block, do reachability processing. |
3192 | 4.21k | if (CurrBlock != LastBlock4.21k ) { |
3193 | 1.06k | LastBlock = CurrBlock; |
3194 | 1.06k | bool BlockReachable = ReachableBlocks.count(CurrBlock); |
3195 | 1.06k | const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock); |
3196 | 1.06k | |
3197 | 1.06k | // If it's not reachable, erase any touched instructions and move on. |
3198 | 1.06k | if (!BlockReachable1.06k ) { |
3199 | 32 | TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second); |
3200 | 32 | DEBUG(dbgs() << "Skipping instructions in block " |
3201 | 32 | << getBlockName(CurrBlock) |
3202 | 32 | << " because it is unreachable\n"); |
3203 | 32 | continue; |
3204 | 32 | } |
3205 | 1.03k | updateProcessedCount(CurrBlock); |
3206 | 1.03k | } |
3207 | 4.21k | // Reset after processing (because we may mark ourselves as touched when |
3208 | 4.21k | // we propagate equalities). |
3209 | 4.18k | TouchedInstructions.reset(InstrNum); |
3210 | 4.18k | |
3211 | 4.18k | if (auto *MP4.18k = dyn_cast<MemoryPhi>(V)) { |
3212 | 222 | DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n"); |
3213 | 222 | valueNumberMemoryPhi(MP); |
3214 | 4.18k | } else if (auto *3.96k I3.96k = dyn_cast<Instruction>(V)) { |
3215 | 3.96k | valueNumberInstruction(I); |
3216 | 3.96k | } else { |
3217 | 0 | llvm_unreachable("Should have been a MemoryPhi or Instruction"); |
3218 | 3.96k | } |
3219 | 4.18k | updateProcessedCount(V); |
3220 | 4.18k | } |
3221 | 465 | } |
3222 | 307 | NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); |
3223 | 307 | } |
3224 | | |
3225 | | // This is the main transformation entry point. |
3226 | 307 | bool NewGVN::runGVN() { |
3227 | 307 | if (DebugCounter::isCounterSet(VNCounter)) |
3228 | 0 | StartingVNCounter = DebugCounter::getCounterValue(VNCounter); |
3229 | 307 | bool Changed = false; |
3230 | 307 | NumFuncArgs = F.arg_size(); |
3231 | 307 | MSSAWalker = MSSA->getWalker(); |
3232 | 307 | SingletonDeadExpression = new (ExpressionAllocator) DeadExpression(); |
3233 | 307 | |
3234 | 307 | // Count number of instructions for sizing of hash tables, and come |
3235 | 307 | // up with a global dfs numbering for instructions. |
3236 | 307 | unsigned ICount = 1; |
3237 | 307 | // Add an empty instruction to account for the fact that we start at 1 |
3238 | 307 | DFSToInstr.emplace_back(nullptr); |
3239 | 307 | // Note: We want ideal RPO traversal of the blocks, which is not quite the |
3240 | 307 | // same as dominator tree order, particularly with regard whether backedges |
3241 | 307 | // get visited first or second, given a block with multiple successors. |
3242 | 307 | // If we visit in the wrong order, we will end up performing N times as many |
3243 | 307 | // iterations. |
3244 | 307 | // The dominator tree does guarantee that, for a given dom tree node, it's |
3245 | 307 | // parent must occur before it in the RPO ordering. Thus, we only need to sort |
3246 | 307 | // the siblings. |
3247 | 307 | ReversePostOrderTraversal<Function *> RPOT(&F); |
3248 | 307 | unsigned Counter = 0; |
3249 | 1.18k | for (auto &B : RPOT) { |
3250 | 1.18k | auto *Node = DT->getNode(B); |
3251 | 1.18k | assert(Node && "RPO and Dominator tree should have same reachability"); |
3252 | 1.18k | RPOOrdering[Node] = ++Counter; |
3253 | 1.18k | } |
3254 | 307 | // Sort dominator tree children arrays into RPO. |
3255 | 1.18k | for (auto &B : RPOT) { |
3256 | 1.18k | auto *Node = DT->getNode(B); |
3257 | 1.18k | if (Node->getChildren().size() > 1) |
3258 | 278 | std::sort(Node->begin(), Node->end(), |
3259 | 529 | [&](const DomTreeNode *A, const DomTreeNode *B) { |
3260 | 529 | return RPOOrdering[A] < RPOOrdering[B]; |
3261 | 278 | }); |
3262 | 1.18k | } |
3263 | 307 | |
3264 | 307 | // Now a standard depth first ordering of the domtree is equivalent to RPO. |
3265 | 1.18k | for (auto DTN : depth_first(DT->getRootNode())) { |
3266 | 1.18k | BasicBlock *B = DTN->getBlock(); |
3267 | 1.18k | const auto &BlockRange = assignDFSNumbers(B, ICount); |
3268 | 1.18k | BlockInstRange.insert({B, BlockRange}); |
3269 | 1.18k | ICount += BlockRange.second - BlockRange.first; |
3270 | 1.18k | } |
3271 | 307 | initializeCongruenceClasses(F); |
3272 | 307 | |
3273 | 307 | TouchedInstructions.resize(ICount); |
3274 | 307 | // Ensure we don't end up resizing the expressionToClass map, as |
3275 | 307 | // that can be quite expensive. At most, we have one expression per |
3276 | 307 | // instruction. |
3277 | 307 | ExpressionToClass.reserve(ICount); |
3278 | 307 | |
3279 | 307 | // Initialize the touched instructions to include the entry block. |
3280 | 307 | const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); |
3281 | 307 | TouchedInstructions.set(InstRange.first, InstRange.second); |
3282 | 307 | DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) |
3283 | 307 | << " marked reachable\n"); |
3284 | 307 | ReachableBlocks.insert(&F.getEntryBlock()); |
3285 | 307 | |
3286 | 307 | iterateTouchedInstructions(); |
3287 | 307 | verifyMemoryCongruency(); |
3288 | 307 | verifyIterationSettled(F); |
3289 | 307 | verifyStoreExpressions(); |
3290 | 307 | |
3291 | 307 | Changed |= eliminateInstructions(F); |
3292 | 307 | |
3293 | 307 | // Delete all instructions marked for deletion. |
3294 | 762 | for (Instruction *ToErase : InstructionsToErase) { |
3295 | 762 | if (!ToErase->use_empty()) |
3296 | 21 | ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); |
3297 | 762 | |
3298 | 762 | if (ToErase->getParent()) |
3299 | 762 | ToErase->eraseFromParent(); |
3300 | 762 | } |
3301 | 307 | |
3302 | 307 | // Delete all unreachable blocks. |
3303 | 1.20k | auto UnreachableBlockPred = [&](const BasicBlock &BB) { |
3304 | 1.20k | return !ReachableBlocks.count(&BB); |
3305 | 1.20k | }; |
3306 | 307 | |
3307 | 182 | for (auto &BB : make_filter_range(F, UnreachableBlockPred)) { |
3308 | 182 | DEBUG(dbgs() << "We believe block " << getBlockName(&BB) |
3309 | 182 | << " is unreachable\n"); |
3310 | 182 | deleteInstructionsInBlock(&BB); |
3311 | 182 | Changed = true; |
3312 | 182 | } |
3313 | 307 | |
3314 | 307 | cleanupTables(); |
3315 | 307 | return Changed; |
3316 | 307 | } |
3317 | | |
3318 | | struct NewGVN::ValueDFS { |
3319 | | int DFSIn = 0; |
3320 | | int DFSOut = 0; |
3321 | | int LocalNum = 0; |
3322 | | // Only one of Def and U will be set. |
3323 | | // The bool in the Def tells us whether the Def is the stored value of a |
3324 | | // store. |
3325 | | PointerIntPair<Value *, 1, bool> Def; |
3326 | | Use *U = nullptr; |
3327 | 1.38k | bool operator<(const ValueDFS &Other) const { |
3328 | 1.38k | // It's not enough that any given field be less than - we have sets |
3329 | 1.38k | // of fields that need to be evaluated together to give a proper ordering. |
3330 | 1.38k | // For example, if you have; |
3331 | 1.38k | // DFS (1, 3) |
3332 | 1.38k | // Val 0 |
3333 | 1.38k | // DFS (1, 2) |
3334 | 1.38k | // Val 50 |
3335 | 1.38k | // We want the second to be less than the first, but if we just go field |
3336 | 1.38k | // by field, we will get to Val 0 < Val 50 and say the first is less than |
3337 | 1.38k | // the second. We only want it to be less than if the DFS orders are equal. |
3338 | 1.38k | // |
3339 | 1.38k | // Each LLVM instruction only produces one value, and thus the lowest-level |
3340 | 1.38k | // differentiator that really matters for the stack (and what we use as as a |
3341 | 1.38k | // replacement) is the local dfs number. |
3342 | 1.38k | // Everything else in the structure is instruction level, and only affects |
3343 | 1.38k | // the order in which we will replace operands of a given instruction. |
3344 | 1.38k | // |
3345 | 1.38k | // For a given instruction (IE things with equal dfsin, dfsout, localnum), |
3346 | 1.38k | // the order of replacement of uses does not matter. |
3347 | 1.38k | // IE given, |
3348 | 1.38k | // a = 5 |
3349 | 1.38k | // b = a + a |
3350 | 1.38k | // When you hit b, you will have two valuedfs with the same dfsin, out, and |
3351 | 1.38k | // localnum. |
3352 | 1.38k | // The .val will be the same as well. |
3353 | 1.38k | // The .u's will be different. |
3354 | 1.38k | // You will replace both, and it does not matter what order you replace them |
3355 | 1.38k | // in (IE whether you replace operand 2, then operand 1, or operand 1, then |
3356 | 1.38k | // operand 2). |
3357 | 1.38k | // Similarly for the case of same dfsin, dfsout, localnum, but different |
3358 | 1.38k | // .val's |
3359 | 1.38k | // a = 5 |
3360 | 1.38k | // b = 6 |
3361 | 1.38k | // c = a + b |
3362 | 1.38k | // in c, we will a valuedfs for a, and one for b,with everything the same |
3363 | 1.38k | // but .val and .u. |
3364 | 1.38k | // It does not matter what order we replace these operands in. |
3365 | 1.38k | // You will always end up with the same IR, and this is guaranteed. |
3366 | 1.38k | return std::tie(DFSIn, DFSOut, LocalNum, Def, U) < |
3367 | 1.38k | std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Def, |
3368 | 1.38k | Other.U); |
3369 | 1.38k | } |
3370 | | }; |
3371 | | |
3372 | | // This function converts the set of members for a congruence class from values, |
3373 | | // to sets of defs and uses with associated DFS info. The total number of |
3374 | | // reachable uses for each value is stored in UseCount, and instructions that |
3375 | | // seem |
3376 | | // dead (have no non-dead uses) are stored in ProbablyDead. |
3377 | | void NewGVN::convertClassToDFSOrdered( |
3378 | | const CongruenceClass &Dense, SmallVectorImpl<ValueDFS> &DFSOrderedSet, |
3379 | | DenseMap<const Value *, unsigned int> &UseCounts, |
3380 | 253 | SmallPtrSetImpl<Instruction *> &ProbablyDead) const { |
3381 | 555 | for (auto D : Dense) { |
3382 | 555 | // First add the value. |
3383 | 555 | BasicBlock *BB = getBlockForValue(D); |
3384 | 555 | // Constants are handled prior to ever calling this function, so |
3385 | 555 | // we should only be left with instructions as members. |
3386 | 555 | assert(BB && "Should have figured out a basic block for value"); |
3387 | 555 | ValueDFS VDDef; |
3388 | 555 | DomTreeNode *DomNode = DT->getNode(BB); |
3389 | 555 | VDDef.DFSIn = DomNode->getDFSNumIn(); |
3390 | 555 | VDDef.DFSOut = DomNode->getDFSNumOut(); |
3391 | 555 | // If it's a store, use the leader of the value operand, if it's always |
3392 | 555 | // available, or the value operand. TODO: We could do dominance checks to |
3393 | 555 | // find a dominating leader, but not worth it ATM. |
3394 | 555 | if (auto *SI555 = dyn_cast<StoreInst>(D)) { |
3395 | 26 | auto Leader = lookupOperandLeader(SI->getValueOperand()); |
3396 | 26 | if (alwaysAvailable(Leader)26 ) { |
3397 | 1 | VDDef.Def.setPointer(Leader); |
3398 | 26 | } else { |
3399 | 25 | VDDef.Def.setPointer(SI->getValueOperand()); |
3400 | 25 | VDDef.Def.setInt(true); |
3401 | 25 | } |
3402 | 555 | } else { |
3403 | 529 | VDDef.Def.setPointer(D); |
3404 | 529 | } |
3405 | 555 | assert(isa<Instruction>(D) && |
3406 | 555 | "The dense set member should always be an instruction"); |
3407 | 555 | Instruction *Def = cast<Instruction>(D); |
3408 | 555 | VDDef.LocalNum = InstrToDFSNum(D); |
3409 | 555 | DFSOrderedSet.push_back(VDDef); |
3410 | 555 | // If there is a phi node equivalent, add it |
3411 | 555 | if (auto *PN555 = RealToTemp.lookup(Def)) { |
3412 | 13 | auto *PHIE = |
3413 | 13 | dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def)); |
3414 | 13 | if (PHIE13 ) { |
3415 | 13 | VDDef.Def.setInt(false); |
3416 | 13 | VDDef.Def.setPointer(PN); |
3417 | 13 | VDDef.LocalNum = 0; |
3418 | 13 | DFSOrderedSet.push_back(VDDef); |
3419 | 13 | } |
3420 | 13 | } |
3421 | 555 | |
3422 | 555 | unsigned int UseCount = 0; |
3423 | 555 | // Now add the uses. |
3424 | 641 | for (auto &U : Def->uses()) { |
3425 | 641 | if (auto *I641 = dyn_cast<Instruction>(U.getUser())) { |
3426 | 641 | // Don't try to replace into dead uses |
3427 | 641 | if (InstructionsToErase.count(I)) |
3428 | 90 | continue; |
3429 | 551 | ValueDFS VDUse; |
3430 | 551 | // Put the phi node uses in the incoming block. |
3431 | 551 | BasicBlock *IBlock; |
3432 | 551 | if (auto *P551 = dyn_cast<PHINode>(I)) { |
3433 | 87 | IBlock = P->getIncomingBlock(U); |
3434 | 87 | // Make phi node users appear last in the incoming block |
3435 | 87 | // they are from. |
3436 | 87 | VDUse.LocalNum = InstrDFS.size() + 1; |
3437 | 551 | } else { |
3438 | 464 | IBlock = getBlockForValue(I); |
3439 | 464 | VDUse.LocalNum = InstrToDFSNum(I); |
3440 | 464 | } |
3441 | 551 | |
3442 | 551 | // Skip uses in unreachable blocks, as we're going |
3443 | 551 | // to delete them. |
3444 | 551 | if (ReachableBlocks.count(IBlock) == 0) |
3445 | 2 | continue; |
3446 | 549 | |
3447 | 549 | DomTreeNode *DomNode = DT->getNode(IBlock); |
3448 | 549 | VDUse.DFSIn = DomNode->getDFSNumIn(); |
3449 | 549 | VDUse.DFSOut = DomNode->getDFSNumOut(); |
3450 | 549 | VDUse.U = &U; |
3451 | 549 | ++UseCount; |
3452 | 549 | DFSOrderedSet.emplace_back(VDUse); |
3453 | 549 | } |
3454 | 641 | } |
3455 | 555 | |
3456 | 555 | // If there are no uses, it's probably dead (but it may have side-effects, |
3457 | 555 | // so not definitely dead. Otherwise, store the number of uses so we can |
3458 | 555 | // track if it becomes dead later). |
3459 | 555 | if (UseCount == 0) |
3460 | 100 | ProbablyDead.insert(Def); |
3461 | 555 | else |
3462 | 455 | UseCounts[Def] = UseCount; |
3463 | 555 | } |
3464 | 253 | } |
3465 | | |
3466 | | // This function converts the set of members for a congruence class from values, |
3467 | | // to the set of defs for loads and stores, with associated DFS info. |
3468 | | void NewGVN::convertClassToLoadsAndStores( |
3469 | | const CongruenceClass &Dense, |
3470 | 227 | SmallVectorImpl<ValueDFS> &LoadsAndStores) const { |
3471 | 246 | for (auto D : Dense) { |
3472 | 246 | if (!isa<LoadInst>(D) && 246 !isa<StoreInst>(D)241 ) |
3473 | 0 | continue; |
3474 | 246 | |
3475 | 246 | BasicBlock *BB = getBlockForValue(D); |
3476 | 246 | ValueDFS VD; |
3477 | 246 | DomTreeNode *DomNode = DT->getNode(BB); |
3478 | 246 | VD.DFSIn = DomNode->getDFSNumIn(); |
3479 | 246 | VD.DFSOut = DomNode->getDFSNumOut(); |
3480 | 246 | VD.Def.setPointer(D); |
3481 | 246 | |
3482 | 246 | // If it's an instruction, use the real local dfs number. |
3483 | 246 | if (auto *I = dyn_cast<Instruction>(D)) |
3484 | 246 | VD.LocalNum = InstrToDFSNum(I); |
3485 | 246 | else |
3486 | 0 | llvm_unreachable("Should have been an instruction"); |
3487 | 246 | |
3488 | 246 | LoadsAndStores.emplace_back(VD); |
3489 | 246 | } |
3490 | 227 | } |
3491 | | |
3492 | 553 | static void patchReplacementInstruction(Instruction *I, Value *Repl) { |
3493 | 553 | auto *ReplInst = dyn_cast<Instruction>(Repl); |
3494 | 553 | if (!ReplInst) |
3495 | 339 | return; |
3496 | 214 | |
3497 | 214 | // Patch the replacement so that it is not more restrictive than the value |
3498 | 214 | // being replaced. |
3499 | 214 | // Note that if 'I' is a load being replaced by some operation, |
3500 | 214 | // for example, by an arithmetic operation, then andIRFlags() |
3501 | 214 | // would just erase all math flags from the original arithmetic |
3502 | 214 | // operation, which is clearly not wanted and not needed. |
3503 | 214 | if (214 !isa<LoadInst>(I)214 ) |
3504 | 148 | ReplInst->andIRFlags(I); |
3505 | 553 | |
3506 | 553 | // FIXME: If both the original and replacement value are part of the |
3507 | 553 | // same control-flow region (meaning that the execution of one |
3508 | 553 | // guarantees the execution of the other), then we can combine the |
3509 | 553 | // noalias scopes here and do better than the general conservative |
3510 | 553 | // answer used in combineMetadata(). |
3511 | 553 | |
3512 | 553 | // In general, GVN unifies expressions over different control-flow |
3513 | 553 | // regions, and so we need a conservative combination of the noalias |
3514 | 553 | // scopes. |
3515 | 553 | static const unsigned KnownIDs[] = { |
3516 | 553 | LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, |
3517 | 553 | LLVMContext::MD_noalias, LLVMContext::MD_range, |
3518 | 553 | LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, |
3519 | 553 | LLVMContext::MD_invariant_group}; |
3520 | 553 | combineMetadata(ReplInst, I, KnownIDs); |
3521 | 553 | } |
3522 | | |
3523 | 337 | static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { |
3524 | 337 | patchReplacementInstruction(I, Repl); |
3525 | 337 | I->replaceAllUsesWith(Repl); |
3526 | 337 | } |
3527 | | |
3528 | 182 | void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { |
3529 | 182 | DEBUG(dbgs() << " BasicBlock Dead:" << *BB); |
3530 | 182 | ++NumGVNBlocksDeleted; |
3531 | 182 | |
3532 | 182 | // Delete the instructions backwards, as it has a reduced likelihood of having |
3533 | 182 | // to update as many def-use and use-def chains. Start after the terminator. |
3534 | 182 | auto StartPoint = BB->rbegin(); |
3535 | 182 | ++StartPoint; |
3536 | 182 | // Note that we explicitly recalculate BB->rend() on each iteration, |
3537 | 182 | // as it may change when we remove the first instruction. |
3538 | 241 | for (BasicBlock::reverse_iterator I(StartPoint); I != BB->rend()241 ;) { |
3539 | 59 | Instruction &Inst = *I++; |
3540 | 59 | if (!Inst.use_empty()) |
3541 | 10 | Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); |
3542 | 59 | if (isa<LandingPadInst>(Inst)) |
3543 | 0 | continue; |
3544 | 59 | |
3545 | 59 | Inst.eraseFromParent(); |
3546 | 59 | ++NumGVNInstrDeleted; |
3547 | 59 | } |
3548 | 182 | // Now insert something that simplifycfg will turn into an unreachable. |
3549 | 182 | Type *Int8Ty = Type::getInt8Ty(BB->getContext()); |
3550 | 182 | new StoreInst(UndefValue::get(Int8Ty), |
3551 | 182 | Constant::getNullValue(Int8Ty->getPointerTo()), |
3552 | 182 | BB->getTerminator()); |
3553 | 182 | } |
3554 | | |
3555 | 819 | void NewGVN::markInstructionForDeletion(Instruction *I) { |
3556 | 819 | DEBUG(dbgs() << "Marking " << *I << " for deletion\n"); |
3557 | 819 | InstructionsToErase.insert(I); |
3558 | 819 | } |
3559 | | |
3560 | 337 | void NewGVN::replaceInstruction(Instruction *I, Value *V) { |
3561 | 337 | |
3562 | 337 | DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n"); |
3563 | 337 | patchAndReplaceAllUsesWith(I, V); |
3564 | 337 | // We save the actual erasing to avoid invalidating memory |
3565 | 337 | // dependencies until we are done with everything. |
3566 | 337 | markInstructionForDeletion(I); |
3567 | 337 | } |
3568 | | |
3569 | | namespace { |
3570 | | |
3571 | | // This is a stack that contains both the value and dfs info of where |
3572 | | // that value is valid. |
3573 | | class ValueDFSStack { |
3574 | | public: |
3575 | 1.13k | Value *back() const { return ValueStack.back(); } |
3576 | 0 | std::pair<int, int> dfs_back() const { return DFSStack.back(); } |
3577 | | |
3578 | 508 | void push_back(Value *V, int DFSIn, int DFSOut) { |
3579 | 508 | ValueStack.emplace_back(V); |
3580 | 508 | DFSStack.emplace_back(DFSIn, DFSOut); |
3581 | 508 | } |
3582 | 4.69k | bool empty() const { return DFSStack.empty(); } |
3583 | 1.13k | bool isInScope(int DFSIn, int DFSOut) const { |
3584 | 1.13k | if (empty()) |
3585 | 253 | return false; |
3586 | 883 | return DFSIn >= DFSStack.back().first && 883 DFSOut <= DFSStack.back().second883 ; |
3587 | 1.13k | } |
3588 | | |
3589 | 508 | void popUntilDFSScope(int DFSIn, int DFSOut) { |
3590 | 508 | |
3591 | 508 | // These two should always be in sync at this point. |
3592 | 508 | assert(ValueStack.size() == DFSStack.size() && |
3593 | 508 | "Mismatch between ValueStack and DFSStack"); |
3594 | 508 | while ( |
3595 | 536 | !DFSStack.empty() && |
3596 | 536 | !(DFSIn >= DFSStack.back().first && 28 DFSOut <= DFSStack.back().second28 )) { |
3597 | 28 | DFSStack.pop_back(); |
3598 | 28 | ValueStack.pop_back(); |
3599 | 28 | } |
3600 | 508 | } |
3601 | | |
3602 | | private: |
3603 | | SmallVector<Value *, 8> ValueStack; |
3604 | | SmallVector<std::pair<int, int>, 8> DFSStack; |
3605 | | }; |
3606 | | } |
3607 | | |
3608 | | // Given an expression, get the congruence class for it. |
3609 | 77 | CongruenceClass *NewGVN::getClassForExpression(const Expression *E) const { |
3610 | 77 | if (auto *VE = dyn_cast<VariableExpression>(E)) |
3611 | 1 | return ValueToClass.lookup(VE->getVariableValue()); |
3612 | 76 | else if (76 isa<DeadExpression>(E)76 ) |
3613 | 0 | return TOPClass; |
3614 | 76 | return ExpressionToClass.lookup(E); |
3615 | 76 | } |
3616 | | |
3617 | | // Given a value and a basic block we are trying to see if it is available in, |
3618 | | // see if the value has a leader available in that block. |
3619 | | Value *NewGVN::findPHIOfOpsLeader(const Expression *E, |
3620 | | const Instruction *OrigInst, |
3621 | 126 | const BasicBlock *BB) const { |
3622 | 126 | // It would already be constant if we could make it constant |
3623 | 126 | if (auto *CE = dyn_cast<ConstantExpression>(E)) |
3624 | 49 | return CE->getConstantValue(); |
3625 | 77 | if (auto *77 VE77 = dyn_cast<VariableExpression>(E)) { |
3626 | 1 | auto *V = VE->getVariableValue(); |
3627 | 1 | if (alwaysAvailable(V) || 1 DT->dominates(getBlockForValue(V), BB)1 ) |
3628 | 0 | return VE->getVariableValue(); |
3629 | 77 | } |
3630 | 77 | |
3631 | 77 | auto *CC = getClassForExpression(E); |
3632 | 77 | if (!CC) |
3633 | 50 | return nullptr; |
3634 | 27 | if (27 alwaysAvailable(CC->getLeader())27 ) |
3635 | 0 | return CC->getLeader(); |
3636 | 27 | |
3637 | 27 | for (auto Member : *CC) 27 { |
3638 | 30 | auto *MemberInst = dyn_cast<Instruction>(Member); |
3639 | 30 | if (MemberInst == OrigInst) |
3640 | 16 | continue; |
3641 | 14 | // Anything that isn't an instruction is always available. |
3642 | 14 | if (14 !MemberInst14 ) |
3643 | 0 | return Member; |
3644 | 14 | if (14 DT->dominates(getBlockForValue(MemberInst), BB)14 ) |
3645 | 8 | return Member; |
3646 | 19 | } |
3647 | 19 | return nullptr; |
3648 | 19 | } |
3649 | | |
3650 | 307 | bool NewGVN::eliminateInstructions(Function &F) { |
3651 | 307 | // This is a non-standard eliminator. The normal way to eliminate is |
3652 | 307 | // to walk the dominator tree in order, keeping track of available |
3653 | 307 | // values, and eliminating them. However, this is mildly |
3654 | 307 | // pointless. It requires doing lookups on every instruction, |
3655 | 307 | // regardless of whether we will ever eliminate it. For |
3656 | 307 | // instructions part of most singleton congruence classes, we know we |
3657 | 307 | // will never eliminate them. |
3658 | 307 | |
3659 | 307 | // Instead, this eliminator looks at the congruence classes directly, sorts |
3660 | 307 | // them into a DFS ordering of the dominator tree, and then we just |
3661 | 307 | // perform elimination straight on the sets by walking the congruence |
3662 | 307 | // class member uses in order, and eliminate the ones dominated by the |
3663 | 307 | // last member. This is worst case O(E log E) where E = number of |
3664 | 307 | // instructions in a single congruence class. In theory, this is all |
3665 | 307 | // instructions. In practice, it is much faster, as most instructions are |
3666 | 307 | // either in singleton congruence classes or can't possibly be eliminated |
3667 | 307 | // anyway (if there are no overlapping DFS ranges in class). |
3668 | 307 | // When we find something not dominated, it becomes the new leader |
3669 | 307 | // for elimination purposes. |
3670 | 307 | // TODO: If we wanted to be faster, We could remove any members with no |
3671 | 307 | // overlapping ranges while sorting, as we will never eliminate anything |
3672 | 307 | // with those members, as they don't dominate anything else in our set. |
3673 | 307 | |
3674 | 307 | bool AnythingReplaced = false; |
3675 | 307 | |
3676 | 307 | // Since we are going to walk the domtree anyway, and we can't guarantee the |
3677 | 307 | // DFS numbers are updated, we compute some ourselves. |
3678 | 307 | DT->updateDFSNumbers(); |
3679 | 307 | |
3680 | 307 | // Go through all of our phi nodes, and kill the arguments associated with |
3681 | 307 | // unreachable edges. |
3682 | 32 | auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) { |
3683 | 32 | for (auto &Operand : PHI.incoming_values()) |
3684 | 73 | if (73 !ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})73 ) { |
3685 | 38 | DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " |
3686 | 73 | << getBlockName(PHI.getIncomingBlock(Operand)) |
3687 | 73 | << " with undef due to it being unreachable\n"); |
3688 | 73 | Operand.set(UndefValue::get(PHI.getType())); |
3689 | 73 | } |
3690 | 32 | }; |
3691 | 307 | SmallPtrSet<BasicBlock *, 8> BlocksWithPhis; |
3692 | 307 | for (auto &B : F) |
3693 | 1.20k | if (1.20k (!B.empty() && 1.20k isa<PHINode>(*B.begin())1.20k ) || |
3694 | 1.03k | (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end())) |
3695 | 172 | BlocksWithPhis.insert(&B); |
3696 | 307 | DenseMap<const BasicBlock *, unsigned> ReachablePredCount; |
3697 | 307 | for (auto KV : ReachableEdges) |
3698 | 985 | ReachablePredCount[KV.getEnd()]++; |
3699 | 307 | for (auto *BB : BlocksWithPhis) |
3700 | 307 | // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in |
3701 | 307 | // the block and subtract the pred count, but it's more complicated. |
3702 | 172 | if (172 ReachablePredCount.lookup(BB) != |
3703 | 172 | unsigned(std::distance(pred_begin(BB), pred_end(BB)))) { |
3704 | 59 | for (auto II = BB->begin(); isa<PHINode>(II)59 ; ++II31 ) { |
3705 | 31 | auto &PHI = cast<PHINode>(*II); |
3706 | 31 | ReplaceUnreachablePHIArgs(PHI, BB); |
3707 | 31 | } |
3708 | 1 | for_each_found(PHIOfOpsPHIs, BB, [&](PHINode *PHI) { |
3709 | 1 | ReplaceUnreachablePHIArgs(*PHI, BB); |
3710 | 1 | }); |
3711 | 172 | } |
3712 | 307 | |
3713 | 307 | // Map to store the use counts |
3714 | 307 | DenseMap<const Value *, unsigned int> UseCounts; |
3715 | 3.15k | for (auto *CC : reverse(CongruenceClasses)) { |
3716 | 3.15k | DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n"); |
3717 | 3.15k | // Track the equivalent store info so we can decide whether to try |
3718 | 3.15k | // dead store elimination. |
3719 | 3.15k | SmallVector<ValueDFS, 8> PossibleDeadStores; |
3720 | 3.15k | SmallPtrSet<Instruction *, 8> ProbablyDead; |
3721 | 3.15k | if (CC->isDead() || 3.15k CC->empty()2.33k ) |
3722 | 921 | continue; |
3723 | 2.23k | // Everything still in the TOP class is unreachable or dead. |
3724 | 2.23k | if (2.23k CC == TOPClass2.23k ) { |
3725 | 152 | for (auto M : *CC) { |
3726 | 152 | auto *VTE = ValueToExpression.lookup(M); |
3727 | 152 | if (VTE && 152 isa<DeadExpression>(VTE)0 ) |
3728 | 0 | markInstructionForDeletion(cast<Instruction>(M)); |
3729 | 152 | assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) || |
3730 | 152 | InstructionsToErase.count(cast<Instruction>(M))) && |
3731 | 152 | "Everything in TOP should be unreachable or dead at this " |
3732 | 152 | "point"); |
3733 | 152 | } |
3734 | 68 | continue; |
3735 | 68 | } |
3736 | 2.16k | |
3737 | 2.23k | assert(CC->getLeader() && "We should have had a leader"); |
3738 | 2.16k | // If this is a leader that is always available, and it's a |
3739 | 2.16k | // constant or has no equivalences, just replace everything with |
3740 | 2.16k | // it. We then update the congruence class with whatever members |
3741 | 2.16k | // are left. |
3742 | 2.16k | Value *Leader = |
3743 | 2.16k | CC->getStoredValue() ? CC->getStoredValue()227 : CC->getLeader()1.93k ; |
3744 | 2.16k | if (alwaysAvailable(Leader)2.16k ) { |
3745 | 766 | CongruenceClass::MemberSet MembersLeft; |
3746 | 910 | for (auto M : *CC) { |
3747 | 910 | Value *Member = M; |
3748 | 910 | // Void things have no uses we can replace. |
3749 | 910 | if (Member == Leader || 910 !isa<Instruction>(Member)489 || |
3750 | 910 | Member->getType()->isVoidTy()489 ) { |
3751 | 573 | MembersLeft.insert(Member); |
3752 | 573 | continue; |
3753 | 573 | } |
3754 | 337 | DEBUG337 (dbgs() << "Found replacement " << *(Leader) << " for " << *Member |
3755 | 337 | << "\n"); |
3756 | 337 | auto *I = cast<Instruction>(Member); |
3757 | 337 | assert(Leader != I && "About to accidentally remove our leader"); |
3758 | 337 | replaceInstruction(I, Leader); |
3759 | 337 | AnythingReplaced = true; |
3760 | 337 | } |
3761 | 766 | CC->swap(MembersLeft); |
3762 | 2.16k | } else { |
3763 | 1.39k | // If this is a singleton, we can skip it. |
3764 | 1.39k | if (CC->size() != 1 || 1.39k RealToTemp.count(Leader)1.16k ) { |
3765 | 253 | // This is a stack because equality replacement/etc may place |
3766 | 253 | // constants in the middle of the member list, and we want to use |
3767 | 253 | // those constant values in preference to the current leader, over |
3768 | 253 | // the scope of those constants. |
3769 | 253 | ValueDFSStack EliminationStack; |
3770 | 253 | |
3771 | 253 | // Convert the members to DFS ordered sets and then merge them. |
3772 | 253 | SmallVector<ValueDFS, 8> DFSOrderedSet; |
3773 | 253 | convertClassToDFSOrdered(*CC, DFSOrderedSet, UseCounts, ProbablyDead); |
3774 | 253 | |
3775 | 253 | // Sort the whole thing. |
3776 | 253 | std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); |
3777 | 1.11k | for (auto &VD : DFSOrderedSet) { |
3778 | 1.11k | int MemberDFSIn = VD.DFSIn; |
3779 | 1.11k | int MemberDFSOut = VD.DFSOut; |
3780 | 1.11k | Value *Def = VD.Def.getPointer(); |
3781 | 1.11k | bool FromStore = VD.Def.getInt(); |
3782 | 1.11k | Use *U = VD.U; |
3783 | 1.11k | // We ignore void things because we can't get a value from them. |
3784 | 1.11k | if (Def && 1.11k Def->getType()->isVoidTy()568 ) |
3785 | 0 | continue; |
3786 | 1.11k | auto *DefInst = dyn_cast_or_null<Instruction>(Def); |
3787 | 1.11k | if (DefInst && 1.11k AllTempInstructions.count(DefInst)567 ) { |
3788 | 13 | auto *PN = cast<PHINode>(DefInst); |
3789 | 13 | |
3790 | 13 | // If this is a value phi and that's the expression we used, insert |
3791 | 13 | // it into the program |
3792 | 13 | // remove from temp instruction list. |
3793 | 13 | AllTempInstructions.erase(PN); |
3794 | 13 | auto *DefBlock = getBlockForValue(Def); |
3795 | 13 | DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def |
3796 | 13 | << " into block " |
3797 | 13 | << getBlockName(getBlockForValue(Def)) << "\n"); |
3798 | 13 | PN->insertBefore(&DefBlock->front()); |
3799 | 13 | Def = PN; |
3800 | 13 | NumGVNPHIOfOpsEliminations++; |
3801 | 13 | } |
3802 | 1.11k | |
3803 | 1.11k | if (EliminationStack.empty()1.11k ) { |
3804 | 253 | DEBUG(dbgs() << "Elimination Stack is empty\n"); |
3805 | 1.11k | } else { |
3806 | 864 | DEBUG(dbgs() << "Elimination Stack Top DFS numbers are (" |
3807 | 864 | << EliminationStack.dfs_back().first << "," |
3808 | 864 | << EliminationStack.dfs_back().second << ")\n"); |
3809 | 864 | } |
3810 | 1.11k | |
3811 | 1.11k | DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << "," |
3812 | 1.11k | << MemberDFSOut << ")\n"); |
3813 | 1.11k | // First, we see if we are out of scope or empty. If so, |
3814 | 1.11k | // and there equivalences, we try to replace the top of |
3815 | 1.11k | // stack with equivalences (if it's on the stack, it must |
3816 | 1.11k | // not have been eliminated yet). |
3817 | 1.11k | // Then we synchronize to our current scope, by |
3818 | 1.11k | // popping until we are back within a DFS scope that |
3819 | 1.11k | // dominates the current member. |
3820 | 1.11k | // Then, what happens depends on a few factors |
3821 | 1.11k | // If the stack is now empty, we need to push |
3822 | 1.11k | // If we have a constant or a local equivalence we want to |
3823 | 1.11k | // start using, we also push. |
3824 | 1.11k | // Otherwise, we walk along, processing members who are |
3825 | 1.11k | // dominated by this scope, and eliminate them. |
3826 | 568 | bool ShouldPush = Def && EliminationStack.empty(); |
3827 | 1.11k | bool OutOfScope = |
3828 | 1.11k | !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut); |
3829 | 1.11k | |
3830 | 1.11k | if (OutOfScope || 1.11k ShouldPush837 ) { |
3831 | 280 | // Sync to our current scope. |
3832 | 280 | EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); |
3833 | 280 | bool ShouldPush = Def && EliminationStack.empty(); |
3834 | 280 | if (ShouldPush280 ) { |
3835 | 280 | EliminationStack.push_back(Def, MemberDFSIn, MemberDFSOut); |
3836 | 280 | } |
3837 | 280 | } |
3838 | 1.11k | |
3839 | 1.11k | // Skip the Def's, we only want to eliminate on their uses. But mark |
3840 | 1.11k | // dominated defs as dead. |
3841 | 1.11k | if (Def1.11k ) { |
3842 | 568 | // For anything in this case, what and how we value number |
3843 | 568 | // guarantees that any side-effets that would have occurred (ie |
3844 | 568 | // throwing, etc) can be proven to either still occur (because it's |
3845 | 568 | // dominated by something that has the same side-effects), or never |
3846 | 568 | // occur. Otherwise, we would not have been able to prove it value |
3847 | 568 | // equivalent to something else. For these things, we can just mark |
3848 | 568 | // it all dead. Note that this is different from the "ProbablyDead" |
3849 | 568 | // set, which may not be dominated by anything, and thus, are only |
3850 | 568 | // easy to prove dead if they are also side-effect free. Note that |
3851 | 568 | // because stores are put in terms of the stored value, we skip |
3852 | 568 | // stored values here. If the stored value is really dead, it will |
3853 | 568 | // still be marked for deletion when we process it in its own class. |
3854 | 568 | if (!EliminationStack.empty() && 568 Def != EliminationStack.back()568 && |
3855 | 568 | isa<Instruction>(Def)284 && !FromStore284 ) |
3856 | 280 | markInstructionForDeletion(cast<Instruction>(Def)); |
3857 | 568 | continue; |
3858 | 568 | } |
3859 | 549 | // At this point, we know it is a Use we are trying to possibly |
3860 | 549 | // replace. |
3861 | 549 | |
3862 | 1.11k | assert(isa<Instruction>(U->get()) && |
3863 | 549 | "Current def should have been an instruction"); |
3864 | 549 | assert(isa<Instruction>(U->getUser()) && |
3865 | 549 | "Current user should have been an instruction"); |
3866 | 549 | |
3867 | 549 | // If the thing we are replacing into is already marked to be dead, |
3868 | 549 | // this use is dead. Note that this is true regardless of whether |
3869 | 549 | // we have anything dominating the use or not. We do this here |
3870 | 549 | // because we are already walking all the uses anyway. |
3871 | 549 | Instruction *InstUse = cast<Instruction>(U->getUser()); |
3872 | 549 | if (InstructionsToErase.count(InstUse)549 ) { |
3873 | 10 | auto &UseCount = UseCounts[U->get()]; |
3874 | 10 | if (--UseCount == 010 ) { |
3875 | 3 | ProbablyDead.insert(cast<Instruction>(U->get())); |
3876 | 3 | } |
3877 | 10 | } |
3878 | 549 | |
3879 | 549 | // If we get to this point, and the stack is empty we must have a use |
3880 | 549 | // with nothing we can use to eliminate this use, so just skip it. |
3881 | 549 | if (EliminationStack.empty()) |
3882 | 0 | continue; |
3883 | 549 | |
3884 | 549 | Value *DominatingLeader = EliminationStack.back(); |
3885 | 549 | |
3886 | 549 | auto *II = dyn_cast<IntrinsicInst>(DominatingLeader); |
3887 | 549 | if (II && 549 II->getIntrinsicID() == Intrinsic::ssa_copy6 ) |
3888 | 1 | DominatingLeader = II->getOperand(0); |
3889 | 549 | |
3890 | 549 | // Don't replace our existing users with ourselves. |
3891 | 549 | if (U->get() == DominatingLeader) |
3892 | 299 | continue; |
3893 | 250 | DEBUG250 (dbgs() << "Found replacement " << *DominatingLeader << " for " |
3894 | 250 | << *U->get() << " in " << *(U->getUser()) << "\n"); |
3895 | 250 | |
3896 | 250 | // If we replaced something in an instruction, handle the patching of |
3897 | 250 | // metadata. Skip this if we are replacing predicateinfo with its |
3898 | 250 | // original operand, as we already know we can just drop it. |
3899 | 250 | auto *ReplacedInst = cast<Instruction>(U->get()); |
3900 | 250 | auto *PI = PredInfo->getPredicateInfoFor(ReplacedInst); |
3901 | 250 | if (!PI || 250 DominatingLeader != PI->OriginalOp38 ) |
3902 | 216 | patchReplacementInstruction(ReplacedInst, DominatingLeader); |
3903 | 250 | U->set(DominatingLeader); |
3904 | 250 | // This is now a use of the dominating leader, which means if the |
3905 | 250 | // dominating leader was dead, it's now live! |
3906 | 250 | auto &LeaderUseCount = UseCounts[DominatingLeader]; |
3907 | 250 | // It's about to be alive again. |
3908 | 250 | if (LeaderUseCount == 0 && 250 isa<Instruction>(DominatingLeader)28 ) |
3909 | 27 | ProbablyDead.erase(cast<Instruction>(DominatingLeader)); |
3910 | 250 | if (LeaderUseCount == 0 && 250 II28 ) |
3911 | 1 | ProbablyDead.insert(II); |
3912 | 1.11k | ++LeaderUseCount; |
3913 | 1.11k | AnythingReplaced = true; |
3914 | 1.11k | } |
3915 | 253 | } |
3916 | 1.39k | } |
3917 | 2.16k | |
3918 | 2.16k | // At this point, anything still in the ProbablyDead set is actually dead if |
3919 | 2.16k | // would be trivially dead. |
3920 | 2.16k | for (auto *I : ProbablyDead) |
3921 | 102 | if (102 wouldInstructionBeTriviallyDead(I)102 ) |
3922 | 74 | markInstructionForDeletion(I); |
3923 | 2.16k | |
3924 | 2.16k | // Cleanup the congruence class. |
3925 | 2.16k | CongruenceClass::MemberSet MembersLeft; |
3926 | 2.16k | for (auto *Member : *CC) |
3927 | 2.27k | if (2.27k !isa<Instruction>(Member) || |
3928 | 1.85k | !InstructionsToErase.count(cast<Instruction>(Member))) |
3929 | 1.97k | MembersLeft.insert(Member); |
3930 | 2.16k | CC->swap(MembersLeft); |
3931 | 2.16k | |
3932 | 2.16k | // If we have possible dead stores to look at, try to eliminate them. |
3933 | 2.16k | if (CC->getStoreCount() > 02.16k ) { |
3934 | 227 | convertClassToLoadsAndStores(*CC, PossibleDeadStores); |
3935 | 227 | std::sort(PossibleDeadStores.begin(), PossibleDeadStores.end()); |
3936 | 227 | ValueDFSStack EliminationStack; |
3937 | 246 | for (auto &VD : PossibleDeadStores) { |
3938 | 246 | int MemberDFSIn = VD.DFSIn; |
3939 | 246 | int MemberDFSOut = VD.DFSOut; |
3940 | 246 | Instruction *Member = cast<Instruction>(VD.Def.getPointer()); |
3941 | 246 | if (EliminationStack.empty() || |
3942 | 246 | !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut)19 ) { |
3943 | 228 | // Sync to our current scope. |
3944 | 228 | EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); |
3945 | 228 | if (EliminationStack.empty()228 ) { |
3946 | 228 | EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); |
3947 | 228 | continue; |
3948 | 228 | } |
3949 | 18 | } |
3950 | 18 | // We already did load elimination, so nothing to do here. |
3951 | 18 | if (18 isa<LoadInst>(Member)18 ) |
3952 | 0 | continue; |
3953 | 18 | assert(!EliminationStack.empty()); |
3954 | 18 | Instruction *Leader = cast<Instruction>(EliminationStack.back()); |
3955 | 18 | (void)Leader; |
3956 | 18 | assert(DT->dominates(Leader->getParent(), Member->getParent())); |
3957 | 18 | // Member is dominater by Leader, and thus dead |
3958 | 18 | DEBUG(dbgs() << "Marking dead store " << *Member |
3959 | 246 | << " that is dominated by " << *Leader << "\n"); |
3960 | 246 | markInstructionForDeletion(Member); |
3961 | 246 | CC->erase(Member); |
3962 | 246 | ++NumGVNDeadStores; |
3963 | 246 | } |
3964 | 227 | } |
3965 | 3.15k | } |
3966 | 307 | return AnythingReplaced; |
3967 | 307 | } |
3968 | | |
3969 | | // This function provides global ranking of operations so that we can place them |
3970 | | // in a canonical order. Note that rank alone is not necessarily enough for a |
3971 | | // complete ordering, as constants all have the same rank. However, generally, |
3972 | | // we will simplify an operation with all constants so that it doesn't matter |
3973 | | // what order they appear in. |
3974 | 2.62k | unsigned int NewGVN::getRank(const Value *V) const { |
3975 | 2.62k | // Prefer constants to undef to anything else |
3976 | 2.62k | // Undef is a constant, have to check it first. |
3977 | 2.62k | // Prefer smaller constants to constantexprs |
3978 | 2.62k | if (isa<ConstantExpr>(V)) |
3979 | 46 | return 2; |
3980 | 2.57k | if (2.57k isa<UndefValue>(V)2.57k ) |
3981 | 24 | return 1; |
3982 | 2.55k | if (2.55k isa<Constant>(V)2.55k ) |
3983 | 1.02k | return 0; |
3984 | 1.52k | else if (auto *1.52k A1.52k = dyn_cast<Argument>(V)) |
3985 | 473 | return 3 + A->getArgNo(); |
3986 | 1.05k | |
3987 | 1.05k | // Need to shift the instruction DFS by number of arguments + 3 to account for |
3988 | 1.05k | // the constant and argument ranking above. |
3989 | 1.05k | unsigned Result = InstrToDFSNum(V); |
3990 | 1.05k | if (Result > 0) |
3991 | 1.05k | return 4 + NumFuncArgs + Result; |
3992 | 0 | // Unreachable or something else, just return a really large number. |
3993 | 0 | return ~0; |
3994 | 0 | } |
3995 | | |
3996 | | // This is a function that says whether two commutative operations should |
3997 | | // have their order swapped when canonicalizing. |
3998 | 1.31k | bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const { |
3999 | 1.31k | // Because we only care about a total ordering, and don't rewrite expressions |
4000 | 1.31k | // in this order, we order by rank, which will give a strict weak ordering to |
4001 | 1.31k | // everything but constants, and then we order by pointer address. |
4002 | 1.31k | return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B); |
4003 | 1.31k | } |
4004 | | |
4005 | | namespace { |
4006 | | class NewGVNLegacyPass : public FunctionPass { |
4007 | | public: |
4008 | | static char ID; // Pass identification, replacement for typeid. |
4009 | 142 | NewGVNLegacyPass() : FunctionPass(ID) { |
4010 | 142 | initializeNewGVNLegacyPassPass(*PassRegistry::getPassRegistry()); |
4011 | 142 | } |
4012 | | bool runOnFunction(Function &F) override; |
4013 | | |
4014 | | private: |
4015 | 142 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
4016 | 142 | AU.addRequired<AssumptionCacheTracker>(); |
4017 | 142 | AU.addRequired<DominatorTreeWrapperPass>(); |
4018 | 142 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
4019 | 142 | AU.addRequired<MemorySSAWrapperPass>(); |
4020 | 142 | AU.addRequired<AAResultsWrapperPass>(); |
4021 | 142 | AU.addPreserved<DominatorTreeWrapperPass>(); |
4022 | 142 | AU.addPreserved<GlobalsAAWrapperPass>(); |
4023 | 142 | } |
4024 | | }; |
4025 | | } // namespace |
4026 | | |
4027 | 304 | bool NewGVNLegacyPass::runOnFunction(Function &F) { |
4028 | 304 | if (skipFunction(F)) |
4029 | 0 | return false; |
4030 | 304 | return NewGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), |
4031 | 304 | &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), |
4032 | 304 | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), |
4033 | 304 | &getAnalysis<AAResultsWrapperPass>().getAAResults(), |
4034 | 304 | &getAnalysis<MemorySSAWrapperPass>().getMSSA(), |
4035 | 304 | F.getParent()->getDataLayout()) |
4036 | 304 | .runGVN(); |
4037 | 304 | } |
4038 | | |
4039 | 24.6k | INITIALIZE_PASS_BEGIN24.6k (NewGVNLegacyPass, "newgvn", "Global Value Numbering",
|
4040 | 24.6k | false, false) |
4041 | 24.6k | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
4042 | 24.6k | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) |
4043 | 24.6k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
4044 | 24.6k | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
4045 | 24.6k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
4046 | 24.6k | INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) |
4047 | 24.6k | INITIALIZE_PASS_END(NewGVNLegacyPass, "newgvn", "Global Value Numbering", false, |
4048 | | false) |
4049 | | |
4050 | | char NewGVNLegacyPass::ID = 0; |
4051 | | |
4052 | | // createGVNPass - The public interface to this file. |
4053 | 0 | FunctionPass *llvm::createNewGVNPass() { return new NewGVNLegacyPass(); } |
4054 | | |
4055 | 3 | PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) { |
4056 | 3 | // Apparently the order in which we get these results matter for |
4057 | 3 | // the old GVN (see Chandler's comment in GVN.cpp). I'll keep |
4058 | 3 | // the same order here, just in case. |
4059 | 3 | auto &AC = AM.getResult<AssumptionAnalysis>(F); |
4060 | 3 | auto &DT = AM.getResult<DominatorTreeAnalysis>(F); |
4061 | 3 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); |
4062 | 3 | auto &AA = AM.getResult<AAManager>(F); |
4063 | 3 | auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); |
4064 | 3 | bool Changed = |
4065 | 3 | NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getParent()->getDataLayout()) |
4066 | 3 | .runGVN(); |
4067 | 3 | if (!Changed) |
4068 | 0 | return PreservedAnalyses::all(); |
4069 | 3 | PreservedAnalyses PA; |
4070 | 3 | PA.preserve<DominatorTreeAnalysis>(); |
4071 | 3 | PA.preserve<GlobalsAA>(); |
4072 | 3 | return PA; |
4073 | 3 | } |