/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/GVNSink.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- GVNSink.cpp - sink expressions into successors -------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | /// \file GVNSink.cpp |
11 | | /// This pass attempts to sink instructions into successors, reducing static |
12 | | /// instruction count and enabling if-conversion. |
13 | | /// |
14 | | /// We use a variant of global value numbering to decide what can be sunk. |
15 | | /// Consider: |
16 | | /// |
17 | | /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] |
18 | | /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] |
19 | | /// \ / |
20 | | /// [ %e = phi i32 %a2, %c2 ] |
21 | | /// [ add i32 %e, 4 ] |
22 | | /// |
23 | | /// |
24 | | /// GVN would number %a1 and %c1 differently because they compute different |
25 | | /// results - the VN of an instruction is a function of its opcode and the |
26 | | /// transitive closure of its operands. This is the key property for hoisting |
27 | | /// and CSE. |
28 | | /// |
29 | | /// What we want when sinking however is for a numbering that is a function of |
30 | | /// the *uses* of an instruction, which allows us to answer the question "if I |
31 | | /// replace %a1 with %c1, will it contribute in an equivalent way to all |
32 | | /// successive instructions?". The PostValueTable class in GVN provides this |
33 | | /// mapping. |
34 | | /// |
35 | | //===----------------------------------------------------------------------===// |
36 | | |
37 | | #include "llvm/ADT/DenseMap.h" |
38 | | #include "llvm/ADT/DenseMapInfo.h" |
39 | | #include "llvm/ADT/DenseSet.h" |
40 | | #include "llvm/ADT/Hashing.h" |
41 | | #include "llvm/ADT/Optional.h" |
42 | | #include "llvm/ADT/PostOrderIterator.h" |
43 | | #include "llvm/ADT/SCCIterator.h" |
44 | | #include "llvm/ADT/SmallPtrSet.h" |
45 | | #include "llvm/ADT/Statistic.h" |
46 | | #include "llvm/ADT/StringExtras.h" |
47 | | #include "llvm/Analysis/GlobalsModRef.h" |
48 | | #include "llvm/Analysis/MemorySSA.h" |
49 | | #include "llvm/Analysis/PostDominators.h" |
50 | | #include "llvm/Analysis/TargetTransformInfo.h" |
51 | | #include "llvm/Analysis/ValueTracking.h" |
52 | | #include "llvm/IR/Instructions.h" |
53 | | #include "llvm/IR/Verifier.h" |
54 | | #include "llvm/Support/MathExtras.h" |
55 | | #include "llvm/Transforms/Scalar.h" |
56 | | #include "llvm/Transforms/Scalar/GVN.h" |
57 | | #include "llvm/Transforms/Scalar/GVNExpression.h" |
58 | | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
59 | | #include "llvm/Transforms/Utils/Local.h" |
60 | | #include <unordered_set> |
61 | | using namespace llvm; |
62 | | |
63 | | #define DEBUG_TYPE "gvn-sink" |
64 | | |
65 | | STATISTIC(NumRemoved, "Number of instructions removed"); |
66 | | |
67 | | namespace llvm { |
68 | | namespace GVNExpression { |
69 | | |
70 | 0 | LLVM_DUMP_METHOD void Expression::dump() const { |
71 | 0 | print(dbgs()); |
72 | 0 | dbgs() << "\n"; |
73 | 0 | } |
74 | | |
75 | | } |
76 | | } |
77 | | |
78 | | namespace { |
79 | | |
80 | 197 | static bool isMemoryInst(const Instruction *I) { |
81 | 182 | return isa<LoadInst>(I) || isa<StoreInst>(I) || |
82 | 144 | (isa<InvokeInst>(I) && 144 !cast<InvokeInst>(I)->doesNotAccessMemory()0 ) || |
83 | 144 | (isa<CallInst>(I) && 144 !cast<CallInst>(I)->doesNotAccessMemory()25 ); |
84 | 197 | } |
85 | | |
86 | | /// Iterates through instructions in a set of blocks in reverse order from the |
87 | | /// first non-terminator. For example (assume all blocks have size n): |
88 | | /// LockstepReverseIterator I([B1, B2, B3]); |
89 | | /// *I-- = [B1[n], B2[n], B3[n]]; |
90 | | /// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; |
91 | | /// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; |
92 | | /// ... |
93 | | /// |
94 | | /// It continues until all blocks have been exhausted. Use \c getActiveBlocks() |
95 | | /// to |
96 | | /// determine which blocks are still going and the order they appear in the |
97 | | /// list returned by operator*. |
98 | | class LockstepReverseIterator { |
99 | | ArrayRef<BasicBlock *> Blocks; |
100 | | SmallPtrSet<BasicBlock *, 4> ActiveBlocks; |
101 | | SmallVector<Instruction *, 4> Insts; |
102 | | bool Fail; |
103 | | |
104 | | public: |
105 | 30 | LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) { |
106 | 30 | reset(); |
107 | 30 | } |
108 | | |
109 | 30 | void reset() { |
110 | 30 | Fail = false; |
111 | 30 | ActiveBlocks.clear(); |
112 | 30 | for (BasicBlock *BB : Blocks) |
113 | 63 | ActiveBlocks.insert(BB); |
114 | 30 | Insts.clear(); |
115 | 63 | for (BasicBlock *BB : Blocks) { |
116 | 63 | if (BB->size() <= 163 ) { |
117 | 0 | // Block wasn't big enough - only contained a terminator. |
118 | 0 | ActiveBlocks.erase(BB); |
119 | 0 | continue; |
120 | 0 | } |
121 | 63 | Insts.push_back(BB->getTerminator()->getPrevNode()); |
122 | 63 | } |
123 | 30 | if (Insts.empty()) |
124 | 0 | Fail = true; |
125 | 30 | } |
126 | | |
127 | 75 | bool isValid() const { return !Fail; } |
128 | 63 | ArrayRef<Instruction *> operator*() const { return Insts; } |
129 | 57 | SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; } |
130 | | |
131 | 2 | void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) { |
132 | 9 | for (auto II = Insts.begin(); II != Insts.end()9 ;) { |
133 | 7 | if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == |
134 | 7 | Blocks.end()) { |
135 | 2 | ActiveBlocks.erase((*II)->getParent()); |
136 | 2 | II = Insts.erase(II); |
137 | 7 | } else { |
138 | 5 | ++II; |
139 | 5 | } |
140 | 7 | } |
141 | 2 | } |
142 | | |
143 | 45 | void operator--() { |
144 | 45 | if (Fail) |
145 | 0 | return; |
146 | 45 | SmallVector<Instruction *, 4> NewInsts; |
147 | 92 | for (auto *Inst : Insts) { |
148 | 92 | if (Inst == &Inst->getParent()->front()) |
149 | 25 | ActiveBlocks.erase(Inst->getParent()); |
150 | 92 | else |
151 | 67 | NewInsts.push_back(Inst->getPrevNode()); |
152 | 92 | } |
153 | 45 | if (NewInsts.empty()45 ) { |
154 | 12 | Fail = true; |
155 | 12 | return; |
156 | 12 | } |
157 | 33 | Insts = NewInsts; |
158 | 33 | } |
159 | | }; |
160 | | |
161 | | //===----------------------------------------------------------------------===// |
162 | | |
163 | | /// Candidate solution for sinking. There may be different ways to |
164 | | /// sink instructions, differing in the number of instructions sunk, |
165 | | /// the number of predecessors sunk from and the number of PHIs |
166 | | /// required. |
167 | | struct SinkingInstructionCandidate { |
168 | | unsigned NumBlocks; |
169 | | unsigned NumInstructions; |
170 | | unsigned NumPHIs; |
171 | | unsigned NumMemoryInsts; |
172 | | int Cost = -1; |
173 | | SmallVector<BasicBlock *, 4> Blocks; |
174 | | |
175 | 45 | void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { |
176 | 45 | unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; |
177 | 45 | unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 23 : 042 ; |
178 | 45 | Cost = (NumInstructions * (NumBlocks - 1)) - |
179 | 45 | (NumExtraPHIs * |
180 | 45 | NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. |
181 | 45 | - SplitEdgeCost; |
182 | 45 | } |
183 | 31 | bool operator>(const SinkingInstructionCandidate &Other) const { |
184 | 31 | return Cost > Other.Cost; |
185 | 31 | } |
186 | | }; |
187 | | |
188 | | #ifndef NDEBUG |
189 | | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, |
190 | | const SinkingInstructionCandidate &C) { |
191 | | OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks |
192 | | << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; |
193 | | return OS; |
194 | | } |
195 | | #endif |
196 | | |
197 | | //===----------------------------------------------------------------------===// |
198 | | |
199 | | /// Describes a PHI node that may or may not exist. These track the PHIs |
200 | | /// that must be created if we sunk a sequence of instructions. It provides |
201 | | /// a hash function for efficient equality comparisons. |
202 | | class ModelledPHI { |
203 | | SmallVector<Value *, 4> Values; |
204 | | SmallVector<BasicBlock *, 4> Blocks; |
205 | | |
206 | | public: |
207 | 8 | ModelledPHI() {} |
208 | 22 | ModelledPHI(const PHINode *PN) { |
209 | 22 | // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order. |
210 | 22 | SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops; |
211 | 72 | for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E72 ; ++I50 ) |
212 | 50 | Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)}); |
213 | 22 | std::sort(Ops.begin(), Ops.end()); |
214 | 50 | for (auto &P : Ops) { |
215 | 50 | Blocks.push_back(P.first); |
216 | 50 | Values.push_back(P.second); |
217 | 50 | } |
218 | 22 | } |
219 | | /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI |
220 | | /// without the same ID. |
221 | | /// \note This is specifically for DenseMapInfo - do not use this! |
222 | 8 | static ModelledPHI createDummy(size_t ID) { |
223 | 8 | ModelledPHI M; |
224 | 8 | M.Values.push_back(reinterpret_cast<Value*>(ID)); |
225 | 8 | return M; |
226 | 8 | } |
227 | | |
228 | | /// Create a PHI from an array of incoming values and incoming blocks. |
229 | | template <typename VArray, typename BArray> |
230 | 57 | ModelledPHI(const VArray &V, const BArray &B) { |
231 | 57 | std::copy(V.begin(), V.end(), std::back_inserter(Values)); |
232 | 57 | std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); |
233 | 57 | } |
234 | | |
235 | | /// Create a PHI from [I[OpNum] for I in Insts]. |
236 | | template <typename BArray> |
237 | 105 | ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) { |
238 | 105 | std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); |
239 | 105 | for (auto *I : Insts) |
240 | 213 | Values.push_back(I->getOperand(OpNum)); |
241 | 105 | } |
242 | | |
243 | | /// Restrict the PHI's contents down to only \c NewBlocks. |
244 | | /// \c NewBlocks must be a subset of \c this->Blocks. |
245 | 2 | void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) { |
246 | 2 | auto BI = Blocks.begin(); |
247 | 2 | auto VI = Values.begin(); |
248 | 9 | while (BI != Blocks.end()9 ) { |
249 | 7 | assert(VI != Values.end()); |
250 | 7 | if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) == |
251 | 7 | NewBlocks.end()) { |
252 | 2 | BI = Blocks.erase(BI); |
253 | 2 | VI = Values.erase(VI); |
254 | 7 | } else { |
255 | 5 | ++BI; |
256 | 5 | ++VI; |
257 | 5 | } |
258 | 7 | } |
259 | 2 | assert(Blocks.size() == NewBlocks.size()); |
260 | 2 | } |
261 | | |
262 | 187 | ArrayRef<Value *> getValues() const { return Values; } |
263 | | |
264 | 105 | bool areAllIncomingValuesSame() const { |
265 | 211 | return all_of(Values, [&](Value *V) { return V == Values[0]; }); |
266 | 105 | } |
267 | 45 | bool areAllIncomingValuesSameType() const { |
268 | 45 | return all_of( |
269 | 93 | Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); |
270 | 45 | } |
271 | 2 | bool areAnyIncomingValuesConstant() const { |
272 | 3 | return any_of(Values, [&](Value *V) { return isa<Constant>(V); }); |
273 | 2 | } |
274 | | // Hash functor |
275 | 194 | unsigned hash() const { |
276 | 194 | return (unsigned)hash_combine_range(Values.begin(), Values.end()); |
277 | 194 | } |
278 | 3.49k | bool operator==(const ModelledPHI &Other) const { |
279 | 3.22k | return Values == Other.Values && Blocks == Other.Blocks; |
280 | 3.49k | } |
281 | | }; |
282 | | |
283 | | template <typename ModelledPHI> struct DenseMapInfo { |
284 | 478 | static inline ModelledPHI &getEmptyKey() { |
285 | 478 | static ModelledPHI Dummy = ModelledPHI::createDummy(0); |
286 | 478 | return Dummy; |
287 | 478 | } |
288 | 292 | static inline ModelledPHI &getTombstoneKey() { |
289 | 292 | static ModelledPHI Dummy = ModelledPHI::createDummy(1); |
290 | 292 | return Dummy; |
291 | 292 | } |
292 | 194 | static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } |
293 | 3.49k | static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { |
294 | 3.49k | return LHS == RHS; |
295 | 3.49k | } |
296 | | }; |
297 | | |
298 | | typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet; |
299 | | |
300 | | //===----------------------------------------------------------------------===// |
301 | | // ValueTable |
302 | | //===----------------------------------------------------------------------===// |
303 | | // This is a value number table where the value number is a function of the |
304 | | // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know |
305 | | // that the program would be equivalent if we replaced A with PHI(A, B). |
306 | | //===----------------------------------------------------------------------===// |
307 | | |
308 | | /// A GVN expression describing how an instruction is used. The operands |
309 | | /// field of BasicExpression is used to store uses, not operands. |
310 | | /// |
311 | | /// This class also contains fields for discriminators used when determining |
312 | | /// equivalence of instructions with sideeffects. |
313 | | class InstructionUseExpr : public GVNExpression::BasicExpression { |
314 | | unsigned MemoryUseOrder = -1; |
315 | | bool Volatile = false; |
316 | | |
317 | | public: |
318 | | InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R, |
319 | | BumpPtrAllocator &A) |
320 | 130 | : GVNExpression::BasicExpression(I->getNumUses()) { |
321 | 130 | allocateOperands(R, A); |
322 | 130 | setOpcode(I->getOpcode()); |
323 | 130 | setType(I->getType()); |
324 | 130 | |
325 | 130 | for (auto &U : I->uses()) |
326 | 100 | op_push_back(U.getUser()); |
327 | 130 | std::sort(op_begin(), op_end()); |
328 | 130 | } |
329 | 40 | void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } |
330 | 32 | void setVolatile(bool V) { Volatile = V; } |
331 | | |
332 | 0 | virtual hash_code getHashValue() const { |
333 | 0 | return hash_combine(GVNExpression::BasicExpression::getHashValue(), |
334 | 0 | MemoryUseOrder, Volatile); |
335 | 0 | } |
336 | | |
337 | 130 | template <typename Function> hash_code getHashValue(Function MapFn) { |
338 | 130 | hash_code H = |
339 | 130 | hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile); |
340 | 130 | for (auto *V : operands()) |
341 | 100 | H = hash_combine(H, MapFn(V)); |
342 | 130 | return H; |
343 | 130 | } |
344 | | }; |
345 | | |
346 | | class ValueTable { |
347 | | DenseMap<Value *, uint32_t> ValueNumbering; |
348 | | DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering; |
349 | | DenseMap<size_t, uint32_t> HashNumbering; |
350 | | BumpPtrAllocator Allocator; |
351 | | ArrayRecycler<Value *> Recycler; |
352 | | uint32_t nextValueNumber; |
353 | | |
354 | | /// Create an expression for I based on its opcode and its uses. If I |
355 | | /// touches or reads memory, the expression is also based upon its memory |
356 | | /// order - see \c getMemoryUseOrder(). |
357 | 130 | InstructionUseExpr *createExpr(Instruction *I) { |
358 | 130 | InstructionUseExpr *E = |
359 | 130 | new (Allocator) InstructionUseExpr(I, Recycler, Allocator); |
360 | 130 | if (isMemoryInst(I)) |
361 | 40 | E->setMemoryUseOrder(getMemoryUseOrder(I)); |
362 | 130 | |
363 | 130 | if (CmpInst *C130 = dyn_cast<CmpInst>(I)) { |
364 | 15 | CmpInst::Predicate Predicate = C->getPredicate(); |
365 | 15 | E->setOpcode((C->getOpcode() << 8) | Predicate); |
366 | 15 | } |
367 | 130 | return E; |
368 | 130 | } |
369 | | |
370 | | /// Helper to compute the value number for a memory instruction |
371 | | /// (LoadInst/StoreInst), including checking the memory ordering and |
372 | | /// volatility. |
373 | 32 | template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) { |
374 | 32 | if (isStrongerThanUnordered(I->getOrdering()) || 32 I->isAtomic()32 ) |
375 | 0 | return nullptr; |
376 | 32 | InstructionUseExpr *E = createExpr(I); |
377 | 32 | E->setVolatile(I->isVolatile()); |
378 | 32 | return E; |
379 | 32 | } GVNSink.cpp:(anonymous namespace)::InstructionUseExpr* (anonymous namespace)::ValueTable::createMemoryExpr<llvm::LoadInst>(llvm::LoadInst*) Line | Count | Source | 373 | 11 | template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) { | 374 | 11 | if (isStrongerThanUnordered(I->getOrdering()) || 11 I->isAtomic()11 ) | 375 | 0 | return nullptr; | 376 | 11 | InstructionUseExpr *E = createExpr(I); | 377 | 11 | E->setVolatile(I->isVolatile()); | 378 | 11 | return E; | 379 | 11 | } |
GVNSink.cpp:(anonymous namespace)::InstructionUseExpr* (anonymous namespace)::ValueTable::createMemoryExpr<llvm::StoreInst>(llvm::StoreInst*) Line | Count | Source | 373 | 21 | template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) { | 374 | 21 | if (isStrongerThanUnordered(I->getOrdering()) || 21 I->isAtomic()21 ) | 375 | 0 | return nullptr; | 376 | 21 | InstructionUseExpr *E = createExpr(I); | 377 | 21 | E->setVolatile(I->isVolatile()); | 378 | 21 | return E; | 379 | 21 | } |
|
380 | | |
381 | | public: |
382 | | /// Returns the value number for the specified value, assigning |
383 | | /// it a new number if it did not have one before. |
384 | 238 | uint32_t lookupOrAdd(Value *V) { |
385 | 238 | auto VI = ValueNumbering.find(V); |
386 | 238 | if (VI != ValueNumbering.end()) |
387 | 89 | return VI->second; |
388 | 149 | |
389 | 149 | if (149 !isa<Instruction>(V)149 ) { |
390 | 0 | ValueNumbering[V] = nextValueNumber; |
391 | 0 | return nextValueNumber++; |
392 | 0 | } |
393 | 149 | |
394 | 149 | Instruction *I = cast<Instruction>(V); |
395 | 149 | InstructionUseExpr *exp = nullptr; |
396 | 149 | switch (I->getOpcode()) { |
397 | 11 | case Instruction::Load: |
398 | 11 | exp = createMemoryExpr(cast<LoadInst>(I)); |
399 | 11 | break; |
400 | 21 | case Instruction::Store: |
401 | 21 | exp = createMemoryExpr(cast<StoreInst>(I)); |
402 | 21 | break; |
403 | 98 | case Instruction::Call: |
404 | 98 | case Instruction::Invoke: |
405 | 98 | case Instruction::Add: |
406 | 98 | case Instruction::FAdd: |
407 | 98 | case Instruction::Sub: |
408 | 98 | case Instruction::FSub: |
409 | 98 | case Instruction::Mul: |
410 | 98 | case Instruction::FMul: |
411 | 98 | case Instruction::UDiv: |
412 | 98 | case Instruction::SDiv: |
413 | 98 | case Instruction::FDiv: |
414 | 98 | case Instruction::URem: |
415 | 98 | case Instruction::SRem: |
416 | 98 | case Instruction::FRem: |
417 | 98 | case Instruction::Shl: |
418 | 98 | case Instruction::LShr: |
419 | 98 | case Instruction::AShr: |
420 | 98 | case Instruction::And: |
421 | 98 | case Instruction::Or: |
422 | 98 | case Instruction::Xor: |
423 | 98 | case Instruction::ICmp: |
424 | 98 | case Instruction::FCmp: |
425 | 98 | case Instruction::Trunc: |
426 | 98 | case Instruction::ZExt: |
427 | 98 | case Instruction::SExt: |
428 | 98 | case Instruction::FPToUI: |
429 | 98 | case Instruction::FPToSI: |
430 | 98 | case Instruction::UIToFP: |
431 | 98 | case Instruction::SIToFP: |
432 | 98 | case Instruction::FPTrunc: |
433 | 98 | case Instruction::FPExt: |
434 | 98 | case Instruction::PtrToInt: |
435 | 98 | case Instruction::IntToPtr: |
436 | 98 | case Instruction::BitCast: |
437 | 98 | case Instruction::Select: |
438 | 98 | case Instruction::ExtractElement: |
439 | 98 | case Instruction::InsertElement: |
440 | 98 | case Instruction::ShuffleVector: |
441 | 98 | case Instruction::InsertValue: |
442 | 98 | case Instruction::GetElementPtr: |
443 | 98 | exp = createExpr(I); |
444 | 98 | break; |
445 | 19 | default: |
446 | 19 | break; |
447 | 149 | } |
448 | 149 | |
449 | 149 | if (149 !exp149 ) { |
450 | 19 | ValueNumbering[V] = nextValueNumber; |
451 | 19 | return nextValueNumber++; |
452 | 19 | } |
453 | 130 | |
454 | 130 | uint32_t e = ExpressionNumbering[exp]; |
455 | 130 | if (!e130 ) { |
456 | 100 | hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); }); |
457 | 130 | auto I = HashNumbering.find(H); |
458 | 130 | if (I != HashNumbering.end()130 ) { |
459 | 60 | e = I->second; |
460 | 130 | } else { |
461 | 70 | e = nextValueNumber++; |
462 | 70 | HashNumbering[H] = e; |
463 | 70 | ExpressionNumbering[exp] = e; |
464 | 70 | } |
465 | 130 | } |
466 | 238 | ValueNumbering[V] = e; |
467 | 238 | return e; |
468 | 238 | } |
469 | | |
470 | | /// Returns the value number of the specified value. Fails if the value has |
471 | | /// not yet been numbered. |
472 | 119 | uint32_t lookup(Value *V) const { |
473 | 119 | auto VI = ValueNumbering.find(V); |
474 | 119 | assert(VI != ValueNumbering.end() && "Value not numbered?"); |
475 | 119 | return VI->second; |
476 | 119 | } |
477 | | |
478 | | /// Removes all value numberings and resets the value table. |
479 | 0 | void clear() { |
480 | 0 | ValueNumbering.clear(); |
481 | 0 | ExpressionNumbering.clear(); |
482 | 0 | HashNumbering.clear(); |
483 | 0 | Recycler.clear(Allocator); |
484 | 0 | nextValueNumber = 1; |
485 | 0 | } |
486 | | |
487 | 30 | ValueTable() : nextValueNumber(1) {} |
488 | | |
489 | | /// \c Inst uses or touches memory. Return an ID describing the memory state |
490 | | /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), |
491 | | /// the exact same memory operations happen after I1 and I2. |
492 | | /// |
493 | | /// This is a very hard problem in general, so we use domain-specific |
494 | | /// knowledge that we only ever check for equivalence between blocks sharing a |
495 | | /// single immediate successor that is common, and when determining if I1 == |
496 | | /// I2 we will have already determined that next(I1) == next(I2). This |
497 | | /// inductive property allows us to simply return the value number of the next |
498 | | /// instruction that defines memory. |
499 | 40 | uint32_t getMemoryUseOrder(Instruction *Inst) { |
500 | 40 | auto *BB = Inst->getParent(); |
501 | 40 | for (auto I = std::next(Inst->getIterator()), E = BB->end(); |
502 | 54 | I != E && 54 !I->isTerminator()54 ; ++I14 ) { |
503 | 22 | if (!isMemoryInst(&*I)) |
504 | 14 | continue; |
505 | 8 | if (8 isa<LoadInst>(&*I)8 ) |
506 | 0 | continue; |
507 | 8 | CallInst *CI = dyn_cast<CallInst>(&*I); |
508 | 8 | if (CI && 8 CI->onlyReadsMemory()0 ) |
509 | 0 | continue; |
510 | 8 | InvokeInst *II = dyn_cast<InvokeInst>(&*I); |
511 | 8 | if (II && 8 II->onlyReadsMemory()0 ) |
512 | 0 | continue; |
513 | 8 | return lookupOrAdd(&*I); |
514 | 8 | } |
515 | 32 | return 0; |
516 | 40 | } |
517 | | }; |
518 | | |
519 | | //===----------------------------------------------------------------------===// |
520 | | |
521 | | class GVNSink { |
522 | | public: |
523 | 30 | GVNSink() : VN() {} |
524 | 30 | bool run(Function &F) { |
525 | 30 | DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); |
526 | 30 | |
527 | 30 | unsigned NumSunk = 0; |
528 | 30 | ReversePostOrderTraversal<Function*> RPOT(&F); |
529 | 30 | for (auto *N : RPOT) |
530 | 128 | NumSunk += sinkBB(N); |
531 | 30 | |
532 | 30 | return NumSunk > 0; |
533 | 30 | } |
534 | | |
535 | | private: |
536 | | ValueTable VN; |
537 | | |
538 | 117 | bool isInstructionBlacklisted(Instruction *I) { |
539 | 117 | // These instructions may change or break semantics if moved. |
540 | 117 | if (isa<PHINode>(I) || 117 I->isEHPad()117 || isa<AllocaInst>(I)117 || |
541 | 117 | I->getType()->isTokenTy()) |
542 | 0 | return true; |
543 | 117 | return false; |
544 | 117 | } |
545 | | |
546 | | /// The main heuristic function. Analyze the set of instructions pointed to by |
547 | | /// LRI and return a candidate solution if these instructions can be sunk, or |
548 | | /// None otherwise. |
549 | | Optional<SinkingInstructionCandidate> analyzeInstructionForSinking( |
550 | | LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, |
551 | | ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents); |
552 | | |
553 | | /// Create a ModelledPHI for each PHI in BB, adding to PHIs. |
554 | | void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, |
555 | 30 | SmallPtrSetImpl<Value *> &PHIContents) { |
556 | 52 | for (auto &I : *BB) { |
557 | 52 | auto *PN = dyn_cast<PHINode>(&I); |
558 | 52 | if (!PN) |
559 | 30 | return; |
560 | 22 | |
561 | 22 | auto MPHI = ModelledPHI(PN); |
562 | 22 | PHIs.insert(MPHI); |
563 | 22 | for (auto *V : MPHI.getValues()) |
564 | 50 | PHIContents.insert(V); |
565 | 52 | } |
566 | 30 | } |
567 | | |
568 | | /// The main instruction sinking driver. Set up state and try and sink |
569 | | /// instructions into BBEnd from its predecessors. |
570 | | unsigned sinkBB(BasicBlock *BBEnd); |
571 | | |
572 | | /// Perform the actual mechanics of sinking an instruction from Blocks into |
573 | | /// BBEnd, which is their only successor. |
574 | | void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd); |
575 | | |
576 | | /// Remove PHIs that all have the same incoming value. |
577 | 33 | void foldPointlessPHINodes(BasicBlock *BB) { |
578 | 33 | auto I = BB->begin(); |
579 | 101 | while (PHINode *PN101 = dyn_cast<PHINode>(I++)) { |
580 | 68 | if (!all_of(PN->incoming_values(), |
581 | 137 | [&](const Value *V) { return V == PN->getIncomingValue(0); })) |
582 | 42 | continue; |
583 | 26 | if (26 PN->getIncomingValue(0) != PN26 ) |
584 | 26 | PN->replaceAllUsesWith(PN->getIncomingValue(0)); |
585 | 26 | else |
586 | 0 | PN->replaceAllUsesWith(UndefValue::get(PN->getType())); |
587 | 68 | PN->eraseFromParent(); |
588 | 68 | } |
589 | 33 | } |
590 | | }; |
591 | | |
592 | | Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( |
593 | | LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, |
594 | 63 | ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) { |
595 | 63 | auto Insts = *LRI; |
596 | 63 | DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I |
597 | 63 | : Insts) { |
598 | 63 | I->dump(); |
599 | 63 | } dbgs() << " ]\n";); |
600 | 63 | |
601 | 63 | DenseMap<uint32_t, unsigned> VNums; |
602 | 130 | for (auto *I : Insts) { |
603 | 130 | uint32_t N = VN.lookupOrAdd(I); |
604 | 130 | DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); |
605 | 130 | if (N == ~0U) |
606 | 0 | return None; |
607 | 130 | VNums[N]++; |
608 | 130 | } |
609 | 63 | unsigned VNumToSink = |
610 | 63 | std::max_element(VNums.begin(), VNums.end(), |
611 | 63 | [](const std::pair<uint32_t, unsigned> &I, |
612 | 7 | const std::pair<uint32_t, unsigned> &J) { |
613 | 7 | return I.second < J.second; |
614 | 7 | }) |
615 | 63 | ->first; |
616 | 63 | |
617 | 63 | if (VNums[VNumToSink] == 1) |
618 | 63 | // Can't sink anything! |
619 | 6 | return None; |
620 | 57 | |
621 | 57 | // Now restrict the number of incoming blocks down to only those with |
622 | 57 | // VNumToSink. |
623 | 57 | auto &ActivePreds = LRI.getActiveBlocks(); |
624 | 57 | unsigned InitialActivePredSize = ActivePreds.size(); |
625 | 57 | SmallVector<Instruction *, 4> NewInsts; |
626 | 119 | for (auto *I : Insts) { |
627 | 119 | if (VN.lookup(I) != VNumToSink) |
628 | 2 | ActivePreds.erase(I->getParent()); |
629 | 119 | else |
630 | 117 | NewInsts.push_back(I); |
631 | 119 | } |
632 | 57 | for (auto *I : NewInsts) |
633 | 117 | if (117 isInstructionBlacklisted(I)117 ) |
634 | 0 | return None; |
635 | 57 | |
636 | 57 | // If we've restricted the incoming blocks, restrict all needed PHIs also |
637 | 57 | // to that set. |
638 | 57 | bool RecomputePHIContents = false; |
639 | 57 | if (ActivePreds.size() != InitialActivePredSize57 ) { |
640 | 2 | ModelledPHISet NewNeededPHIs; |
641 | 2 | for (auto P : NeededPHIs) { |
642 | 2 | P.restrictToBlocks(ActivePreds); |
643 | 2 | NewNeededPHIs.insert(P); |
644 | 2 | } |
645 | 2 | NeededPHIs = NewNeededPHIs; |
646 | 2 | LRI.restrictToBlocks(ActivePreds); |
647 | 2 | RecomputePHIContents = true; |
648 | 2 | } |
649 | 57 | |
650 | 57 | // The sunk instruction's results. |
651 | 57 | ModelledPHI NewPHI(NewInsts, ActivePreds); |
652 | 57 | |
653 | 57 | // Does sinking this instruction render previous PHIs redundant? |
654 | 57 | if (NeededPHIs.find(NewPHI) != NeededPHIs.end()57 ) { |
655 | 39 | NeededPHIs.erase(NewPHI); |
656 | 39 | RecomputePHIContents = true; |
657 | 39 | } |
658 | 57 | |
659 | 57 | if (RecomputePHIContents57 ) { |
660 | 39 | // The needed PHIs have changed, so recompute the set of all needed |
661 | 39 | // values. |
662 | 39 | PHIContents.clear(); |
663 | 39 | for (auto &PHI : NeededPHIs) |
664 | 10 | PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); |
665 | 39 | } |
666 | 57 | |
667 | 57 | // Is this instruction required by a later PHI that doesn't match this PHI? |
668 | 57 | // if so, we can't sink this instruction. |
669 | 57 | for (auto *V : NewPHI.getValues()) |
670 | 111 | if (111 PHIContents.count(V)111 ) |
671 | 111 | // V exists in this PHI, but the whole PHI is different to NewPHI |
672 | 111 | // (else it would have been removed earlier). We cannot continue |
673 | 111 | // because this isn't representable. |
674 | 5 | return None; |
675 | 52 | |
676 | 52 | // Which operands need PHIs? |
677 | 52 | // FIXME: If any of these fail, we should partition up the candidates to |
678 | 52 | // try and continue making progress. |
679 | 52 | Instruction *I0 = NewInsts[0]; |
680 | 150 | for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E150 ; ++OpNum98 ) { |
681 | 105 | ModelledPHI PHI(NewInsts, OpNum, ActivePreds); |
682 | 105 | if (PHI.areAllIncomingValuesSame()) |
683 | 51 | continue; |
684 | 54 | if (54 !canReplaceOperandWithVariable(I0, OpNum)54 ) |
685 | 54 | // We can 't create a PHI from this instruction! |
686 | 6 | return None; |
687 | 48 | if (48 NeededPHIs.count(PHI)48 ) |
688 | 3 | continue; |
689 | 45 | if (45 !PHI.areAllIncomingValuesSameType()45 ) |
690 | 0 | return None; |
691 | 45 | // Don't create indirect calls! The called value is the final operand. |
692 | 45 | if (45 (isa<CallInst>(I0) || 45 isa<InvokeInst>(I0)40 ) && OpNum == E - 15 && |
693 | 2 | PHI.areAnyIncomingValuesConstant()) |
694 | 1 | return None; |
695 | 44 | |
696 | 44 | NeededPHIs.reserve(NeededPHIs.size()); |
697 | 44 | NeededPHIs.insert(PHI); |
698 | 44 | PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); |
699 | 44 | } |
700 | 52 | |
701 | 45 | if (45 isMemoryInst(NewInsts[0])45 ) |
702 | 16 | ++MemoryInstNum; |
703 | 45 | |
704 | 45 | SinkingInstructionCandidate Cand; |
705 | 45 | Cand.NumInstructions = ++InstNum; |
706 | 45 | Cand.NumMemoryInsts = MemoryInstNum; |
707 | 45 | Cand.NumBlocks = ActivePreds.size(); |
708 | 45 | Cand.NumPHIs = NeededPHIs.size(); |
709 | 45 | for (auto *C : ActivePreds) |
710 | 92 | Cand.Blocks.push_back(C); |
711 | 45 | |
712 | 45 | return Cand; |
713 | 63 | } |
714 | | |
715 | 128 | unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { |
716 | 128 | DEBUG(dbgs() << "GVNSink: running on basic block "; |
717 | 128 | BBEnd->printAsOperand(dbgs()); dbgs() << "\n"); |
718 | 128 | SmallVector<BasicBlock *, 4> Preds; |
719 | 136 | for (auto *B : predecessors(BBEnd)) { |
720 | 136 | auto *T = B->getTerminator(); |
721 | 136 | if (isa<BranchInst>(T) || 136 isa<SwitchInst>(T)6 ) |
722 | 136 | Preds.push_back(B); |
723 | 136 | else |
724 | 0 | return 0; |
725 | 128 | } |
726 | 128 | if (128 Preds.size() < 2128 ) |
727 | 98 | return 0; |
728 | 30 | std::sort(Preds.begin(), Preds.end()); |
729 | 30 | |
730 | 30 | unsigned NumOrigPreds = Preds.size(); |
731 | 30 | // We can only sink instructions through unconditional branches. |
732 | 98 | for (auto I = Preds.begin(); I != Preds.end()98 ;) { |
733 | 68 | if ((*I)->getTerminator()->getNumSuccessors() != 1) |
734 | 5 | I = Preds.erase(I); |
735 | 68 | else |
736 | 63 | ++I; |
737 | 68 | } |
738 | 30 | |
739 | 30 | LockstepReverseIterator LRI(Preds); |
740 | 30 | SmallVector<SinkingInstructionCandidate, 4> Candidates; |
741 | 30 | unsigned InstNum = 0, MemoryInstNum = 0; |
742 | 30 | ModelledPHISet NeededPHIs; |
743 | 30 | SmallPtrSet<Value *, 4> PHIContents; |
744 | 30 | analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); |
745 | 30 | unsigned NumOrigPHIs = NeededPHIs.size(); |
746 | 30 | |
747 | 75 | while (LRI.isValid()75 ) { |
748 | 63 | auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, |
749 | 63 | NeededPHIs, PHIContents); |
750 | 63 | if (!Cand) |
751 | 18 | break; |
752 | 45 | Cand->calculateCost(NumOrigPHIs, Preds.size()); |
753 | 45 | Candidates.emplace_back(*Cand); |
754 | 45 | --LRI; |
755 | 45 | } |
756 | 30 | |
757 | 30 | std::stable_sort( |
758 | 30 | Candidates.begin(), Candidates.end(), |
759 | 30 | [](const SinkingInstructionCandidate &A, |
760 | 31 | const SinkingInstructionCandidate &B) { return A > B; }); |
761 | 30 | DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C |
762 | 30 | : Candidates) dbgs() |
763 | 30 | << " " << C << "\n";); |
764 | 30 | |
765 | 30 | // Pick the top candidate, as long it is positive! |
766 | 30 | if (Candidates.empty() || 30 Candidates.front().Cost <= 021 ) |
767 | 14 | return 0; |
768 | 16 | auto C = Candidates.front(); |
769 | 16 | |
770 | 16 | DEBUG(dbgs() << " -- Sinking: " << C << "\n"); |
771 | 16 | BasicBlock *InsertBB = BBEnd; |
772 | 16 | if (C.Blocks.size() < NumOrigPreds16 ) { |
773 | 2 | DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs()); |
774 | 2 | dbgs() << "\n"); |
775 | 2 | InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); |
776 | 2 | if (!InsertBB2 ) { |
777 | 0 | DEBUG(dbgs() << " -- FAILED to split edge!\n"); |
778 | 0 | // Edge couldn't be split. |
779 | 0 | return 0; |
780 | 0 | } |
781 | 16 | } |
782 | 16 | |
783 | 49 | for (unsigned I = 0; 16 I < C.NumInstructions49 ; ++I33 ) |
784 | 33 | sinkLastInstruction(C.Blocks, InsertBB); |
785 | 128 | |
786 | 128 | return C.NumInstructions; |
787 | 128 | } |
788 | | |
789 | | void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, |
790 | 33 | BasicBlock *BBEnd) { |
791 | 33 | SmallVector<Instruction *, 4> Insts; |
792 | 33 | for (BasicBlock *BB : Blocks) |
793 | 67 | Insts.push_back(BB->getTerminator()->getPrevNode()); |
794 | 33 | Instruction *I0 = Insts.front(); |
795 | 33 | |
796 | 33 | SmallVector<Value *, 4> NewOperands; |
797 | 91 | for (unsigned O = 0, E = I0->getNumOperands(); O != E91 ; ++O58 ) { |
798 | 116 | bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { |
799 | 116 | return I->getOperand(O) != I0->getOperand(O); |
800 | 116 | }); |
801 | 58 | if (!NeedPHI58 ) { |
802 | 25 | NewOperands.push_back(I0->getOperand(O)); |
803 | 25 | continue; |
804 | 25 | } |
805 | 33 | |
806 | 33 | // Create a new PHI in the successor block and populate it. |
807 | 33 | auto *Op = I0->getOperand(O); |
808 | 33 | assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); |
809 | 33 | auto *PN = PHINode::Create(Op->getType(), Insts.size(), |
810 | 33 | Op->getName() + ".sink", &BBEnd->front()); |
811 | 33 | for (auto *I : Insts) |
812 | 67 | PN->addIncoming(I->getOperand(O), I->getParent()); |
813 | 58 | NewOperands.push_back(PN); |
814 | 58 | } |
815 | 33 | |
816 | 33 | // Arbitrarily use I0 as the new "common" instruction; remap its operands |
817 | 33 | // and move it to the start of the successor block. |
818 | 91 | for (unsigned O = 0, E = I0->getNumOperands(); O != E91 ; ++O58 ) |
819 | 58 | I0->getOperandUse(O).set(NewOperands[O]); |
820 | 33 | I0->moveBefore(&*BBEnd->getFirstInsertionPt()); |
821 | 33 | |
822 | 33 | // Update metadata and IR flags. |
823 | 33 | for (auto *I : Insts) |
824 | 67 | if (67 I != I067 ) { |
825 | 34 | combineMetadataForCSE(I0, I); |
826 | 34 | I0->andIRFlags(I); |
827 | 34 | } |
828 | 33 | |
829 | 33 | for (auto *I : Insts) |
830 | 67 | if (67 I != I067 ) |
831 | 34 | I->replaceAllUsesWith(I0); |
832 | 33 | foldPointlessPHINodes(BBEnd); |
833 | 33 | |
834 | 33 | // Finally nuke all instructions apart from the common instruction. |
835 | 33 | for (auto *I : Insts) |
836 | 67 | if (67 I != I067 ) |
837 | 34 | I->eraseFromParent(); |
838 | 33 | |
839 | 33 | NumRemoved += Insts.size() - 1; |
840 | 33 | } |
841 | | |
842 | | //////////////////////////////////////////////////////////////////////////////// |
843 | | // Pass machinery / boilerplate |
844 | | |
845 | | class GVNSinkLegacyPass : public FunctionPass { |
846 | | public: |
847 | | static char ID; |
848 | | |
849 | 4 | GVNSinkLegacyPass() : FunctionPass(ID) { |
850 | 4 | initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); |
851 | 4 | } |
852 | | |
853 | 30 | bool runOnFunction(Function &F) override { |
854 | 30 | if (skipFunction(F)) |
855 | 0 | return false; |
856 | 30 | GVNSink G; |
857 | 30 | return G.run(F); |
858 | 30 | } |
859 | | |
860 | 4 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
861 | 4 | AU.addPreserved<GlobalsAAWrapperPass>(); |
862 | 4 | } |
863 | | }; |
864 | | } // namespace |
865 | | |
866 | 0 | PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { |
867 | 0 | GVNSink G; |
868 | 0 | if (!G.run(F)) |
869 | 0 | return PreservedAnalyses::all(); |
870 | 0 |
|
871 | 0 | PreservedAnalyses PA; |
872 | 0 | PA.preserve<GlobalsAA>(); |
873 | 0 | return PA; |
874 | 0 | } |
875 | | |
876 | | char GVNSinkLegacyPass::ID = 0; |
877 | 24.6k | INITIALIZE_PASS_BEGIN24.6k (GVNSinkLegacyPass, "gvn-sink",
|
878 | 24.6k | "Early GVN sinking of Expressions", false, false) |
879 | 24.6k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
880 | 24.6k | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) |
881 | 24.6k | INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink", |
882 | | "Early GVN sinking of Expressions", false, false) |
883 | | |
884 | 0 | FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } |