/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/MachineCSE.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass performs global common subexpression elimination on machine |
11 | | // instructions using a scoped hash table based value numbering scheme. It |
12 | | // must be run while the machine function is still in SSA form. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/ADT/DenseMap.h" |
17 | | #include "llvm/ADT/ScopedHashTable.h" |
18 | | #include "llvm/ADT/SmallPtrSet.h" |
19 | | #include "llvm/ADT/SmallSet.h" |
20 | | #include "llvm/ADT/SmallVector.h" |
21 | | #include "llvm/ADT/Statistic.h" |
22 | | #include "llvm/Analysis/AliasAnalysis.h" |
23 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | | #include "llvm/CodeGen/MachineDominators.h" |
25 | | #include "llvm/CodeGen/MachineFunction.h" |
26 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | | #include "llvm/CodeGen/MachineInstr.h" |
28 | | #include "llvm/CodeGen/MachineOperand.h" |
29 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
30 | | #include "llvm/CodeGen/Passes.h" |
31 | | #include "llvm/MC/MCInstrDesc.h" |
32 | | #include "llvm/MC/MCRegisterInfo.h" |
33 | | #include "llvm/Pass.h" |
34 | | #include "llvm/Support/Allocator.h" |
35 | | #include "llvm/Support/Debug.h" |
36 | | #include "llvm/Support/RecyclingAllocator.h" |
37 | | #include "llvm/Support/raw_ostream.h" |
38 | | #include "llvm/Target/TargetInstrInfo.h" |
39 | | #include "llvm/Target/TargetOpcodes.h" |
40 | | #include "llvm/Target/TargetRegisterInfo.h" |
41 | | #include "llvm/Target/TargetSubtargetInfo.h" |
42 | | #include <cassert> |
43 | | #include <iterator> |
44 | | #include <utility> |
45 | | #include <vector> |
46 | | |
47 | | using namespace llvm; |
48 | | |
49 | | #define DEBUG_TYPE "machine-cse" |
50 | | |
51 | | STATISTIC(NumCoalesces, "Number of copies coalesced"); |
52 | | STATISTIC(NumCSEs, "Number of common subexpression eliminated"); |
53 | | STATISTIC(NumPhysCSEs, |
54 | | "Number of physreg referencing common subexpr eliminated"); |
55 | | STATISTIC(NumCrossBBCSEs, |
56 | | "Number of cross-MBB physreg referencing CS eliminated"); |
57 | | STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); |
58 | | |
59 | | namespace { |
60 | | |
61 | | class MachineCSE : public MachineFunctionPass { |
62 | | const TargetInstrInfo *TII; |
63 | | const TargetRegisterInfo *TRI; |
64 | | AliasAnalysis *AA; |
65 | | MachineDominatorTree *DT; |
66 | | MachineRegisterInfo *MRI; |
67 | | |
68 | | public: |
69 | | static char ID; // Pass identification |
70 | | |
71 | 33.5k | MachineCSE() : MachineFunctionPass(ID) { |
72 | 33.5k | initializeMachineCSEPass(*PassRegistry::getPassRegistry()); |
73 | 33.5k | } |
74 | | |
75 | | bool runOnMachineFunction(MachineFunction &MF) override; |
76 | | |
77 | 33.4k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
78 | 33.4k | AU.setPreservesCFG(); |
79 | 33.4k | MachineFunctionPass::getAnalysisUsage(AU); |
80 | 33.4k | AU.addRequired<AAResultsWrapperPass>(); |
81 | 33.4k | AU.addPreservedID(MachineLoopInfoID); |
82 | 33.4k | AU.addRequired<MachineDominatorTree>(); |
83 | 33.4k | AU.addPreserved<MachineDominatorTree>(); |
84 | 33.4k | } |
85 | | |
86 | 603k | void releaseMemory() override { |
87 | 603k | ScopeMap.clear(); |
88 | 603k | Exps.clear(); |
89 | 603k | } |
90 | | |
91 | | private: |
92 | | using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, |
93 | | ScopedHashTableVal<MachineInstr *, unsigned>>; |
94 | | using ScopedHTType = |
95 | | ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, |
96 | | AllocatorTy>; |
97 | | using ScopeType = ScopedHTType::ScopeTy; |
98 | | |
99 | | unsigned LookAheadLimit = 0; |
100 | | DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; |
101 | | ScopedHTType VNT; |
102 | | SmallVector<MachineInstr *, 64> Exps; |
103 | | unsigned CurrVN = 0; |
104 | | |
105 | | bool PerformTrivialCopyPropagation(MachineInstr *MI, |
106 | | MachineBasicBlock *MBB); |
107 | | bool isPhysDefTriviallyDead(unsigned Reg, |
108 | | MachineBasicBlock::const_iterator I, |
109 | | MachineBasicBlock::const_iterator E) const; |
110 | | bool hasLivePhysRegDefUses(const MachineInstr *MI, |
111 | | const MachineBasicBlock *MBB, |
112 | | SmallSet<unsigned,8> &PhysRefs, |
113 | | SmallVectorImpl<unsigned> &PhysDefs, |
114 | | bool &PhysUseDef) const; |
115 | | bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, |
116 | | SmallSet<unsigned,8> &PhysRefs, |
117 | | SmallVectorImpl<unsigned> &PhysDefs, |
118 | | bool &NonLocal) const; |
119 | | bool isCSECandidate(MachineInstr *MI); |
120 | | bool isProfitableToCSE(unsigned CSReg, unsigned Reg, |
121 | | MachineInstr *CSMI, MachineInstr *MI); |
122 | | void EnterScope(MachineBasicBlock *MBB); |
123 | | void ExitScope(MachineBasicBlock *MBB); |
124 | | bool ProcessBlock(MachineBasicBlock *MBB); |
125 | | void ExitScopeIfDone(MachineDomTreeNode *Node, |
126 | | DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); |
127 | | bool PerformCSE(MachineDomTreeNode *Node); |
128 | | }; |
129 | | |
130 | | } // end anonymous namespace |
131 | | |
132 | | char MachineCSE::ID = 0; |
133 | | |
134 | | char &llvm::MachineCSEID = MachineCSE::ID; |
135 | | |
136 | 36.7k | INITIALIZE_PASS_BEGIN36.7k (MachineCSE, DEBUG_TYPE,
|
137 | 36.7k | "Machine Common Subexpression Elimination", false, false) |
138 | 36.7k | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) |
139 | 36.7k | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
140 | 36.7k | INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, |
141 | | "Machine Common Subexpression Elimination", false, false) |
142 | | |
143 | | /// The source register of a COPY machine instruction can be propagated to all |
144 | | /// its users, and this propagation could increase the probability of finding |
145 | | /// common subexpressions. If the COPY has only one user, the COPY itself can |
146 | | /// be removed. |
147 | | bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, |
148 | 7.58M | MachineBasicBlock *MBB) { |
149 | 7.58M | bool Changed = false; |
150 | 27.0M | for (MachineOperand &MO : MI->operands()) { |
151 | 27.0M | if (!MO.isReg() || 27.0M !MO.isUse()17.9M ) |
152 | 18.2M | continue; |
153 | 8.79M | unsigned Reg = MO.getReg(); |
154 | 8.79M | if (!TargetRegisterInfo::isVirtualRegister(Reg)) |
155 | 1.24M | continue; |
156 | 7.55M | bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); |
157 | 7.55M | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
158 | 7.55M | if (!DefMI->isCopy()) |
159 | 5.29M | continue; |
160 | 2.25M | unsigned SrcReg = DefMI->getOperand(1).getReg(); |
161 | 2.25M | if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) |
162 | 830k | continue; |
163 | 1.42M | if (1.42M DefMI->getOperand(0).getSubReg()1.42M ) |
164 | 12 | continue; |
165 | 1.42M | // FIXME: We should trivially coalesce subregister copies to expose CSE |
166 | 1.42M | // opportunities on instructions with truncated operands (see |
167 | 1.42M | // cse-add-with-overflow.ll). This can be done here as follows: |
168 | 1.42M | // if (SrcSubReg) |
169 | 1.42M | // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, |
170 | 1.42M | // SrcSubReg); |
171 | 1.42M | // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); |
172 | 1.42M | // |
173 | 1.42M | // The 2-addr pass has been updated to handle coalesced subregs. However, |
174 | 1.42M | // some machine-specific code still can't handle it. |
175 | 1.42M | // To handle it properly we also need a way find a constrained subregister |
176 | 1.42M | // class given a super-reg class and subreg index. |
177 | 1.42M | if (1.42M DefMI->getOperand(1).getSubReg()1.42M ) |
178 | 415k | continue; |
179 | 1.01M | const TargetRegisterClass *RC = MRI->getRegClass(Reg); |
180 | 1.01M | if (!MRI->constrainRegClass(SrcReg, RC)) |
181 | 58.2k | continue; |
182 | 952k | DEBUG952k (dbgs() << "Coalescing: " << *DefMI); |
183 | 952k | DEBUG(dbgs() << "*** to: " << *MI); |
184 | 952k | // Propagate SrcReg of copies to MI. |
185 | 952k | MO.setReg(SrcReg); |
186 | 952k | MRI->clearKillFlags(SrcReg); |
187 | 952k | // Coalesce single use copies. |
188 | 952k | if (OnlyOneUse952k ) { |
189 | 417k | DefMI->eraseFromParent(); |
190 | 417k | ++NumCoalesces; |
191 | 417k | } |
192 | 27.0M | Changed = true; |
193 | 27.0M | } |
194 | 7.58M | |
195 | 7.58M | return Changed; |
196 | 7.58M | } |
197 | | |
198 | | bool |
199 | | MachineCSE::isPhysDefTriviallyDead(unsigned Reg, |
200 | | MachineBasicBlock::const_iterator I, |
201 | 42.6k | MachineBasicBlock::const_iterator E) const { |
202 | 42.6k | unsigned LookAheadLeft = LookAheadLimit; |
203 | 57.6k | while (LookAheadLeft57.6k ) { |
204 | 57.3k | // Skip over dbg_value's. |
205 | 57.3k | I = skipDebugInstructionsForward(I, E); |
206 | 57.3k | |
207 | 57.3k | if (I == E) |
208 | 57.3k | // Reached end of block, we don't know if register is dead or not. |
209 | 268 | return false; |
210 | 57.1k | |
211 | 57.1k | bool SeenDef = false; |
212 | 196k | for (const MachineOperand &MO : I->operands()) { |
213 | 196k | if (MO.isRegMask() && 196k MO.clobbersPhysReg(Reg)177 ) |
214 | 177 | SeenDef = true; |
215 | 196k | if (!MO.isReg() || 196k !MO.getReg()113k ) |
216 | 83.9k | continue; |
217 | 112k | if (112k !TRI->regsOverlap(MO.getReg(), Reg)112k ) |
218 | 69.8k | continue; |
219 | 42.4k | if (42.4k MO.isUse()42.4k ) |
220 | 42.4k | // Found a use! |
221 | 40.4k | return false; |
222 | 2.05k | SeenDef = true; |
223 | 2.05k | } |
224 | 16.6k | if (16.6k SeenDef16.6k ) |
225 | 16.6k | // See a def of Reg (or an alias) before encountering any use, it's |
226 | 16.6k | // trivially dead. |
227 | 1.68k | return true; |
228 | 15.0k | |
229 | 15.0k | --LookAheadLeft; |
230 | 15.0k | ++I; |
231 | 15.0k | } |
232 | 318 | return false; |
233 | 42.6k | } |
234 | | |
235 | | /// hasLivePhysRegDefUses - Return true if the specified instruction read/write |
236 | | /// physical registers (except for dead defs of physical registers). It also |
237 | | /// returns the physical register def by reference if it's the only one and the |
238 | | /// instruction does not uses a physical register. |
239 | | bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, |
240 | | const MachineBasicBlock *MBB, |
241 | | SmallSet<unsigned,8> &PhysRefs, |
242 | | SmallVectorImpl<unsigned> &PhysDefs, |
243 | 564k | bool &PhysUseDef) const{ |
244 | 564k | // First, add all uses to PhysRefs. |
245 | 1.72M | for (const MachineOperand &MO : MI->operands()) { |
246 | 1.72M | if (!MO.isReg() || 1.72M MO.isDef()999k ) |
247 | 1.36M | continue; |
248 | 364k | unsigned Reg = MO.getReg(); |
249 | 364k | if (!Reg) |
250 | 22.5k | continue; |
251 | 342k | if (342k TargetRegisterInfo::isVirtualRegister(Reg)342k ) |
252 | 111k | continue; |
253 | 230k | // Reading constant physregs is ok. |
254 | 230k | if (230k !MRI->isConstantPhysReg(Reg)230k ) |
255 | 289k | for (MCRegAliasIterator AI(Reg, TRI, true); 99.2k AI.isValid()289k ; ++AI190k ) |
256 | 190k | PhysRefs.insert(*AI); |
257 | 1.72M | } |
258 | 564k | |
259 | 564k | // Next, collect all defs into PhysDefs. If any is already in PhysRefs |
260 | 564k | // (which currently contains only uses), set the PhysUseDef flag. |
261 | 564k | PhysUseDef = false; |
262 | 564k | MachineBasicBlock::const_iterator I = MI; I = std::next(I); |
263 | 1.72M | for (const MachineOperand &MO : MI->operands()) { |
264 | 1.72M | if (!MO.isReg() || 1.72M !MO.isDef()999k ) |
265 | 1.09M | continue; |
266 | 634k | unsigned Reg = MO.getReg(); |
267 | 634k | if (!Reg) |
268 | 0 | continue; |
269 | 634k | if (634k TargetRegisterInfo::isVirtualRegister(Reg)634k ) |
270 | 523k | continue; |
271 | 110k | // Check against PhysRefs even if the def is "dead". |
272 | 110k | if (110k PhysRefs.count(Reg)110k ) |
273 | 30.1k | PhysUseDef = true; |
274 | 110k | // If the def is dead, it's ok. But the def may not marked "dead". That's |
275 | 110k | // common since this pass is run before livevariables. We can scan |
276 | 110k | // forward a few instructions and check if it is obviously dead. |
277 | 110k | if (!MO.isDead() && 110k !isPhysDefTriviallyDead(Reg, I, MBB->end())42.6k ) |
278 | 40.9k | PhysDefs.push_back(Reg); |
279 | 1.72M | } |
280 | 564k | |
281 | 564k | // Finally, add all defs to PhysRefs as well. |
282 | 605k | for (unsigned i = 0, e = PhysDefs.size(); i != e605k ; ++i40.9k ) |
283 | 93.3k | for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); 40.9k AI.isValid()93.3k ; ++AI52.3k ) |
284 | 52.3k | PhysRefs.insert(*AI); |
285 | 564k | |
286 | 564k | return !PhysRefs.empty(); |
287 | 564k | } |
288 | | |
289 | | bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, |
290 | | SmallSet<unsigned,8> &PhysRefs, |
291 | | SmallVectorImpl<unsigned> &PhysDefs, |
292 | 106k | bool &NonLocal) const { |
293 | 106k | // For now conservatively returns false if the common subexpression is |
294 | 106k | // not in the same basic block as the given instruction. The only exception |
295 | 106k | // is if the common subexpression is in the sole predecessor block. |
296 | 106k | const MachineBasicBlock *MBB = MI->getParent(); |
297 | 106k | const MachineBasicBlock *CSMBB = CSMI->getParent(); |
298 | 106k | |
299 | 106k | bool CrossMBB = false; |
300 | 106k | if (CSMBB != MBB106k ) { |
301 | 76.0k | if (MBB->pred_size() != 1 || 76.0k *MBB->pred_begin() != CSMBB60.8k ) |
302 | 33.3k | return false; |
303 | 42.7k | |
304 | 54.6k | for (unsigned i = 0, e = PhysDefs.size(); 42.7k i != e54.6k ; ++i11.9k ) { |
305 | 12.1k | if (MRI->isAllocatable(PhysDefs[i]) || 12.1k MRI->isReserved(PhysDefs[i])12.0k ) |
306 | 12.1k | // Avoid extending live range of physical registers if they are |
307 | 12.1k | //allocatable or reserved. |
308 | 210 | return false; |
309 | 12.1k | } |
310 | 42.5k | CrossMBB = true; |
311 | 42.5k | } |
312 | 73.2k | MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); |
313 | 73.2k | MachineBasicBlock::const_iterator E = MI; |
314 | 73.2k | MachineBasicBlock::const_iterator EE = CSMBB->end(); |
315 | 73.2k | unsigned LookAheadLeft = LookAheadLimit; |
316 | 918k | while (LookAheadLeft918k ) { |
317 | 886k | // Skip over dbg_value's. |
318 | 886k | while (I != E && 886k I != EE876k && I->isDebugValue()844k ) |
319 | 6 | ++I; |
320 | 886k | |
321 | 886k | if (I == EE886k ) { |
322 | 31.9k | assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); |
323 | 31.9k | (void)CrossMBB; |
324 | 31.9k | CrossMBB = false; |
325 | 31.9k | NonLocal = true; |
326 | 31.9k | I = MBB->begin(); |
327 | 31.9k | EE = MBB->end(); |
328 | 31.9k | continue; |
329 | 31.9k | } |
330 | 854k | |
331 | 854k | if (854k I == E854k ) |
332 | 9.48k | return true; |
333 | 844k | |
334 | 844k | for (const MachineOperand &MO : I->operands()) 844k { |
335 | 3.02M | // RegMasks go on instructions like calls that clobber lots of physregs. |
336 | 3.02M | // Don't attempt to CSE across such an instruction. |
337 | 3.02M | if (MO.isRegMask()) |
338 | 98 | return false; |
339 | 3.02M | if (3.02M !MO.isReg() || 3.02M !MO.isDef()2.22M ) |
340 | 2.25M | continue; |
341 | 769k | unsigned MOReg = MO.getReg(); |
342 | 769k | if (TargetRegisterInfo::isVirtualRegister(MOReg)) |
343 | 707k | continue; |
344 | 61.3k | if (61.3k PhysRefs.count(MOReg)61.3k ) |
345 | 31.5k | return false; |
346 | 812k | } |
347 | 812k | |
348 | 812k | --LookAheadLeft; |
349 | 812k | ++I; |
350 | 812k | } |
351 | 73.2k | |
352 | 32.0k | return false; |
353 | 106k | } |
354 | | |
355 | 37.6M | bool MachineCSE::isCSECandidate(MachineInstr *MI) { |
356 | 37.6M | if (MI->isPosition() || 37.6M MI->isPHI()37.5M || MI->isImplicitDef()36.6M || MI->isKill()36.4M || |
357 | 37.6M | MI->isInlineAsm()36.4M || MI->isDebugValue()36.4M ) |
358 | 1.20M | return false; |
359 | 36.4M | |
360 | 36.4M | // Ignore copies. |
361 | 36.4M | if (36.4M MI->isCopyLike()36.4M ) |
362 | 12.7M | return false; |
363 | 23.7M | |
364 | 23.7M | // Ignore stuff that we obviously can't move. |
365 | 23.7M | if (23.7M MI->mayStore() || 23.7M MI->isCall()21.7M || MI->isTerminator()19.3M || |
366 | 14.6M | MI->hasUnmodeledSideEffects()) |
367 | 13.3M | return false; |
368 | 10.3M | |
369 | 10.3M | if (10.3M MI->mayLoad()10.3M ) { |
370 | 2.48M | // Okay, this instruction does a load. As a refinement, we allow the target |
371 | 2.48M | // to decide whether the loaded value is actually a constant. If so, we can |
372 | 2.48M | // actually use it as a load. |
373 | 2.48M | if (!MI->isDereferenceableInvariantLoad(AA)) |
374 | 2.48M | // FIXME: we should be able to hoist loads with no other side effects if |
375 | 2.48M | // there are no other instructions which can change memory in this loop. |
376 | 2.48M | // This is a trivial form of alias analysis. |
377 | 2.26M | return false; |
378 | 8.12M | } |
379 | 8.12M | |
380 | 8.12M | // Ignore stack guard loads, otherwise the register that holds CSEed value may |
381 | 8.12M | // be spilled and get loaded back with corrupted data. |
382 | 8.12M | if (8.12M MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD8.12M ) |
383 | 6.45k | return false; |
384 | 8.11M | |
385 | 8.11M | return true; |
386 | 8.11M | } |
387 | | |
388 | | /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a |
389 | | /// common expression that defines Reg. |
390 | | bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, |
391 | 429k | MachineInstr *CSMI, MachineInstr *MI) { |
392 | 429k | // FIXME: Heuristics that works around the lack the live range splitting. |
393 | 429k | |
394 | 429k | // If CSReg is used at all uses of Reg, CSE should not increase register |
395 | 429k | // pressure of CSReg. |
396 | 429k | bool MayIncreasePressure = true; |
397 | 429k | if (TargetRegisterInfo::isVirtualRegister(CSReg) && |
398 | 429k | TargetRegisterInfo::isVirtualRegister(Reg)429k ) { |
399 | 429k | MayIncreasePressure = false; |
400 | 429k | SmallPtrSet<MachineInstr*, 8> CSUses; |
401 | 1.80M | for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { |
402 | 1.80M | CSUses.insert(&MI); |
403 | 1.80M | } |
404 | 427k | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { |
405 | 427k | if (!CSUses.count(&MI)427k ) { |
406 | 426k | MayIncreasePressure = true; |
407 | 426k | break; |
408 | 426k | } |
409 | 429k | } |
410 | 429k | } |
411 | 429k | if (!MayIncreasePressure429k ) return true2.90k ; |
412 | 426k | |
413 | 426k | // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in |
414 | 426k | // an immediate predecessor. We don't want to increase register pressure and |
415 | 426k | // end up causing other computation to be spilled. |
416 | 426k | if (426k TII->isAsCheapAsAMove(*MI)426k ) { |
417 | 219k | MachineBasicBlock *CSBB = CSMI->getParent(); |
418 | 219k | MachineBasicBlock *BB = MI->getParent(); |
419 | 219k | if (CSBB != BB && 219k !CSBB->isSuccessor(BB)198k ) |
420 | 134k | return false; |
421 | 291k | } |
422 | 291k | |
423 | 291k | // Heuristics #2: If the expression doesn't not use a vr and the only use |
424 | 291k | // of the redundant computation are copies, do not cse. |
425 | 291k | bool HasVRegUse = false; |
426 | 660k | for (const MachineOperand &MO : MI->operands()) { |
427 | 660k | if (MO.isReg() && 660k MO.isUse()347k && |
428 | 660k | TargetRegisterInfo::isVirtualRegister(MO.getReg())52.5k ) { |
429 | 40.1k | HasVRegUse = true; |
430 | 40.1k | break; |
431 | 40.1k | } |
432 | 291k | } |
433 | 291k | if (!HasVRegUse291k ) { |
434 | 251k | bool HasNonCopyUse = false; |
435 | 268k | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { |
436 | 268k | // Ignore copies. |
437 | 268k | if (!MI.isCopyLike()268k ) { |
438 | 190k | HasNonCopyUse = true; |
439 | 190k | break; |
440 | 190k | } |
441 | 251k | } |
442 | 251k | if (!HasNonCopyUse) |
443 | 61.2k | return false; |
444 | 230k | } |
445 | 230k | |
446 | 230k | // Heuristics #3: If the common subexpression is used by PHIs, do not reuse |
447 | 230k | // it unless the defined value is already used in the BB of the new use. |
448 | 230k | bool HasPHI = false; |
449 | 230k | SmallPtrSet<MachineBasicBlock*, 4> CSBBs; |
450 | 1.40M | for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { |
451 | 1.40M | HasPHI |= MI.isPHI(); |
452 | 1.40M | CSBBs.insert(MI.getParent()); |
453 | 1.40M | } |
454 | 230k | |
455 | 230k | if (!HasPHI) |
456 | 225k | return true; |
457 | 4.35k | return CSBBs.count(MI->getParent()); |
458 | 4.35k | } |
459 | | |
460 | 3.82M | void MachineCSE::EnterScope(MachineBasicBlock *MBB) { |
461 | 3.82M | DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); |
462 | 3.82M | ScopeType *Scope = new ScopeType(VNT); |
463 | 3.82M | ScopeMap[MBB] = Scope; |
464 | 3.82M | } |
465 | | |
466 | 3.82M | void MachineCSE::ExitScope(MachineBasicBlock *MBB) { |
467 | 3.82M | DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); |
468 | 3.82M | DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); |
469 | 3.82M | assert(SI != ScopeMap.end()); |
470 | 3.82M | delete SI->second; |
471 | 3.82M | ScopeMap.erase(SI); |
472 | 3.82M | } |
473 | | |
474 | 3.82M | bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { |
475 | 3.82M | bool Changed = false; |
476 | 3.82M | |
477 | 3.82M | SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; |
478 | 3.82M | SmallVector<unsigned, 2> ImplicitDefsToUpdate; |
479 | 3.82M | SmallVector<unsigned, 2> ImplicitDefs; |
480 | 41.4M | for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E41.4M ; ) { |
481 | 37.6M | MachineInstr *MI = &*I; |
482 | 37.6M | ++I; |
483 | 37.6M | |
484 | 37.6M | if (!isCSECandidate(MI)) |
485 | 29.5M | continue; |
486 | 8.11M | |
487 | 8.11M | bool FoundCSE = VNT.count(MI); |
488 | 8.11M | if (!FoundCSE8.11M ) { |
489 | 7.58M | // Using trivial copy propagation to find more CSE opportunities. |
490 | 7.58M | if (PerformTrivialCopyPropagation(MI, MBB)7.58M ) { |
491 | 806k | Changed = true; |
492 | 806k | |
493 | 806k | // After coalescing MI itself may become a copy. |
494 | 806k | if (MI->isCopyLike()) |
495 | 0 | continue; |
496 | 806k | |
497 | 806k | // Try again to see if CSE is possible. |
498 | 806k | FoundCSE = VNT.count(MI); |
499 | 806k | } |
500 | 7.58M | } |
501 | 8.11M | |
502 | 8.11M | // Commute commutable instructions. |
503 | 8.11M | bool Commuted = false; |
504 | 8.11M | if (!FoundCSE && 8.11M MI->isCommutable()7.55M ) { |
505 | 208k | if (MachineInstr *NewMI208k = TII->commuteInstruction(*MI)) { |
506 | 175k | Commuted = true; |
507 | 175k | FoundCSE = VNT.count(NewMI); |
508 | 175k | if (NewMI != MI175k ) { |
509 | 0 | // New instruction. It doesn't need to be kept. |
510 | 0 | NewMI->eraseFromParent(); |
511 | 0 | Changed = true; |
512 | 175k | } else if (175k !FoundCSE175k ) |
513 | 175k | // MI was changed but it didn't help, commute it back! |
514 | 175k | (void)TII->commuteInstruction(*MI); |
515 | 175k | } |
516 | 208k | } |
517 | 8.11M | |
518 | 8.11M | // If the instruction defines physical registers and the values *may* be |
519 | 8.11M | // used, then it's not safe to replace it with a common subexpression. |
520 | 8.11M | // It's also not safe if the instruction uses physical registers. |
521 | 8.11M | bool CrossMBBPhysDef = false; |
522 | 8.11M | SmallSet<unsigned, 8> PhysRefs; |
523 | 8.11M | SmallVector<unsigned, 2> PhysDefs; |
524 | 8.11M | bool PhysUseDef = false; |
525 | 8.11M | if (FoundCSE && 8.11M hasLivePhysRegDefUses(MI, MBB, PhysRefs, |
526 | 8.11M | PhysDefs, PhysUseDef)) { |
527 | 136k | FoundCSE = false; |
528 | 136k | |
529 | 136k | // ... Unless the CS is local or is in the sole predecessor block |
530 | 136k | // and it also defines the physical register which is not clobbered |
531 | 136k | // in between and the physical register uses were not clobbered. |
532 | 136k | // This can never be the case if the instruction both uses and |
533 | 136k | // defines the same physical register, which was detected above. |
534 | 136k | if (!PhysUseDef136k ) { |
535 | 106k | unsigned CSVN = VNT.lookup(MI); |
536 | 106k | MachineInstr *CSMI = Exps[CSVN]; |
537 | 106k | if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) |
538 | 9.48k | FoundCSE = true; |
539 | 106k | } |
540 | 136k | } |
541 | 8.11M | |
542 | 8.11M | if (!FoundCSE8.11M ) { |
543 | 7.67M | VNT.insert(MI, CurrVN++); |
544 | 7.67M | Exps.push_back(MI); |
545 | 7.67M | continue; |
546 | 7.67M | } |
547 | 437k | |
548 | 437k | // Found a common subexpression, eliminate it. |
549 | 437k | unsigned CSVN = VNT.lookup(MI); |
550 | 437k | MachineInstr *CSMI = Exps[CSVN]; |
551 | 437k | DEBUG(dbgs() << "Examining: " << *MI); |
552 | 437k | DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); |
553 | 437k | |
554 | 437k | // Check if it's profitable to perform this CSE. |
555 | 437k | bool DoCSE = true; |
556 | 437k | unsigned NumDefs = MI->getDesc().getNumDefs() + |
557 | 437k | MI->getDesc().getNumImplicitDefs(); |
558 | 437k | |
559 | 710k | for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && 710k i != e473k ; ++i273k ) { |
560 | 473k | MachineOperand &MO = MI->getOperand(i); |
561 | 473k | if (!MO.isReg() || 473k !MO.isDef()460k ) |
562 | 26.9k | continue; |
563 | 446k | unsigned OldReg = MO.getReg(); |
564 | 446k | unsigned NewReg = CSMI->getOperand(i).getReg(); |
565 | 446k | |
566 | 446k | // Go through implicit defs of CSMI and MI, if a def is not dead at MI, |
567 | 446k | // we should make sure it is not dead at CSMI. |
568 | 446k | if (MO.isImplicit() && 446k !MO.isDead()10.5k && CSMI->getOperand(i).isDead()3.31k ) |
569 | 52 | ImplicitDefsToUpdate.push_back(i); |
570 | 446k | |
571 | 446k | // Keep track of implicit defs of CSMI and MI, to clear possibly |
572 | 446k | // made-redundant kill flags. |
573 | 446k | if (MO.isImplicit() && 446k !MO.isDead()10.5k && OldReg == NewReg3.31k ) |
574 | 3.31k | ImplicitDefs.push_back(OldReg); |
575 | 446k | |
576 | 446k | if (OldReg == NewReg446k ) { |
577 | 17.6k | --NumDefs; |
578 | 17.6k | continue; |
579 | 17.6k | } |
580 | 429k | |
581 | 446k | assert(TargetRegisterInfo::isVirtualRegister(OldReg) && |
582 | 429k | TargetRegisterInfo::isVirtualRegister(NewReg) && |
583 | 429k | "Do not CSE physical register defs!"); |
584 | 429k | |
585 | 429k | if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)429k ) { |
586 | 200k | DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); |
587 | 200k | DoCSE = false; |
588 | 200k | break; |
589 | 200k | } |
590 | 229k | |
591 | 229k | // Don't perform CSE if the result of the old instruction cannot exist |
592 | 229k | // within the register class of the new instruction. |
593 | 229k | const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); |
594 | 229k | if (!MRI->constrainRegClass(NewReg, OldRC)229k ) { |
595 | 3 | DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); |
596 | 3 | DoCSE = false; |
597 | 3 | break; |
598 | 3 | } |
599 | 229k | |
600 | 229k | CSEPairs.push_back(std::make_pair(OldReg, NewReg)); |
601 | 229k | --NumDefs; |
602 | 229k | } |
603 | 437k | |
604 | 437k | // Actually perform the elimination. |
605 | 437k | if (DoCSE437k ) { |
606 | 229k | for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { |
607 | 229k | unsigned OldReg = CSEPair.first; |
608 | 229k | unsigned NewReg = CSEPair.second; |
609 | 229k | // OldReg may have been unused but is used now, clear the Dead flag |
610 | 229k | MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); |
611 | 229k | assert(Def != nullptr && "CSEd register has no unique definition?"); |
612 | 229k | Def->clearRegisterDeads(NewReg); |
613 | 229k | // Replace with NewReg and clear kill flags which may be wrong now. |
614 | 229k | MRI->replaceRegWith(OldReg, NewReg); |
615 | 229k | MRI->clearKillFlags(NewReg); |
616 | 229k | } |
617 | 236k | |
618 | 236k | // Go through implicit defs of CSMI and MI, if a def is not dead at MI, |
619 | 236k | // we should make sure it is not dead at CSMI. |
620 | 236k | for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) |
621 | 52 | CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); |
622 | 236k | |
623 | 236k | // Go through implicit defs of CSMI and MI, and clear the kill flags on |
624 | 236k | // their uses in all the instructions between CSMI and MI. |
625 | 236k | // We might have made some of the kill flags redundant, consider: |
626 | 236k | // subs ... %NZCV<imp-def> <- CSMI |
627 | 236k | // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore |
628 | 236k | // subs ... %NZCV<imp-def> <- MI, to be eliminated |
629 | 236k | // csinc ... %NZCV<imp-use,kill> |
630 | 236k | // Since we eliminated MI, and reused a register imp-def'd by CSMI |
631 | 236k | // (here %NZCV), that register, if it was killed before MI, should have |
632 | 236k | // that kill flag removed, because it's lifetime was extended. |
633 | 236k | if (CSMI->getParent() == MI->getParent()236k ) { |
634 | 1.05M | for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE1.05M ; ++II1.03M ) |
635 | 1.03M | for (auto ImplicitDef : ImplicitDefs) |
636 | 4.94k | if (MachineOperand *4.94k MO4.94k = II->findRegisterUseOperand( |
637 | 4.94k | ImplicitDef, /*isKill=*/true, TRI)) |
638 | 0 | MO->setIsKill(false); |
639 | 236k | } else { |
640 | 214k | // If the instructions aren't in the same BB, bail out and clear the |
641 | 214k | // kill flag on all uses of the imp-def'd register. |
642 | 214k | for (auto ImplicitDef : ImplicitDefs) |
643 | 1.63k | MRI->clearKillFlags(ImplicitDef); |
644 | 214k | } |
645 | 236k | |
646 | 236k | if (CrossMBBPhysDef236k ) { |
647 | 1.63k | // Add physical register defs now coming in from a predecessor to MBB |
648 | 1.63k | // livein list. |
649 | 3.25k | while (!PhysDefs.empty()3.25k ) { |
650 | 1.62k | unsigned LiveIn = PhysDefs.pop_back_val(); |
651 | 1.62k | if (!MBB->isLiveIn(LiveIn)) |
652 | 1.62k | MBB->addLiveIn(LiveIn); |
653 | 1.62k | } |
654 | 1.63k | ++NumCrossBBCSEs; |
655 | 1.63k | } |
656 | 236k | |
657 | 236k | MI->eraseFromParent(); |
658 | 236k | ++NumCSEs; |
659 | 236k | if (!PhysRefs.empty()) |
660 | 9.45k | ++NumPhysCSEs; |
661 | 236k | if (Commuted) |
662 | 1 | ++NumCommutes; |
663 | 236k | Changed = true; |
664 | 437k | } else { |
665 | 200k | VNT.insert(MI, CurrVN++); |
666 | 200k | Exps.push_back(MI); |
667 | 200k | } |
668 | 37.6M | CSEPairs.clear(); |
669 | 37.6M | ImplicitDefsToUpdate.clear(); |
670 | 37.6M | ImplicitDefs.clear(); |
671 | 37.6M | } |
672 | 3.82M | |
673 | 3.82M | return Changed; |
674 | 3.82M | } |
675 | | |
676 | | /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given |
677 | | /// dominator tree node if its a leaf or all of its children are done. Walk |
678 | | /// up the dominator tree to destroy ancestors which are now done. |
679 | | void |
680 | | MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, |
681 | 3.82M | DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { |
682 | 3.82M | if (OpenChildren[Node]) |
683 | 1.99M | return; |
684 | 1.82M | |
685 | 1.82M | // Pop scope. |
686 | 1.82M | ExitScope(Node->getBlock()); |
687 | 1.82M | |
688 | 1.82M | // Now traverse upwards to pop ancestors whose offsprings are all done. |
689 | 3.82M | while (MachineDomTreeNode *Parent3.82M = Node->getIDom()) { |
690 | 3.21M | unsigned Left = --OpenChildren[Parent]; |
691 | 3.21M | if (Left != 0) |
692 | 1.21M | break; |
693 | 1.99M | ExitScope(Parent->getBlock()); |
694 | 1.99M | Node = Parent; |
695 | 1.99M | } |
696 | 3.82M | } |
697 | | |
698 | 603k | bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { |
699 | 603k | SmallVector<MachineDomTreeNode*, 32> Scopes; |
700 | 603k | SmallVector<MachineDomTreeNode*, 8> WorkList; |
701 | 603k | DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; |
702 | 603k | |
703 | 603k | CurrVN = 0; |
704 | 603k | |
705 | 603k | // Perform a DFS walk to determine the order of visit. |
706 | 603k | WorkList.push_back(Node); |
707 | 3.82M | do { |
708 | 3.82M | Node = WorkList.pop_back_val(); |
709 | 3.82M | Scopes.push_back(Node); |
710 | 3.82M | const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); |
711 | 3.82M | OpenChildren[Node] = Children.size(); |
712 | 3.82M | for (MachineDomTreeNode *Child : Children) |
713 | 3.21M | WorkList.push_back(Child); |
714 | 3.82M | } while (!WorkList.empty()); |
715 | 603k | |
716 | 603k | // Now perform CSE. |
717 | 603k | bool Changed = false; |
718 | 3.82M | for (MachineDomTreeNode *Node : Scopes) { |
719 | 3.82M | MachineBasicBlock *MBB = Node->getBlock(); |
720 | 3.82M | EnterScope(MBB); |
721 | 3.82M | Changed |= ProcessBlock(MBB); |
722 | 3.82M | // If it's a leaf node, it's done. Traverse upwards to pop ancestors. |
723 | 3.82M | ExitScopeIfDone(Node, OpenChildren); |
724 | 3.82M | } |
725 | 603k | |
726 | 603k | return Changed; |
727 | 603k | } |
728 | | |
729 | 603k | bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { |
730 | 603k | if (skipFunction(*MF.getFunction())) |
731 | 45 | return false; |
732 | 603k | |
733 | 603k | TII = MF.getSubtarget().getInstrInfo(); |
734 | 603k | TRI = MF.getSubtarget().getRegisterInfo(); |
735 | 603k | MRI = &MF.getRegInfo(); |
736 | 603k | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
737 | 603k | DT = &getAnalysis<MachineDominatorTree>(); |
738 | 603k | LookAheadLimit = TII->getMachineCSELookAheadLimit(); |
739 | 603k | return PerformCSE(DT->getRootNode()); |
740 | 603k | } |