/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/InlineSpiller.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- InlineSpiller.cpp - Insert spills and restores inline --------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // The inline spiller modifies the machine function directly instead of |
10 | | // inserting spills and restores in VirtRegMap. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "LiveRangeCalc.h" |
15 | | #include "Spiller.h" |
16 | | #include "SplitKit.h" |
17 | | #include "llvm/ADT/ArrayRef.h" |
18 | | #include "llvm/ADT/DenseMap.h" |
19 | | #include "llvm/ADT/MapVector.h" |
20 | | #include "llvm/ADT/None.h" |
21 | | #include "llvm/ADT/STLExtras.h" |
22 | | #include "llvm/ADT/SetVector.h" |
23 | | #include "llvm/ADT/SmallPtrSet.h" |
24 | | #include "llvm/ADT/SmallVector.h" |
25 | | #include "llvm/ADT/Statistic.h" |
26 | | #include "llvm/Analysis/AliasAnalysis.h" |
27 | | #include "llvm/CodeGen/LiveInterval.h" |
28 | | #include "llvm/CodeGen/LiveIntervals.h" |
29 | | #include "llvm/CodeGen/LiveRangeEdit.h" |
30 | | #include "llvm/CodeGen/LiveStacks.h" |
31 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
32 | | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
33 | | #include "llvm/CodeGen/MachineDominators.h" |
34 | | #include "llvm/CodeGen/MachineFunction.h" |
35 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
36 | | #include "llvm/CodeGen/MachineInstr.h" |
37 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
38 | | #include "llvm/CodeGen/MachineInstrBundle.h" |
39 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
40 | | #include "llvm/CodeGen/MachineOperand.h" |
41 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
42 | | #include "llvm/CodeGen/SlotIndexes.h" |
43 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
44 | | #include "llvm/CodeGen/TargetOpcodes.h" |
45 | | #include "llvm/CodeGen/TargetRegisterInfo.h" |
46 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
47 | | #include "llvm/CodeGen/VirtRegMap.h" |
48 | | #include "llvm/Config/llvm-config.h" |
49 | | #include "llvm/Support/BlockFrequency.h" |
50 | | #include "llvm/Support/BranchProbability.h" |
51 | | #include "llvm/Support/CommandLine.h" |
52 | | #include "llvm/Support/Compiler.h" |
53 | | #include "llvm/Support/Debug.h" |
54 | | #include "llvm/Support/ErrorHandling.h" |
55 | | #include "llvm/Support/raw_ostream.h" |
56 | | #include <cassert> |
57 | | #include <iterator> |
58 | | #include <tuple> |
59 | | #include <utility> |
60 | | #include <vector> |
61 | | |
62 | | using namespace llvm; |
63 | | |
64 | | #define DEBUG_TYPE "regalloc" |
65 | | |
66 | | STATISTIC(NumSpilledRanges, "Number of spilled live ranges"); |
67 | | STATISTIC(NumSnippets, "Number of spilled snippets"); |
68 | | STATISTIC(NumSpills, "Number of spills inserted"); |
69 | | STATISTIC(NumSpillsRemoved, "Number of spills removed"); |
70 | | STATISTIC(NumReloads, "Number of reloads inserted"); |
71 | | STATISTIC(NumReloadsRemoved, "Number of reloads removed"); |
72 | | STATISTIC(NumFolded, "Number of folded stack accesses"); |
73 | | STATISTIC(NumFoldedLoads, "Number of folded loads"); |
74 | | STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); |
75 | | |
76 | | static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, |
77 | | cl::desc("Disable inline spill hoisting")); |
78 | | static cl::opt<bool> |
79 | | RestrictStatepointRemat("restrict-statepoint-remat", |
80 | | cl::init(false), cl::Hidden, |
81 | | cl::desc("Restrict remat for statepoint operands")); |
82 | | |
83 | | namespace { |
84 | | |
85 | | class HoistSpillHelper : private LiveRangeEdit::Delegate { |
86 | | MachineFunction &MF; |
87 | | LiveIntervals &LIS; |
88 | | LiveStacks &LSS; |
89 | | AliasAnalysis *AA; |
90 | | MachineDominatorTree &MDT; |
91 | | MachineLoopInfo &Loops; |
92 | | VirtRegMap &VRM; |
93 | | MachineRegisterInfo &MRI; |
94 | | const TargetInstrInfo &TII; |
95 | | const TargetRegisterInfo &TRI; |
96 | | const MachineBlockFrequencyInfo &MBFI; |
97 | | |
98 | | InsertPointAnalysis IPA; |
99 | | |
100 | | // Map from StackSlot to the LiveInterval of the original register. |
101 | | // Note the LiveInterval of the original register may have been deleted |
102 | | // after it is spilled. We keep a copy here to track the range where |
103 | | // spills can be moved. |
104 | | DenseMap<int, std::unique_ptr<LiveInterval>> StackSlotToOrigLI; |
105 | | |
106 | | // Map from pair of (StackSlot and Original VNI) to a set of spills which |
107 | | // have the same stackslot and have equal values defined by Original VNI. |
108 | | // These spills are mergeable and are hoist candiates. |
109 | | using MergeableSpillsMap = |
110 | | MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>; |
111 | | MergeableSpillsMap MergeableSpills; |
112 | | |
113 | | /// This is the map from original register to a set containing all its |
114 | | /// siblings. To hoist a spill to another BB, we need to find out a live |
115 | | /// sibling there and use it as the source of the new spill. |
116 | | DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap; |
117 | | |
118 | | bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, |
119 | | MachineBasicBlock &BB, unsigned &LiveReg); |
120 | | |
121 | | void rmRedundantSpills( |
122 | | SmallPtrSet<MachineInstr *, 16> &Spills, |
123 | | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
124 | | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); |
125 | | |
126 | | void getVisitOrders( |
127 | | MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, |
128 | | SmallVectorImpl<MachineDomTreeNode *> &Orders, |
129 | | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
130 | | DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep, |
131 | | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); |
132 | | |
133 | | void runHoistSpills(LiveInterval &OrigLI, VNInfo &OrigVNI, |
134 | | SmallPtrSet<MachineInstr *, 16> &Spills, |
135 | | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
136 | | DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns); |
137 | | |
138 | | public: |
139 | | HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, |
140 | | VirtRegMap &vrm) |
141 | | : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), |
142 | | LSS(pass.getAnalysis<LiveStacks>()), |
143 | | AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), |
144 | | MDT(pass.getAnalysis<MachineDominatorTree>()), |
145 | | Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), |
146 | | MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), |
147 | | TRI(*mf.getSubtarget().getRegisterInfo()), |
148 | | MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), |
149 | 484k | IPA(LIS, mf.getNumBlockIDs()) {} |
150 | | |
151 | | void addToMergeableSpills(MachineInstr &Spill, int StackSlot, |
152 | | unsigned Original); |
153 | | bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot); |
154 | | void hoistAllSpills(); |
155 | | void LRE_DidCloneVirtReg(unsigned, unsigned) override; |
156 | | }; |
157 | | |
158 | | class InlineSpiller : public Spiller { |
159 | | MachineFunction &MF; |
160 | | LiveIntervals &LIS; |
161 | | LiveStacks &LSS; |
162 | | AliasAnalysis *AA; |
163 | | MachineDominatorTree &MDT; |
164 | | MachineLoopInfo &Loops; |
165 | | VirtRegMap &VRM; |
166 | | MachineRegisterInfo &MRI; |
167 | | const TargetInstrInfo &TII; |
168 | | const TargetRegisterInfo &TRI; |
169 | | const MachineBlockFrequencyInfo &MBFI; |
170 | | |
171 | | // Variables that are valid during spill(), but used by multiple methods. |
172 | | LiveRangeEdit *Edit; |
173 | | LiveInterval *StackInt; |
174 | | int StackSlot; |
175 | | unsigned Original; |
176 | | |
177 | | // All registers to spill to StackSlot, including the main register. |
178 | | SmallVector<unsigned, 8> RegsToSpill; |
179 | | |
180 | | // All COPY instructions to/from snippets. |
181 | | // They are ignored since both operands refer to the same stack slot. |
182 | | SmallPtrSet<MachineInstr*, 8> SnippetCopies; |
183 | | |
184 | | // Values that failed to remat at some point. |
185 | | SmallPtrSet<VNInfo*, 8> UsedValues; |
186 | | |
187 | | // Dead defs generated during spilling. |
188 | | SmallVector<MachineInstr*, 8> DeadDefs; |
189 | | |
190 | | // Object records spills information and does the hoisting. |
191 | | HoistSpillHelper HSpiller; |
192 | | |
193 | 484k | ~InlineSpiller() override = default; |
194 | | |
195 | | public: |
196 | | InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) |
197 | | : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), |
198 | | LSS(pass.getAnalysis<LiveStacks>()), |
199 | | AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), |
200 | | MDT(pass.getAnalysis<MachineDominatorTree>()), |
201 | | Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), |
202 | | MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), |
203 | | TRI(*mf.getSubtarget().getRegisterInfo()), |
204 | | MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), |
205 | 484k | HSpiller(pass, mf, vrm) {} |
206 | | |
207 | | void spill(LiveRangeEdit &) override; |
208 | | void postOptimization() override; |
209 | | |
210 | | private: |
211 | | bool isSnippet(const LiveInterval &SnipLI); |
212 | | void collectRegsToSpill(); |
213 | | |
214 | 282k | bool isRegToSpill(unsigned Reg) { return is_contained(RegsToSpill, Reg); } |
215 | | |
216 | | bool isSibling(unsigned Reg); |
217 | | bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI); |
218 | | void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); |
219 | | |
220 | | void markValueUsed(LiveInterval*, VNInfo*); |
221 | | bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI); |
222 | | bool reMaterializeFor(LiveInterval &, MachineInstr &MI); |
223 | | void reMaterializeAll(); |
224 | | |
225 | | bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); |
226 | | bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>, |
227 | | MachineInstr *LoadMI = nullptr); |
228 | | void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); |
229 | | void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); |
230 | | |
231 | | void spillAroundUses(unsigned Reg); |
232 | | void spillAll(); |
233 | | }; |
234 | | |
235 | | } // end anonymous namespace |
236 | | |
237 | 484k | Spiller::~Spiller() = default; |
238 | | |
239 | 0 | void Spiller::anchor() {} |
240 | | |
241 | | Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass, |
242 | | MachineFunction &mf, |
243 | 484k | VirtRegMap &vrm) { |
244 | 484k | return new InlineSpiller(pass, mf, vrm); |
245 | 484k | } |
246 | | |
247 | | //===----------------------------------------------------------------------===// |
248 | | // Snippets |
249 | | //===----------------------------------------------------------------------===// |
250 | | |
251 | | // When spilling a virtual register, we also spill any snippets it is connected |
252 | | // to. The snippets are small live ranges that only have a single real use, |
253 | | // leftovers from live range splitting. Spilling them enables memory operand |
254 | | // folding or tightens the live range around the single use. |
255 | | // |
256 | | // This minimizes register pressure and maximizes the store-to-load distance for |
257 | | // spill slots which can be important in tight loops. |
258 | | |
259 | | /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, |
260 | | /// otherwise return 0. |
261 | 968k | static unsigned isFullCopyOf(const MachineInstr &MI, unsigned Reg) { |
262 | 968k | if (!MI.isFullCopy()) |
263 | 391k | return 0; |
264 | 576k | if (MI.getOperand(0).getReg() == Reg) |
265 | 219k | return MI.getOperand(1).getReg(); |
266 | 357k | if (MI.getOperand(1).getReg() == Reg) |
267 | 326k | return MI.getOperand(0).getReg(); |
268 | 31.2k | return 0; |
269 | 31.2k | } |
270 | | |
271 | | /// isSnippet - Identify if a live interval is a snippet that should be spilled. |
272 | | /// It is assumed that SnipLI is a virtual register with the same original as |
273 | | /// Edit->getReg(). |
274 | 229k | bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { |
275 | 229k | unsigned Reg = Edit->getReg(); |
276 | 229k | |
277 | 229k | // A snippet is a tiny live range with only a single instruction using it |
278 | 229k | // besides copies to/from Reg or spills/fills. We accept: |
279 | 229k | // |
280 | 229k | // %snip = COPY %Reg / FILL fi# |
281 | 229k | // %snip = USE %snip |
282 | 229k | // %Reg = COPY %snip / SPILL %snip, fi# |
283 | 229k | // |
284 | 229k | if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)120k ) |
285 | 161k | return false; |
286 | 68.1k | |
287 | 68.1k | MachineInstr *UseMI = nullptr; |
288 | 68.1k | |
289 | 68.1k | // Check that all uses satisfy our criteria. |
290 | 68.1k | for (MachineRegisterInfo::reg_instr_nodbg_iterator |
291 | 68.1k | RI = MRI.reg_instr_nodbg_begin(SnipLI.reg), |
292 | 169k | E = MRI.reg_instr_nodbg_end(); RI != E; ) { |
293 | 165k | MachineInstr &MI = *RI++; |
294 | 165k | |
295 | 165k | // Allow copies to/from Reg. |
296 | 165k | if (isFullCopyOf(MI, Reg)) |
297 | 29.9k | continue; |
298 | 135k | |
299 | 135k | // Allow stack slot loads. |
300 | 135k | int FI; |
301 | 135k | if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot1.96k ) |
302 | 1.15k | continue; |
303 | 133k | |
304 | 133k | // Allow stack slot stores. |
305 | 133k | if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot1.32k ) |
306 | 958 | continue; |
307 | 132k | |
308 | 132k | // Allow a single additional instruction. |
309 | 132k | if (UseMI && &MI != UseMI65.3k ) |
310 | 63.5k | return false; |
311 | 69.4k | UseMI = &MI; |
312 | 69.4k | } |
313 | 68.1k | return true4.59k ; |
314 | 68.1k | } |
315 | | |
316 | | /// collectRegsToSpill - Collect live range snippets that only have a single |
317 | | /// real use. |
318 | 279k | void InlineSpiller::collectRegsToSpill() { |
319 | 279k | unsigned Reg = Edit->getReg(); |
320 | 279k | |
321 | 279k | // Main register always spills. |
322 | 279k | RegsToSpill.assign(1, Reg); |
323 | 279k | SnippetCopies.clear(); |
324 | 279k | |
325 | 279k | // Snippets all have the same original, so there can't be any for an original |
326 | 279k | // register. |
327 | 279k | if (Original == Reg) |
328 | 154k | return; |
329 | 124k | |
330 | 124k | for (MachineRegisterInfo::reg_instr_iterator |
331 | 501k | RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { |
332 | 376k | MachineInstr &MI = *RI++; |
333 | 376k | unsigned SnipReg = isFullCopyOf(MI, Reg); |
334 | 376k | if (!isSibling(SnipReg)) |
335 | 147k | continue; |
336 | 229k | LiveInterval &SnipLI = LIS.getInterval(SnipReg); |
337 | 229k | if (!isSnippet(SnipLI)) |
338 | 224k | continue; |
339 | 4.59k | SnippetCopies.insert(&MI); |
340 | 4.59k | if (isRegToSpill(SnipReg)) |
341 | 257 | continue; |
342 | 4.33k | RegsToSpill.push_back(SnipReg); |
343 | 4.33k | LLVM_DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); |
344 | 4.33k | ++NumSnippets; |
345 | 4.33k | } |
346 | 124k | } |
347 | | |
348 | 633k | bool InlineSpiller::isSibling(unsigned Reg) { |
349 | 633k | return TargetRegisterInfo::isVirtualRegister(Reg) && |
350 | 633k | VRM.getOriginal(Reg) == Original425k ; |
351 | 633k | } |
352 | | |
353 | | /// It is beneficial to spill to earlier place in the same BB in case |
354 | | /// as follows: |
355 | | /// There is an alternative def earlier in the same MBB. |
356 | | /// Hoist the spill as far as possible in SpillMBB. This can ease |
357 | | /// register pressure: |
358 | | /// |
359 | | /// x = def |
360 | | /// y = use x |
361 | | /// s = copy x |
362 | | /// |
363 | | /// Hoisting the spill of s to immediately after the def removes the |
364 | | /// interference between x and y: |
365 | | /// |
366 | | /// x = def |
367 | | /// spill x |
368 | | /// y = use killed x |
369 | | /// |
370 | | /// This hoist only helps when the copy kills its source. |
371 | | /// |
372 | | bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, |
373 | 84.9k | MachineInstr &CopyMI) { |
374 | 84.9k | SlotIndex Idx = LIS.getInstructionIndex(CopyMI); |
375 | | #ifndef NDEBUG |
376 | | VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); |
377 | | assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); |
378 | | #endif |
379 | | |
380 | 84.9k | unsigned SrcReg = CopyMI.getOperand(1).getReg(); |
381 | 84.9k | LiveInterval &SrcLI = LIS.getInterval(SrcReg); |
382 | 84.9k | VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); |
383 | 84.9k | LiveQueryResult SrcQ = SrcLI.Query(Idx); |
384 | 84.9k | MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def); |
385 | 84.9k | if (DefMBB != CopyMI.getParent() || !SrcQ.isKill()43.1k ) |
386 | 58.3k | return false; |
387 | 26.5k | |
388 | 26.5k | // Conservatively extend the stack slot range to the range of the original |
389 | 26.5k | // value. We may be able to do better with stack slot coloring by being more |
390 | 26.5k | // careful here. |
391 | 26.5k | assert(StackInt && "No stack slot assigned yet."); |
392 | 26.5k | LiveInterval &OrigLI = LIS.getInterval(Original); |
393 | 26.5k | VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); |
394 | 26.5k | StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); |
395 | 26.5k | LLVM_DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " |
396 | 26.5k | << *StackInt << '\n'); |
397 | 26.5k | |
398 | 26.5k | // We are going to spill SrcVNI immediately after its def, so clear out |
399 | 26.5k | // any later spills of the same value. |
400 | 26.5k | eliminateRedundantSpills(SrcLI, SrcVNI); |
401 | 26.5k | |
402 | 26.5k | MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def); |
403 | 26.5k | MachineBasicBlock::iterator MII; |
404 | 26.5k | if (SrcVNI->isPHIDef()) |
405 | 5.08k | MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin()); |
406 | 21.4k | else { |
407 | 21.4k | MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def); |
408 | 21.4k | assert(DefMI && "Defining instruction disappeared"); |
409 | 21.4k | MII = DefMI; |
410 | 21.4k | ++MII; |
411 | 21.4k | } |
412 | 26.5k | // Insert spill without kill flag immediately after def. |
413 | 26.5k | TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, |
414 | 26.5k | MRI.getRegClass(SrcReg), &TRI); |
415 | 26.5k | --MII; // Point to store instruction. |
416 | 26.5k | LIS.InsertMachineInstrInMaps(*MII); |
417 | 26.5k | LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); |
418 | 26.5k | |
419 | 26.5k | HSpiller.addToMergeableSpills(*MII, StackSlot, Original); |
420 | 26.5k | ++NumSpills; |
421 | 26.5k | return true; |
422 | 26.5k | } |
423 | | |
424 | | /// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any |
425 | | /// redundant spills of this value in SLI.reg and sibling copies. |
426 | 93.4k | void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { |
427 | 93.4k | assert(VNI && "Missing value"); |
428 | 93.4k | SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; |
429 | 93.4k | WorkList.push_back(std::make_pair(&SLI, VNI)); |
430 | 93.4k | assert(StackInt && "No stack slot assigned yet."); |
431 | 93.4k | |
432 | 126k | do { |
433 | 126k | LiveInterval *LI; |
434 | 126k | std::tie(LI, VNI) = WorkList.pop_back_val(); |
435 | 126k | unsigned Reg = LI->reg; |
436 | 126k | LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' |
437 | 126k | << VNI->def << " in " << *LI << '\n'); |
438 | 126k | |
439 | 126k | // Regs to spill are taken care of. |
440 | 126k | if (isRegToSpill(Reg)) |
441 | 26.6k | continue; |
442 | 99.3k | |
443 | 99.3k | // Add all of VNI's live range to StackInt. |
444 | 99.3k | StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); |
445 | 99.3k | LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); |
446 | 99.3k | |
447 | 99.3k | // Find all spills and copies of VNI. |
448 | 99.3k | for (MachineRegisterInfo::use_instr_nodbg_iterator |
449 | 99.3k | UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); |
450 | 701k | UI != E; ) { |
451 | 601k | MachineInstr &MI = *UI++; |
452 | 601k | if (!MI.isCopy() && !MI.mayStore()422k ) |
453 | 319k | continue; |
454 | 282k | SlotIndex Idx = LIS.getInstructionIndex(MI); |
455 | 282k | if (LI->getVNInfoAt(Idx) != VNI) |
456 | 215k | continue; |
457 | 66.4k | |
458 | 66.4k | // Follow sibling copies down the dominator tree. |
459 | 66.4k | if (unsigned DstReg = isFullCopyOf(MI, Reg)) { |
460 | 52.5k | if (isSibling(DstReg)) { |
461 | 32.5k | LiveInterval &DstLI = LIS.getInterval(DstReg); |
462 | 32.5k | VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); |
463 | 32.5k | assert(DstVNI && "Missing defined value"); |
464 | 32.5k | assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot"); |
465 | 32.5k | WorkList.push_back(std::make_pair(&DstLI, DstVNI)); |
466 | 32.5k | } |
467 | 52.5k | continue; |
468 | 52.5k | } |
469 | 13.9k | |
470 | 13.9k | // Erase spills. |
471 | 13.9k | int FI; |
472 | 13.9k | if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot1.78k ) { |
473 | 1.60k | LLVM_DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI); |
474 | 1.60k | // eliminateDeadDefs won't normally remove stores, so switch opcode. |
475 | 1.60k | MI.setDesc(TII.get(TargetOpcode::KILL)); |
476 | 1.60k | DeadDefs.push_back(&MI); |
477 | 1.60k | ++NumSpillsRemoved; |
478 | 1.60k | if (HSpiller.rmFromMergeableSpills(MI, StackSlot)) |
479 | 1.60k | --NumSpills; |
480 | 1.60k | } |
481 | 13.9k | } |
482 | 126k | } while (!WorkList.empty()); |
483 | 93.4k | } |
484 | | |
485 | | //===----------------------------------------------------------------------===// |
486 | | // Rematerialization |
487 | | //===----------------------------------------------------------------------===// |
488 | | |
489 | | /// markValueUsed - Remember that VNI failed to rematerialize, so its defining |
490 | | /// instruction cannot be eliminated. See through snippet copies |
491 | 6.89k | void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { |
492 | 6.89k | SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; |
493 | 6.89k | WorkList.push_back(std::make_pair(LI, VNI)); |
494 | 30.1k | do { |
495 | 30.1k | std::tie(LI, VNI) = WorkList.pop_back_val(); |
496 | 30.1k | if (!UsedValues.insert(VNI).second) |
497 | 14.8k | continue; |
498 | 15.3k | |
499 | 15.3k | if (VNI->isPHIDef()) { |
500 | 5.60k | MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); |
501 | 23.0k | for (MachineBasicBlock *P : MBB->predecessors()) { |
502 | 23.0k | VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P)); |
503 | 23.0k | if (PVNI) |
504 | 23.0k | WorkList.push_back(std::make_pair(LI, PVNI)); |
505 | 23.0k | } |
506 | 5.60k | continue; |
507 | 5.60k | } |
508 | 9.76k | |
509 | 9.76k | // Follow snippet copies. |
510 | 9.76k | MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); |
511 | 9.76k | if (!SnippetCopies.count(MI)) |
512 | 9.51k | continue; |
513 | 247 | LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); |
514 | 247 | assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); |
515 | 247 | VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); |
516 | 247 | assert(SnipVNI && "Snippet undefined before copy"); |
517 | 247 | WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); |
518 | 30.1k | } while (!WorkList.empty()); |
519 | 6.89k | } |
520 | | |
521 | | bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, |
522 | 227k | MachineInstr &MI) { |
523 | 227k | if (!RestrictStatepointRemat) |
524 | 227k | return true; |
525 | 60 | // Here's a quick explanation of the problem we're trying to handle here: |
526 | 60 | // * There are some pseudo instructions with more vreg uses than there are |
527 | 60 | // physical registers on the machine. |
528 | 60 | // * This is normally handled by spilling the vreg, and folding the reload |
529 | 60 | // into the user instruction. (Thus decreasing the number of used vregs |
530 | 60 | // until the remainder can be assigned to physregs.) |
531 | 60 | // * However, since we may try to spill vregs in any order, we can end up |
532 | 60 | // trying to spill each operand to the instruction, and then rematting it |
533 | 60 | // instead. When that happens, the new live intervals (for the remats) are |
534 | 60 | // expected to be trivially assignable (i.e. RS_Done). However, since we |
535 | 60 | // may have more remats than physregs, we're guaranteed to fail to assign |
536 | 60 | // one. |
537 | 60 | // At the moment, we only handle this for STATEPOINTs since they're the only |
538 | 60 | // psuedo op where we've seen this. If we start seeing other instructions |
539 | 60 | // with the same problem, we need to revisit this. |
540 | 60 | return (MI.getOpcode() != TargetOpcode::STATEPOINT); |
541 | 60 | } |
542 | | |
543 | | /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. |
544 | 401k | bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { |
545 | 401k | // Analyze instruction |
546 | 401k | SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; |
547 | 401k | MIBundleOperands::VirtRegInfo RI = |
548 | 401k | MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); |
549 | 401k | |
550 | 401k | if (!RI.Reads) |
551 | 164k | return false; |
552 | 236k | |
553 | 236k | SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); |
554 | 236k | VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); |
555 | 236k | |
556 | 236k | if (!ParentVNI) { |
557 | 0 | LLVM_DEBUG(dbgs() << "\tadding <undef> flags: "); |
558 | 0 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
559 | 0 | MachineOperand &MO = MI.getOperand(i); |
560 | 0 | if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) |
561 | 0 | MO.setIsUndef(); |
562 | 0 | } |
563 | 0 | LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); |
564 | 0 | return true; |
565 | 0 | } |
566 | 236k | |
567 | 236k | if (SnippetCopies.count(&MI)) |
568 | 1.68k | return false; |
569 | 235k | |
570 | 235k | LiveInterval &OrigLI = LIS.getInterval(Original); |
571 | 235k | VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); |
572 | 235k | LiveRangeEdit::Remat RM(ParentVNI); |
573 | 235k | RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); |
574 | 235k | |
575 | 235k | if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) { |
576 | 6.57k | markValueUsed(&VirtReg, ParentVNI); |
577 | 6.57k | LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); |
578 | 6.57k | return false; |
579 | 6.57k | } |
580 | 228k | |
581 | 228k | // If the instruction also writes VirtReg.reg, it had better not require the |
582 | 228k | // same register for uses and defs. |
583 | 228k | if (RI.Tied) { |
584 | 271 | markValueUsed(&VirtReg, ParentVNI); |
585 | 271 | LLVM_DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI); |
586 | 271 | return false; |
587 | 271 | } |
588 | 228k | |
589 | 228k | // Before rematerializing into a register for a single instruction, try to |
590 | 228k | // fold a load into the instruction. That avoids allocating a new register. |
591 | 228k | if (RM.OrigMI->canFoldAsLoad() && |
592 | 228k | foldMemoryOperand(Ops, RM.OrigMI)7.19k ) { |
593 | 1.06k | Edit->markRematerialized(RM.ParentVNI); |
594 | 1.06k | ++NumFoldedLoads; |
595 | 1.06k | return true; |
596 | 1.06k | } |
597 | 227k | |
598 | 227k | // If we can't guarantee that we'll be able to actually assign the new vreg, |
599 | 227k | // we can't remat. |
600 | 227k | if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) { |
601 | 40 | markValueUsed(&VirtReg, ParentVNI); |
602 | 40 | LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); |
603 | 40 | return false; |
604 | 40 | } |
605 | 227k | |
606 | 227k | // Allocate a new register for the remat. |
607 | 227k | unsigned NewVReg = Edit->createFrom(Original); |
608 | 227k | |
609 | 227k | // Finally we can rematerialize OrigMI before MI. |
610 | 227k | SlotIndex DefIdx = |
611 | 227k | Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI); |
612 | 227k | |
613 | 227k | // We take the DebugLoc from MI, since OrigMI may be attributed to a |
614 | 227k | // different source location. |
615 | 227k | auto *NewMI = LIS.getInstructionFromIndex(DefIdx); |
616 | 227k | NewMI->setDebugLoc(MI.getDebugLoc()); |
617 | 227k | |
618 | 227k | (void)DefIdx; |
619 | 227k | LLVM_DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' |
620 | 227k | << *LIS.getInstructionFromIndex(DefIdx)); |
621 | 227k | |
622 | 227k | // Replace operands |
623 | 227k | for (const auto &OpPair : Ops) { |
624 | 227k | MachineOperand &MO = OpPair.first->getOperand(OpPair.second); |
625 | 227k | if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg227k ) { |
626 | 227k | MO.setReg(NewVReg); |
627 | 227k | MO.setIsKill(); |
628 | 227k | } |
629 | 227k | } |
630 | 227k | LLVM_DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n'); |
631 | 227k | |
632 | 227k | ++NumRemats; |
633 | 227k | return true; |
634 | 227k | } |
635 | | |
636 | | /// reMaterializeAll - Try to rematerialize as many uses as possible, |
637 | | /// and trim the live ranges after. |
638 | 279k | void InlineSpiller::reMaterializeAll() { |
639 | 279k | if (!Edit->anyRematerializable(AA)) |
640 | 130k | return; |
641 | 148k | |
642 | 148k | UsedValues.clear(); |
643 | 148k | |
644 | 148k | // Try to remat before all uses of snippets. |
645 | 148k | bool anyRemat = false; |
646 | 150k | for (unsigned Reg : RegsToSpill) { |
647 | 150k | LiveInterval &LI = LIS.getInterval(Reg); |
648 | 150k | for (MachineRegisterInfo::reg_bundle_iterator |
649 | 150k | RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); |
650 | 551k | RegI != E; ) { |
651 | 401k | MachineInstr &MI = *RegI++; |
652 | 401k | |
653 | 401k | // Debug values are not allowed to affect codegen. |
654 | 401k | if (MI.isDebugValue()) |
655 | 0 | continue; |
656 | 401k | |
657 | 401k | assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " |
658 | 401k | "instruction that isn't a DBG_VALUE"); |
659 | 401k | |
660 | 401k | anyRemat |= reMaterializeFor(LI, MI); |
661 | 401k | } |
662 | 150k | } |
663 | 148k | if (!anyRemat) |
664 | 2.62k | return; |
665 | 146k | |
666 | 146k | // Remove any values that were completely rematted. |
667 | 147k | for (unsigned Reg : RegsToSpill)146k { |
668 | 147k | LiveInterval &LI = LIS.getInterval(Reg); |
669 | 147k | for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); |
670 | 316k | I != E; ++I169k ) { |
671 | 169k | VNInfo *VNI = *I; |
672 | 169k | if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)155k ) |
673 | 13.9k | continue; |
674 | 155k | MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); |
675 | 155k | MI->addRegisterDead(Reg, &TRI); |
676 | 155k | if (!MI->allDefsAreDead()) |
677 | 1 | continue; |
678 | 155k | LLVM_DEBUG(dbgs() << "All defs dead: " << *MI); |
679 | 155k | DeadDefs.push_back(MI); |
680 | 155k | } |
681 | 147k | } |
682 | 146k | |
683 | 146k | // Eliminate dead code after remat. Note that some snippet copies may be |
684 | 146k | // deleted here. |
685 | 146k | if (DeadDefs.empty()) |
686 | 97 | return; |
687 | 146k | LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); |
688 | 146k | Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); |
689 | 146k | |
690 | 146k | // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions |
691 | 146k | // after rematerialization. To remove a VNI for a vreg from its LiveInterval, |
692 | 146k | // LiveIntervals::removeVRegDefAt is used. However, after non-PHI VNIs are all |
693 | 146k | // removed, PHI VNI are still left in the LiveInterval. |
694 | 146k | // So to get rid of unused reg, we need to check whether it has non-dbg |
695 | 146k | // reference instead of whether it has non-empty interval. |
696 | 146k | unsigned ResultPos = 0; |
697 | 147k | for (unsigned Reg : RegsToSpill) { |
698 | 147k | if (MRI.reg_nodbg_empty(Reg)) { |
699 | 147k | Edit->eraseVirtReg(Reg); |
700 | 147k | continue; |
701 | 147k | } |
702 | 14 | |
703 | 14 | assert(LIS.hasInterval(Reg) && |
704 | 14 | (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) && |
705 | 14 | "Empty and not used live-range?!"); |
706 | 14 | |
707 | 14 | RegsToSpill[ResultPos++] = Reg; |
708 | 14 | } |
709 | 146k | RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); |
710 | 146k | LLVM_DEBUG(dbgs() << RegsToSpill.size() |
711 | 146k | << " registers to spill after remat.\n"); |
712 | 146k | } |
713 | | |
714 | | //===----------------------------------------------------------------------===// |
715 | | // Spilling |
716 | | //===----------------------------------------------------------------------===// |
717 | | |
718 | | /// If MI is a load or store of StackSlot, it can be removed. |
719 | 363k | bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { |
720 | 363k | int FI = 0; |
721 | 363k | unsigned InstrReg = TII.isLoadFromStackSlot(*MI, FI); |
722 | 363k | bool IsLoad = InstrReg; |
723 | 363k | if (!IsLoad) |
724 | 360k | InstrReg = TII.isStoreToStackSlot(*MI, FI); |
725 | 363k | |
726 | 363k | // We have a stack access. Is it the right register and slot? |
727 | 363k | if (InstrReg != Reg || FI != StackSlot4.83k ) |
728 | 360k | return false; |
729 | 2.84k | |
730 | 2.84k | if (!IsLoad) |
731 | 1.42k | HSpiller.rmFromMergeableSpills(*MI, StackSlot); |
732 | 2.84k | |
733 | 2.84k | LLVM_DEBUG(dbgs() << "Coalescing stack access: " << *MI); |
734 | 2.84k | LIS.RemoveMachineInstrFromMaps(*MI); |
735 | 2.84k | MI->eraseFromParent(); |
736 | 2.84k | |
737 | 2.84k | if (IsLoad) { |
738 | 1.41k | ++NumReloadsRemoved; |
739 | 1.41k | --NumReloads; |
740 | 1.42k | } else { |
741 | 1.42k | ++NumSpillsRemoved; |
742 | 1.42k | --NumSpills; |
743 | 1.42k | } |
744 | 2.84k | |
745 | 2.84k | return true; |
746 | 2.84k | } |
747 | | |
748 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
749 | | LLVM_DUMP_METHOD |
750 | | // Dump the range of instructions from B to E with their slot indexes. |
751 | | static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, |
752 | | MachineBasicBlock::iterator E, |
753 | | LiveIntervals const &LIS, |
754 | | const char *const header, |
755 | | unsigned VReg =0) { |
756 | | char NextLine = '\n'; |
757 | | char SlotIndent = '\t'; |
758 | | |
759 | | if (std::next(B) == E) { |
760 | | NextLine = ' '; |
761 | | SlotIndent = ' '; |
762 | | } |
763 | | |
764 | | dbgs() << '\t' << header << ": " << NextLine; |
765 | | |
766 | | for (MachineBasicBlock::iterator I = B; I != E; ++I) { |
767 | | SlotIndex Idx = LIS.getInstructionIndex(*I).getRegSlot(); |
768 | | |
769 | | // If a register was passed in and this instruction has it as a |
770 | | // destination that is marked as an early clobber, print the |
771 | | // early-clobber slot index. |
772 | | if (VReg) { |
773 | | MachineOperand *MO = I->findRegisterDefOperand(VReg); |
774 | | if (MO && MO->isEarlyClobber()) |
775 | | Idx = Idx.getRegSlot(true); |
776 | | } |
777 | | |
778 | | dbgs() << SlotIndent << Idx << '\t' << *I; |
779 | | } |
780 | | } |
781 | | #endif |
782 | | |
783 | | /// foldMemoryOperand - Try folding stack slot references in Ops into their |
784 | | /// instructions. |
785 | | /// |
786 | | /// @param Ops Operand indices from analyzeVirtReg(). |
787 | | /// @param LoadMI Load instruction to use instead of stack slot when non-null. |
788 | | /// @return True on success. |
789 | | bool InlineSpiller:: |
790 | | foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, |
791 | 340k | MachineInstr *LoadMI) { |
792 | 340k | if (Ops.empty()) |
793 | 0 | return false; |
794 | 340k | // Don't attempt folding in bundles. |
795 | 340k | MachineInstr *MI = Ops.front().first; |
796 | 340k | if (Ops.back().first != MI || MI->isBundled()340k ) |
797 | 53 | return false; |
798 | 340k | |
799 | 340k | bool WasCopy = MI->isCopy(); |
800 | 340k | unsigned ImpReg = 0; |
801 | 340k | |
802 | 340k | // Spill subregs if the target allows it. |
803 | 340k | // We always want to spill subregs for stackmap/patchpoint pseudos. |
804 | 340k | bool SpillSubRegs = TII.isSubregFoldable() || |
805 | 340k | MI->getOpcode() == TargetOpcode::STATEPOINT21.9k || |
806 | 340k | MI->getOpcode() == TargetOpcode::PATCHPOINT21.9k || |
807 | 340k | MI->getOpcode() == TargetOpcode::STACKMAP21.9k ; |
808 | 340k | |
809 | 340k | // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied |
810 | 340k | // operands. |
811 | 340k | SmallVector<unsigned, 8> FoldOps; |
812 | 343k | for (const auto &OpPair : Ops) { |
813 | 343k | unsigned Idx = OpPair.second; |
814 | 343k | assert(MI == OpPair.first && "Instruction conflict during operand folding"); |
815 | 343k | MachineOperand &MO = MI->getOperand(Idx); |
816 | 343k | if (MO.isImplicit()) { |
817 | 7 | ImpReg = MO.getReg(); |
818 | 7 | continue; |
819 | 7 | } |
820 | 343k | |
821 | 343k | if (!SpillSubRegs && MO.getSubReg()22.0k ) |
822 | 254 | return false; |
823 | 343k | // We cannot fold a load instruction into a def. |
824 | 343k | if (LoadMI && MO.isDef()7.25k ) |
825 | 0 | return false; |
826 | 343k | // Tied use operands should not be passed to foldMemoryOperand. |
827 | 343k | if (!MI->isRegTiedToDefOperand(Idx)) |
828 | 341k | FoldOps.push_back(Idx); |
829 | 343k | } |
830 | 340k | |
831 | 340k | // If we only have implicit uses, we won't be able to fold that. |
832 | 340k | // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try! |
833 | 340k | if (340k FoldOps.empty()340k ) |
834 | 7 | return false; |
835 | 340k | |
836 | 340k | MachineInstrSpan MIS(MI, MI->getParent()); |
837 | 340k | |
838 | 340k | MachineInstr *FoldMI = |
839 | 340k | LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)7.19k |
840 | 340k | : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM)333k ; |
841 | 340k | if (!FoldMI) |
842 | 145k | return false; |
843 | 194k | |
844 | 194k | // Remove LIS for any dead defs in the original MI not in FoldMI. |
845 | 607k | for (MIBundleOperands MO(*MI); 194k MO.isValid(); ++MO412k ) { |
846 | 412k | if (!MO->isReg()) |
847 | 6.47k | continue; |
848 | 406k | unsigned Reg = MO->getReg(); |
849 | 406k | if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)406k || |
850 | 406k | MRI.isReserved(Reg)61.5k ) { |
851 | 349k | continue; |
852 | 349k | } |
853 | 56.3k | // Skip non-Defs, including undef uses and internal reads. |
854 | 56.3k | if (MO->isUse()) |
855 | 18.6k | continue; |
856 | 37.7k | MIBundleOperands::PhysRegInfo RI = |
857 | 37.7k | MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI); |
858 | 37.7k | if (RI.FullyDefined) |
859 | 37.4k | continue; |
860 | 288 | // FoldMI does not define this physreg. Remove the LI segment. |
861 | 288 | assert(MO->isDead() && "Cannot fold physreg def"); |
862 | 288 | SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); |
863 | 288 | LIS.removePhysRegDefAt(Reg, Idx); |
864 | 288 | } |
865 | 194k | |
866 | 194k | int FI; |
867 | 194k | if (TII.isStoreToStackSlot(*MI, FI) && |
868 | 194k | HSpiller.rmFromMergeableSpills(*MI, FI)3 ) |
869 | 3 | --NumSpills; |
870 | 194k | LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); |
871 | 194k | if (MI->isCall()) |
872 | 222 | MI->getMF()->updateCallSiteInfo(MI, FoldMI); |
873 | 194k | MI->eraseFromParent(); |
874 | 194k | |
875 | 194k | // Insert any new instructions other than FoldMI into the LIS maps. |
876 | 194k | assert(!MIS.empty() && "Unexpected empty span of instructions!"); |
877 | 194k | for (MachineInstr &MI : MIS) |
878 | 194k | if (&MI != FoldMI) |
879 | 0 | LIS.InsertMachineInstrInMaps(MI); |
880 | 194k | |
881 | 194k | // TII.foldMemoryOperand may have left some implicit operands on the |
882 | 194k | // instruction. Strip them. |
883 | 194k | if (ImpReg) |
884 | 0 | for (unsigned i = FoldMI->getNumOperands(); i; --i) { |
885 | 0 | MachineOperand &MO = FoldMI->getOperand(i - 1); |
886 | 0 | if (!MO.isReg() || !MO.isImplicit()) |
887 | 0 | break; |
888 | 0 | if (MO.getReg() == ImpReg) |
889 | 0 | FoldMI->RemoveOperand(i - 1); |
890 | 0 | } |
891 | 194k | |
892 | 194k | LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, |
893 | 194k | "folded")); |
894 | 194k | |
895 | 194k | if (!WasCopy) |
896 | 11.4k | ++NumFolded; |
897 | 183k | else if (Ops.front().second == 0) { |
898 | 76.6k | ++NumSpills; |
899 | 76.6k | HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original); |
900 | 76.6k | } else |
901 | 106k | ++NumReloads; |
902 | 194k | return true; |
903 | 194k | } |
904 | | |
905 | | void InlineSpiller::insertReload(unsigned NewVReg, |
906 | | SlotIndex Idx, |
907 | 92.7k | MachineBasicBlock::iterator MI) { |
908 | 92.7k | MachineBasicBlock &MBB = *MI->getParent(); |
909 | 92.7k | |
910 | 92.7k | MachineInstrSpan MIS(MI, &MBB); |
911 | 92.7k | TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, |
912 | 92.7k | MRI.getRegClass(NewVReg), &TRI); |
913 | 92.7k | |
914 | 92.7k | LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); |
915 | 92.7k | |
916 | 92.7k | LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", |
917 | 92.7k | NewVReg)); |
918 | 92.7k | ++NumReloads; |
919 | 92.7k | } |
920 | | |
921 | | /// Check if \p Def fully defines a VReg with an undefined value. |
922 | | /// If that's the case, that means the value of VReg is actually |
923 | | /// not relevant. |
924 | 48.6k | static bool isFullUndefDef(const MachineInstr &Def) { |
925 | 48.6k | if (!Def.isImplicitDef()) |
926 | 48.2k | return false; |
927 | 437 | assert(Def.getNumOperands() == 1 && |
928 | 437 | "Implicit def with more than one definition"); |
929 | 437 | // We can say that the VReg defined by Def is undef, only if it is |
930 | 437 | // fully defined by Def. Otherwise, some of the lanes may not be |
931 | 437 | // undef and the value of the VReg matters. |
932 | 437 | return !Def.getOperand(0).getSubReg(); |
933 | 437 | } |
934 | | |
935 | | /// insertSpill - Insert a spill of NewVReg after MI. |
936 | | void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, |
937 | 48.6k | MachineBasicBlock::iterator MI) { |
938 | 48.6k | MachineBasicBlock &MBB = *MI->getParent(); |
939 | 48.6k | |
940 | 48.6k | MachineInstrSpan MIS(MI, &MBB); |
941 | 48.6k | bool IsRealSpill = true; |
942 | 48.6k | if (isFullUndefDef(*MI)) { |
943 | 401 | // Don't spill undef value. |
944 | 401 | // Anything works for undef, in particular keeping the memory |
945 | 401 | // uninitialized is a viable option and it saves code size and |
946 | 401 | // run time. |
947 | 401 | BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) |
948 | 401 | .addReg(NewVReg, getKillRegState(isKill)); |
949 | 401 | IsRealSpill = false; |
950 | 401 | } else |
951 | 48.2k | TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, |
952 | 48.2k | MRI.getRegClass(NewVReg), &TRI); |
953 | 48.6k | |
954 | 48.6k | LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); |
955 | 48.6k | |
956 | 48.6k | LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, |
957 | 48.6k | "spill")); |
958 | 48.6k | ++NumSpills; |
959 | 48.6k | if (IsRealSpill) |
960 | 48.2k | HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); |
961 | 48.6k | } |
962 | | |
963 | | /// spillAroundUses - insert spill code around each use of Reg. |
964 | 135k | void InlineSpiller::spillAroundUses(unsigned Reg) { |
965 | 135k | LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); |
966 | 135k | LiveInterval &OldLI = LIS.getInterval(Reg); |
967 | 135k | |
968 | 135k | // Iterate over instructions using Reg. |
969 | 135k | for (MachineRegisterInfo::reg_bundle_iterator |
970 | 135k | RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); |
971 | 505k | RegI != E; ) { |
972 | 369k | MachineInstr *MI = &*(RegI++); |
973 | 369k | |
974 | 369k | // Debug values are not allowed to affect codegen. |
975 | 369k | if (MI->isDebugValue()) { |
976 | 0 | // Modify DBG_VALUE now that the value is in a spill slot. |
977 | 0 | MachineBasicBlock *MBB = MI->getParent(); |
978 | 0 | LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); |
979 | 0 | buildDbgValueForSpill(*MBB, MI, *MI, StackSlot); |
980 | 0 | MBB->erase(MI); |
981 | 0 | continue; |
982 | 0 | } |
983 | 369k | |
984 | 369k | assert(!MI->isDebugInstr() && "Did not expect to find a use in debug " |
985 | 369k | "instruction that isn't a DBG_VALUE"); |
986 | 369k | |
987 | 369k | // Ignore copies to/from snippets. We'll delete them. |
988 | 369k | if (SnippetCopies.count(MI)) |
989 | 6.35k | continue; |
990 | 363k | |
991 | 363k | // Stack slot accesses may coalesce away. |
992 | 363k | if (coalesceStackAccess(MI, Reg)) |
993 | 2.84k | continue; |
994 | 360k | |
995 | 360k | // Analyze instruction. |
996 | 360k | SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; |
997 | 360k | MIBundleOperands::VirtRegInfo RI = |
998 | 360k | MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops); |
999 | 360k | |
1000 | 360k | // Find the slot index where this instruction reads and writes OldLI. |
1001 | 360k | // This is usually the def slot, except for tied early clobbers. |
1002 | 360k | SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); |
1003 | 360k | if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) |
1004 | 209k | if (SlotIndex::isSameInstr(Idx, VNI->def)) |
1005 | 60 | Idx = VNI->def; |
1006 | 360k | |
1007 | 360k | // Check for a sibling copy. |
1008 | 360k | unsigned SibReg = isFullCopyOf(*MI, Reg); |
1009 | 360k | if (SibReg && isSibling(SibReg)203k ) { |
1010 | 151k | // This may actually be a copy between snippets. |
1011 | 151k | if (isRegToSpill(SibReg)) { |
1012 | 4 | LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI); |
1013 | 4 | SnippetCopies.insert(MI); |
1014 | 4 | continue; |
1015 | 4 | } |
1016 | 151k | if (RI.Writes) { |
1017 | 84.9k | if (hoistSpillInsideBB(OldLI, *MI)) { |
1018 | 26.5k | // This COPY is now dead, the value is already in the stack slot. |
1019 | 26.5k | MI->getOperand(0).setIsDead(); |
1020 | 26.5k | DeadDefs.push_back(MI); |
1021 | 26.5k | continue; |
1022 | 26.5k | } |
1023 | 66.8k | } else { |
1024 | 66.8k | // This is a reload for a sib-reg copy. Drop spills downstream. |
1025 | 66.8k | LiveInterval &SibLI = LIS.getInterval(SibReg); |
1026 | 66.8k | eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); |
1027 | 66.8k | // The COPY will fold to a reload below. |
1028 | 66.8k | } |
1029 | 151k | } |
1030 | 360k | |
1031 | 360k | // Attempt to fold memory ops. |
1032 | 360k | if (333k foldMemoryOperand(Ops)333k ) |
1033 | 193k | continue; |
1034 | 139k | |
1035 | 139k | // Create a new virtual register for spill/fill. |
1036 | 139k | // FIXME: Infer regclass from instruction alone. |
1037 | 139k | unsigned NewVReg = Edit->createFrom(Reg); |
1038 | 139k | |
1039 | 139k | if (RI.Reads) |
1040 | 92.7k | insertReload(NewVReg, Idx, MI); |
1041 | 139k | |
1042 | 139k | // Rewrite instruction operands. |
1043 | 139k | bool hasLiveDef = false; |
1044 | 141k | for (const auto &OpPair : Ops) { |
1045 | 141k | MachineOperand &MO = OpPair.first->getOperand(OpPair.second); |
1046 | 141k | MO.setReg(NewVReg); |
1047 | 141k | if (MO.isUse()) { |
1048 | 92.9k | if (!OpPair.first->isRegTiedToDefOperand(OpPair.second)) |
1049 | 91.8k | MO.setIsKill(); |
1050 | 92.9k | } else { |
1051 | 48.7k | if (!MO.isDead()) |
1052 | 48.6k | hasLiveDef = true; |
1053 | 48.7k | } |
1054 | 141k | } |
1055 | 139k | LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); |
1056 | 139k | |
1057 | 139k | // FIXME: Use a second vreg if instruction has no tied ops. |
1058 | 139k | if (RI.Writes) |
1059 | 48.6k | if (hasLiveDef) |
1060 | 48.6k | insertSpill(NewVReg, true, MI); |
1061 | 139k | } |
1062 | 135k | } |
1063 | | |
1064 | | /// spillAll - Spill all registers remaining after rematerialization. |
1065 | 132k | void InlineSpiller::spillAll() { |
1066 | 132k | // Update LiveStacks now that we are committed to spilling. |
1067 | 132k | if (StackSlot == VirtRegMap::NO_STACK_SLOT) { |
1068 | 121k | StackSlot = VRM.assignVirt2StackSlot(Original); |
1069 | 121k | StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); |
1070 | 121k | StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator()); |
1071 | 121k | } else |
1072 | 11.0k | StackInt = &LSS.getInterval(StackSlot); |
1073 | 132k | |
1074 | 132k | if (Original != Edit->getReg()) |
1075 | 82.1k | VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); |
1076 | 132k | |
1077 | 132k | assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); |
1078 | 132k | for (unsigned Reg : RegsToSpill) |
1079 | 135k | StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg), |
1080 | 135k | StackInt->getValNumInfo(0)); |
1081 | 132k | LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); |
1082 | 132k | |
1083 | 132k | // Spill around uses of all RegsToSpill. |
1084 | 132k | for (unsigned Reg : RegsToSpill) |
1085 | 135k | spillAroundUses(Reg); |
1086 | 132k | |
1087 | 132k | // Hoisted spills may cause dead code. |
1088 | 132k | if (!DeadDefs.empty()) { |
1089 | 26.6k | LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); |
1090 | 26.6k | Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); |
1091 | 26.6k | } |
1092 | 132k | |
1093 | 132k | // Finally delete the SnippetCopies. |
1094 | 135k | for (unsigned Reg : RegsToSpill) { |
1095 | 135k | for (MachineRegisterInfo::reg_instr_iterator |
1096 | 135k | RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); |
1097 | 139k | RI != E; ) { |
1098 | 3.17k | MachineInstr &MI = *(RI++); |
1099 | 3.17k | assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy"); |
1100 | 3.17k | // FIXME: Do this with a LiveRangeEdit callback. |
1101 | 3.17k | LIS.RemoveMachineInstrFromMaps(MI); |
1102 | 3.17k | MI.eraseFromParent(); |
1103 | 3.17k | } |
1104 | 135k | } |
1105 | 132k | |
1106 | 132k | // Delete all spilled registers. |
1107 | 132k | for (unsigned Reg : RegsToSpill) |
1108 | 135k | Edit->eraseVirtReg(Reg); |
1109 | 132k | } |
1110 | | |
1111 | 279k | void InlineSpiller::spill(LiveRangeEdit &edit) { |
1112 | 279k | ++NumSpilledRanges; |
1113 | 279k | Edit = &edit; |
1114 | 279k | assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) |
1115 | 279k | && "Trying to spill a stack slot."); |
1116 | 279k | // Share a stack slot among all descendants of Original. |
1117 | 279k | Original = VRM.getOriginal(edit.getReg()); |
1118 | 279k | StackSlot = VRM.getStackSlot(Original); |
1119 | 279k | StackInt = nullptr; |
1120 | 279k | |
1121 | 279k | LLVM_DEBUG(dbgs() << "Inline spilling " |
1122 | 279k | << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) |
1123 | 279k | << ':' << edit.getParent() << "\nFrom original " |
1124 | 279k | << printReg(Original) << '\n'); |
1125 | 279k | assert(edit.getParent().isSpillable() && |
1126 | 279k | "Attempting to spill already spilled value."); |
1127 | 279k | assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); |
1128 | 279k | |
1129 | 279k | collectRegsToSpill(); |
1130 | 279k | reMaterializeAll(); |
1131 | 279k | |
1132 | 279k | // Remat may handle everything. |
1133 | 279k | if (!RegsToSpill.empty()) |
1134 | 132k | spillAll(); |
1135 | 279k | |
1136 | 279k | Edit->calculateRegClassAndHint(MF, Loops, MBFI); |
1137 | 279k | } |
1138 | | |
1139 | | /// Optimizations after all the reg selections and spills are done. |
1140 | 484k | void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); } |
1141 | | |
1142 | | /// When a spill is inserted, add the spill to MergeableSpills map. |
1143 | | void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, |
1144 | 151k | unsigned Original) { |
1145 | 151k | BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); |
1146 | 151k | LiveInterval &OrigLI = LIS.getInterval(Original); |
1147 | 151k | // save a copy of LiveInterval in StackSlotToOrigLI because the original |
1148 | 151k | // LiveInterval may be cleared after all its references are spilled. |
1149 | 151k | if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) { |
1150 | 121k | auto LI = llvm::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight); |
1151 | 121k | LI->assign(OrigLI, Allocator); |
1152 | 121k | StackSlotToOrigLI[StackSlot] = std::move(LI); |
1153 | 121k | } |
1154 | 151k | SlotIndex Idx = LIS.getInstructionIndex(Spill); |
1155 | 151k | VNInfo *OrigVNI = StackSlotToOrigLI[StackSlot]->getVNInfoAt(Idx.getRegSlot()); |
1156 | 151k | std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI); |
1157 | 151k | MergeableSpills[MIdx].insert(&Spill); |
1158 | 151k | } |
1159 | | |
1160 | | /// When a spill is removed, remove the spill from MergeableSpills map. |
1161 | | /// Return true if the spill is removed successfully. |
1162 | | bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, |
1163 | 3.03k | int StackSlot) { |
1164 | 3.03k | auto It = StackSlotToOrigLI.find(StackSlot); |
1165 | 3.03k | if (It == StackSlotToOrigLI.end()) |
1166 | 0 | return false; |
1167 | 3.03k | SlotIndex Idx = LIS.getInstructionIndex(Spill); |
1168 | 3.03k | VNInfo *OrigVNI = It->second->getVNInfoAt(Idx.getRegSlot()); |
1169 | 3.03k | std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI); |
1170 | 3.03k | return MergeableSpills[MIdx].erase(&Spill); |
1171 | 3.03k | } |
1172 | | |
1173 | | /// Check BB to see if it is a possible target BB to place a hoisted spill, |
1174 | | /// i.e., there should be a living sibling of OrigReg at the insert point. |
1175 | | bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, |
1176 | 95.0k | MachineBasicBlock &BB, unsigned &LiveReg) { |
1177 | 95.0k | SlotIndex Idx; |
1178 | 95.0k | unsigned OrigReg = OrigLI.reg; |
1179 | 95.0k | MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB); |
1180 | 95.0k | if (MI != BB.end()) |
1181 | 83.0k | Idx = LIS.getInstructionIndex(*MI); |
1182 | 12.0k | else |
1183 | 12.0k | Idx = LIS.getMBBEndIdx(&BB).getPrevSlot(); |
1184 | 95.0k | SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg]; |
1185 | 95.0k | assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI"); |
1186 | 95.0k | |
1187 | 148k | for (auto const SibReg : Siblings) { |
1188 | 148k | LiveInterval &LI = LIS.getInterval(SibReg); |
1189 | 148k | VNInfo *VNI = LI.getVNInfoAt(Idx); |
1190 | 148k | if (VNI) { |
1191 | 94.3k | LiveReg = SibReg; |
1192 | 94.3k | return true; |
1193 | 94.3k | } |
1194 | 148k | } |
1195 | 95.0k | return false687 ; |
1196 | 95.0k | } |
1197 | | |
1198 | | /// Remove redundant spills in the same BB. Save those redundant spills in |
1199 | | /// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map. |
1200 | | void HoistSpillHelper::rmRedundantSpills( |
1201 | | SmallPtrSet<MachineInstr *, 16> &Spills, |
1202 | | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1203 | 130k | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { |
1204 | 130k | // For each spill saw, check SpillBBToSpill[] and see if its BB already has |
1205 | 130k | // another spill inside. If a BB contains more than one spill, only keep the |
1206 | 130k | // earlier spill with smaller SlotIndex. |
1207 | 148k | for (const auto CurrentSpill : Spills) { |
1208 | 148k | MachineBasicBlock *Block = CurrentSpill->getParent(); |
1209 | 148k | MachineDomTreeNode *Node = MDT.getBase().getNode(Block); |
1210 | 148k | MachineInstr *PrevSpill = SpillBBToSpill[Node]; |
1211 | 148k | if (PrevSpill) { |
1212 | 668 | SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); |
1213 | 668 | SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill); |
1214 | 668 | MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill456 : PrevSpill212 ; |
1215 | 668 | MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill456 : CurrentSpill212 ; |
1216 | 668 | SpillsToRm.push_back(SpillToRm); |
1217 | 668 | SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep; |
1218 | 147k | } else { |
1219 | 147k | SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; |
1220 | 147k | } |
1221 | 148k | } |
1222 | 130k | for (const auto SpillToRm : SpillsToRm) |
1223 | 668 | Spills.erase(SpillToRm); |
1224 | 130k | } |
1225 | | |
1226 | | /// Starting from \p Root find a top-down traversal order of the dominator |
1227 | | /// tree to visit all basic blocks containing the elements of \p Spills. |
1228 | | /// Redundant spills will be found and put into \p SpillsToRm at the same |
1229 | | /// time. \p SpillBBToSpill will be populated as part of the process and |
1230 | | /// maps a basic block to the first store occurring in the basic block. |
1231 | | /// \post SpillsToRm.union(Spills\@post) == Spills\@pre |
1232 | | void HoistSpillHelper::getVisitOrders( |
1233 | | MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, |
1234 | | SmallVectorImpl<MachineDomTreeNode *> &Orders, |
1235 | | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1236 | | DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep, |
1237 | 130k | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { |
1238 | 130k | // The set contains all the possible BB nodes to which we may hoist |
1239 | 130k | // original spills. |
1240 | 130k | SmallPtrSet<MachineDomTreeNode *, 8> WorkSet; |
1241 | 130k | // Save the BB nodes on the path from the first BB node containing |
1242 | 130k | // non-redundant spill to the Root node. |
1243 | 130k | SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath; |
1244 | 130k | // All the spills to be hoisted must originate from a single def instruction |
1245 | 130k | // to the OrigReg. It means the def instruction should dominate all the spills |
1246 | 130k | // to be hoisted. We choose the BB where the def instruction is located as |
1247 | 130k | // the Root. |
1248 | 130k | MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom(); |
1249 | 130k | // For every node on the dominator tree with spill, walk up on the dominator |
1250 | 130k | // tree towards the Root node until it is reached. If there is other node |
1251 | 130k | // containing spill in the middle of the path, the previous spill saw will |
1252 | 130k | // be redundant and the node containing it will be removed. All the nodes on |
1253 | 130k | // the path starting from the first node with non-redundant spill to the Root |
1254 | 130k | // node will be added to the WorkSet, which will contain all the possible |
1255 | 130k | // locations where spills may be hoisted to after the loop below is done. |
1256 | 147k | for (const auto Spill : Spills) { |
1257 | 147k | MachineBasicBlock *Block = Spill->getParent(); |
1258 | 147k | MachineDomTreeNode *Node = MDT[Block]; |
1259 | 147k | MachineInstr *SpillToRm = nullptr; |
1260 | 395k | while (Node != RootIDomNode) { |
1261 | 264k | // If Node dominates Block, and it already contains a spill, the spill in |
1262 | 264k | // Block will be redundant. |
1263 | 264k | if (Node != MDT[Block] && SpillBBToSpill[Node]117k ) { |
1264 | 1.10k | SpillToRm = SpillBBToSpill[MDT[Block]]; |
1265 | 1.10k | break; |
1266 | 1.10k | /// If we see the Node already in WorkSet, the path from the Node to |
1267 | 1.10k | /// the Root node must already be traversed by another spill. |
1268 | 1.10k | /// Then no need to repeat. |
1269 | 263k | } else if (WorkSet.count(Node)) { |
1270 | 15.8k | break; |
1271 | 247k | } else { |
1272 | 247k | NodesOnPath.insert(Node); |
1273 | 247k | } |
1274 | 264k | Node = Node->getIDom(); |
1275 | 247k | } |
1276 | 147k | if (SpillToRm) { |
1277 | 1.10k | SpillsToRm.push_back(SpillToRm); |
1278 | 146k | } else { |
1279 | 146k | // Add a BB containing the original spills to SpillsToKeep -- i.e., |
1280 | 146k | // set the initial status before hoisting start. The value of BBs |
1281 | 146k | // containing original spills is set to 0, in order to descriminate |
1282 | 146k | // with BBs containing hoisted spills which will be inserted to |
1283 | 146k | // SpillsToKeep later during hoisting. |
1284 | 146k | SpillsToKeep[MDT[Block]] = 0; |
1285 | 146k | WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end()); |
1286 | 146k | } |
1287 | 147k | NodesOnPath.clear(); |
1288 | 147k | } |
1289 | 130k | |
1290 | 130k | // Sort the nodes in WorkSet in top-down order and save the nodes |
1291 | 130k | // in Orders. Orders will be used for hoisting in runHoistSpills. |
1292 | 130k | unsigned idx = 0; |
1293 | 130k | Orders.push_back(MDT.getBase().getNode(Root)); |
1294 | 241k | do { |
1295 | 241k | MachineDomTreeNode *Node = Orders[idx++]; |
1296 | 241k | const std::vector<MachineDomTreeNode *> &Children = Node->getChildren(); |
1297 | 241k | unsigned NumChildren = Children.size(); |
1298 | 638k | for (unsigned i = 0; i != NumChildren; ++i396k ) { |
1299 | 396k | MachineDomTreeNode *Child = Children[i]; |
1300 | 396k | if (WorkSet.count(Child)) |
1301 | 110k | Orders.push_back(Child); |
1302 | 396k | } |
1303 | 241k | } while (idx != Orders.size()); |
1304 | 130k | assert(Orders.size() == WorkSet.size() && |
1305 | 130k | "Orders have different size with WorkSet"); |
1306 | 130k | |
1307 | | #ifndef NDEBUG |
1308 | | LLVM_DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n"); |
1309 | | SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); |
1310 | | for (; RIt != Orders.rend(); RIt++) |
1311 | | LLVM_DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ","); |
1312 | | LLVM_DEBUG(dbgs() << "\n"); |
1313 | | #endif |
1314 | | } |
1315 | | |
1316 | | /// Try to hoist spills according to BB hotness. The spills to removed will |
1317 | | /// be saved in \p SpillsToRm. The spills to be inserted will be saved in |
1318 | | /// \p SpillsToIns. |
1319 | | void HoistSpillHelper::runHoistSpills( |
1320 | | LiveInterval &OrigLI, VNInfo &OrigVNI, |
1321 | | SmallPtrSet<MachineInstr *, 16> &Spills, |
1322 | | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1323 | 130k | DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) { |
1324 | 130k | // Visit order of dominator tree nodes. |
1325 | 130k | SmallVector<MachineDomTreeNode *, 32> Orders; |
1326 | 130k | // SpillsToKeep contains all the nodes where spills are to be inserted |
1327 | 130k | // during hoisting. If the spill to be inserted is an original spill |
1328 | 130k | // (not a hoisted one), the value of the map entry is 0. If the spill |
1329 | 130k | // is a hoisted spill, the value of the map entry is the VReg to be used |
1330 | 130k | // as the source of the spill. |
1331 | 130k | DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep; |
1332 | 130k | // Map from BB to the first spill inside of it. |
1333 | 130k | DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill; |
1334 | 130k | |
1335 | 130k | rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill); |
1336 | 130k | |
1337 | 130k | MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def); |
1338 | 130k | getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep, |
1339 | 130k | SpillBBToSpill); |
1340 | 130k | |
1341 | 130k | // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of |
1342 | 130k | // nodes set and the cost of all the spills inside those nodes. |
1343 | 130k | // The nodes set are the locations where spills are to be inserted |
1344 | 130k | // in the subtree of current node. |
1345 | 130k | using NodesCostPair = |
1346 | 130k | std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>; |
1347 | 130k | DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap; |
1348 | 130k | |
1349 | 130k | // Iterate Orders set in reverse order, which will be a bottom-up order |
1350 | 130k | // in the dominator tree. Once we visit a dom tree node, we know its |
1351 | 130k | // children have already been visited and the spill locations in the |
1352 | 130k | // subtrees of all the children have been determined. |
1353 | 130k | SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); |
1354 | 372k | for (; RIt != Orders.rend(); RIt++241k ) { |
1355 | 241k | MachineBasicBlock *Block = (*RIt)->getBlock(); |
1356 | 241k | |
1357 | 241k | // If Block contains an original spill, simply continue. |
1358 | 241k | if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]146k ) { |
1359 | 146k | SpillsInSubTreeMap[*RIt].first.insert(*RIt); |
1360 | 146k | // SpillsInSubTreeMap[*RIt].second contains the cost of spill. |
1361 | 146k | SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block); |
1362 | 146k | continue; |
1363 | 146k | } |
1364 | 95.0k | |
1365 | 95.0k | // Collect spills in subtree of current node (*RIt) to |
1366 | 95.0k | // SpillsInSubTreeMap[*RIt].first. |
1367 | 95.0k | const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren(); |
1368 | 95.0k | unsigned NumChildren = Children.size(); |
1369 | 303k | for (unsigned i = 0; i != NumChildren; ++i208k ) { |
1370 | 208k | MachineDomTreeNode *Child = Children[i]; |
1371 | 208k | if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end()) |
1372 | 97.2k | continue; |
1373 | 110k | // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below |
1374 | 110k | // should be placed before getting the begin and end iterators of |
1375 | 110k | // SpillsInSubTreeMap[Child].first, or else the iterators may be |
1376 | 110k | // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time |
1377 | 110k | // and the map grows and then the original buckets in the map are moved. |
1378 | 110k | SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree = |
1379 | 110k | SpillsInSubTreeMap[*RIt].first; |
1380 | 110k | BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second; |
1381 | 110k | SubTreeCost += SpillsInSubTreeMap[Child].second; |
1382 | 110k | auto BI = SpillsInSubTreeMap[Child].first.begin(); |
1383 | 110k | auto EI = SpillsInSubTreeMap[Child].first.end(); |
1384 | 110k | SpillsInSubTree.insert(BI, EI); |
1385 | 110k | SpillsInSubTreeMap.erase(Child); |
1386 | 110k | } |
1387 | 95.0k | |
1388 | 95.0k | SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree = |
1389 | 95.0k | SpillsInSubTreeMap[*RIt].first; |
1390 | 95.0k | BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second; |
1391 | 95.0k | // No spills in subtree, simply continue. |
1392 | 95.0k | if (SpillsInSubTree.empty()) |
1393 | 0 | continue; |
1394 | 95.0k | |
1395 | 95.0k | // Check whether Block is a possible candidate to insert spill. |
1396 | 95.0k | unsigned LiveReg = 0; |
1397 | 95.0k | if (!isSpillCandBB(OrigLI, OrigVNI, *Block, LiveReg)) |
1398 | 687 | continue; |
1399 | 94.3k | |
1400 | 94.3k | // If there are multiple spills that could be merged, bias a little |
1401 | 94.3k | // to hoist the spill. |
1402 | 94.3k | BranchProbability MarginProb = (SpillsInSubTree.size() > 1) |
1403 | 94.3k | ? BranchProbability(9, 10)18.6k |
1404 | 94.3k | : BranchProbability(1, 1)75.6k ; |
1405 | 94.3k | if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) { |
1406 | 13.1k | // Hoist: Move spills to current Block. |
1407 | 21.2k | for (const auto SpillBB : SpillsInSubTree) { |
1408 | 21.2k | // When SpillBB is a BB contains original spill, insert the spill |
1409 | 21.2k | // to SpillsToRm. |
1410 | 21.2k | if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() && |
1411 | 21.2k | !SpillsToKeep[SpillBB]) { |
1412 | 18.4k | MachineInstr *SpillToRm = SpillBBToSpill[SpillBB]; |
1413 | 18.4k | SpillsToRm.push_back(SpillToRm); |
1414 | 18.4k | } |
1415 | 21.2k | // SpillBB will not contain spill anymore, remove it from SpillsToKeep. |
1416 | 21.2k | SpillsToKeep.erase(SpillBB); |
1417 | 21.2k | } |
1418 | 13.1k | // Current Block is the BB containing the new hoisted spill. Add it to |
1419 | 13.1k | // SpillsToKeep. LiveReg is the source of the new spill. |
1420 | 13.1k | SpillsToKeep[*RIt] = LiveReg; |
1421 | 13.1k | LLVM_DEBUG({ |
1422 | 13.1k | dbgs() << "spills in BB: "; |
1423 | 13.1k | for (const auto Rspill : SpillsInSubTree) |
1424 | 13.1k | dbgs() << Rspill->getBlock()->getNumber() << " "; |
1425 | 13.1k | dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber() |
1426 | 13.1k | << "\n"; |
1427 | 13.1k | }); |
1428 | 13.1k | SpillsInSubTree.clear(); |
1429 | 13.1k | SpillsInSubTree.insert(*RIt); |
1430 | 13.1k | SubTreeCost = MBFI.getBlockFreq(Block); |
1431 | 13.1k | } |
1432 | 94.3k | } |
1433 | 130k | // For spills in SpillsToKeep with LiveReg set (i.e., not original spill), |
1434 | 130k | // save them to SpillsToIns. |
1435 | 138k | for (const auto Ent : SpillsToKeep) { |
1436 | 138k | if (Ent.second) |
1437 | 10.3k | SpillsToIns[Ent.first->getBlock()] = Ent.second; |
1438 | 138k | } |
1439 | 130k | } |
1440 | | |
1441 | | /// For spills with equal values, remove redundant spills and hoist those left |
1442 | | /// to less hot spots. |
1443 | | /// |
1444 | | /// Spills with equal values will be collected into the same set in |
1445 | | /// MergeableSpills when spill is inserted. These equal spills are originated |
1446 | | /// from the same defining instruction and are dominated by the instruction. |
1447 | | /// Before hoisting all the equal spills, redundant spills inside in the same |
1448 | | /// BB are first marked to be deleted. Then starting from the spills left, walk |
1449 | | /// up on the dominator tree towards the Root node where the define instruction |
1450 | | /// is located, mark the dominated spills to be deleted along the way and |
1451 | | /// collect the BB nodes on the path from non-dominated spills to the define |
1452 | | /// instruction into a WorkSet. The nodes in WorkSet are the candidate places |
1453 | | /// where we are considering to hoist the spills. We iterate the WorkSet in |
1454 | | /// bottom-up order, and for each node, we will decide whether to hoist spills |
1455 | | /// inside its subtree to that node. In this way, we can get benefit locally |
1456 | | /// even if hoisting all the equal spills to one cold place is impossible. |
1457 | 484k | void HoistSpillHelper::hoistAllSpills() { |
1458 | 484k | SmallVector<unsigned, 4> NewVRegs; |
1459 | 484k | LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); |
1460 | 484k | |
1461 | 19.1M | for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i18.6M ) { |
1462 | 18.6M | unsigned Reg = TargetRegisterInfo::index2VirtReg(i); |
1463 | 18.6M | unsigned Original = VRM.getPreSplitReg(Reg); |
1464 | 18.6M | if (!MRI.def_empty(Reg)) |
1465 | 7.94M | Virt2SiblingsMap[Original].insert(Reg); |
1466 | 18.6M | } |
1467 | 484k | |
1468 | 484k | // Each entry in MergeableSpills contains a spill set with equal values. |
1469 | 484k | for (auto &Ent : MergeableSpills) { |
1470 | 130k | int Slot = Ent.first.first; |
1471 | 130k | LiveInterval &OrigLI = *StackSlotToOrigLI[Slot]; |
1472 | 130k | VNInfo *OrigVNI = Ent.first.second; |
1473 | 130k | SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second; |
1474 | 130k | if (Ent.second.empty()) |
1475 | 120 | continue; |
1476 | 130k | |
1477 | 130k | LLVM_DEBUG({ |
1478 | 130k | dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n" |
1479 | 130k | << "Equal spills in BB: "; |
1480 | 130k | for (const auto spill : EqValSpills) |
1481 | 130k | dbgs() << spill->getParent()->getNumber() << " "; |
1482 | 130k | dbgs() << "\n"; |
1483 | 130k | }); |
1484 | 130k | |
1485 | 130k | // SpillsToRm is the spill set to be removed from EqValSpills. |
1486 | 130k | SmallVector<MachineInstr *, 16> SpillsToRm; |
1487 | 130k | // SpillsToIns is the spill set to be newly inserted after hoisting. |
1488 | 130k | DenseMap<MachineBasicBlock *, unsigned> SpillsToIns; |
1489 | 130k | |
1490 | 130k | runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns); |
1491 | 130k | |
1492 | 130k | LLVM_DEBUG({ |
1493 | 130k | dbgs() << "Finally inserted spills in BB: "; |
1494 | 130k | for (const auto Ispill : SpillsToIns) |
1495 | 130k | dbgs() << Ispill.first->getNumber() << " "; |
1496 | 130k | dbgs() << "\nFinally removed spills in BB: "; |
1497 | 130k | for (const auto Rspill : SpillsToRm) |
1498 | 130k | dbgs() << Rspill->getParent()->getNumber() << " "; |
1499 | 130k | dbgs() << "\n"; |
1500 | 130k | }); |
1501 | 130k | |
1502 | 130k | // Stack live range update. |
1503 | 130k | LiveInterval &StackIntvl = LSS.getInterval(Slot); |
1504 | 130k | if (!SpillsToIns.empty() || !SpillsToRm.empty()120k ) |
1505 | 10.9k | StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI, |
1506 | 10.9k | StackIntvl.getValNumInfo(0)); |
1507 | 130k | |
1508 | 130k | // Insert hoisted spills. |
1509 | 130k | for (auto const Insert : SpillsToIns) { |
1510 | 10.3k | MachineBasicBlock *BB = Insert.first; |
1511 | 10.3k | unsigned LiveReg = Insert.second; |
1512 | 10.3k | MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB); |
1513 | 10.3k | TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot, |
1514 | 10.3k | MRI.getRegClass(LiveReg), &TRI); |
1515 | 10.3k | LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI); |
1516 | 10.3k | ++NumSpills; |
1517 | 10.3k | } |
1518 | 130k | |
1519 | 130k | // Remove redundant spills or change them to dead instructions. |
1520 | 130k | NumSpills -= SpillsToRm.size(); |
1521 | 130k | for (auto const RMEnt : SpillsToRm) { |
1522 | 20.2k | RMEnt->setDesc(TII.get(TargetOpcode::KILL)); |
1523 | 92.2k | for (unsigned i = RMEnt->getNumOperands(); i; --i72.0k ) { |
1524 | 72.0k | MachineOperand &MO = RMEnt->getOperand(i - 1); |
1525 | 72.0k | if (MO.isReg() && MO.isImplicit()27.7k && MO.isDef()613 && !MO.isDead()304 ) |
1526 | 302 | RMEnt->RemoveOperand(i - 1); |
1527 | 72.0k | } |
1528 | 20.2k | } |
1529 | 130k | Edit.eliminateDeadDefs(SpillsToRm, None, AA); |
1530 | 130k | } |
1531 | 484k | } |
1532 | | |
1533 | | /// For VirtReg clone, the \p New register should have the same physreg or |
1534 | | /// stackslot as the \p old register. |
1535 | 2.30k | void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { |
1536 | 2.30k | if (VRM.hasPhys(Old)) |
1537 | 2.30k | VRM.assignVirt2Phys(New, VRM.getPhys(Old)); |
1538 | 0 | else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT) |
1539 | 0 | VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old)); |
1540 | 0 | else |
1541 | 0 | llvm_unreachable("VReg should be assigned either physreg or stackslot"); |
1542 | 2.30k | } |